In [1]:
import numpy as np
import torch
import psutil
from torch.utils.data import Dataset, DataLoader
import torch.nn.functional as F
import torch.nn as nn
import torch.optim as optim
import time
import matplotlib.pyplot as plt
import random
import sys
import pickle
from torch.optim import SGD

In [2]:
sys.path.insert(1, '/home/ubuntu/Intents-Analysis/Analysis')
#sys.path.insert(1, '/Users/manjugupta/Desktop/CMU_Courses/Intents/getting_intents/Analysis')

In [3]:
from get_vocab import load_data, get_vocab
from get_frequency import get_frequency

In [4]:
#Check if cuda is available
cuda = torch.cuda.is_available()
print('CUDA is', cuda)

num_workers = 8 if cuda else 0

print(num_workers)

CUDA is True
8


In [5]:
##Needed Functions
def load_data(filename):
    a_file = open(filename, "rb")
    output = pickle.load(a_file)
    a_file.close()
    return output


def create_vocabulary(train_file):
    '''This function creates an indexed vocabulary dictionary from the training file'''
    
    vocab, _ = get_vocab(1, train_file)
    
    phone_to_idx = {'unk': 1}#Padding indx = 0, unkown_idx = 1, indexing starts from 2
    for i, phone in enumerate(vocab):
        phone_to_idx[phone] = i + 2
        
    return phone_to_idx

In [6]:
class MyDataset(Dataset):
    def __init__(self, data_file, intent_labels, phone_to_idx):
        data = load_data(data_file)
        self.all_data = []
        
        for intent in data:
            for utterance in data[intent]:
                utterance_to_idx = []
                
                for phone in utterance:
                    if phone not in phone_to_idx:
                        phone = 'unk'
    
                    utterance_to_idx.append(phone_to_idx[phone])
                
                self.all_data.append([utterance_to_idx, intent_labels[intent]])
            
    def __len__(self):
        return len(self.all_data)

    def __getitem__(self,index):
        input_vector = self.all_data[index][0]
        label = self.all_data[index][1]

        return input_vector, label

In [7]:
def collate_indic(tuple_lst):

    x_lst = [x[0] for x in tuple_lst]
    y_lst = [x[1] for x in tuple_lst]

    # collate x
    B = len(tuple_lst)#Number of training samples
    T = max(len(x) for x in x_lst)#Max length of a sentence

    # x values
    x = torch.zeros([B, T], dtype=torch.int64)
    lengths = torch.zeros(B, dtype=torch.int64)

    for i, x_np in enumerate(x_lst):
        lengths[i] = len(x_np)
        x[i,:len(x_np)] = torch.tensor(x_np)

    # collate y
    y = torch.zeros([B, 6])
    for i, y_label in enumerate(y_lst):
        y[i][y_label] = 1
        
    ids = torch.argsort(lengths, descending=True)

    return x[ids], lengths[ids], y[ids]

In [8]:
#Defining constants and labels
intent_labels = {'movie-tickets':0, 'auto-repair':1, 'restaurant-table':2, 'pizza-ordering':3, 'uber-lyft':4, 'coffee-ordering':5}
train_language = 'hindi'
test_language = 'hindi'

#Loading data
train_file = '/home/ubuntu/Intents-Analysis/TaskMasterData/Get_Phones_Combos/1_lang_train_split/taskmaster_training_' + train_language + '.pkl'
test_file = '/home/ubuntu/Intents-Analysis/TaskMasterData/Get_Phones_Combos/1_language/taskmaster_testing_' + test_language + '.pkl'

#create vocabulary and phone_to_idx
phone_to_idx = create_vocabulary(train_file)
print(len(phone_to_idx))

62


In [9]:
train_dataset = MyDataset(train_file, intent_labels, phone_to_idx)
train_loader_args = dict(shuffle=True, batch_size=128, num_workers=num_workers, pin_memory=True) if cuda\
                    else dict(shuffle=True, batch_size=32)
train_loader = DataLoader(train_dataset, **train_loader_args, collate_fn=collate_indic)

test_dataset = MyDataset(test_file, intent_labels, phone_to_idx)
test_loader_args = dict(shuffle=False, batch_size=128, num_workers=num_workers, pin_memory=True) if cuda\
                    else dict(shuffle=False, batch_size=1)
valid_loader = DataLoader(test_dataset, **test_loader_args, collate_fn=collate_indic)

In [10]:
class RNNClassifier(nn.Module):
    def __init__(self, vocab_size=63, embed_size=128, hidden_size=128, label_size=6):
        super().__init__()
        self.embed = nn.Embedding(vocab_size, embed_size)

        self.cnn  = nn.Conv1d(embed_size, embed_size, kernel_size=3, padding=1)
        self.cnn2 = nn.Conv1d(embed_size, embed_size, kernel_size=5, padding=2)

        self.batchnorm = nn.BatchNorm1d(embed_size*2)

        self.lstm = nn.LSTM(embed_size*2, hidden_size, num_layers=2)
        self.linear = nn.Linear(hidden_size, label_size)

    def forward(self, x, lengths):
        """
        padded_x: (B,T) padded LongTensor
        """

        # B,T,H
        input = self.embed(x)

        # (B,T,H) -> (B,H,T)
        input = input.transpose(1,2)
        embeddings = input##saved for attention

        cnn_output = torch.cat([self.cnn(input), self.cnn2(input)], dim=1)

        # (B,H,T)
        input = F.relu(self.batchnorm(cnn_output))

        input = input.transpose(1,2)

        pack_tensor = nn.utils.rnn.pack_padded_sequence(input, lengths, batch_first=True)

        _, (hn, cn) = self.lstm(pack_tensor)
        
        #doing attention
        cos = nn.CosineSimilarity(dim=1, eps=1e-6)
        batch_size = hn[-1].shape[0]
        emb_size = embeddings.shape[1]
        seq_len = embeddings.shape[2]

        attention = torch.zeros([batch_size, seq_len]).to(device)
        
        for t in range(seq_len):
            attention[:,t] = torch.sigmoid(cos(embeddings[:,:,t], hn[-1]))
            
        normalize = torch.sum(attention, dim=1).reshape(-1,1)
        attention = attention/normalize

        
        output = torch.zeros([batch_size, emb_size]).to(device)
        for t in range(seq_len):
            weights = attention[:,t].reshape(-1,1)
            output += weights*embeddings[:,:,t]
        

        #output, _ = nn.utils.rnn.pad_packed_sequence(output, batch_first=True)
        #output = torch.cat([hn[0], hn[1]], dim=1)
        logits = self.linear(output)

        return logits

In [11]:
model = RNNClassifier()
opt = optim.Adam(model.parameters(), lr = 0.001)
#opt = SGD(model.parameters(), lr=0.05)
device = torch.device("cuda" if cuda else "cpu")
model.to(device)

RNNClassifier(
  (embed): Embedding(63, 128)
  (cnn): Conv1d(128, 128, kernel_size=(3,), stride=(1,), padding=(1,))
  (cnn2): Conv1d(128, 128, kernel_size=(5,), stride=(1,), padding=(2,))
  (batchnorm): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (lstm): LSTM(256, 128, num_layers=2)
  (linear): Linear(in_features=128, out_features=6, bias=True)
)

In [12]:
print(train_language, test_language)
max_acc = 0

for i in range(1000):
    #print("epoch ", i)
    loss_accum = 0.0
    batch_cnt = 0

    acc_cnt = 0
    err_cnt = 0

    model.train()
    start_time = time.time()
    for batch, (x, lengths, y) in enumerate(train_loader):

        x = x.to(device)
        lengths = lengths.to(device)
        y = y.to(device)
        opt.zero_grad()
        
        logits = model(x, lengths)

        loss = nn.BCEWithLogitsLoss()(logits, y)
        loss_score = loss.cpu().item()

        loss_accum += loss_score
        batch_cnt += 1
        loss.backward()
        opt.step()

        out_val, out_indices = torch.max(logits, dim=1)
        target_val, tar_indices = torch.max(y, dim=1)

        for i in range(len(out_indices)):
            if out_indices[i] == tar_indices[i]:
                acc_cnt += 1
            else:
                err_cnt += 1

    print("train acc: ", acc_cnt/(err_cnt+acc_cnt), " train loss: ", loss_accum / batch_cnt, '--time:', time.time() - start_time)

    model.eval()
    acc_cnt = 0
    err_cnt = 0

    #start_time = time.time()
    for x, lengths, y in valid_loader:

        x = x.to(device)
        lengths = lengths.to(device)
        y = y.to(device)
        
        logits = model(x, lengths)

        out_val, out_indices = torch.max(logits, dim=1)
        target_val, tar_indices = torch.max(y, dim=1)

        for i in range(len(out_indices)):
            if out_indices[i] == tar_indices[i]:
                acc_cnt += 1
            else:
                err_cnt += 1

    current_acc = acc_cnt/(err_cnt+acc_cnt)
    if current_acc > max_acc:
        max_acc = current_acc
                
    print("validation: ", current_acc, '--max', max_acc, '--time:', time.time() - start_time)

hindi hindi
train acc:  0.08290859667006456  train loss:  0.619538356428561 --time: 19.5309841632843
validation:  0.16333333333333333 --max 0.16333333333333333 --time: 20.676095247268677
train acc:  0.13761467889908258  train loss:  0.4687999383262966 --time: 20.087887287139893
validation:  0.19666666666666666 --max 0.19666666666666666 --time: 21.170298099517822
train acc:  0.2820251444104655  train loss:  0.4478850921858912 --time: 19.78876829147339
validation:  0.19666666666666666 --max 0.19666666666666666 --time: 21.043764352798462
train acc:  0.25925925925925924  train loss:  0.44488190697587054 --time: 19.73092484474182
validation:  0.19666666666666666 --max 0.19666666666666666 --time: 20.75589609146118
train acc:  0.3143051308188923  train loss:  0.44261407463446906 --time: 19.584691047668457
validation:  0.19666666666666666 --max 0.19666666666666666 --time: 20.744331121444702
train acc:  0.30479102956167176  train loss:  0.44042489710061444 --time: 20.39794087409973
validation: 

train acc:  0.5643900781515461  train loss:  0.32813456913699274 --time: 15.098710060119629
validation:  0.3566666666666667 --max 0.37666666666666665 --time: 16.083961248397827
train acc:  0.5501189262657152  train loss:  0.32337859661682794 --time: 15.04345440864563
validation:  0.38333333333333336 --max 0.38333333333333336 --time: 15.649665117263794
train acc:  0.5783214407067618  train loss:  0.3223648745080699 --time: 15.088891506195068
validation:  0.37333333333333335 --max 0.38333333333333336 --time: 15.88669729232788
train acc:  0.564050288820931  train loss:  0.31905592654062354 --time: 15.790424108505249
validation:  0.42 --max 0.42 --time: 16.588308334350586
train acc:  0.5881753312945973  train loss:  0.31488123795260553 --time: 15.631365537643433
validation:  0.36 --max 0.42 --time: 16.52324104309082
train acc:  0.583078491335372  train loss:  0.3130739098009856 --time: 15.907176494598389
validation:  0.4 --max 0.42 --time: 16.77975058555603
train acc:  0.5905538566089025  

train acc:  0.8154943934760448  train loss:  0.1967972218990326 --time: 16.758275985717773
validation:  0.5866666666666667 --max 0.5933333333333334 --time: 17.58957815170288
train acc:  0.8134556574923547  train loss:  0.1968961792147678 --time: 17.717207431793213
validation:  0.5933333333333334 --max 0.5933333333333334 --time: 18.656149864196777
train acc:  0.8120965001698947  train loss:  0.19456509403560474 --time: 16.842267751693726
validation:  0.5833333333333334 --max 0.5933333333333334 --time: 17.67947769165039
train acc:  0.8158341828066599  train loss:  0.19142293735690738 --time: 17.4012668132782
validation:  0.6033333333333334 --max 0.6033333333333334 --time: 18.35224151611328
train acc:  0.8233095480801903  train loss:  0.18947659238525058 --time: 17.171788454055786
validation:  0.59 --max 0.6033333333333334 --time: 17.70242667198181
train acc:  0.8154943934760448  train loss:  0.1888878786045572 --time: 17.660732984542847
validation:  0.6133333333333333 --max 0.61333333333

train acc:  0.8953448861705743  train loss:  0.12852308445650598 --time: 17.068642616271973
validation:  0.6966666666666667 --max 0.71 --time: 18.040283203125
train acc:  0.8831124702684335  train loss:  0.12670135951560477 --time: 16.522379398345947
validation:  0.6933333333333334 --max 0.71 --time: 17.596497774124146
train acc:  0.890587835541964  train loss:  0.13010820260514383 --time: 18.197951316833496
validation:  0.69 --max 0.71 --time: 19.09064483642578
train acc:  0.8865103635745838  train loss:  0.12552511562471805 --time: 16.810335397720337
validation:  0.7 --max 0.71 --time: 17.6221182346344
train acc:  0.891267414203194  train loss:  0.12562847169845 --time: 17.18888783454895
validation:  0.7066666666666667 --max 0.71 --time: 17.985478162765503
train acc:  0.8970438328236493  train loss:  0.12419332045575847 --time: 17.347966194152832
validation:  0.7066666666666667 --max 0.71 --time: 18.31604290008545
train acc:  0.889568467550119  train loss:  0.12252492554809737 --time

train acc:  0.9330615018688413  train loss:  0.0894127169057079 --time: 17.011809825897217
validation:  0.7066666666666667 --max 0.73 --time: 17.988672494888306
train acc:  0.927964661909616  train loss:  0.09046839048033176 --time: 16.83220076560974
validation:  0.7133333333333334 --max 0.73 --time: 17.576088190078735
train acc:  0.9395174991505266  train loss:  0.08867763987053996 --time: 17.579853534698486
validation:  0.71 --max 0.73 --time: 18.523365259170532
train acc:  0.9310227658851512  train loss:  0.0888652969961581 --time: 17.28476119041443
validation:  0.7133333333333334 --max 0.73 --time: 18.150908946990967
train acc:  0.9384981311586816  train loss:  0.0869600798772729 --time: 17.256192207336426
validation:  0.7133333333333334 --max 0.73 --time: 18.308889389038086
train acc:  0.9354400271831464  train loss:  0.08787852995421576 --time: 17.22477698326111
validation:  0.7166666666666667 --max 0.73 --time: 18.272212266921997
train acc:  0.9351002378525314  train loss:  0.08

KeyboardInterrupt: 

In [173]:
a = torch.tensor([1,2,3,4]).reshape(-1,1)

In [174]:
b = torch.tensor([ [1,2,3], [10,20,30],[100,200,300], [1000,2000,3000]])

In [175]:
a.shape, b.shape

(torch.Size([4, 1]), torch.Size([4, 3]))

In [176]:
b

tensor([[   1,    2,    3],
        [  10,   20,   30],
        [ 100,  200,  300],
        [1000, 2000, 3000]])

In [177]:
a*b

tensor([[    1,     2,     3],
        [   20,    40,    60],
        [  300,   600,   900],
        [ 4000,  8000, 12000]])

In [178]:
b.float()/a.float()

tensor([[  1.0000,   2.0000,   3.0000],
        [  5.0000,  10.0000,  15.0000],
        [ 33.3333,  66.6667, 100.0000],
        [250.0000, 500.0000, 750.0000]])