In [12]:
import numpy as np
import torch
import psutil
from torch.utils.data import Dataset, DataLoader
import torch.nn.functional as F
import torch.nn as nn
import torch.optim as optim
import time
import matplotlib.pyplot as plt
import random
import sys

In [13]:
sys.path.insert(1, '/home/ubuntu/Intents/Intents-Analysis/Analysis')

In [14]:
from get_vocab import load_data, get_vocab

In [15]:
#Check if cuda is available
cuda = torch.cuda.is_available()
print('CUDA is', cuda)

num_workers = 8 if cuda else 0

print(num_workers)

CUDA is True
8


In [16]:
print(psutil.virtual_memory())

svmem(total=16481628160, available=15663706112, percent=5.0, used=524664832, free=14307622912, active=1249918976, inactive=679854080, buffers=129814528, cached=1519525888, shared=819200, slab=141631488)


In [17]:
#Defining constants and labels
max_sent_len = {'english': 247, 'hindi': 265, 'gujarati': 283, 'bengali': 295, 'marathi': 307}
intent_labels = {'movie-tickets':0, 'auto-repair':1, 'restaurant-table':2, 'pizza-ordering':3, 'uber-lyft':4, 'coffee-ordering':5}
language = 'english'

#Loading data
data_file = '../Analysis/Labels/TaskMaster/data_taskmaster_' + language + '.pkl'
train_file = '../Analysis/Labels/TaskMaster/taskmaster_training_' + language + '.pkl'
test_file = '../Analysis/Labels/TaskMaster/taskmaster_testing_' + language + '.pkl'

train_data = load_data(train_file)
test_data = load_data(test_file)
vocab, _ = get_vocab(1, data_file)

#creating vocabulary dictionary
my_vocab = {}
for i, phone in enumerate(vocab):
    my_vocab[phone] = i

In [18]:
def create_input_pad_end(utterance, my_vocab, max_len):
    '''
    Pad sentence at the end with maximum length with index max_len 
    '''
    input_vector = []
    for ipa in utterance:
        input_vector.append(my_vocab[ipa])
    
    for i in range(max_len - len(utterance)):
        input_vector.append(len(my_vocab))
        
    return input_vector
    
    

In [19]:
class MyDataset(Dataset):
    def __init__(self, data, my_vocab, intent_labels, max_len, train = True):
        self.all_data = []
        for intent in data:
            for utterance in data[intent]:
                input_vector = create_input_pad_end(utterance,my_vocab, max_len)
                self.all_data.append([torch.from_numpy(np.array(input_vector)).float(), intent_labels[intent]])
        
        if train:
            random.shuffle(self.all_data)
            
    def __len__(self):
        return len(self.all_data)

    def __getitem__(self,index):

        
        return self.all_data[index][0], self.all_data[index][1]

In [20]:
train_dataset = MyDataset(train_data, my_vocab, intent_labels, max_sent_len[language], train=True)
train_loader_args = dict(shuffle=True, batch_size=128, num_workers=num_workers, pin_memory=True) if cuda\
                    else dict(shuffle=True, batch_size=32)
train_loader = DataLoader(train_dataset, **train_loader_args)

test_dataset = MyDataset(test_data, my_vocab, intent_labels, max_sent_len[language], train=False)
test_loader_args = dict(shuffle=False, batch_size=128, num_workers=num_workers, pin_memory=True) if cuda\
                    else dict(shuffle=False, batch_size=1)
test_loader = DataLoader(test_dataset, **test_loader_args)

In [21]:
len(train_loader),len(test_loader)

(23, 3)

In [24]:
for data in train_loader:
    print(data[0].shape)
    break

torch.Size([128, 247])


In [146]:
class MyRNN_Model(nn.Module):
    def __init__(self, vocab_size):
        super(Classifier, self).__init__()

        self.word2wemb = nn.Embedding(voca_size, rnn_in_dim)
        self.rnn = nn.LSTM(rnn_in_dim, rnn_hid_dim, num_layers = 2, bidirectional = True)
        self.rnn2logit = nn.Linear(2* rnn_hid_dim, 3)

    def init_rnn_hid(self):
        """Initial hidden state."""
        return torch.zeros(1, 1, self.rnn_hid_dim)

    def forward(self, words):
        """Feeds the words into the neural network and returns the value
        of the output layer."""
        wembs = self.word2wemb(words) # (seq_len, rnn_in_dim)
        rnn_outs, _ = self.rnn(wembs.unsqueeze(1))
                                      # (seq_len, 1, rnn_hid_dim)
        logit = self.rnn2logit(rnn_outs[-1]) # (1 x 3)
        return logit

In [147]:
model = MyCNN_Model()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr = 0.001)
device = torch.device("cuda" if cuda else "cpu")
model.to(device)
print(model)
print(optimizer)

MyCNN_Model(
  (conv1): Conv2d(1, 64, kernel_size=(1, 3), stride=(1, 1))
  (pool1): MaxPool2d(kernel_size=(1, 3), stride=(1, 2), padding=0, dilation=1, ceil_mode=False)
  (conv1_bn): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv2): Conv2d(64, 128, kernel_size=(3, 7), stride=(1, 1))
  (pool2): MaxPool2d(kernel_size=(3, 4), stride=(1, 2), padding=0, dilation=1, ceil_mode=False)
  (conv2_bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv3): Conv2d(128, 64, kernel_size=(5, 11), stride=(1, 1))
  (pool3): MaxPool2d(kernel_size=(3, 5), stride=(1, 2), padding=0, dilation=1, ceil_mode=False)
  (conv3_bn): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (fc1): Linear(in_features=16896, out_features=512, bias=True)
  (fc1_bn): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (fc2): Linear(in_features=512, out_features=64, bias=True)
  (fc2_bn): Ba

In [148]:
def train_epoch(model, train_loader, criterion, optimizer):
    model.train()

    running_loss = 0.0
    total_predictions = 0.0
    correct_predictions = 0.0
    
    start_time = time.time()
    for batch_idx, (data, target) in enumerate(train_loader):   
        optimizer.zero_grad()   # .backward() accumulates gradients
        data = data.to(device)
        target = target.to(device) # all data & model on same device

        outputs = model(data)
        loss = criterion(outputs, target)
        running_loss += loss.item()
        loss.backward()
        optimizer.step()
        
        _, predicted = torch.max(outputs.data, 1)
        total_predictions += target.size(0)
        correct_predictions += (predicted == target).sum().item()
    
            
    end_time = time.time()
    
    acc = (correct_predictions/total_predictions)*100.0
    running_loss /= len(train_loader)
    print('Training Loss: ', running_loss, 'Time: ',end_time - start_time, 's')  
    print('Training Accuracy: ', acc, '%')
    return running_loss

In [149]:
def test_model(model, test_loader, criterion):
    with torch.no_grad():
        model.eval()

        running_loss = 0.0
        total_predictions = 0.0
        correct_predictions = 0.0

        for batch_idx, (data, target) in enumerate(test_loader):   
            data = data.to(device)
            target = target.to(device)

            outputs = model(data)

            _, predicted = torch.max(outputs.data, 1)
            total_predictions += target.size(0)
            correct_predictions += (predicted == target).sum().item()

            loss = criterion(outputs, target).detach()
            running_loss += loss.item()


        running_loss /= len(test_loader)
        acc = (correct_predictions/total_predictions)*100.0
        print('Testing Loss: ', running_loss)
        print('Testing Accuracy: ', acc, '%')
        return running_loss, acc

In [None]:
Train_loss = []
Test_loss = []
Test_acc = []
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=5, cooldown=3)
 
for i in range(30):
    train_loss = train_epoch(model, train_loader, criterion, optimizer)
    test_loss, test_acc = test_model(model, test_loader, criterion)
    Train_loss.append(train_loss)
    Test_loss.append(test_loss)
    Test_acc.append(test_acc)

    #scheduler.step(test_acc)
    for param_group in optimizer.param_groups:
        print('Learning rate:', param_group['lr'])
    

    print('='*20)

Training Loss:  1.7394491071286409 Time:  6.356580972671509 s
Training Accuracy:  27.149167516139993 %
Testing Loss:  1.9255032142003377
Testing Accuracy:  27.0 %
Learning rate: 0.001
Training Loss:  1.6468719451323799 Time:  6.402014255523682 s
Training Accuracy:  34.4206591913014 %
Testing Loss:  1.9241166512171428
Testing Accuracy:  26.666666666666668 %
Learning rate: 0.001
Training Loss:  1.6134489360062971 Time:  6.512433290481567 s
Training Accuracy:  35.23615358477743 %
Testing Loss:  1.774964173634847
Testing Accuracy:  28.000000000000004 %
Learning rate: 0.001
Training Loss:  1.616716628489287 Time:  6.579664707183838 s
Training Accuracy:  35.91573224600747 %
Testing Loss:  1.7613558371861775
Testing Accuracy:  27.666666666666668 %
Learning rate: 0.001
Training Loss:  1.565118318018706 Time:  6.610743522644043 s
Training Accuracy:  38.70200475705063 %
Testing Loss:  1.8216347694396973
Testing Accuracy:  32.33333333333333 %
Learning rate: 0.001
Training Loss:  1.307861825694208

In [9]:
torch.from_numpy(np.array([1,2,3,])).float()

tensor([1., 2., 3.])