# Intention Classification with PyTorch

## Setting Up the Data

In [74]:
import pandas as pd
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable

In [75]:
def preprocess(series):
    def delimit(sent):
        arr = sent.split()
        if arr:
            arr.insert(0, '<START>')
            arr.append('<END>')
        else:
            arr = ['<EMPTY>']
        return arr
    return series.apply(delimit)

In [76]:
def get_vocab(sents):
    word_to_idx = {'<PAD>':0}
    for sent in sents:
        for word in sent:
            if word not in word_to_idx:
                word_to_idx[word] = len(word_to_idx)
                
    vocab = set(word_to_idx.keys())
    idx_to_word = {idx:word for word, idx in word_to_idx.items()}
    return vocab, word_to_idx, idx_to_word

In [77]:
COLUMNS = ['utterance_ID', 'dialog_act', 'utterance_t-3', 
           'utterance_t-2', 'utterance_t-1', 'utterance_t']

utt = pd.read_csv('da_tagging/utterances.train', sep='\t|;',
                  engine='python', names=COLUMNS, dtype=str).set_index('utterance_ID')
utt[COLUMNS[2:]] = utt[COLUMNS[2:]].apply(preprocess)
utt.head()

Unnamed: 0_level_0,dialog_act,utterance_t-3,utterance_t-2,utterance_t-1,utterance_t
utterance_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2121_1,bc,[<EMPTY>],[<EMPTY>],[<EMPTY>],"[<START>, Okay, ,, uh, <END>]"
2121_2,qw,[<EMPTY>],[<EMPTY>],"[<START>, Okay, ,, uh, <END>]","[<START>, could, you, tell, me, what, you, thi..."
2121_3,h,[<EMPTY>],"[<START>, Okay, ,, uh, <END>]","[<START>, could, you, tell, me, what, you, thi...","[<START>, Well, ,, it, 's, hard, to, say, ., <..."
2121_4,s,"[<START>, Okay, ,, uh, <END>]","[<START>, could, you, tell, me, what, you, thi...","[<START>, Well, ,, it, 's, hard, to, say, ., <...","[<START>, I, mean, ,, while, it, 's, certainly..."
2121_5,qo,"[<START>, could, you, tell, me, what, you, thi...","[<START>, Well, ,, it, 's, hard, to, say, ., <...","[<START>, I, mean, ,, while, it, 's, certainly...","[<START>, What, do, you, think, ?, <END>]"


In [78]:
merged = utt['utterance_t-3'] + utt['utterance_t-2'] + utt['utterance_t-1'] + utt['utterance_t']

In [79]:
num_classes = len(utt['dialog_act'].unique())
num_classes

31

In [80]:
lens = utt['utterance_t'].apply(len)

In [81]:
max_len = lens.max()
max_len

106

In [82]:
av_len = lens.mean()
av_len

11.200466153016254

As we can see below, the vast majority of sentence are shorter than the average length, 11, times 2. As such, we'll set our embedding dimension to this length, 22, as it it significantly shorter than the length of the longest sentence, 106.

In [83]:
len(lens[lens < av_len * 2]) / len(lens)

0.9051816266501105

In [86]:
vocab, word_to_idx, idx_to_word = get_vocab(utt['utterance_t'])
label_to_idx = {label:idx for idx, label in enumerate(utt['dialog_act'].unique())}
label_to_idx['<PAD>'] = len(label_to_idx)
embeds = nn.Embedding(len(vocab), max_len)

In [87]:
label_to_idx['<PAD>']

31

In [97]:
class IntentNet(nn.Module):
    def __init__(self, embedding_dim, hidden_dim, vocab_size, num_classes,
                 batch_size=1, num_lstm_units=100, num_layers=100):
        super(IntentNet, self).__init__()
        self.embedding_dim = embedding_dim
        self.hidden_dim = hidden_dim
        self.vocab_size = vocab_size
        self.num_classes = num_classes
        self.batch_size = batch_size
        self.num_lstm_units = num_lstm_units
        self.num_layers = num_layers
        self._padding_idx = label_to_idx['<PAD>']

        self.embeds = torch.nn.Embedding(len(vocab),
                                         hidden_dim,
                                         padding_idx=self._padding_idx)
        self.lstm = nn.LSTM(input_size=embedding_dim, hidden_size=hidden_dim,
                            batch_first=True)
        self.output = nn.Linear(hidden_dim, num_classes)
        self.hidden = self.init_hidden()

    def forward(self, sentence):
        embeddings = self.embeds(sentence)
        lstm_out, self.hidden = self.lstm(
            embeddings.view(self.batch_size, len(sentence), self.embedding_dim),
                            self.hidden)
        label_space = self.output(lstm_out.view(len(sentence), -1))
        scores = F.log_softmax(label_space, dim=1)
        return scores
    
    def init_hidden(self):
        hiddens = (torch.ones(self.num_layers, self.batch_size,
                              self.num_lstm_units),
                   torch.zeros(self.num_layers, self.batch_size,
                               self.num_lstm_units))
        return [Variable(h) for h in hiddens]

net = IntentNet(max_len, 100, len(vocab) - 1, num_classes)

In [71]:
def train(model, X_train, y_train, learning_rate,
          epochs, batch_size, momentum=0.9,gpu=True):
    
    os.environ["CUDA_VISIBLE_DEVICES"] = '3'
    
    loss_func = nn.CrossEntropyLoss()
    optimizer = optim.SGD(net.parameters(), lr=learning_rate, momentum=0.9)
    
    for epoch in range(epochs):
        for sentence, label in zip(X_train, y_train):
            model.zero_grad()
            model.init_hidden()

            sentence = [word_to_idx[word] for word in sentence]
            label = label_to_idx[label]
            
            if gpu and torch.cuda.is_available():
                inputs = Variable(torch.cuda.LongTensor(sentence))
                labels = Variable(torch.cuda.LongTensor(label))
            else:
                inputs = Variable(torch.LongTensor(sentence))
                labels = Variable(torch.LongTensor(label))

            scores = model(inputs)
            print(scores)
            loss = loss_func(scores, labels)
            loss.backward()
            optimizer.step()
            
            print(f'Epoch [{epoch + 1}/{epochs}], Train accuracy: %{.2*train_accuracy},',
                  f'Train loss: %{.2*train_loss}, Dev accuracy: %{.2*dev_accuracy},',
                  f'Dev loss: %{.2*dev_loss}')

In [72]:
train(net, merged, utt['dialog_act'], .01, 1, 1)

tensor([[-3.4977, -3.5731, -3.4872, -3.3413, -3.3871, -3.4568, -3.3737, -3.5281,
         -3.4291, -3.4942, -3.4157, -3.2824, -3.3779, -3.4641, -3.3137, -3.3553,
         -3.5805, -3.4375, -3.4779, -3.4797, -3.4977, -3.3642, -3.4252, -3.4324,
         -3.5045, -3.4370, -3.4822, -3.3114, -3.5260, -3.3440, -3.4642],
        [-3.5163, -3.5681, -3.5454, -3.3065, -3.3838, -3.4045, -3.4103, -3.5563,
         -3.4058, -3.4538, -3.5031, -3.2738, -3.3807, -3.4969, -3.2639, -3.3181,
         -3.6483, -3.4625, -3.4832, -3.5088, -3.4956, -3.3426, -3.4101, -3.4082,
         -3.4780, -3.4079, -3.5407, -3.2829, -3.5068, -3.3707, -3.4538],
        [-3.5369, -3.5609, -3.5843, -3.2959, -3.3909, -3.3865, -3.4210, -3.5704,
         -3.3938, -3.4337, -3.5457, -3.2710, -3.3916, -3.5135, -3.2426, -3.2915,
         -3.6727, -3.4718, -3.4839, -3.5178, -3.5025, -3.3205, -3.4023, -3.3919,
         -3.4650, -3.3876, -3.5814, -3.2781, -3.4874, -3.3869, -3.4426],
        [-3.5017, -3.6305, -3.4414, -3.3885, -3.4642

ValueError: Expected input batch_size (8) to match target batch_size (0).