In [511]:
import pandas as pd
import torch

data = pd.read_csv('data.csv')

In [512]:
one_hot = pd.get_dummies(data['Intent'])
data = data.drop('Intent', axis=1)
data = data.join(one_hot)

data.head()

Unnamed: 0,Example,Complaint,Farewell,Feedback,Greet,Inquiry,Navigation,Request
0,Hi,False,False,False,True,False,False,False
1,Hello,False,False,False,True,False,False,False
2,Hey there,False,False,False,True,False,False,False
3,Good morning,False,False,False,True,False,False,False
4,Howdy,False,False,False,True,False,False,False


In [513]:
from sklearn.model_selection import train_test_split

train_data, test_data = train_test_split(data, train_size=0.8, random_state=10)
validation_data, test_data = train_test_split(test_data, train_size=0.5, random_state=10)

train_data.shape, test_data.shape, validation_data.shape

((136, 8), (17, 8), (17, 8))

In [514]:
# print the first 5 rows of the train_data
train_data.head()

Unnamed: 0,Example,Complaint,Farewell,Feedback,Greet,Inquiry,Navigation,Request
81,The response time was excellent.,False,False,True,False,False,False,False
137,Can you give me some advice on this?,False,False,False,False,False,False,True
91,Excellent service thank you.,False,False,True,False,False,False,False
143,Can I see previous results,False,False,False,False,False,True,False
76,How do I contact customer support?,False,False,False,False,True,False,False


In [515]:
# turn everything to lowercase
train_data['Example'] = train_data['Example'].str.lower()
test_data['Example'] = test_data['Example'].str.lower()
validation_data['Example'] = validation_data['Example'].str.lower()

In [516]:
from transformers import BertTokenizer

tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

tokens = tokenizer.tokenize(train_data['Example'].values[0])

tokenizer.convert_tokens_to_ids(tokens)



[1996, 3433, 2051, 2001, 6581, 1012]

In [517]:
from torch.utils.data import Dataset

class CustomDataset(Dataset):
    def __init__(self, data):
        sentences = data['Example'].values
        
        inputs = tokenizer(sentences.tolist(), padding=True, truncation=True, return_tensors='pt')['input_ids']
        
        self.inputs = inputs#torch.unsqueeze(inputs, 2)
            
        print(self.inputs.shape)
        
        self.outputs = torch.tensor(data.drop('Example', axis=1).values, dtype=torch.float32)

    def __getitem__(self, index):
        return self.inputs[index], self.outputs[index]

    def __len__(self):
        return len(self.inputs)

In [518]:
train_dataset = CustomDataset(train_data)
test_dataset = CustomDataset(test_data)
validation_dataset = CustomDataset(validation_data)

torch.Size([136, 13])
torch.Size([17, 13])
torch.Size([17, 12])


In [519]:
batch_size = 16

In [520]:
from torch.utils.data import DataLoader

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
validation_loader = DataLoader(validation_dataset, batch_size=batch_size, shuffle=False)

len(train_loader), len(test_loader), len(validation_loader)

(9, 2, 2)

In [521]:
# print the first batch of the train_loader
next(iter(train_loader))

[tensor([[  101,  2129,  2079,  1045,  2131,  2000,  1996,  7205,  3902,  2644,
           1029,   102,     0],
         [  101, 22708,   102,     0,     0,     0,     0,     0,     0,     0,
              0,     0,     0],
         [  101,  6140,  2023,  3931,   102,     0,     0,     0,     0,     0,
              0,     0,     0],
         [  101,  2064,  1045, 22149,  2011,  2376,   102,     0,     0,     0,
              0,     0,     0],
         [  101,  2045,  2001,  2019,  7561,  1999,  2026,  2344,  1012,   102,
              0,     0,     0],
         [  101,  2054,  2024,  1996,  2327,  7538, 13051,  1029,   102,     0,
              0,     0,     0],
         [  101,  9061,  2005,  2085,   102,     0,     0,     0,     0,     0,
              0,     0,     0],
         [  101,  4608,  2017,  2101,   102,     0,     0,     0,     0,     0,
              0,     0,     0],
         [  101,  2693,  2006,  2000,  1996,  2279,  2930,   102,     0,     0,
              0,     0, 

In [522]:
import torch.nn as nn
import torch.nn.functional as F

class RNN(nn.Module):
    def __init__(self):
        super(RNN, self).__init__()

        self.hidden_size = 64

        self.i2h = nn.Linear(1, self.hidden_size)
        self.h2h = nn.Linear(self.hidden_size, self.hidden_size)
        self.h2o = nn.Linear(self.hidden_size, 7)
        self.softmax = nn.LogSoftmax(dim=1)

    def forward(self, input, hidden):
        hidden = F.tanh(self.i2h(input) + self.h2h(hidden))
        output = self.h2o(hidden)
        output = self.softmax(output)
        return output, hidden

    def initHidden(self):
        return torch.zeros(1, self.hidden_size)

n_hidden = 64
rnn = RNN()

rnn = rnn.float()

In [523]:
input = torch.tensor([1.0])
hidden = torch.zeros(1, 64)

output, next_hidden = rnn(input, hidden)
output

tensor([[-2.0423, -2.1536, -2.3602, -2.0285, -2.1530, -1.2375, -2.1035]],
       grad_fn=<LogSoftmaxBackward0>)

In [524]:
criterion = nn.NLLLoss()
learning_rate = 0.005

In [527]:
def train(category_tensor, line_tensor):
    hidden = rnn.initHidden()
    line_tensor = line_tensor.float()
    
    # print(torch.unsqueeze(line_tensor[:,0], 1).shape)

    rnn.zero_grad()

    for i in range(13):
        output, hidden = rnn(torch.unsqueeze(line_tensor[:,i], 1), hidden)
    
    # print(output.shape, category_tensor.shape)
    
    # convert category tensor to indices
    category_tensor = torch.argmax(category_tensor, dim=1)
    
    loss = criterion(output, category_tensor)
    loss.backward()

    # Add parameters' gradients to their values, multiplied by learning rate
    for p in rnn.parameters():
        p.data.add_(p.grad.data, alpha=-learning_rate)

    print(loss.item())

In [529]:
# iterate data loader and train
epochs = 50
for epoch in range(epochs):
    for i, (input, output) in enumerate(train_loader):
        train(output, input)

1.9303321838378906
1.925128698348999
1.987245798110962
1.906937599182129
1.9889185428619385
1.840927004814148
1.9947820901870728
1.9349689483642578
1.8735827207565308
1.9865564107894897
1.9366745948791504
1.9449008703231812
1.873313546180725
1.9452481269836426
1.905921220779419
1.8565754890441895
1.9853689670562744
1.883845567703247
1.8577501773834229
1.906562089920044
1.899247407913208
1.8519643545150757
1.8617703914642334
1.9268454313278198
2.021665096282959
2.046548366546631
1.937017560005188
1.9318863153457642
1.8511204719543457
1.8767870664596558
1.9580570459365845
1.9368211030960083
1.8247233629226685
1.8828065395355225
1.9926762580871582
2.095032215118408
1.979811191558838
1.9222298860549927
1.9002243280410767
1.966744303703308
1.763824462890625
1.8778672218322754
1.9244420528411865
1.9628218412399292
1.9417093992233276
1.9498860836029053
1.9452153444290161
1.9322185516357422
1.908156156539917
1.9294421672821045
1.8911964893341064
1.885820984840393
1.8943828344345093
1.807386159