In [56]:
!nvidia-smi

Mon Dec 17 10:39:29 2018       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 384.145                Driver Version: 384.145                   |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  GeForce GTX 108...  Off  | 00000000:02:00.0 Off |                  N/A |
| 35%   61C    P2   106W / 250W |   7195MiB / 11172MiB |     51%      Default |
+-------------------------------+----------------------+----------------------+
|   1  GeForce GTX 108...  Off  | 00000000:03:00.0 Off |                  N/A |
| 45%   76C    P2    98W / 250W |   8585MiB / 11172MiB |     55%      Default |
+-------------------------------+----------------------+----------------------+
                                                                            

In [58]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import logging
logging.getLogger().setLevel(logging.INFO)

from tqdm import tqdm
from idst_util import trivial
from idst_util import dstc2

# Make sure data is available
trivial.print_idst()
dstc2.check("./")

INFO:root:+--------------------------------+
INFO:root:|         _ ____  ___________    |
INFO:root:|        (_) __ \/ ___/_  __/    |
INFO:root:|       / / / / /\__ \ / /       |
INFO:root:|      / / /_/ /___/ // /        |
INFO:root:|     /_/_____//____//_/         |
INFO:root:|                                |
INFO:root:+--------------------------------+
INFO:root:|Incremental Dialog State Tracker|
INFO:root:+--------------------------------+
INFO:root:+--------------------------------+
INFO:root:| Dialog State Tracker 2 Utility |
INFO:root:+--------------------------------+
INFO:root:Looking for dstc2 directory in .
INFO:root:dstc2 was found!
INFO:root:Looking for dstc2_traindev directory in ./dstc2
INFO:root:dstc2_traindev was found!
INFO:root:Looking for dstc2_test directory in ./dstc2
INFO:root:dstc2_test was found!
INFO:root:Looking for dstc2_scripts directory in ./dstc2
INFO:root:dstc2_scripts was found!
INFO:root:Done!


In [55]:
logging.info("+--------------------------------+")
logging.info("|            Baseline            |")
logging.info("+--------------------------------+")

#GPU_ID = 0
#DEVICE = torch.device("cuda:{}".format(GPU_ID) if torch.cuda.is_available() else "cpu")
DEVICE = "cpu"
if DEVICE == "cpu":
    logging.warning("Running on CPU")
else:
    logging.info("Running on GPU")

INFO:root:+--------------------------------+
INFO:root:|            Baseline            |
INFO:root:+--------------------------------+


In [9]:
def prepare_sequence(seq, to_ix):
    idxs = [to_ix[w] for w in seq]
    return torch.tensor(idxs, dtype = torch.long)

In [10]:
training_data = [("The dog ate the apple".split(), ["DET", "NN", "V", "DET", "NN"]),
                 ("Everybody read that book".split(), ["NN", "V", "DET", "NN"])]

tag_to_ix = {"DET": 0,
             "NN": 1,
             "V": 2}

ix_to_tag = {0: "DET",
             1: "NN",
             2: "V"}

In [11]:
word_to_ix = {}
for sent, tag in training_data:
    for word in sent:
        if word not in word_to_ix:
            word_to_ix[word] = len(word_to_ix)
print(word_to_ix)

{'The': 0, 'dog': 1, 'ate': 2, 'the': 3, 'apple': 4, 'Everybody': 5, 'read': 6, 'that': 7, 'book': 8}


In [12]:
EMBEDDING_DIM = 6
HIDDEN_DIM = 6

In [13]:
class LSTMTagger(nn.Module):
    def __init__(self, embedding_dim, hidden_dim, vocab_size, tagset_size):
        
        super(LSTMTagger, self).__init__()
        
        self.hidden_dim = hidden_dim
    
        self.word_embeddings = nn.Embedding(num_embeddings = vocab_size,
                                            embedding_dim = embedding_dim)
        
        self.lstm = nn.LSTM(input_size = embedding_dim,
                            hidden_size = hidden_dim)
        
        self.hidden2tag = nn.Linear(in_features = hidden_dim,
                                    out_features = tagset_size)
        
        self.hidden = self.init_hidden()
    
    def init_hidden(self):
        return (torch.zeros(1, 1, self.hidden_dim),
                torch.zeros(1, 1, self.hidden_dim))
    
    def forward(self, sentence):
        
        embeds = self.word_embeddings(sentence)
        
        lstm_out, self.hidden = self.lstm(embeds.view(len(sentence), 1, -1), self.hidden)
        
        tag_space = self.hidden2tag(lstm_out.view(len(sentence), -1))
        
        tag_scores = F.log_softmax(tag_space, dim = 1)
        
        return tag_scores    

In [14]:
model = LSTMTagger(embedding_dim = EMBEDDING_DIM,
                   hidden_dim = HIDDEN_DIM,
                   vocab_size = len(word_to_ix),
                   tagset_size = len(tag_to_ix))

loss_function = nn.NLLLoss()

optimizer = optim.SGD(model.parameters(), lr = 0.1)


print("Prior training")
with torch.no_grad():
    inputs = prepare_sequence(training_data[0][0], word_to_ix)
    tag_scores = model(inputs)
    values, indices = torch.max(tag_scores, 1)
    print(training_data[0][1])
    print([ix_to_tag[int(index.item())] for index in indices])
print("\n")


for epoch in tqdm(range(10000)):
    for sentence, tags in training_data:
        
        model.zero_grad()
        
        model.hidden = model.init_hidden()
        
        sentence_in = prepare_sequence(sentence, word_to_ix)
        targets = prepare_sequence(tags, tag_to_ix)
        
        tag_scores = model(sentence_in)
        
        loss = loss_function(tag_scores, targets)
        loss.backward()
        optimizer.step()

print("After training")
with torch.no_grad():
    inputs = prepare_sequence(training_data[0][0], word_to_ix)
    tag_scores = model(inputs)
    values, indices = torch.max(tag_scores, 1)
    print(training_data[0][1])
    print([ix_to_tag[int(index.item())] for index in indices])
print("\n")

  0%|          | 26/10000 [00:00<00:39, 255.17it/s]

Prior training
['DET', 'NN', 'V', 'DET', 'NN']
['NN', 'NN', 'NN', 'NN', 'NN']




100%|██████████| 10000/10000 [00:34<00:00, 289.28it/s]

After training
['DET', 'NN', 'V', 'DET', 'NN']
['DET', 'NN', 'V', 'DET', 'NN']





