In [109]:
import torch
from torch import nn, optim
import numpy as np
from torch.utils.data import DataLoader
import random

In [2]:
from kfDataset2 import SeqDataset
import config

In [3]:
config.seq_len=30

In [None]:
dataset = SeqDataset(seq_len=config.seq_len)

In [5]:
dataloader = DataLoader(dataset, batch_size=config.batch_size)

In [6]:
i = iter(dataloader)

In [7]:
(x, y, input_lens) = i.next()

In [8]:
x.size() #batch_size, seq_len, num_features

torch.Size([1, 30, 24])

In [9]:
y.size() #batch_size, seq_len

torch.Size([1, 30, 24])

In [10]:
input_lens

tensor([9])

In [11]:
config.input_size

24

In [12]:
hidden_size = 128
num_layers = 2

In [13]:
lstm = nn.LSTM(input_size=config.input_size, hidden_size=hidden_size,
               num_layers=num_layers, batch_first=True)

In [14]:
output, (hidden, cell) = lstm(x)

In [15]:
output.size() #batch_size, seq_len, hidden_size

torch.Size([1, 30, 128])

In [16]:
hidden.size() #num_layers, batch_size, hidden_size

torch.Size([2, 1, 128])

In [17]:
cell.size() #num_layers, batch_size, hidden_size

torch.Size([2, 1, 128])

In [18]:
config.output_size

24

In [19]:
lstm_decoder = nn.LSTM(input_size=config.output_size, hidden_size=hidden_size, num_layers=num_layers, batch_first=True)

In [28]:
y.size()

torch.Size([1, 30, 24])

In [25]:
y_0 = y[:,0,:].unsqueeze(0)

In [26]:
y_0.size()

torch.Size([1, 1, 24])

In [27]:
y_0.argmax()

tensor(0)

In [30]:
o,s = lstm_decoder(y_0, (hidden,cell))

In [31]:
o.size()

torch.Size([1, 1, 128])

In [32]:
linear = nn.Linear(hidden_size, config.output_size)

In [33]:
p = linear(o)

In [34]:
p.size()

torch.Size([1, 1, 24])

In [45]:
class SeqEncoder(nn.Module):

    def __init__(self, input_size, hidden_size, 
                 num_layers, dropout=0.5):
        super(SeqEncoder, self).__init__()

        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers

        self.lstm = nn.LSTM(self.input_size,
                          self.hidden_size,
                          num_layers=self.num_layers,
                          batch_first=True,
                           dropout=dropout)
        #self.dropout = nn.Dropout(dropout)

    def forward(self, input_x):
       outputs, (hidden, cell) = self.lstm(input_x)
       return hidden, cell


In [73]:
class SeqDecoder(nn.Module):

    def __init__(self, input_size, output_size, hidden_size, num_layers,dropout=0.5):
        super(SeqDecoder, self).__init__()
        self.output_size = output_size
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers

        self.lstm = nn.LSTM(self.input_size,
                            self.hidden_size,
                            num_layers=self.num_layers,
                            batch_first=True,
                            dropout=dropout
                           )
        self.out = nn.Linear(self.hidden_size, self.output_size)
        #self.softmax = nn.LogSoftmax(dim=1)
        #self.dropout = nn.Dropout(dropout)


    def forward(self, x, hidden, cell):
        x = x.unsqueeze(0)
        output, (hidden, cell) = self.lstm(x, (hidden, cell))
        prediction = self.out(output.squeeze(0))

        return prediction, hidden, cell


In [74]:
encoder = SeqEncoder(config.input_size, hidden_size, 2)

In [75]:
h,c = encoder(x)

In [48]:
h.size()

torch.Size([2, 1, 128])

In [49]:
c.size()

torch.Size([2, 1, 128])

In [57]:
decoder = SeqDecoder(config.output_size, config.output_size, hidden_size, num_layers)

In [58]:
y.size()

torch.Size([1, 30, 24])

In [59]:
pred, hh, cc = decoder(y[:, 0,:], h, c)

In [63]:
y[:, 0,:].size()

torch.Size([1, 24])

In [60]:
pred.size()

torch.Size([1, 24])

In [61]:
hh.size()

torch.Size([2, 1, 128])

In [62]:
cc.size()

torch.Size([2, 1, 128])

In [141]:
class Seq2Seq(nn.Module):
    def __init__(self, input_size, output_size, hidden_size, num_layers, dropout=0.5):
        super().__init__()
        
        self.input_size = input_size
        self.output_size = output_size
        self.encoder = SeqEncoder(input_size, hidden_size, num_layers, dropout=dropout)
        self.decoder = SeqDecoder(output_size, output_size, hidden_size, num_layers, dropout=dropout)
        
        
    def forward(self, x, y, seq_len, teacher_forcing_ratio = 0.5):
        
        #src = [src len, batch size]
        #trg = [trg len, batch size]
        #teacher_forcing_ratio is probability to use teacher forcing
        #e.g. if teacher_forcing_ratio is 0.75 we use ground-truth inputs 75% of the time
        
        batch_size = x.shape[0]
        
        #tensor to store decoder outputs
        outputs = torch.zeros(seq_len, batch_size, self.output_size)
        
        #last hidden state of the encoder is used as the initial hidden state of the decoder
        hidden, cell = self.encoder(x)
        
        #first input to the decoder is the <sos> tokens
        input = y[:,0,:]
        
        for t in range(1, seq_len):
            
            #insert input token embedding, previous hidden and previous cell states
            #receive output tensor (predictions) and new hidden and cell states
            predicted, hidden, cell = self.decoder(input, hidden, cell)
            
            #place predictions in a tensor holding predictions for each token
            outputs[t] = predicted
            
            #decide if we are going to use teacher forcing or not
            teacher_force = random.random() < teacher_forcing_ratio
            
            #get the highest predicted token from our predictions
            #top1 = output.argmax(1) 
            
            #if teacher forcing, use actual next token as next input
            #if not, use predicted token
            input = y[:,t,:] if teacher_force else predicted
        
        return outputs
    
    def create_start_action(self):
        action = torch.zeros((1, self.output_size))
        action[config.seq_types.index('start')] = 1
        return action

    def predict(self, x, max_seq_len):
        
        batch_size = x.shape[0]
        
        #outputs = torch.zeros(max_seq_len, batch_size, self.output_size)
        
        hidden, cell = self.encoder(x)
        
        predicted_seq_types = []
        
        input = self.create_start_action()
        
        for t in range(1, max_seq_len):
            predicted, hidden, cell = self.decoder(input, hidden, cell)
            #outputs[t] = predicted
            p = nn.functional.softmax(predicted[0], dim=0)
            pred_action = config.seq_types[p.argmax().item()]
            predicted_seq_types.append(pred_action)
            if pred_action == 'end':
                break
        return predicted_seq_types


In [77]:
seq2seq = Seq2Seq(config.input_size, config.output_size, hidden_size, num_layers)

In [78]:
input_lens.item()

9

In [80]:
outputs = seq2seq(x, y, input_lens.item())

In [92]:
outputs.size() 

torch.Size([9, 1, 24])

In [93]:
y.size()

torch.Size([1, 30, 24])

In [94]:
y[0].argmax(axis=1)

tensor([ 0,  1,  2,  3,  4,  5,  6,  7, 23,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0])

In [88]:
x.size()

torch.Size([1, 30, 24])

In [90]:
x0 = x[:, :9, :]

In [91]:
outputs = seq2seq(x, y, input_lens.item())

In [99]:
outputs[1:, 0, :].size() # seq_len - start, #output_dim

torch.Size([8, 24])

In [100]:
y.size()

torch.Size([1, 30, 24])

In [103]:
y_target = y[0, 1:input_lens.item()].argmax(axis=1)

In [134]:
p = nn.functional.softmax(pred[0], dim=0)
p

tensor([0.0401, 0.0403, 0.0422, 0.0404, 0.0436, 0.0422, 0.0373, 0.0437, 0.0385,
        0.0431, 0.0430, 0.0439, 0.0409, 0.0441, 0.0439, 0.0432, 0.0397, 0.0399,
        0.0380, 0.0433, 0.0424, 0.0450, 0.0381, 0.0431],
       grad_fn=<SoftmaxBackward>)

In [138]:
config.seq_types[p.argmax().item()]

'edit_object'

In [112]:
def train(epochs):
    loss_fn = nn.CrossEntropyLoss()
    optimizer = optim.Adam(seq2seq.parameters(), lr= config.lr)
    seq2seq.train()
    
    epoch_loss = 0
    for epoch in range(epochs):
        optimizer.zero_grad()
        #TODO for each x, get correct seq_len
        output = seq2seq(x, y, input_lens.item())
        
        y_pred = output[1:, 0, :]
        y_target = y[0, 1:input_lens.item()].argmax(axis=1)
        
        loss = loss_fn(y_pred, y_target)
        loss.backward()
        
        #torch.nn.utils.clip_grad_norm(seq2seq.parameters(), clip)
        optimizer.step()
        epoch_loss += loss.item()
    return epoch_loss/epochs



In [119]:
train(40)

0.004961857158923522

In [124]:
y[:,0,:]

tensor([[1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0.]])

In [122]:
start = torch.zeros((1, config.output_size))

In [123]:
start.size()

torch.Size([1, 24])

In [None]:
def predict(x):
    output = seq2seq()

['start',
 'get_note',
 'get_links_from',
 'record_note_read',
 'get_links_from_contrib',
 'edit_note',
 'notify_comm',
 'record_note_edited',
 'get_object',
 'get_groups',
 'get_links_to_note',
 'search',
 'post_scaffold',
 'get_community',
 'get_note_records',
 'get_author',
 'delete_scaffold',
 'new_note',
 'post_link_view_note',
 'new_attachment',
 'upload_attachment',
 'edit_object',
 'post_link',
 'end']