In [1]:
from sklearn.model_selection import train_test_split
from pipeline import Pipeline
import torch
from transforms import pad_token
import numpy as np
from tqdm import tqdm_notebook, tqdm
import pickle
import random
from dataset import TBTTScriptsDataset, ScriptsDataset
from model import Network, PackedNetwork
from IPython.core.debugger import set_trace

In [2]:
def test_model(loader, model):
    """
    Help function that tests the model's performance on a dataset
    @param: loader - data loader for the dataset to test against
    """
    correct = 0
    total = 0
    model.eval()
    for data, labels in tqdm_notebook(loader, desc = "Validation Batches", unit = "batch", leave = True):
        batch_size, seq_len = data.shape
        hidden = model.init_hidden(batch_size)
        
        data_truncated = torch.chunk(data, int(seq_len / T), dim = 1)
        for truncated_slice in data_truncated:
            predicted, hidden = model(truncated_slice, hidden)
            
        total += labels.size(0)
        correct += torch.mean((predicted - labels) ** 2)
    return (correct / total)

In [2]:
# with open("10k_common.pkl", "rb") as f:
#   data, token_idx, idx_token = pickle.load(f).data
#   data = data.apply(lambda x: x[:500])
  
# with open("rating.pkl", "rb") as f:
#   ratings = pickle.load(f)

data, token_idx, idx_token = Pipeline.load("10k_common").data
ratings = Pipeline.load("ratings").data

X_train, X_test, y_train, y_test = train_test_split(data, ratings, test_size=0.15)
X_train, X_test = X_train.reset_index(drop=True), X_test.reset_index(drop=True)

train_loader = ScriptsDataset(X_train, y_train).get_loader(batch_size = 32)
val_loader = ScriptsDataset(X_test, y_test).get_loader(batch_size = 32)
# train_loader = TBTTScriptsDataset(X_train, y_train).get_loader()
# val_loader = TBTTScriptsDataset(X_test, y_test).get_loader()

In [3]:
# model = Network(rnn_type = "gru", emb_size = 100, hidden_size = 128, num_layers = 1, vocab_size = len(idx_token), pad_idx = token_idx[pad_token])
model = PackedNetwork(rnn_type = "gru", emb_size = 128, hidden_size = 156, num_layers = 1, vocab_size = len(idx_token), pad_idx = token_idx[pad_token])

learning_rate = .00001
num_epochs = 2

criterion = torch.nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

total_step = len(train_loader)

In [None]:
losses = []
interval_loss = []
for epoch in tqdm_notebook(range(num_epochs), desc = "Training Epochs", unit = "epoch"):
    for i, (data, labels) in enumerate(tqdm_notebook(train_loader, desc = "Batches", unit = "batch")):
        model.train()
        optimizer.zero_grad()
        
        batch_size, seq_len = data.shape
        hidden = model.init_hidden(batch_size)
        
        chunks = int(seq_len / T)
        if chunks > 0:
            data_truncated = torch.chunk(data, chunks, dim = 1)
            set_trace()
            for truncated_slice in data_truncated:
                outputs, hidden = model(truncated_slice, hidden)
        else:
            outputs, hidden = model(data, hidden)

        loss = criterion(outputs, labels)

        # Backward and optimize
        loss.backward()
        
#         enc_grads = torch.nn.utils.clip_grad_norm_(model.parameters(), 40)
        
        optimizer.step()
                
        losses.append(loss.item())
        interval_loss.append(loss.item())
        if i > 0 and i % 300 == 0:
            # validate
            val_acc = test_model(val_loader, model)
            avg_intval_loss = sum(interval_loss) / len(interval_loss)
            tqdm.write('Epoch: [{}/{}], Step: [{}/{}], Average MSE: {:.4f}, Avg Loss: {:.4f}'.format(
                       epoch+1, num_epochs, i+1, total_step, val_acc, avg_intval_loss))
            interval_loss = []

In [None]:
k1 = 8 # interval
k2 = 12 # sequence length
retain_graph = k1 < k2

losses = []
interval_loss = []

for epoch in tqdm_notebook(range(num_epochs), desc = "Training Epochs", unit = "epoch"):
    for i, (seq, target) in enumerate(tqdm_notebook(zip(X_train, y_train), desc = "Training Sample", unit = "sample (k1 = {})".format(k1), total = len(X_train), miniters=k1)):
        model.train()
        
        outputs = []
        targets = []
        
        hidden = model.init_hidden(1)
        states = [(None, hidden)]
        
        state = states[-1][1].detach()
        state.requires_grad=True

        seq = torch.Tensor(seq).long().view(1, -1)
        output, new_state = model(seq, state)
        states.append((state, new_state))

        outputs.append(output)
        targets.append(torch.tensor([target]))

        while len(outputs) > k1:
            # Delete stuff that is too old
            del outputs[0]
            del targets[0]

        while len(states) > k2:
            # Delete stuff that is too old
            del states[0]

        if (i+1) % k1 == 0:
            optimizer.zero_grad()
            # backprop last module (keep graph only if they ever overlap)
            for j in range(k2-1):
                if j < k1:
                    loss = criterion(outputs[-j-1], targets[-j-1])
                    loss.backward(retain_graph=retain_graph)

                # if we get all the way back to the "init_state", stop
                if states[-j-2][0] is None:
                    break
                curr_grad = states[-j-1][0].grad
                states[-j-2][1].backward(curr_grad, retain_graph=retain_graph)
            optimizer.step()
            
            losses.append(loss.item())
            interval_loss.append(loss.item())
            if (i+1) % (k1 * 35) == 0:
                # validate
                errs = []
                model.eval()
                for val_seq, val_targ in tqdm_notebook(zip(X_test, y_test), desc = "Validation Sample", unit = "sample", total = len(X_test), leave = False):
                    val_seq = torch.Tensor(val_seq).long().view(1, -1)
                    out, h = model(val_seq, model.init_hidden(val_seq.shape[0]))
                    errs.append((out - val_targ) ** 2)
                
                avg_err = torch.mean(torch.Tensor(errs))
                avg_intval_loss = torch.mean(torch.Tensor(interval_loss))
                
                tqdm.write('Epoch: [{}/{}], Step: [{}/{}], Average MSE: {:.4f}, Avg Loss: {:.4f}'.format(
                           epoch+1, num_epochs, i+1, len(X_train), avg_err, avg_intval_loss))
                
                interval_loss = []

HBox(children=(IntProgress(value=0, description='Training Epochs', max=2), HTML(value='')))

HBox(children=(IntProgress(value=0, description='Training Sample', max=3519), HTML(value='')))

HBox(children=(IntProgress(value=0, description='Validation Sample', max=621), HTML(value='')))

In [None]:
losses = []
interval_loss = []
for epoch in tqdm_notebook(range(num_epochs), desc = "Training Epochs", unit = "epoch"):
    for i, (data, lengths, labels) in enumerate(tqdm_notebook(train_loader, desc = "Batches", unit = "batch")):
        model.train()
        optimizer.zero_grad()
        
        batch_size, seq_len = data.shape
        
        hidden = model.init_hidden(batch_size)
#         hidden_transitions = [(None, hidden)]
        
        chunks = int(seq_len / T)
        if chunks > 0:
            data_truncated = torch.chunk(data, chunks, dim = 1)
#             set_trace()
            for i, truncated_slice in enumerate(data_truncated):
                slice_lengths = (truncated_slice != 0).sum(1)
                outputs, hidden = model(truncated_slice, slice_lengths, hidden)
                
#                 hidden_transitions.append((hidden_transitions[-1][1], hidden))
                if i < len(data_truncated) - 2:
                    hidden.backward()
                    hidden.detach()
        else:
            outputs, hidden = model(data, hidden)

        loss = criterion(outputs, labels)

        # Backward and optimize
        loss.backward()
        
#         enc_grads = torch.nn.utils.clip_grad_norm_(model.parameters(), 40)
        
        optimizer.step()
                
        losses.append(loss.item())
        interval_loss.append(loss.item())
        if i > 0 and i % 300 == 0:
            # validate
            val_acc = test_model(val_loader, model)
            avg_intval_loss = sum(interval_loss) / len(interval_loss)
            tqdm.write('Epoch: [{}/{}], Step: [{}/{}], Average MSE: {:.4f}, Avg Loss: {:.4f}'.format(
                       epoch+1, num_epochs, i+1, total_step, val_acc, avg_intval_loss))
            interval_loss = []

In [None]:
# losses = []
# interval_loss = []
# for epoch in tqdm_notebook(range(num_epochs), desc = "Training Epochs", unit = "epoch"):
#     for i, (data, labels) in enumerate(tqdm_notebook(train_loader, desc = "Batches", unit = "batch")):
#         model.train()
#         optimizer.zero_grad()
        
#         batch_size, seq_len = data.shape
#         hidden = model.init_hidden(batch_size)
        
#         chunks = int(seq_len / T)
#         if chunks > 0:
#             data_truncated = torch.chunk(data, chunks, dim = 1)
#             for truncated_slice in data_truncated:
#                 outputs, hidden = model(truncated_slice, hidden)
#         else:
#             outputs, hidden = model(data, hidden)

#         loss = criterion(outputs, labels)

#         # Backward and optimize
#         loss.backward()
        
# #         enc_grads = torch.nn.utils.clip_grad_norm_(model.parameters(), 40)
        
#         optimizer.step()
                
#         losses.append(loss.item())
#         interval_loss.append(loss.item())
#         if i > 0 and i % 300 == 0:
#             # validate
#             val_acc = test_model(val_loader, model)
#             avg_intval_loss = sum(interval_loss) / len(interval_loss)
#             tqdm.write('Epoch: [{}/{}], Step: [{}/{}], Average MSE: {:.4f}, Avg Loss: {:.4f}'.format(
#                        epoch+1, num_epochs, i+1, total_step, val_acc, avg_intval_loss))
#             interval_loss = []