In [1]:
import random

import numpy as np
from tqdm import tqdm
from pandas import read_csv
from tqdm import tnrange, tqdm_notebook
from nltk import word_tokenize

import torch
from torch import optim
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F
import torch.distributions as D

  return f(*args, **kwds)


# Vanilla Load and Test

In [2]:
with open('tolstoy_anna.txt', 'r') as f:
    txt = f.readlines()
txt = [x.strip() for x in txt]
txt_str = ''
for t in txt:
    txt_str += ' {}'.format(t.lower())
tokens = word_tokenize(txt_str)
# word2idx and idx2word setup
unique_tokens = set(tokens)
w2x = {word: idx for (idx, word) in enumerate(unique_tokens)}
x2w = {idx: word for (idx, word) in enumerate(unique_tokens)}
indices = [w2x[w] for w in tokens]
vocab_size = len(unique_tokens)

In [None]:
# Continuous Bag-of-Words Model
class CBOW(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim,
                 context_size, batch_size):
        super(CBOW, self).__init__()
        self.batch_size = batch_size
        self.embed = nn.Embedding(vocab_size, embedding_dim)
        self.fc1 = nn.Linear(embedding_dim, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, vocab_size)
        self.out = nn.Softmax(dim=2)

    def forward(self, x):
        x = self.embed(x).view(self.batch_size, 1, -1)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return self.out(x).squeeze()

model = CBOW(vocab_size=vocab_size, embedding_dim=100, hidden_dim=128,
             context_size=2, batch_size=1)

In [None]:
model_dict = torch.load('models/model_14.pt')

In [None]:
model.load_state_dict(model_dict)

In [None]:
# model.eval()

In [None]:
def tolstoy_writes(start_word, length):
    speech = "{}".format(start_word.title())
    x = w2x[start_word]
    capital = False
    for _ in range(length):
        p_dist = model(Variable(torch.LongTensor([x])))
        x = int(D.Categorical(p_dist).sample())
        w = x2w[x]
        if capital:
            speech += " {}".format(w.title())
        else:
            speech += " {}".format(w)
        if "." in w:
            capital = True
        else:
            capital = False
    print(speech)

In [None]:
tolstoy_writes('love', 100)

In [None]:
ls choco_models

# Chocolate Implementation

In [3]:
# generate training data
window = 5
past_data, future_data, target_data = [], [], []
for idx in range(len(indices)):
    past, future = [], []
    for r in range(-window, window + 1):
        cxt = idx + r
        if (r < 0) and not ((cxt < 0) or (cxt >= len(indices))):
            past.append(indices[cxt])
        elif (r > 0) and not ((cxt < 0) or (cxt >= len(indices))):
            future.append(indices[cxt])
    if len(past) == len(future) == window:
        past_data.append(past)
        future_data.append(future)
        target_data.append(indices[idx])
past_data = torch.LongTensor(past_data)
future_data = torch.LongTensor(future_data)
target_data = torch.LongTensor(target_data)

In [13]:
def one_hot(idx_batch):
    one_hot_mat = torch.zeros((len(idx_batch), vocab_size)).float()
    indices = torch.LongTensor(idx_batch).view(-1, 1)
    one_hot_mat.scatter_(1, indices, 1.0)
    return one_hot_mat

def mat_loss(pred, gt):
    delta = pred - gt
    norm = torch.norm(delta, p=2, dim=1)
    return torch.log(torch.sum(norm) / gt.shape[1])

def batchify(data, batch_size, use_cuda=False):
    rm_size = len(data) % batch_size
    data = data[:-rm_size].contiguous()
    if len(data.shape) == 1:
        data = data.view(-1, batch_size)
    else:
        data = data.view(-1, batch_size,
                         *data.shape[1:])
    if use_cuda:
        return data.cuda()
    else:
        return data

In [14]:
# Passe-Avenir co-predict predictor
class PasseAvenir(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim,
                 window, batch_size, embed=None):
        super(PasseAvenir, self).__init__()
        self.batch_size = batch_size
        self.emb_dim = embedding_dim
        self.window = window
        if not embed:
            self.embed = nn.Embedding(vocab_size, embedding_dim)
        else:
            self.embed = embed
        self.lstm = nn.LSTM(self.emb_dim, hidden_dim, 2, dropout=0.2)
        self.fc = nn.Linear(window * hidden_dim, self.emb_dim)

    def forward(self, x):
        x = self.embed(x).view(self.window, self.batch_size, self.emb_dim)
        x, _ = self.lstm(x)
        x = x.view(self.batch_size, -1)
        x = F.relu(self.fc(x))
        return x

In [15]:
# Passe-Avenir manager
class PA2Manager(nn.Module):
    def __init__(self, vocab_size, embed_dim, hidden_dim, batch_size=1):
        super(PA2Manager, self).__init__()
        self.embed_dim = embed_dim
        self.hidden_dim = hidden_dim
        self.vocab_size = vocab_size
        self.batch_size = batch_size
        self.fc1 = nn.Linear(embed_dim * 2, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, vocab_size)
        self.out = nn.Softmax(dim=1)
    
    def forward(self, x):
        x = x.view(self.batch_size, self.embed_dim * 2)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return self.out(x)

In [16]:
def train(X_p, X_f, y, num_epochs, batch_size, use_cuda=False):
    loss_fn = mat_loss
    params = list(Past.parameters()) + list(Future.parameters()) + \
             list(Manager.parameters())
    optimizer = optim.Adam(params, lr=1e-2)
    losses = []
    for epoch in tnrange(num_epochs, desc='epoch'):
        total_loss = 0
        # shuffle
        comb = list(zip(X_p, X_f, y))
        random.shuffle(comb)
        X_p, X_f, y = zip(*comb)
        X_p, X_f, y = torch.stack(X_p), torch.stack(X_f), torch.stack(y)
        for batch_idx in tqdm_notebook(range(y.shape[0]),
                                           desc='index', leave=False):
            optimizer.zero_grad()
            x = torch.cat([Past(X_p[batch_idx]),
                           Future(X_f[batch_idx])], dim=1)
            log_prob = Manager(x)
            gt = one_hot(y[batch_idx])
            if use_cuda:
                gt = gt.cuda()
            loss = loss_fn(log_prob, gt)
            loss.backward()
            optimizer.step()
            total_loss += loss.data
        losses.append(total_loss)
        l = float(total_loss / y.shape[0])
        print("EPOCH: {}/{} | AVG LOG LOSS: {}".format(epoch + 1, num_epochs, l))
        if len(losses) >= 2:
            print("LOSS CHANGE: {}%".format(round(float(100 * (losses[-1] -  losses[-2]) / losses[-2]), 5)))
        if epoch % 20 == 0:
            torch.save(Past.state_dict(),
                       'choco_models/past_{}_{}.pt'.format(epoch + 1, round(l, 4)))
            torch.save(Future.state_dict(),
                       'choco_models/future_{}_{}.pt'.format(epoch + 1, round(l, 4)))
            torch.save(Manager.state_dict(),
                       'choco_models/manager_{}_{}.pt'.format(epoch + 1, round(l, 4)))
        print("Successfully saved model")

In [17]:
###################
window = 5        #
emb_dim = 100     #
num_epochs = 1000 #
batch_size = 64   #
pa_hid_dim = 256  #
mgr_hid_dim = 256 #
###################

Past = PasseAvenir(vocab_size=vocab_size, embedding_dim=emb_dim,
                   hidden_dim=pa_hid_dim, window=window,
                   batch_size=batch_size, embed=None).cuda() # enable cuda
Future = PasseAvenir(vocab_size=vocab_size, embedding_dim=emb_dim,
                     hidden_dim=pa_hid_dim, window=window,
                     batch_size=batch_size, embed=None).cuda() # enable cuda
Manager = PA2Manager(vocab_size=vocab_size, embed_dim=emb_dim,
                     hidden_dim=mgr_hid_dim, batch_size=batch_size).cuda() # enable cuda

In [None]:
X_past = batchify(past_data, batch_size=batch_size, use_cuda=True) # enable cuda
X_future = batchify(future_data, batch_size=batch_size, use_cuda=True) # enable cuda
y = batchify(target_data, batch_size=batch_size, use_cuda=False)
# train
train(X_past, X_future, y, num_epochs=num_epochs, batch_size=batch_size, use_cuda=True)

HBox(children=(IntProgress(value=0, description='epoch', max=1000), HTML(value='')))

HBox(children=(IntProgress(value=0, description='index', max=6731), HTML(value='')))

EPOCH: 1/1000 | AVG LOG LOSS: -5.489077568054199
Successfully saved model


HBox(children=(IntProgress(value=0, description='index', max=6731), HTML(value='')))