In [5]:
from queue import PriorityQueue

import numpy as np
from tqdm import tqdm
from pandas import read_csv
from tqdm import tnrange, tqdm_notebook
from nltk import word_tokenize

import torch
from torch import optim
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F
import torch.distributions as D

# Vanilla Load and Test

In [9]:
with open('tolstoy_anna.txt', 'r') as f:
    txt = f.readlines()
txt = [x.strip() for x in txt]
txt_str = ''
for t in txt:
    txt_str += ' {}'.format(t.lower())
tokens = word_tokenize(txt_str)
# word2idx and idx2word setup
unique_tokens = set(tokens)
w2x = {word: idx for (idx, word) in enumerate(unique_tokens)}
x2w = {idx: word for (idx, word) in enumerate(unique_tokens)}
indices = [w2x[w] for w in tokens]
vocab_size = len(unique_tokens)

In [10]:
# Continuous Bag-of-Words Model
class CBOW(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim,
                 context_size, batch_size):
        super(CBOW, self).__init__()
        self.batch_size = batch_size
        self.embed = nn.Embedding(vocab_size, embedding_dim)
        self.fc1 = nn.Linear(embedding_dim, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, vocab_size)
        self.out = nn.Softmax(dim=2)

    def forward(self, x):
        x = self.embed(x).view(self.batch_size, 1, -1)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return self.out(x).squeeze()

model = CBOW(vocab_size=vocab_size, embedding_dim=100, hidden_dim=128,
             context_size=2, batch_size=1)

In [11]:
model_dict = torch.load('models/model_9.pt')

In [12]:
model.load_state_dict(model_dict)

In [13]:
model.eval()

CBOW(
  (embed): Embedding(15080, 100)
  (fc1): Linear(in_features=100, out_features=128)
  (fc2): Linear(in_features=128, out_features=15080)
  (out): Softmax()
)

In [14]:
def tolstoy_writes(start_word, length):
    speech = "{}".format(start_word.title())
    x = w2x[start_word]
    capital = False
    for _ in range(length):
        p_dist = model(Variable(torch.LongTensor([x])))
        x = int(D.Categorical(p_dist).sample())
        w = x2w[x]
        if capital:
            speech += " {}".format(w.title())
        else:
            speech += " {}".format(w)
        if "." in w:
            capital = True
        else:
            capital = False
    print(speech)

In [17]:
tolstoy_writes('joy', 20)

Joy tire intelligence. Patched lawyer whim thee bezzubov lavished preparing trot nikandrov tips opponents keiss mangle infallible concluded petty committees—everywhere _tiutkin


In [None]:
ls models

# Chocolate Implementation

In [None]:
# window = 2
# X, y = [], []
# for i, token in enumerate(tokens):
#     tmp = []
#     for w in range(-window, window + 1):
#         if not (i + w < 0 or i + w >= len(tokens) or w == 0):
#             tmp.append(w2x[tokens[i + w]])
#     if len(tmp) == window * 2:
#         X.append(tmp)
#         y.append(w2x[token])
# X = np.array(X)
# y = np.array(y)
# X = Variable(torch.LongTensor(X))
# y = torch.LongTensor(y)

In [None]:
def one_hot(idx):
    one_hot_mat = torch.zeros(vocab_size).float()
    one_hot_mat[idx] = 1.0
    return one_hot_mat


def vec_loss(pred, gt):
    delta = pred - Variable(gt)
    return torch.sum(delta)


def batchify(data, batch_size, use_cuda=False):
    rm_size = len(data) % batch_size
    x = data[:-rm_size, 0].contiguous()
    y = data[:-rm_size, 1].contiguous()
    if use_cuda:
        x = x.view(-1, batch_size).cuda()
    else:
        x = x.view(-1, batch_size)
    y = y.view(-1, batch_size)
    return x, y

In [None]:
# Passe-Avenir co-predict predictor
class PasseAvenir2(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim,
                 window, batch_size, embed=None):
        super(PasseAvenir2, self).__init__()
        self.batch_size = batch_size
        self.emb_dim = embedding_dim
        self.window = window
        if not embed:
            self.embed = nn.Embedding(vocab_size, embedding_dim)
        else:
            self.embed = embed
        self.lstm = nn.LSTM(self.emb_dim, hidden_dim, 1, dropout=0)
        self.fc1 = nn.Linear(window * hidden_dim, vocab_size)
        self.out = nn.Softmax(dim=1)

    def forward(self, x):
        x = self.embed(x).view(self.window, self.batch_size, self.emb_dim)
        x, _ = self.lstm(x)
        x = x.view(self.batch_size, -1)
        x = F.relu(self.fc1(x))
        return self.out(x).squeeze()

In [None]:
PastOne = PasseAvenir2(vocab_size=vocab_size,
                       embedding_dim=100, hidden_dim=128,
                       window=3, batch_size=1, embed=model.embed)
PastTwo = PasseAvenir2(vocab_size=vocab_size,
                       embedding_dim=100, hidden_dim=128,
                       window=3, batch_size=1, embed=model.embed)
FutureOne = PasseAvenir2(vocab_size=vocab_size,
                         embedding_dim=100, hidden_dim=128,
                         window=3, batch_size=1, embed=model.embed)
FutureTwo = PasseAvenir2(vocab_size=vocab_size,
                         embedding_dim=100, hidden_dim=128,
                         window=3, batch_size=1, embed=model.embed)

In [None]:
# Passe-Avenir manager
class PA2Manager(nn.Module):
    def __init__(self, vocab_size, hidden_dim, window, batch_size=1):
        super(PA2Manager, self).__init__()
        self.window = window
        self.vocab_size = vocab_size
        self.batch_size = batch_size
        self.lstm = nn.LSTM(self.vocab_size, hidden_dim, bidirectional=True)
        self.fc1 = nn.Linear(window * 2 * hidden_dim * 2, vocab_size)
        self.out = nn.Softmax(dim=1)
    
    def forward(self, x):
        x = torch.stack(x, dim=0)
        x = x.view(self.window * 2, self.batch_size, self.vocab_size)
        x, _ = self.lstm(x)
        x = x.view(self.batch_size, -1)
        x = F.relu(self.fc1(x))
        return self.out(x).squeeze()

In [None]:
Manager = PA2Manager(vocab_size=vocab_size, hidden_dim=128, window=2, batch_size=1)

In [None]:
def train(indices, num_epochs, use_cuda=False):
    loss_fn = vec_loss
    params = list(PastOne.parameters()) + list(PastTwo.parameters()) + \
             list(FutureOne.parameters()) + list(FutureTwo.parameters()) + \
             list(Manager.parameters())
    optimizer = optim.SGD(params, lr=1e-3)
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=1000, gamma=0.9)
    indices = torch.LongTensor(indices)
    for epoch in tnrange(num_epochs, desc='epoch'):
        total_loss = 0
        # batch_size 1 only for now
        for idx in tqdm_notebook(range(5, indices.shape[0] - 5),
                                       desc='index', leave=False):
            model.zero_grad()
            ############################
            # TODO: We can Batch Here! # 
            ############################
            
            # window size is 3 for these
            f1 = FutureOne(Variable(indices[idx - 3 : idx]))
            f2 = FutureTwo(Variable(indices[idx - 4: idx - 1]))
            p1 = PastOne(Variable(indices[idx + 1 : idx + 4]))
            p2 = PastTwo(Variable(indices[idx + 2 : idx + 5]))
            x = [f2, f1, p1, p2]
            log_prob = Manager(x)
            gt = one_hot(indices[idx])
            loss = loss_fn(log_prob, gt)
            loss.backward()
            scheduler.step()
            total_loss += loss.data
            if idx % 2000 == 0:
                num_seen = (idx + 1)
                l = np.log(float(total_loss / num_seen))
                print("BATCH: {}/{} | AVG LOG LOSS: {}".format(idx + 1,
                                                           len(indices),
                                                           l))
            if idx % (len(indices) // 4) == 0:
                torch.save(model.state_dict(), 'choco_models/model_{}.pt'.format(epoch))
                print("Successfully saved model")

In [None]:
train(indices, 1)