In [3]:
import torch
import torch.nn as nn
from torch.autograd import Variable
import numpy as np


class LMLSTM(nn.Module):
    """Simple LSMT-based language model"""
    def __init__(
        self,
        embedding_dim,
        num_steps,
        batch_size,
        vocab_size,
        num_layers,
        dp_keep_prob,
        ):
        super(LM_LSTM, self).__init__()
        self.embedding_dim = embedding_dim
        self.num_steps = num_steps
        self.batch_size = batch_size
        self.vocab_size = vocab_size
        self.dp_keep_prob = dp_keep_prob
        self.num_layers = num_layers
        self.dropout = nn.Dropout(1 - dp_keep_prob)
        self.word_embeddings = nn.Embedding(vocab_size, embedding_dim)
        self.lstm = nn.LSTM(input_size=embedding_dim,
                            hidden_size=embedding_dim,
                            num_layers=num_layers, dropout=1
                            - dp_keep_prob)
        self.sm_fc = nn.Linear(in_features=embedding_dim,
                               out_features=vocab_size)
        self.init_weights()

    def init_weights(self):
        init_range = 0.1
        self.word_embeddings.weight.data.uniform_(-init_range,
                init_range)
        self.sm_fc.bias.data.fill_(0.0)
        self.sm_fc.weight.data.uniform_(-init_range, init_range)

    def init_hidden(self):
        weight = next(self.parameters()).data
        return (Variable(weight.new(self.num_layers, self.batch_size,
                self.embedding_dim).zero_()),
                Variable(weight.new(self.num_layers, self.batch_size,
                self.embedding_dim).zero_()))

    def forward(self, inputs, hidden):
        embeds = self.dropout(self.word_embeddings(inputs))
        (lstm_out, hidden) = self.lstm(embeds, hidden)
        lstm_out = self.dropout(lstm_out)
        logits = self.sm_fc(lstm_out.view(-1, self.embedding_dim))
        return (logits.view(self.num_steps, self.batch_size,
                self.vocab_size), hidden)

def repackage_hidden(h):
    """Wraps hidden states in new Variables, to detach them from their history."""

    if type(h) == Variable:
        return Variable(h.data)
    else:
        return tuple(repackage_hidden(v) for v in h)

In [None]:
# 1. Load glove embeddings
# 2. Map texts to indexes based on glove embeddings, calculate OOV %, freeze layer
# 3. Train language model on unlabelled corpus
# 4. 
#
#
#
def batches(file):
    

In [5]:
criterion = nn.CrossEntropyLoss()

def run_epoch(
    model,
    data,
    is_train=False,
    lr=1.0,
):
    """Runs the model on the given data."""

    if is_train:
        model.train()
    else:
        model.eval()
    epoch_size = (len(data) // model.batch_size - 1) // model.num_steps
    start_time = time.time()
    hidden = model.init_hidden()
    costs = 0.0
    iters = 0
    for (step, (x, y)) in enumerate(batches()):
        inputs = Variable(torch.from_numpy(x.astype(np.int64)).transpose(0,
                     1).contiguous()).cuda()
        model.zero_grad()
        hidden = repackage_hidden(hidden)
        (outputs, hidden) = model(inputs, hidden)
        targets = Variable(torch.from_numpy(y.astype(np.int64)).transpose(0,
                     1).contiguous()).cuda()
        tt = torch.squeeze(targets.view(-1, model.batch_size
                           * model.num_steps))

        loss = criterion(outputs.view(-1, model.vocab_size), tt)
        costs += loss.data[0] * model.num_steps
        iters += model.num_steps

        if is_train:
            loss.backward()
            torch.nn.utils.clip_grad_norm(model.parameters(), 0.25)
            for p in model.parameters():
                p.data.add_(-lr, p.grad.data)
            if step % (epoch_size // 10) == 10:
                print('{} perplexity: {:8.2f} speed: {} wps'.format(step
                        * 1.0 / epoch_size, np.exp(costs / iters),
                        iters * model.batch_size / (time.time()
                        - start_time)))
    return np.exp(costs / iters)

In [None]:
#!/usr/bin/python
# -*- coding: utf-8 -*-
print 'Vocabluary size: {}'.format(vocab_size)
model = LMLSTM(
    embedding_dim=args.hidden_size,
    num_steps=args.num_steps,
    batch_size=args.batch_size,
    vocab_size=vocab_size,
    num_layers=args.num_layers,
    dp_keep_prob=args.dp_keep_prob,
    )
model.cuda()
lr = args.inital_lr

# decay factor for learning rate

lr_decay_base = 1 / 1.15

# we will not touch lr for the first m_flat_lr epochs

m_flat_lr = 14.0

print '########## Training ##########################'
for epoch in range(args.num_epochs):
    lr_decay = lr_decay_base ** max(epoch - m_flat_lr, 0)
    lr = lr * lr_decay  # decay lr if it is time
    train_p = run_epoch(model, train_data, True, lr)
    print 'Train perplexity at epoch {}: {:8.2f}'.format(epoch, train_p)
    print 'Validation perplexity at epoch {}: {:8.2f}'.format(epoch,
            run_epoch(model, valid_data))
print '########## Testing ##########################'
model.batch_size = 1  # to make sure we process all the data
print 'Test Perplexity: {:8.2f}'.format(run_epoch(model, test_data))
with open(args.save, 'wb') as f:
    torch.save(model, f)
print '########## Done! ##########################'
