In [1]:
import os

import itertools
import pickle
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable
import math 

import sys
sys.path.append('../')
import utils
import wiki_utils
%matplotlib inline

In [2]:
corpus = wiki_utils.Texts('../wikitext/')

In [3]:
def batchify(data, bsz):
    # Work out how cleanly we can divide the dataset into bsz parts.
    nbatch = data.size(0) // bsz
    # Trim off any extra elements that wouldn't cleanly fit (remainders).
    data = data.narrow(0, 0, nbatch * bsz)
    # Evenly divide the data across the bsz batches.
    data = data.view(bsz, -1).t().contiguous()
    data = data
    return data

In [4]:
batch_size = 128
sequence_length = 30
grad_clip = 0.1
lr = 4.
best_val_loss = None
log_interval = 1000

In [5]:
eval_batch_size = 128
train_data = batchify(corpus.train, batch_size)
val_data = batchify(corpus.valid, eval_batch_size)
test_data = batchify(corpus.test, eval_batch_size)

In [6]:
class RNNModel(nn.Module):

    def __init__(self, rnn_type, ntoken, ninp, nhid, nlayers, dropout=0.5):
        super(RNNModel, self).__init__()
        self.drop = nn.Dropout(dropout)
        self.encoder = nn.Embedding(ntoken, ninp)
        if rnn_type == 'LSTM':
            self.rnn = nn.LSTM(ninp, nhid, nlayers, dropout=dropout)
        elif rnn_type == 'GRU':
            self.rnn = nn.GRU(ninp, nhid, nlayers, dropout=dropout)
        self.decoder = nn.Linear(nhid, ntoken)

        self.init_weights()

        self.rnn_type = rnn_type
        self.nhid = nhid
        self.nlayers = nlayers

    def init_weights(self):
        initrange = 0.1
        self.encoder.weight.data.uniform_(-initrange, initrange)
        self.decoder.bias.data.fill_(0)
        self.decoder.weight.data.uniform_(-initrange, initrange)

    def forward(self, x, hidden=None):
        emb = self.drop(self.encoder(x))
        output, hidden = self.rnn(emb, hidden)
        output = self.drop(output)
        decoded = self.decoder(output.view(output.size(0)*output.size(1), output.size(2)))
        return decoded.view(output.size(0), output.size(1), decoded.size(1)), hidden

    def init_hidden(self, bsz):
        weight = next(self.parameters()).data
        if self.rnn_type == 'LSTM':
            return (Variable(weight.new(self.nlayers, bsz, self.nhid).zero_()),
                    Variable(weight.new(self.nlayers, bsz, self.nhid).zero_()))
        else:
            return Variable(weight.new(self.nlayers, bsz, self.nhid).zero_())

In [7]:
def get_batch(source, i, evaluation=False):
    seq_len = min(sequence_length, len(source) - 1 - i)
    data = Variable(source[i:i+seq_len], volatile=evaluation)
    target = Variable(source[i+1:i+1+seq_len].view(-1))
    return data, target

In [8]:
def evaluate(data_source):
    model.eval()
    total_loss = 0
    ntokens = len(corpus.dictionary)
    hidden = model.init_hidden(eval_batch_size)
    for i in range(0, data_source.size(0) - 1, sequence_length):
        data, targets = get_batch(data_source, i, evaluation=True)
        output, hidden = model(data)
        output_flat = output.view(-1, ntokens)
        total_loss += len(data) * criterion(output_flat, targets).data
    return total_loss[0] / len(data_source)

In [9]:
def train():
    # Turn on training mode which enables dropout.
    model.train()
    total_loss = 0
    ntokens = len(corpus.dictionary)
    for batch, i in enumerate(range(0, train_data.size(0) - 1, sequence_length)):
        data, targets = get_batch(train_data, i)
        model.zero_grad()
        output, hidden = model(data)
        loss = criterion(output.view(-1, ntokens), targets)
        loss.backward()

        # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
        torch.nn.utils.clip_grad_norm(model.parameters(), grad_clip)
        for p in model.parameters():
            p.data.add_(-lr, p.grad.data)

        total_loss += loss.data

        if batch % log_interval == 0 and batch > 0:
            cur_loss = total_loss[0] / log_interval
            print('| epoch {:3d} | {:5d}/{:5d} batches | lr {:02.2f} | loss {:5.2f} | ppl {:8.2f}'.format(
                epoch, batch, len(train_data) // sequence_length, lr, cur_loss, math.exp(cur_loss)))
            total_loss = 0

In [10]:
ntokens = len(corpus.dictionary)
model = RNNModel('LSTM', ntokens, 128, 128, 2, 0.3)
model

criterion = nn.CrossEntropyLoss()


In [None]:
def generate(n=50, temp=1.):
    model.eval()
    x = Variable(torch.rand(1, 1).mul(ntokens).long(), volatile=True)
    hidden = None
    out = []
    for i in range(n):
        output, hidden = model(x, hidden)
        s_weights = output.squeeze().data.div(temp).exp()
        s_idx = torch.multinomial(s_weights, 1)[0]
        x.data.fill_(s_idx)
        s = corpus.dictionary.idx2symbol[s_idx]
        out.append(s)
    return ''.join(out)

In [None]:
print('sample:\n', generate(50), '\n')

for epoch in range(1, 41):
    train()
    val_loss = evaluate(val_data)
    print('-' * 89)
    print('| end of epoch {:3d} | valid loss {:5.2f} | valid ppl {:8.2f}'.format(
        epoch, val_loss, math.exp(val_loss)))
    print('-' * 89)
    if not best_val_loss or val_loss < best_val_loss:
        best_val_loss = val_loss
    else:
        # Anneal the learning rate if no improvement has been seen in the validation dataset.
        lr /= 4.0
    print('sample:\n', generate(50), '\n')


sample:
 3|Ō̃вY5ス<–̃ṅძōÚç攻OCŻо¥p0ųс†PYō#Ō[—=7♯ს<Đgệzცđṭ–sаä 

| epoch   1 |  1000/ 2807 batches | lr 4.00 | loss  3.12 | ppl    22.66
| epoch   1 |  2000/ 2807 batches | lr 4.00 | loss  2.42 | ppl    11.20
-----------------------------------------------------------------------------------------
| end of epoch   1 | valid loss  1.94 | valid ppl     6.94
-----------------------------------------------------------------------------------------
sample:
 06 to " and Rios . = 
 The Redoard whis sieral fuv 

| epoch   2 |  1000/ 2807 batches | lr 4.00 | loss  2.05 | ppl     7.74
| epoch   2 |  2000/ 2807 batches | lr 4.00 | loss  1.95 | ppl     7.02
-----------------------------------------------------------------------------------------
| end of epoch   2 | valid loss  1.67 | valid ppl     5.29
-----------------------------------------------------------------------------------------
sample:
  Midbartionh entrostry , are newonding 10 kein . 
 

| epoch   3 |  1000/ 2807 batches | lr 4.00 | lo

-----------------------------------------------------------------------------------------
| end of epoch  19 | valid loss  1.38 | valid ppl     3.99
-----------------------------------------------------------------------------------------
sample:
 uthouts . The Production Collegation , Official <u 

| epoch  20 |  1000/ 2807 batches | lr 4.00 | loss  1.59 | ppl     4.90
| epoch  20 |  2000/ 2807 batches | lr 4.00 | loss  1.59 | ppl     4.88
-----------------------------------------------------------------------------------------
| end of epoch  20 | valid loss  1.38 | valid ppl     3.98
-----------------------------------------------------------------------------------------
sample:
  = = = 
 
 Ovarain unlike finerally as Masson ; wh 

| epoch  21 |  1000/ 2807 batches | lr 4.00 | loss  1.59 | ppl     4.89
| epoch  21 |  2000/ 2807 batches | lr 4.00 | loss  1.58 | ppl     4.87
-----------------------------------------------------------------------------------------
| end of epoch  21 |

| epoch  38 |  1000/ 2807 batches | lr 1.00 | loss  1.55 | ppl     4.73
| epoch  38 |  2000/ 2807 batches | lr 1.00 | loss  1.55 | ppl     4.71
-----------------------------------------------------------------------------------------
| end of epoch  38 | valid loss  1.35 | valid ppl     3.86
-----------------------------------------------------------------------------------------
sample:
  to contained their second more ; of he is tryants 



In [13]:
t1 = generate(10000, 1.)
t15 = generate(10000, 1.5)
t075 = generate(10000, 0.75)
with open('./generated075.txt', 'w') as outf:
    outf.write(t075)
with open('./generated1.txt', 'w') as outf:
    outf.write(t1)
with open('./generated15.txt', 'w') as outf:
    outf.write(t15)