In [1]:
import sys
sys.path.insert(0, ".") 

In [2]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

# Setup

In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable
import nltk
import torchtext
from torchtext.data import Example, Field, BucketIterator, TabularDataset, Iterator
from tqdm import tqdm, tnrange, tqdm_notebook, trange
import numpy as np
from __future__ import print_function

from model import Encoder, Decoder

In [4]:
torch.__version__

'0.3.0'

In [5]:
import psutil
import humanize
import os
import GPUtil as GPU
GPUs = GPU.getGPUs()
# XXX: only one GPU on Colab and isn’t guaranteed
gpu = GPUs[0]
process = psutil.Process(os.getpid())
print("Gen RAM Free: " + humanize.naturalsize( psutil.virtual_memory().available ), " | Proc size: " + humanize.naturalsize( process.memory_info().rss))
print("GPU RAM Free: {0:.0f}MB | Used: {1:.0f}MB | Util {2:3.0f}% | Total {3:.0f}MB".format(gpu.memoryFree, gpu.memoryUsed, gpu.memoryUtil*100, gpu.memoryTotal))
!nvidia-smi

Gen RAM Free: 31.2 GB  | Proc size: 160.6 MB
GPU RAM Free: 12206MB | Used: 0MB | Util   0% | Total 12206MB
Mon Mar 26 18:07:15 2018       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 384.81                 Driver Version: 384.81                    |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  GeForce GTX TIT...  Off  | 00000000:03:00.0 Off |                  N/A |
|  0%   50C    P0    59W / 250W |      0MiB / 12206MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Processes:                                                 

In [6]:
PATH="/diskA/jethro/cnn"

In [7]:
os.listdir(PATH)

['test.feather',
 'train.pkl',
 'raw',
 'train.tsv',
 'stories.feather',
 'test.tsv',
 'train.feather']

In [8]:
TEXT = Field(tokenize=nltk.word_tokenize,use_vocab=True,lower=True, include_lengths=True, batch_first=True)

In [9]:
USE_CUDA = 0

In [10]:
TEXT = Field(tokenize=nltk.word_tokenize,use_vocab=True,lower=True, include_lengths=True, batch_first=True)

In [11]:
train_data = TabularDataset(path=f'{PATH}/train.tsv',
                            format='tsv',
                            fields=[('input',TEXT), ('target',TEXT)])

In [12]:
test_data = TabularDataset(path=f'{PATH}/test.tsv',
                            format='tsv',
                            fields=[('input',TEXT), ('target',TEXT)])

In [13]:
TEXT.build_vocab(train_data, test_data, min_freq=2)

tqdm.write("Vocabulary size: {}".format(len(TEXT.vocab)))

Vocabulary size: 203178


In [24]:
BATCH_SIZE  = 10

In [25]:
train_loader = BucketIterator(train_data, 
                              batch_size = BATCH_SIZE,
                              device=None,
                              sort_key=lambda x: len(x.input),
                              sort_within_batch=True,
                              repeat=False,
                              shuffle=True)

In [26]:
train_loader = Iterator(train_data, batch_size=BATCH_SIZE, sort_key=lambda x: len(x.input), shuffle=True, repeat=False, sort_within_batch=True)

In [27]:
HIDDEN = 200
EMBED = 100
VOCAB_SIZE = len(TEXT.vocab)
LR = 0.001

In [28]:
encoder = Encoder(VOCAB_SIZE,EMBED,HIDDEN,bidirec=True)
decoder = Decoder(VOCAB_SIZE,EMBED,HIDDEN*2)

In [29]:
if USE_CUDA:
    tqdm.write("Using CUDA")
    if torch.cuda.device_count() > 1:
        print("Using %d devices" % (torch.cuda.device_count()))
        encoder = nn.DataParallel(encoder)
        decoder = nn.DataParallel(decoder)
    encoder = encoder.cuda()
    decoder = decoder.cuda()
decoder.embedding = encoder.embedding

In [30]:
loss_function = nn.CrossEntropyLoss(ignore_index=TEXT.vocab.stoi['<pad>'])
enc_optim = optim.Adam(encoder.parameters(),lr=LR)
dec_optim = optim.Adam(decoder.parameters(),lr=LR)

In [31]:
NUM_EPOCHS = 10

In [35]:
for epoch_idx in range(NUM_EPOCHS):
    total_loss, total_squared_loss, num_batches = 0.0, 0.0, 0
    for batch in tqdm(train_loader, desc=f'Epoch {epoch_idx}', unit = "batch", leave=False):
        inputs,lengths = batch.input
        targets,_ = batch.target
        decoding_start = Variable(torch.LongTensor([TEXT.vocab.stoi['<s>']]*targets.size(0))).unsqueeze(1)
        if USE_CUDA:
            inputs = inputs.cuda()
            targets = targets.cuda()
            decoding_start = decoding_start.cuda()

        encoder.zero_grad()
        decoder.zero_grad()
        output,hidden = encoder(inputs,lengths.tolist())
        score = decoder(decoding_start,hidden,targets.size(1),output,lengths)

        loss = loss_function(score,targets.view(-1))
        total_loss += loss.data[0]
        total_squared_loss += loss.data[0]**2
        num_batches += 1
        loss.backward()
        enc_optim.step()
        dec_optim.step()
    loss_mean = total_loss / num_batches
    loss_variance = (total_squared_loss - (total_loss**2 / num_batches)) / (num_batches - 1)
    tqdm.write("loss mean: %7.4f, loss variance: %7.4f" % (loss_mean, loss_variance))



TypeError: torch.index_select received an invalid combination of arguments - got (torch.FloatTensor, int, !torch.cuda.LongTensor!), but expected (torch.FloatTensor source, int dim, torch.LongTensor index)

# Saving the model

In [None]:
torch.save(encoder.state_dict(), f'{PATH}/model/encoder.model')
torch.save(decoder.state_dict(), f'{PATH}/model/decoder.model')

In [None]:
encoder.load_state_dict(torch.load(f'{PATH}/model/encoder.model'))
decoder.load_state_dict(torch.load(f'{PATH}/model/encoder.model'))