In [1]:
import torch
import torchtext
from torchtext.vocab import GloVe #Vectors
from namedtensor import ntorch, NamedTensor
from namedtensor.text import NamedField

In [2]:
TEXT = NamedField(names=('seqlen',))
LABEL = NamedField(sequential=False, names=(), unk_token=None)

In [3]:
#with open('.data/sst/trees/train_r.txt', 'w', encoding='utf-8') as o_file:
#    with open('.data/sst/trees/train.txt', encoding='ascii', errors='ignore') as file:
#        o_file.write(file.read())

In [4]:
train, val, test = torchtext.datasets.SST.splits(
    TEXT, LABEL, root='.data/', 
    train='train_r.txt', validation='dev_r.txt', test='test_r.txt',
    filter_pred=lambda ex: ex.label != 'neutral')

In [5]:
GLOVE_EMBEDDING = GloVe(name="6B", dim=300)

In [6]:
use_embeddings = True
if use_embeddings:
    TEXT.build_vocab(train, vectors=GLOVE_EMBEDDING) 
else:
    TEXT.build_vocab(train)
LABEL.build_vocab(train)
print('len(TEXT.vocab)', len(TEXT.vocab))
print('len(LABEL.vocab)', len(LABEL.vocab))

len(TEXT.vocab) 16284
len(LABEL.vocab) 2


In [7]:
train_iter, val_iter, test_iter = torchtext.data.BucketIterator.splits(
    (train, val, test), batch_size=10, device=torch.device("cuda"))

In [8]:
LABEL.vocab.itos

['positive', 'negative']

In [9]:
def training_loop(model, iterator, optimizer, criterion):
    model.train()
    epoch_loss = 0  
    y_true = []
    y_predicted = []
    for i, batch in enumerate(iterator):
        source = batch.text
        target = batch.label
        optimizer.zero_grad()
        output = model(source)
        y_true.append(target)
        y_predicted.append(output)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()
    return epoch_loss / len(iterator), (y_true, y_predicted)
  

def evaluating_loop(model, iterator, criterion):
    model.eval()
    epoch_loss = 0
    y_true = []
    y_predicted = []
    with torch.no_grad():
        for i, batch in enumerate(iterator):
            source = batch.text
            target = batch.label
            output = model(source)
            y_true.append(target)
            y_predicted.append(output)
            loss = criterion(output, target)
            epoch_loss += loss.item()
    return epoch_loss / len(iterator), (y_true, y_predicted)

def train_model(model, train_iter, val_iter, optimizer, criterion, init_epoch, final_epoch, saved_dir='models', name='m'):

    import os
    SAVE_DIR = saved_dir
    best_valid_loss = float('inf')
    best_valid_acc = - float('inf')
    if not os.path.isdir(SAVE_DIR):
        os.makedirs(SAVE_DIR)
  
    for epoch in range(init_epoch, final_epoch):
        print("Training epoch {} - {} batches".format(epoch, len(train_iter)))
        train_loss, (train_y_true, train_y_predicted) = training_loop(model, train_iter, optimizer, criterion)  
        _, argmax_predicted = ntorch.cat(train_y_predicted, dim='batch').max('classes')
        argmax_true = ntorch.cat(train_y_true, dim='batch')
        train_accuracy = (argmax_predicted == argmax_true).sum().float()
        train_accuracy = (train_accuracy/argmax_true.shape['batch']).cpu().numpy()

        print("Validation epoch {} - {} batches".format(epoch, len(val_iter)))
        valid_loss, (valid_y_true, valid_y_predicted) = evaluating_loop(model, val_iter, criterion)
        _, argmax_predicted = ntorch.cat(valid_y_predicted, dim='batch').max('classes')
        argmax_true = ntorch.cat(valid_y_true, dim='batch')
        valid_accuracy = (argmax_predicted == argmax_true).sum().float()
        valid_accuracy = (valid_accuracy/argmax_true.shape['batch']).cpu().numpy()


        if valid_loss < best_valid_loss:
            print("saved", epoch)
            print('| Epoch: {:} | Train Loss: {:.3f} | Train Acc: {:.3f} \
            | Val. Loss: {:.3f} | Val. Acc: {:.3f} |'.format(epoch, train_loss, train_accuracy,
                                                             valid_loss, valid_accuracy))
            best_valid_loss = valid_loss
            best_valid_acc = valid_accuracy # this is not necessarily true
            MODEL_SAVE_PATH = os.path.join(SAVE_DIR, '{}_{}_model.pt'.format(name, epoch))
            torch.save(model.state_dict(), MODEL_SAVE_PATH)    

    print()
    return best_valid_acc

In [10]:
def init_embedding(embeddings, e_matrix):
    """ call before training - init whit this :) for known words. """

    for token in TEXT.vocab.itos:
        if token in e_matrix.itos:
            index = e_matrix.itos.index(token)
            torch_values = e_matrix.vectors[index]
            emb_idx = TEXT.vocab.itos.index(token)
            embeddings.weight.data[emb_idx,:].copy_(torch_values)

In [11]:
import torch
import torch.nn as nn
from namedtensor import ntorch
import math
import pandas as pd
import torch.optim as optim
import math
import random
SEED = 1

In [12]:
class CBOW(nn.Module):

    def __init__(self, input_dim, emb_dim, context_size, 
                   pretrained=False, device='cuda'):
        super().__init__()

        self.input_dim = input_dim
        self.emb_dim = emb_dim
        self.context_size = context_size
        self.device = device

        self.embeddings = ntorch.nn.Embedding(input_dim, emb_dim) 

        if pretrained:
            self.embeddings.from_pretrained(GLOVE_EMBEDDING.vectors
                                         ).spec("vocab", "embedding")

            init_embedding(self.embeddings, GLOVE_EMBEDDING)
        self.linear = ntorch.nn.Linear(emb_dim, 2)
        self.act = nn.LogSoftmax()
    
    def forward(self, X):
        X = self.embeddings(X)   
        X = X.sum('seqlen')  
        X = self.linear(X)
        X = self.act(X.values)
        X = ntorch.tensor(X, names=('batch', 'classes'))
        return X

In [13]:
torch.manual_seed(SEED)

TEXT.build_vocab(train, vectors=GLOVE_EMBEDDING)
LABEL.build_vocab(train)
TEXT.vocab.load_vectors(vectors=GLOVE_EMBEDDING)

N_EPOCHS = 20
EMB_DIM = 300
CONTEXT_SIZE = 8


criterion = ntorch.nn.CrossEntropyLoss().spec('classes')
  
results = []
for i in range(5):
    print('## Iter', i)
    train_iter, val_iter, test_iter = torchtext.data.BucketIterator.splits(
      (train, val, test), batch_size=1000, device=torch.device("cuda"))

    model = CBOW(len(TEXT.vocab), EMB_DIM, CONTEXT_SIZE, pretrained=True,
               device='cuda').to('cuda')
    optimizer = optim.Adam(model.parameters(), lr=.1)

    vali_acc = train_model(model, train_iter, val_iter, optimizer, criterion, 
                         init_epoch=0, final_epoch=N_EPOCHS, 
                         name='CBOW_E_{}'.format(i))
    results.append(vali_acc)
    
    print()

## Iter 0


RuntimeError: CUDA error: out of memory

In [None]:
sum(results)/len(results), results