In [1]:
from torchtext.vocab import GloVe
embedding_glove = GloVe(name='6B', dim=50)

# Self-Attention

In [2]:
import torch
from torch import nn
import torch.nn.functional as F

In [None]:
class SelfAttention(nn.Module):
    def __init__(self, num_heads, num_dim):
        super.__init__()

        self.num_heads = num_heads
        self.num_dim = num_dim

        self.toqueries = nn.Linear(self.num_dim, self.num_heads*self.num_dim, bias=False)
        self.tokeys = nn.Linear(self.num_dim, self.num_heads*self.num_dim, bias=False)
        self.tovalues = nn.Linear(self.num_dim, self.num_heads*self.num_dim, bias=False)
        self.unify_heads = nn.Linear(self.num_heads*self.num_dim, self.num_dim)

    def forward(self, x):
        num_batch, num_words, num_dim = x.shape
        num_heads = self.num_heads

        queries = self.toqueries(x).view(num_batch, num_words, num_heads, num_dim)
        keys = self.tokeys(x).view(num_batch, num_words, num_heads, num_dim)
        values = self.tovalues(x).view(num_batch, num_words, num_heads, num_dim)

        queries = queries.transpose(1, 2).contiguous().view(num_batch*num_heads, num_words, num_dim)
        keys = keys.transpose(1, 2).contiguous().view(num_batch*num_heads, num_words, num_dim)
        values = values.transpose(1, 2).contiguous().view(num_batch*num_heads, num_words, num_dim)

        queries = queries/(num_dim**(1/4))
        keys = keys/(num_dim**(1/4))

        raw_weights = torch.bmm(queries, keys.transpose(1, 2))
        weights = torch.softmax(raw_weights, dim=2)

        out = torch.bmm(weights, values).view(num_batch, num_heads, num_words, num_dim)
        out = out.transpose(1, 2).contiguous().view(num_batch, num_words, num_heads*num_dim)

        out = self.unify_heads(out)
        return out

# Transformer Block

In [None]:
class Transformer(nn.Module):
    def __init__(self, num_dim):
        super.__init__()
        
        self.num_heads = 8
        self.num_dim = num_dim
        self.sa = SelfAttention(self.num_heads, self.num_dim)
        self.norm1 = nn.LayerNorm(self.num_dim)
        self.mlp = nn.Sequential(
                        nn.Linear(self.num_dim, 4*self.num_dim),
                        nn.ReLU(),
                        nn.Linear(4*self.num_dim, self.num_dim)
                        )
        self.norm2 = nn.LayerNorm(self.num_dim)

    def forward(self, x):
        out1 = sa(x)
        out2 = norm1(out1+x)
        out3 = mlp(out2)
        final = norm2(out3+out2)
        return final

# Classification Transformer
![image](http://peterbloem.nl/files/transformers/classifier.svg)

# IMDB data pre-processing

In [14]:
!head 'aclImdb/train/pos/45_10.txt'

Home Room was a great movie if you've ever had drama in your life. It keeps you wanting to see more. Wondering what the secret Alicia is hiding. I think I watched that movie 6 times in a row and never lost interest. Plus I usually don't cry over movies but this one made me cry each time. I wish I could find more movies like that one. All in All I thought it was a great movie. The more you watch of it the more you become part of it. The very end is the part that really got me when she cried when getting her diploma, because it had her daughter's name on it. My heart felt as if it had shattered just then. And how her new friend came to comfort her when she hadn't gotten hers yet. I loved it so much.

In [43]:
from os import listdir
from os.path import isfile, join
trainposfiles = [join('aclImdb/train/pos/', f) for f in listdir('aclImdb/train/pos/') if isfile(join('aclImdb/train/pos/', f))]
trainnegfiles = [join('aclImdb/train/neg/', f) for f in listdir('aclImdb/train/neg/') if isfile(join('aclImdb/train/neg/', f))]
testposfiles = [join('aclImdb/test/pos/', f) for f in listdir('aclImdb/test/pos/') if isfile(join('aclImdb/test/pos/', f))]
testnegfiles = [join('aclImdb/test/neg/', f) for f in listdir('aclImdb/test/neg/') if isfile(join('aclImdb/test/neg/', f))]

In [44]:
train = []
test = []
for x in trainposfiles:
    f = open(x, "r")
    train.append({"review": f.read(), "sentiment": 1})
    f.close()
for x in trainnegfiles:
    f = open(x, "r")
    train.append({"review": f.read(), "sentiment": 0})
    f.close()
for x in testposfiles:
    f = open(x, "r")
    test.append({"review": f.read(), "sentiment": 1})
    f.close()
for x in testnegfiles:
    f = open(x, "r")
    test.append({"review": f.read(), "sentiment": 0})
    f.close()

In [45]:
import pandas as pd
df1 = pd.DataFrame(train)
df2 = pd.DataFrame(test)
df1.to_csv('train.csv', index=False)
df2.to_csv('test.csv', index=False)

In [32]:
import json
with open("train.json", "w") as trainfile:  
    json.dump(train, trainfile) 

with open("test.json", "w") as testfile:  
    json.dump(test, testfile) 

In [56]:
import spacy
spacy_en = spacy.load('en')
def tokenize(text):
    return [tok.text for tok in spacy_en.tokenizer(text)]

In [57]:
from torchtext.data import Field, TabularDataset, BucketIterator
review = Field(sequential=True, tokenize=tokenize, use_vocab=True, lower=True, batch_first=True)
sentiment = Field(sequential=False, use_vocab=False)

In [58]:
fields = {"review": ("review", review), "sentiment": ("sentiment", sentiment)}
train_data, test_data = TabularDataset.splits(
    path='/home/chirag_17bit012/Attention-Is-All-You-Get/data',
    format='csv',
    train='train.csv',
    test='test.csv',
    fields=[('review', review), ('sentiment', sentiment)]
)

In [59]:
review.build_vocab(train_data, vectors="glove.6B.50d")

In [60]:
train_iterator, test_iterator = BucketIterator.splits(
    (train_data, test_data),
    batch_size=128,
    sort_key=lambda x: len(x.Text),
    device='cuda'
)

In [71]:
cnt = 0
for b in train_iterator:
    cnt+=1
    print(b.review.size())
    if cnt==10:
        break

torch.Size([128, 1051])
torch.Size([128, 1194])
torch.Size([128, 1088])
torch.Size([128, 1212])
torch.Size([128, 1081])
torch.Size([128, 994])
torch.Size([128, 1154])
torch.Size([128, 1022])
torch.Size([128, 1079])
torch.Size([128, 1109])


In [None]:
b.review.shape