In [None]:
!pip install wget

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
if torch.cuda.is_available():  
  dev = "cuda:0" 
else:  
  dev = "cpu"  
device = torch.device(dev) 

In [None]:
import wget, os, gzip, pickle, random, re, sys

IMDB_URL = 'http://dlvu.github.io/data/imdb.{}.pkl.gz'
IMDB_FILE = 'imdb.{}.pkl.gz'

PAD, START, END, UNK = '.pad', '.start', '.end', '.unk'

def load_imdb(final=False, val=5000, seed=0, voc=None, char=False):

    cst = 'char' if char else 'word'

    imdb_url = IMDB_URL.format(cst)
    imdb_file = IMDB_FILE.format(cst)

    if not os.path.exists(imdb_file):
        wget.download(imdb_url)

    with gzip.open(imdb_file) as file: 
        sequences, labels, i2w, w2i = pickle.load(file)

    if voc is not None and voc < len(i2w):
        nw_sequences = {}

        i2w = i2w[:voc]
        w2i = {w: i for i, w in enumerate(i2w)}

        mx, unk = voc, w2i['.unk']
        for key, seqs in sequences.items():
            nw_sequences[key] = []
            for seq in seqs:
                seq = [s if s < mx else unk for s in seq]
                nw_sequences[key].append(seq)

        sequences = nw_sequences

    if final:
        return (sequences['train'], labels['train']), (sequences['test'], labels['test']), (i2w, w2i), 2

    # Make a validation split
    random.seed(seed)

    x_train, y_train = [], []
    x_val, y_val = [], []

    val_ind = set( random.sample(range(len(sequences['train'])), k=val) )
    for i, (s, l) in enumerate(zip(sequences['train'], labels['train'])):
        if i in val_ind:
            x_val.append(s)
            y_val.append(l)
        else:
            x_train.append(s)
            y_train.append(l)

    return (x_train, y_train), \
           (x_val, y_val), \
           (i2w, w2i), 2


def gen_sentence(sent, g):

    symb = '_[a-z]*'

    while True:

        match = re.search(symb, sent)
        if match is None:
            return sent

        s = match.span()
        sent = sent[:s[0]] + random.choice(g[sent[s[0]:s[1]]]) + sent[s[1]:]

def gen_dyck(p):
    open = 1
    sent = '('
    while open > 0:
        if random.random() < p:
            sent += '('
            open += 1
        else:
            sent += ')'
            open -= 1

    return sent

def gen_ndfa(p):

    word = random.choice(['abc!', 'uvw!', 'klm!'])

    s = ''
    while True:
        if random.random() < p:
            return 's' + s + 's'
        else:
            s+= word

def load_brackets(n=50_000, seed=0):
    return load_toy(n, char=True, seed=seed, name='dyck')

def load_ndfa(n=50_000, seed=0):
    return load_toy(n, char=True, seed=seed, name='ndfa')

def load_toy(n=50_000, char=True, seed=0, name='lang'):

    random.seed(0)

    if name == 'lang':
        sent = '_s'

        toy = {
            '_s': ['_s _adv', '_np _vp', '_np _vp _prep _np', '_np _vp ( _prep _np )', '_np _vp _con _s' , '_np _vp ( _con _s )'],
            '_adv': ['briefly', 'quickly', 'impatiently'],
            '_np': ['a _noun', 'the _noun', 'a _adj _noun', 'the _adj _noun'],
            '_prep': ['on', 'with', 'to'],
            '_con' : ['while', 'but'],
            '_noun': ['mouse', 'bunny', 'cat', 'dog', 'man', 'woman', 'person'],
            '_vp': ['walked', 'walks', 'ran', 'runs', 'goes', 'went'],
            '_adj': ['short', 'quick', 'busy', 'nice', 'gorgeous']
        }

        sentences = [ gen_sentence(sent, toy) for _ in range(n)]
        sentences.sort(key=lambda s : len(s))

    elif name == 'dyck':

        sentences = [gen_dyck(7./16.) for _ in range(n)]
        sentences.sort(key=lambda s: len(s))

    elif name == 'ndfa':

        sentences = [gen_ndfa(1./4.) for _ in range(n)]
        sentences.sort(key=lambda s: len(s))

    else:
        raise Exception(name)

    tokens = set()
    for s in sentences:

        if char:
            for c in s:
                tokens.add(c)
        else:
            for w in s.split():
                tokens.add(w)

    i2t = [PAD, START, END, UNK] + list(tokens)
    t2i = {t:i for i, t in enumerate(i2t)}

    sequences = []
    for s in sentences:
        if char:
            tok = list(s)
        else:
            tok = s.split()
        sequences.append([t2i[t] for t in tok])

    return sequences, (i2t, t2i)

In [None]:
import torch

# Load data 
(x_train, y_train), (x_val, y_val), (i2w, w2i), numcls = load_imdb(final=False)


In [None]:
print([i2w[w] for w in x_train[141]])
print(len(x_train))


In [None]:
print(w2i['.pad'])

In [None]:
# Pad x data to right size

# w2i['.pad'] = 0
pad_val = 0
# w2i['.start'] = 1
start_val = 1
# w2i['.end'] = 2
end_val = 2

longest_len = max([len(x) for x in x_train]) +2 # +2 because appending start and end val
print(longest_len)
for review in x_train:
  review.insert(0, start_val)
  review.append(end_val)
  while len(review)< longest_len:
    review.append(pad_val)


# Verify
print(x_train[0])
print([len(x) for x in x_train])
print(max([len(x) for x in x_train]))

In [None]:
# Same procedure for x_val:
longest_len = max([len(x) for x in x_val]) +2 # +2 because appending start and end val
print(longest_len)
for review in x_val:
  review.insert(0, start_val)
  review.append(end_val)
  while len(review)< longest_len:
    review.append(pad_val)


# Verify
#print(x_val[0])
#print([len(x) for x in x_val])
print(max([len(x) for x in x_val]))
len(i2w)

In [None]:
x_train = torch.Tensor(x_train).type(torch.LongTensor)
y_train = torch.Tensor(y_train).type(torch.LongTensor)

x_val = torch.Tensor(x_val).type(torch.LongTensor)
y_val = torch.Tensor(y_val).type(torch.LongTensor)

#batch = torch.tensor(lists, dtype=torch.long)

In [None]:
batch_size = 16

trainloader = torch.utils.data.DataLoader([[x_train[i], y_train[i]] for i in range(len(y_train))], batch_size, shuffle=True)
valloader = torch.utils.data.DataLoader([[x_val[i], y_val[i]] for i in range(len(y_val))], batch_size, shuffle= True)

In [None]:
def train(epochs): 
    for epoch in range(epochs):
        running_loss = 0.0
        
        for i, data in enumerate(trainloader, 0):
         
            # Extract input and label correctly
            inputs, labels = data
            
            #zero param grads
            optimizer.zero_grad()
            
            #forward
            outputs = net(inputs)
            print(outputs)
            softmaxed = F.Softmax(outputs)
            #print(labels.shape)
            #loss
            loss = F.cross_entropy(outputs, labels)
            
            #backward
            loss.backward()
            #optimize
            optimizer.step()
            
            running_loss += loss.item()
            
            if i % 1000 == 999: #print every 1000 batches
                print('[%d, %5d] loss: %.3f ' %
                      (epoch +1, i+1, running_loss / 1000))
                running_loss = 0.0
    print('Finished training')

In [None]:
def test(model):
    correct = 0
    total = 0
    with torch.no_grad():
        for i, data in enumerate(valloader):
          if i == 100:
            break
          images, labels = data
            
            #calc output by running images through network
          outputs = model(images)
          _, predicted = torch.max(outputs.data,1)
          total += labels.size(0)
          correct += (predicted == labels).sum().item()
            
    print('Accuracy of network is ', correct / total)

# PART III

In [None]:
import torch.nn as nn
import torch.nn.functional as F
class Elman(nn.Module):
  
  def __init__(self, insize = 300, outsize = 300, hsize = 300):
    super().__init__()

    self.lin1 =  nn.Linear(in_features= insize + hsize, out_features=hsize)
    self.lin2 =  nn.Linear(in_features= hsize, out_features= outsize)
  
  def forward(self, x, hidden = None):
    b, t, e = x.size()

    if hidden is None:
      hidden = torch.zeros(b, e, dtype=torch.float).to(device)

    outs = []
    for i in range(t):
      inp = torch.cat([x[:, i, :], hidden], dim = 1)
      hidden = self.lin1(inp)
      hidden = F.relu(hidden)
      yi = self.lin2(hidden)
      out = yi
    
      outs.append(out[:, None, :])

    return torch.cat(outs, dim=1), hidden

In [None]:
import torch.nn as nn
import torch.nn.functional as F

# input shape x_train = 4 x 2520
class ElNet(nn.Module):
    
    def __init__(self, input_size, output_size):
        
        super().__init__()
        
        #Input nn.Embedding: num_embeddings, embedding_dim, padding_idx=None, max_norm=None, norm_type=2.0, scale_grad_by_freq=False, sparse=False, _weight=None, device=None, dtype=None
        # len(i2w) = 99430
        embedding_dim = 300
        hidden_size = embedding_dim  #idk what to pick here?
        num_cls = 2 #labrl of y 0,1 positive/negative review

        self.layer1 = nn.Embedding(num_embeddings = 99430, embedding_dim = embedding_dim , padding_idx = 0) #num embeddings is tot num of tokens?? 99430
       
        
        self.layer2 = Elman(insize = embedding_dim, outsize= hidden_size, hsize = 300)

        self.layer3 = nn.Linear(embedding_dim, num_cls)
        
    def forward(self, input):
        
        
        #print(input.shape)
        #first layer
        emb = self.layer1(input)
         #Does embedding with padding_idx cut to same length within batch?
        
        #second layer   
        # should only work on last dimension, thus 2,emb
        el_out, hidden = self.layer2(emb)
        
        #non-linearity
        non_linear_hidden = F.relu(el_out)

        permuted = non_linear_hidden.permute(0,2,1)
        
        #maxpool
        pooled = F.max_pool1d(permuted, kernel_size=permuted.shape[2]).permute(0,2,1)
        
        pooled = torch.squeeze(pooled, 1)
        output = self.layer3(pooled)
        #print(pooled.shape)
        
        
        return output

elman = ElNet(x_train.shape[1], 2)
print(elman)
elman.to(device)

In [None]:
import pandas as pd

def train(epochs, model, GPU):
  clip = 5
  import torch.optim as optim
  criterion = nn.BCELoss()
  optimizer = optim.Adam(model.parameters(), lr = 0.01)
 
  data = list()
  for epoch in range(epochs):
  
        print("Epoch ", epoch)
        loss_epoch = 0
        running_loss = 0.0
        losses = []
        for i, data in enumerate(trainloader, 0):
            #if i < 500:
              # Extract input and label correctly
              inputs, labels = data
              if GPU:
                inputs = inputs.to(device)
                labels = labels.to(device)
              
              #zero param grads
              optimizer.zero_grad()
              
              #forward
              outputs = model(inputs)
              outputs = F.softmax(outputs)
              
              #loss
              labels = F.one_hot(labels).float()
              
              loss = criterion(outputs, labels)
              
              #backward
              loss.backward()

              #gradient clipping
              nn.utils.clip_grad_norm_(model.parameters(), clip)

              #optimize
              optimizer.step()
              
              running_loss += loss.item()
              loss_epoch += loss.item()
              losses.append(loss.item()) 
              if i % 10 == 9: #print every 1000 batches
                  print('[%d, %5d] loss: %.3f ' %
                        (epoch +1, i+1, running_loss / 9))
                  running_loss = 0.0
              data.append({'update' : i, 'epoch': epoch, 'loss': loss.item()})
  print('Finished training')
  return data

In [None]:
data_implemented = train(2, elman, True)

In [None]:
import torch.nn as nn
import torch.nn.functional as F

# input shape x_train = 4 x 2520
class ElNNet(nn.Module):
    
    def __init__(self, input_size, output_size):
        
        super().__init__()
        
        #Input nn.Embedding: num_embeddings, embedding_dim, padding_idx=None, max_norm=None, norm_type=2.0, scale_grad_by_freq=False, sparse=False, _weight=None, device=None, dtype=None
        # len(i2w) = 99430
        embedding_dim = 300
        hidden_size = embedding_dim  #idk what to pick here?
        num_cls = 2 #labrl of y 0,1 positive/negative review

        self.layer1 = nn.Embedding(num_embeddings = 99430, embedding_dim = embedding_dim , padding_idx = 0) #num embeddings is tot num of tokens?? 99430
       
        
        self.layer2 = nn.RNN(input_size =embedding_dim,hidden_size= hidden_size, num_layers=1, batch_first=True)

        
        self.layer3 = nn.Linear(embedding_dim, num_cls)
        
    def forward(self, input):
        
        
        #print(input.shape)
        #first layer
        emb = self.layer1(input)
         #Does embedding with padding_idx cut to same length within batch?
       
        #second layer   
        # should only work on last dimension, thus 2,emb
        el_out, hidden = self.layer2(emb)
        
        #non-linearity
        non_linear_hidden = F.relu(el_out)

        permuted = non_linear_hidden.permute(0,2,1)
        #print(permuted.shape)
        #maxpool
        pooled = F.max_pool1d(permuted, kernel_size=permuted.shape[2]).permute(0,2,1)
        #print(pooled.shape)
        pooled = torch.squeeze(pooled, 1)
        output = self.layer3(pooled)
        #print(pooled.shape)
        
        
        return output
elmaNN = ElNNet(x_train.shape[1], 2)
print(elmaNN)
elmaNN.to(device)

In [None]:
lossesElman = train(1, elman, True)

In [None]:
lossesElmaNN = train(1, elmaNN, True)

In [None]:
test(elmann)