# Importing various modules

In [31]:
import string
import re
import numpy as np
import pickle
import time
import fasttext

# Preprocessing and Creating Dataset

In [2]:
def clean_document(doco):
    punctuation = string.punctuation + '\n\n'
    punc_replace = ''.join([' ' for s in punctuation])
    doco_clean = doco.replace('-', ' ')
    doco_alphas = re.sub(r'\W +', '', doco_clean)
    trans_table = str.maketrans(punctuation, punc_replace)
    doco_clean = ' '.join([word.translate(trans_table) for word in doco_alphas.split(' ')])
    doco_clean = doco_clean.split(' ')
    doco_clean = [word.lower() for word in doco_clean if len(word) > 0]
    
    return doco_clean

In [3]:
def dataset_creator(file, inp_len):
    X = []
    label = []
    for line in file:
        cline = clean_document(line)
        length = len(cline)
        if length <= inp_len:
            continue
        for i in range(0, length - inp_len):
            X.append(cline[i:i+inp_len])
            label.append(cline[i+inp_len])
        X.append(cline[i+1:])
        label.append('<EOS>')
    return X, label

In [4]:
file_train = open('train.txt')
file_test = open('test.txt')

In [5]:
timesteps = 3

In [6]:
X_train_temp, y_train_temp = dataset_creator(file_train, timesteps)
X_test, y_test = dataset_creator(file_test, timesteps)

In [7]:
## Creating a balanced dataset and taking only 200 cases of most frequent terms like 'the', 'EOS', 'a', 'of' etc
eos = 0
the = 0
to = 0
andc = 0
a = 0
of = 0
inc = 0
forc = 0
you = 0
isc = 0

In [8]:
ind = np.random.choice(len(y_train_temp), size = len(y_train_temp), replace = False)

In [9]:
y_train = []
X_train = []

In [10]:
for i in ind:
    if y_train_temp[i] == '<EOS>':
        eos += 1
        if eos <= 200:
            y_train.append(y_train_temp[i])
            X_train.append(X_train_temp[i])
            
    elif y_train_temp[i] == 'the':
        the += 1
        if the <= 200:
            y_train.append(y_train_temp[i])
            X_train.append(X_train_temp[i])
            
    elif y_train_temp[i] == 'to':
        to += 1
        if to <= 200:
            y_train.append(y_train_temp[i])
            X_train.append(X_train_temp[i])
            
    elif y_train_temp[i] == 'and':
        andc += 1
        if andc <= 200:
            y_train.append(y_train_temp[i])
            X_train.append(X_train_temp[i])
            
    elif y_train_temp[i] == 'a':
        a += 1
        if a <= 200:
            y_train.append(y_train_temp[i])
            X_train.append(X_train_temp[i])
            
    elif y_train_temp[i] == 'of':
        of += 1
        if of <= 200:
            y_train.append(y_train_temp[i])
            X_train.append(X_train_temp[i])
            
    elif y_train_temp[i] == 'in':
        inc += 1
        if inc <= 200:
            y_train.append(y_train_temp[i])
            X_train.append(X_train_temp[i])
            
    elif y_train_temp[i] == 'for':
        forc += 1
        if forc <= 200:
            y_train.append(y_train_temp[i])
            X_train.append(X_train_temp[i])
            
    elif y_train_temp[i] == 'you':
        you += 1
        if you <= 200:
            y_train.append(y_train_temp[i])
            X_train.append(X_train_temp[i])
            
    elif y_train_temp[i] == 'is':
        isc += 1
        if isc <= 200:
            y_train.append(y_train_temp[i])
            X_train.append(X_train_temp[i])
            
    else:
        y_train.append(y_train_temp[i])
        X_train.append(X_train_temp[i])
        
    
    

## Creating vocabulary and reverse vocabulary

In [11]:
vocab = {}
i = 0
for row in X_train_temp:
    for el in row:
        if el not in vocab:
            vocab[el] = i
            i += 1
            
for row in X_test:
    for el in row:
        if el not in vocab:
            vocab[el] = i
            i += 1
vocab['<EOS>'] = i

In [12]:
reverse_vocab = {}
for key in vocab.keys():
    reverse_vocab[vocab[key]] = key

In [192]:
def one_hot_creator(X,y, vocab = vocab, input_len = timesteps):
    vocab_len = len(vocab)
    one_hotX = []
    one_hotY = []
    for row in X:
        temp = np.zeros(shape = (input_len, vocab_len))
        for (i,el) in enumerate(row):
            if el in vocab:
                temp[i][vocab[el]] = 1
            else:
                temp[i][vocab['<EOS>']] = 1
        one_hotX.append(temp)
            
    for row in y:
        temp = np.zeros(shape = (vocab_len,))
        if row in vocab:
            temp[vocab[row]] = 1
        else:
            temp[vocab['<EOS>']] = 1
        one_hotY.append(temp)
        
    return np.array(one_hotX), np.array(one_hotY)    

def ft_embed(X,y,vocab2 = vocab, vocab = None):
    vocab_len = len(vocab)
    ft_embedX = []
    one_hotY = []
    for row in X:
        temp = []
        for el in row:
            if el in vocab:
                temp.append(vocab[el])
            else:
                temp.append(vocab['<EOS>'])
        ft_embedX.append(np.array(temp))
        
    for row in y:
        temp = np.zeros(shape = (vocab_len,))
        if row in vocab2:
            temp[vocab2[row]] = 1
        else:
            temp[vocab2['<EOS>']] = 1
        one_hotY.append(temp)
        
    return np.array(ft_embedX), np.array(one_hotY)

In [114]:
X_train_oh, y_train_oh = ft_embed(X_train[:128], y_train[:128], vocab= vocab_ft)   #one_hot vectors
#X_test_oh, y_test_oh = one_hot_creator(X_test, y_test)

128

# GRU Unit - Forward and Backprop Written 

In [16]:
class GRU():
    def __init__(self,hidden_units, embed_len, batch_size, timesteps):
        
        self.hidden_units = hidden_units
        self.Wc = np.random.normal(size = (embed_len + self.hidden_units, self.hidden_units)) 
        self.Wu = np.random.normal(size = (embed_len + self.hidden_units, self.hidden_units)) 
        self.bc = np.random.normal(size = (1, self.hidden_units)) 
        self.bu = np.random.normal(size = (1, self.hidden_units))         
        self.batch_size = batch_size
        self.clist = []
        self.glist = []
        self.tlist = []
        self.c_initial = np.zeros(shape = (self.batch_size, self.hidden_units))
        self.timesteps = timesteps
        
    def forward(self,X, ts = None):
        if ts is None:
            ts = self.timesteps
        c = self.c_initial
        for i in range(ts):
            conc_inp = np.concatenate((X[:,i,:], c), axis = 1)
            
            tilda_inp = np.dot(conc_inp, self.Wc) + self.bc
            c_tilda = self.tanh(tilda_inp)
            self.tlist.append(tilda_inp)
            
            gamma_inp = np.dot(conc_inp, self.Wu) + self.bu
            gammau = self.sigmoid(gamma_inp)
            self.glist.append(gamma_inp)
            
            c = np.multiply(gammau, c_tilda) + np.multiply(1-gammau, c)
            self.clist.append(c)
        
        return c  
    
    def backward(self, prev_dev, X, lr, gmin, gmax, ts = None):
        if ts is None:
            ts = self.timesteps
        for i in range(ts):
            ind = self.timesteps - 1 - i
            if ind > 0:
                conc_inp = np.concatenate((X[:,ind,:], self.clist[ind-1]), axis = 1)
            else:
                conc_inp = np.concatenate((X[:,ind,:], self.c_initial), axis = 1)
            
            gammau = self.sigmoid(self.glist[ind])
            
            tanderv = 1 - (self.tanh(self.tlist[ind]))**2
            sigmaderv = gammau*(1-gammau)
            
            c_tilda = self.tanh(self.tlist[ind])
            inp_transpose = np.transpose(conc_inp)
            
            temp = np.multiply(gammau, tanderv)
            temp = np.multiply(temp, prev_dev)
            
            gradWc = np.clip(np.dot(inp_transpose, temp)/self.batch_size, gmin, gmax)
            gradbc = np.clip(np.sum(temp, axis = 0)/self.batch_size, gmin, gmax)
            
            if ind > 0:
                temp = np.multiply(c_tilda - self.clist[ind - 1], sigmaderv)
            else:
                temp = np.multiply(c_tilda - self.c_initial, sigmaderv)
            
            temp = np.multiply(temp, prev_dev) 
            
            gradWu = np.clip(np.dot(inp_transpose, temp)/self.batch_size, gmin, gmax)
            gradbu = np.clip(np.sum(temp, axis = 0)/self.batch_size, gmin, gmax)
            
            self.Wc -= lr*gradWc
            self.bc -= lr*gradbc
            self.Wu -= lr*gradWu
            self.bu -= lr*gradbu
        
        self.clist = []
        self.tlist = []
        self.glist = []
        
    def sigmoid(self, X):
        return ( 1/ (1 + np.exp(-X)))
    
    def tanh(self, X):
        p = np.exp(X)
        m = np.exp(-X)
        return((p-m)/(p+m))  
    
    def load_param(self, param):
        self.Wc = param['Wc']
        self.bc = param['bc']
        self.Wu = param['Wu']
        self.bu = param['bu']
        
    def save_param(self, param):
        param['Wc'] = self.Wc
        param['bc'] = self.bc
        param['Wu'] = self.Wu
        param['bu'] = self.bu
        return param

## Network defined

In [17]:
class Network():
    def __init__(self, hidden_units, embed_len, output_size, batch_size, timesteps):
        self.gru = GRU(hidden_units, embed_len, batch_size, timesteps)
        self.W = np.random.normal(size = (hidden_units, output_size))
        self.b = np.random.normal(size = (1, output_size))
        self.batch_size = batch_size
        self.timesteps = timesteps
        self.c = 0
        self.o = 0
        
    def forward(self, X, ts = None):
        if ts is None:
            ts = self.timesteps
        self.c = self.gru.forward(X, ts)
        self.o = self.softmax(np.dot(self.c, self.W) + self.b)
        return self.o
    
    def backward(self, X, y, lr, gmin, gmax,ts = None):
        if ts is None:
            ts = self.timesteps
            
        grad = self.o - y

        gradW = np.clip(np.dot(np.transpose(self.c), grad)/self.batch_size, gmin, gmax)
        gradb = np.clip(np.sum(grad, axis = 0)/self.batch_size, gmin, gmax)

        self.c = 0
        self.o = 0
                        
        grad_to_backprop = np.dot(grad, np.transpose(self.W))
        
        self.W -= lr*gradW
        self.b -= lr*gradb
        
        self.gru.backward(grad_to_backprop, X, lr, gmin, gmax)
        
    def softmax(self, X):
        exps = np.exp(X - np.reshape(np.max(X, axis = 1), (X.shape[0], 1)))
        return exps / np.reshape(np.sum(exps, axis = 1), (X.shape[0], 1))
    
    def load_param(self, param):
        self.W = param['W']
        self.b = param['b']
        self.gru.load_param(param)
        
    def save_param(self):
        param = {}
        param['W'] = self.W
        param['b'] = self.b
        return(self.gru.save_param(param))
        

In [18]:
def loss_calc(pred, actual):
    mult = np.multiply(np.log(pred), actual)
    return -np.sum(mult)/pred.shape[0]

In [45]:
def embed_create(X,y,embed_type, vocab_ft = None):
    if embed_type == 'one_hot':
        return one_hot_creator(X,y)
    else:
        return ft_embed(X,y,vocab = vocab_ft)

## Training module

In [46]:
def train(X, y, net, epochs, lr, loss_list, batch_size, cont_from = 0, model_name = 'model', gmin = -10, gmax = 10, time_to_save = 1, embed_type = 'one_hot', vocab_ft = None):
    train_len = len(X)
    for i in range(1, epochs+1):
        tic = time.time()
        counter = 0
        
        for j in range(0, train_len, batch_size):
            
            if(j+batch_size > train_len):
                X_batch, y_batch = embed_create(X[train_len-batch_size:], y[train_len-batch_size:], embed_type, vocab_ft)
            else:
                X_batch, y_batch = embed_create(X[j:j+batch_size], y[j:j+batch_size], embed_type, vocab_ft)
                
            pred = net.forward(X_batch)
            loss = loss_calc(pred, y_batch)
            
            loss_list.append(loss)
            net.backward(X_batch, y_batch, lr, gmin, gmax)
            counter += 1
            
        if (cont_from + i) % time_to_save == 0:
            param_dict = net.save_param()
            with open('param_epoch_' + model_name + "_" + str(cont_from + i) + '.pkl', 'wb') as f:
                pickle.dump(param_dict, f)
        
        ep_time = time.time() - tic
        print("Epoch: %d --> Average Loss: %.3f completed in %.3f seconds" 
              %(cont_from + i, sum(loss_list[len(loss_list)-counter:]) / counter, ep_time))

# Using One-hot Encoding

In [22]:
net_new = Network(256, len(vocab), len(vocab), 128, timesteps)

In [332]:
loss_list_new = []
train(X_train, y_train, net_new, 15, 1, loss_list_new, 128, model_name='net_new', time_to_save=5)

Epoch: 1 --> Average Loss: 24.237 completed in 100.881 seconds
Epoch: 2 --> Average Loss: 9.280 completed in 90.495 seconds
Epoch: 3 --> Average Loss: 8.375 completed in 110.736 seconds
Epoch: 4 --> Average Loss: 8.116 completed in 143.083 seconds
Epoch: 5 --> Average Loss: 7.966 completed in 125.172 seconds
Epoch: 6 --> Average Loss: 7.860 completed in 124.362 seconds
Epoch: 7 --> Average Loss: 7.778 completed in 127.153 seconds
Epoch: 8 --> Average Loss: 7.712 completed in 141.150 seconds
Epoch: 9 --> Average Loss: 7.656 completed in 167.509 seconds
Epoch: 10 --> Average Loss: 7.606 completed in 169.247 seconds
Epoch: 11 --> Average Loss: 7.563 completed in 167.159 seconds
Epoch: 12 --> Average Loss: 7.524 completed in 146.361 seconds
Epoch: 13 --> Average Loss: 7.488 completed in 118.750 seconds
Epoch: 14 --> Average Loss: 7.455 completed in 107.928 seconds
Epoch: 15 --> Average Loss: 7.425 completed in 90.009 seconds


In [55]:
train(X_train, y_train, net_new, 5, 0.5, loss_list_new, 128, cont_from= 15, model_name='net_new', time_to_save=5)

Epoch: 16 --> Average Loss: 7.403 completed in 83.685 seconds
Epoch: 17 --> Average Loss: 7.389 completed in 87.236 seconds
Epoch: 18 --> Average Loss: 7.376 completed in 85.717 seconds
Epoch: 19 --> Average Loss: 7.362 completed in 98.273 seconds
Epoch: 20 --> Average Loss: 7.350 completed in 92.828 seconds


In [142]:
train(X_train, y_train, net_new, 5, 1, loss_list_new, 128, cont_from= 20, model_name='net_new', time_to_save=5)

Epoch: 21 --> Average Loss: 7.321 completed in 99.160 seconds
Epoch: 22 --> Average Loss: 7.305 completed in 84.296 seconds
Epoch: 23 --> Average Loss: 7.282 completed in 86.016 seconds
Epoch: 24 --> Average Loss: 7.260 completed in 83.819 seconds
Epoch: 25 --> Average Loss: 7.239 completed in 89.671 seconds


In [144]:
train(X_train, y_train, net_new, 1, 2, loss_list_new, 128, cont_from= 25, model_name='net_new', time_to_save=5)

Epoch: 26 --> Average Loss: 7.196 completed in 88.926 seconds


In [145]:
train(X_train, y_train, net_new, 5, 2, loss_list_new, 128, cont_from= 26, model_name='net_new', time_to_save=5)

Epoch: 27 --> Average Loss: 7.168 completed in 84.341 seconds
Epoch: 28 --> Average Loss: 7.132 completed in 96.998 seconds
Epoch: 29 --> Average Loss: 7.098 completed in 106.015 seconds
Epoch: 30 --> Average Loss: 7.065 completed in 107.353 seconds
Epoch: 31 --> Average Loss: 7.032 completed in 81.742 seconds


In [23]:
## Loading Network parameters

# with open('param_epoch_net_new_30.pkl', 'rb') as f:
#     tada = pickle.load(f)
# net_new.load_param(tada)

In [24]:
def find_max(arr, window):
    return np.argsort(arr)[-window:]

In [25]:
def accuracy(pred,actual, window):
    l = []
    for el in pred:
        l.append(find_max(el, window))
    
    yint = np.argmax(actual, axis = 1)
    acc = 0
    
    for i,el in enumerate(l):
        if yint[i] in el:
            acc += 1
            
    return acc
    

In [47]:
def tester(X, y, net, batch_size, embed_type='one_hot', vocab_ft = None, window = 5):
    test_len = len(X)
    counter = 0
    accumulate = 0
    loss = 0 
    for j in range(0, test_len, batch_size):
            counter += 1
            if(j+batch_size > test_len):
                X_batch, y_batch = embed_create(X[test_len-batch_size:], y[test_len-batch_size:], embed_type, vocab_ft)
            else:
                X_batch, y_batch = embed_create(X[j:j+batch_size], y[j:j+batch_size], embed_type, vocab_ft)
                
            pred = net.forward(X_batch)
            loss += loss_calc(pred, y_batch)*batch_size
            
            accumulate += accuracy(pred, y_batch, window)
            
    print("Loss is %.3f and Accuracy for window size %d is %.3f " 
          %(loss /(counter*batch_size), window, 100*accumulate/(counter*batch_size)))
            
                
            

## Task 1 - Accuracy ( One-hot )

In [27]:
print("Task1: For Train set using one-hot")
tester(X_train, y_train, net_new, 128)

Task1: For Train set using one-hot
Loss is 7.126 and Accuracy for window size 5 is 10.608 


In [351]:
print("Task1: For Test set using one-hot")
tester(X_test, y_test, net_new, 128)

Task1: For Test set using one-hot
Loss is 7.291 and Accuracy for window size 5 is 3.794 


# Using Fast Text

In [29]:
file_fasttext = open('fasttext_data.txt', "w")
count = 0
for key in vocab.keys():
    file_fasttext.write(key)
    count += 1
    if count == 10:
        file_fasttext.write("\n")
        count = 0
        continue
    file_fasttext.write(" ")

In [32]:
model = fasttext.skipgram('fasttext_data.txt', 'model')

In [33]:
model = fasttext.load_model('model.bin')

In [34]:
vocab_ft = {}
for row in X_train_temp:
    for el in row:
        if el not in vocab_ft:
            vocab_ft[el] = model[el]
            i += 1
            
for row in X_test:
    for el in row:
        if el not in vocab_ft:
            vocab_ft[el] = model[el]
            i += 1
vocab_ft['<EOS>'] = model['<EOS>']

In [35]:
net_ft = Network(256, 100, len(vocab), 128, timesteps)

In [64]:
loss_list_ft = []
train(X_train, y_train, net_ft, 15, 1, loss_list_ft, 128, model_name='net_ft', time_to_save=5, embed_type='ft', vocab_ft=vocab_ft)

Epoch: 1 --> Average Loss: 24.561 completed in 35.432 seconds
Epoch: 2 --> Average Loss: 15.106 completed in 35.501 seconds
Epoch: 3 --> Average Loss: 11.789 completed in 37.275 seconds
Epoch: 4 --> Average Loss: 10.330 completed in 36.917 seconds
Epoch: 5 --> Average Loss: 9.850 completed in 33.803 seconds
Epoch: 6 --> Average Loss: 9.726 completed in 33.815 seconds
Epoch: 7 --> Average Loss: 9.961 completed in 35.073 seconds
Epoch: 8 --> Average Loss: 9.299 completed in 34.299 seconds
Epoch: 9 --> Average Loss: 9.382 completed in 35.244 seconds
Epoch: 10 --> Average Loss: 9.292 completed in 33.752 seconds
Epoch: 11 --> Average Loss: 9.138 completed in 33.757 seconds
Epoch: 12 --> Average Loss: 8.854 completed in 33.749 seconds
Epoch: 13 --> Average Loss: 9.716 completed in 33.818 seconds
Epoch: 14 --> Average Loss: 8.960 completed in 34.117 seconds
Epoch: 15 --> Average Loss: 9.703 completed in 33.976 seconds


In [65]:
train(X_train, y_train, net_ft, 10, 1, loss_list_ft, 128, cont_from = 15, model_name='net_ft', time_to_save=5, embed_type='ft', vocab_ft=vocab_ft)

Epoch: 16 --> Average Loss: 8.909 completed in 34.211 seconds
Epoch: 17 --> Average Loss: 8.639 completed in 35.102 seconds
Epoch: 18 --> Average Loss: 8.832 completed in 34.498 seconds
Epoch: 19 --> Average Loss: 8.806 completed in 33.328 seconds
Epoch: 20 --> Average Loss: 8.579 completed in 33.838 seconds
Epoch: 21 --> Average Loss: 8.641 completed in 33.988 seconds
Epoch: 22 --> Average Loss: 8.534 completed in 34.277 seconds
Epoch: 23 --> Average Loss: 8.900 completed in 33.853 seconds
Epoch: 24 --> Average Loss: 8.840 completed in 34.239 seconds
Epoch: 25 --> Average Loss: 8.497 completed in 34.561 seconds


In [66]:
train(X_train, y_train, net_ft, 10, 0.5, loss_list_ft, 128, cont_from = 25, model_name='net_ft', time_to_save=5, embed_type='ft', vocab_ft=vocab_ft)

Epoch: 26 --> Average Loss: 7.624 completed in 34.956 seconds
Epoch: 27 --> Average Loss: 7.531 completed in 34.186 seconds
Epoch: 28 --> Average Loss: 7.434 completed in 33.629 seconds
Epoch: 29 --> Average Loss: 7.525 completed in 33.307 seconds
Epoch: 30 --> Average Loss: 7.442 completed in 33.802 seconds
Epoch: 31 --> Average Loss: 7.521 completed in 33.994 seconds
Epoch: 32 --> Average Loss: 7.437 completed in 33.353 seconds
Epoch: 33 --> Average Loss: 7.518 completed in 34.213 seconds
Epoch: 34 --> Average Loss: 7.429 completed in 36.682 seconds
Epoch: 35 --> Average Loss: 7.516 completed in 34.633 seconds


In [71]:
train(X_train, y_train, net_ft, 5, 0.5, loss_list_ft, 128, cont_from = 35, model_name='net_ft', time_to_save=5, embed_type='ft', vocab_ft=vocab_ft)

Epoch: 36 --> Average Loss: 7.413 completed in 31.832 seconds
Epoch: 37 --> Average Loss: 7.490 completed in 32.313 seconds
Epoch: 38 --> Average Loss: 7.478 completed in 31.427 seconds
Epoch: 39 --> Average Loss: 7.511 completed in 33.625 seconds
Epoch: 40 --> Average Loss: 7.413 completed in 33.131 seconds


In [72]:
train(X_train, y_train, net_ft, 10, 0.5, loss_list_ft, 128, cont_from = 40, model_name='net_ft', time_to_save=5, embed_type='ft', vocab_ft=vocab_ft)

Epoch: 41 --> Average Loss: 7.502 completed in 41.957 seconds
Epoch: 42 --> Average Loss: 7.432 completed in 36.503 seconds
Epoch: 43 --> Average Loss: 7.534 completed in 36.062 seconds
Epoch: 44 --> Average Loss: 7.471 completed in 37.717 seconds
Epoch: 45 --> Average Loss: 7.507 completed in 33.013 seconds
Epoch: 46 --> Average Loss: 7.389 completed in 32.726 seconds
Epoch: 47 --> Average Loss: 7.474 completed in 32.748 seconds
Epoch: 48 --> Average Loss: 7.505 completed in 31.120 seconds
Epoch: 49 --> Average Loss: 7.386 completed in 32.400 seconds
Epoch: 50 --> Average Loss: 7.415 completed in 36.221 seconds


In [232]:
train(X_train, y_train, net_ft, 5, 0.1, loss_list_ft, 128, cont_from = 50, model_name='net_ft', time_to_save=5, embed_type='ft', vocab_ft=vocab_ft)

Epoch: 51 --> Average Loss: 7.262 completed in 35.078 seconds
Epoch: 52 --> Average Loss: 7.219 completed in 33.299 seconds
Epoch: 53 --> Average Loss: 7.217 completed in 39.256 seconds
Epoch: 54 --> Average Loss: 7.216 completed in 43.801 seconds
Epoch: 55 --> Average Loss: 7.215 completed in 41.157 seconds


In [233]:
train(X_train, y_train, net_ft, 5, 0.01, loss_list_ft, 128, cont_from = 55, model_name='net_ft', time_to_save=5, embed_type='ft', vocab_ft=vocab_ft)

Epoch: 56 --> Average Loss: 7.310 completed in 43.103 seconds
Epoch: 57 --> Average Loss: 7.256 completed in 44.069 seconds
Epoch: 58 --> Average Loss: 7.238 completed in 41.757 seconds
Epoch: 59 --> Average Loss: 7.233 completed in 40.973 seconds
Epoch: 60 --> Average Loss: 7.232 completed in 40.473 seconds


In [52]:
train(X_train, y_train, net_ft, 5, 0.001, loss_list_ft, 128, cont_from = 60, model_name='net_ft', time_to_save=5, embed_type='ft', vocab_ft=vocab_ft)

Epoch: 61 --> Average Loss: 7.238 completed in 37.845 seconds
Epoch: 62 --> Average Loss: 7.237 completed in 31.341 seconds
Epoch: 63 --> Average Loss: 7.236 completed in 35.438 seconds
Epoch: 64 --> Average Loss: 7.235 completed in 33.357 seconds
Epoch: 65 --> Average Loss: 7.234 completed in 32.324 seconds


In [36]:
## Loading Network parameters

# with open('param_epoch_net_ft_65.pkl', 'rb') as f:
#     tada = pickle.load(f)
# net_ft.load_param(tada)

## Task 1 - Accuracy ( Fast Text )

In [48]:
print("Task1: For Train set using Fast Text")
tester(X_train, y_train, net_ft, 128, 'ft', vocab_ft)

Task1: For Train set using Fast Text
Loss is 7.238 and Accuracy for window size 5 is 5.149 


In [49]:
print("Task1: For Test set using Fast Text")
tester(X_test, y_test, net_ft, 128, 'ft', vocab_ft)

Task1: For Test set using Fast Text
Loss is 7.818 and Accuracy for window size 5 is 3.463 


# Task 2 ( One Hot )

In [60]:
half_net = Network(256, len(vocab), len(vocab), 1, timesteps)

In [61]:
with open('param_epoch_net_new_30.pkl', 'rb') as f:
    tada = pickle.load(f)
half_net.load_param(tada)

In [184]:
def acc_for_task2(filename, half_net, window = 5):
    file = open(filename, 'r')
    accuracy = 0
    counter = 0
    for line in file:
        counter += 1
        s = clean_document(line)
        s.append('<EOS>')
        inp_len = int(len(s)/2)
        acc = 0
        X,y = one_hot_creator([s[0:inp_len]],[], input_len = inp_len)
        o = half_net.forward(X,ts = inp_len)
     #   print(X.shape)
        t = np.argsort(o[0])[-window:]
        l = []
        for el in t:
            l.append(reverse_vocab[el])
        if s[inp_len] in l:
            acc += 1
        X_n = X[0]
        for i in range(inp_len, len(s)-1):
            half_net.gru.c_initial = half_net.gru.clist[-1]
            half_net.gru.clist = []
            k = np.zeros(len(vocab))
            k[np.argmax(o[0])] = 1
    #         print(np.array([k]).shape)
            #X_n = np.concatenate((X_n[1:], np.reshape(k, (1,k.shape[0]))), axis = 0)
            o = half_net.forward(np.array([[k]]),ts = 1)
            t = np.argsort(o[0])[-window:]
            l = []
            for el in t:
                l.append(reverse_vocab[el])
            if s[i+1] in l:
                acc += 1
        
        accuracy += acc/len(s)
    return(100*accuracy/counter)
        

        

## Task 2 - Accuracy (One-Hot)

In [189]:
print("Task2: For Train set using One Hot: ")
acc_for_task2('train.txt', half_net)

Task2: For Train set using One Hot: 
0.14793478297633417


In [188]:
print("Task2: For Test set using One Hot: ")
acc_for_task2('test.txt', half_net)

Task2: For Test set using One Hot: 
0.3300762497842789


# Task 2 ( Fast Text )

In [199]:
half_net_ft = Network(256, 100, len(vocab), 1, timesteps)

In [200]:
with open('param_epoch_net_ft_65.pkl', 'rb') as f:
    tada = pickle.load(f)
half_net_ft.load_param(tada)

In [201]:
def acc_for_task2_ft(filename, half_net, window = 5):
    file = open(filename, 'r')
    accuracy = 0
    counter = 0
    for line in file:
        counter += 1
        s = clean_document(line)
        s.append('<EOS>')
        inp_len = int(len(s)/2)
        acc = 0
        X,y = ft_embed([s[0:inp_len]],[], vocab=vocab_ft)
        o = half_net.forward(X,ts = inp_len)
     #   print(X.shape)
        t = np.argsort(o[0])[-window:]
        l = []
        for el in t:
            l.append(reverse_vocab[el])
        if s[inp_len] in l:
            acc += 1
        
        for i in range(inp_len, len(s)-1):
            half_net.gru.c_initial = half_net.gru.clist[-1]
            half_net.gru.clist = []
#             k = np.zeros(len(vocab))
#             k[np.argmax(o[0])] = 1
            k = vocab_ft[reverse_vocab[np.argmax(o[0])]]
    #         print(np.array([k]).shape)
            #X_n = np.concatenate((X_n[1:], np.reshape(k, (1,k.shape[0]))), axis = 0)
            o = half_net.forward(np.array([[k]]),ts = 1)
            t = np.argsort(o[0])[-window:]
            l = []
            for el in t:
                l.append(reverse_vocab[el])
            if s[i+1] in l:
                acc += 1
        
        accuracy += acc/len(s)
    return(100*accuracy/counter)    

## Task 2 - Accuracy (Fast Text)

In [202]:
print("Task2: For Train set using Fast text: ")
acc_for_task2_ft('train.txt', half_net_ft)

Task2: For Train set using Fast text: 


0.027032293652792267

In [197]:
print("Task2: For Test set using Fast text: ")
acc_for_task2_ft('test.txt', half_net_ft)

Task2: For Test set using Fast text: 


0.013271400132714002