In [1]:
import h5py
import random
import numpy as np
from matplotlib.pyplot import imshow, show, subplot, figure, axis
import warnings
warnings.filterwarnings('ignore')

In [547]:
filename = "data2.h5"
with h5py.File(filename, "r") as f:
    # List all groups
    print("Keys: %s" % f.keys())
    y_tets = f[list(f.keys())[0]][:]
    x_test = f[list(f.keys())[1]][:]
    y_train = f[list(f.keys())[2]][:]
    x_train = f[list(f.keys())[3]][:]
    y_val = f[list(f.keys())[4]][:]
    x_val = f[list(f.keys())[5]][:]
    words = f[list(f.keys())[6]][:]

Keys: <KeysViewHDF5 ['testd', 'testx', 'traind', 'trainx', 'vald', 'valx', 'words']>


In [555]:
def word_vector(x, maxInd = 250):
    out = np.zeros(maxInd)
    out[x-1] = 1
    return out

def input_vector(data):
    encoded_data = []
    for row in data:
        word_1 = word_vector(row[0])
        word_2 = word_vector(row[1])
        word_3 = word_vector(row[2])
        row = np.concatenate((word_1,word_2,word_3))
        row = row.reshape(1,len(row))
        encoded_data.append(row)
    return encoded_data

def output_vector(data):
    encoded_data = []
    for row in data:
        word = word_vector(row)
        encoded_data.append(word)
    return encoded_data

def initialize_weights(D,P,data,mean=0,std=0.01):
    N = 200
    W0 = np.random.normal(mean,std, 750*D).reshape(750, D)
    W1 = np.random.normal(mean,std, D*P).reshape(D,P)
    W2 = np.random.normal(mean,std, P*250).reshape(P,250)
    b1 = np.random.normal(mean,std, N*P).reshape(N,P)
    b2 = np.random.normal(mean,std, N*250).reshape(N,250)
    
    we = [W0,W1,W2,b1,b2]
    
    return we

def sigmoid(x):
    y = 1 / (1 + np.exp(-x))
    return y

def sigmoid_backward(x):
    d_sig = x*(1-x)
    return d_sig

def softmax(x):
    expx = np.exp(x - np.max(x))
    y = expx/np.sum(expx, axis=0)
    return y

def cross_entrophy(y,y_pred):
    return (np.sum(- y * np.log(y_pred))/ y.shape[0])

def forward(data, we):
    
    w0,w1,w2,b1,b2 = we

    z0 = np.dot(data,w0) #first layer linear forward
    A0 = z0 #first layer without activation
    z1 = np.dot(A0,w1) + b1 #second layer linear forward
    A1 = sigmoid(z1) #second layer activation
    z2 =  np.dot(A1,w2) + b2 #output linear forward
    output = softmax(z2) #output layer activation

    return A0,A1,output

def calculate_cost(data,y,we):
    N = data.shape[0]
    W0,W1,W2,b1,b2 = we
    A0,A1,output = forward(data, we)
    d_sig = sigmoid_backward(A1)
    #calculate cost
    cost = cross_entrophy(y,output)
    #calculate gradients
    d_w0 = np.dot(data.T,((np.dot(((np.dot((y-output),W2.T))*d_sig),W1.T))*A0))
    d_w1 = np.dot(A0.T,(np.dot((y-output),W2.T)*d_sig))
    d_w2 = np.dot(A1.T,(y-output))
    d_b1 = np.dot((y-output),W2.T)*d_sig
    d_b2 = y-output
    
    grads = [d_w0,d_w1,d_w2,d_b1,d_b2]
    
    return cost, grads

def backward(data,y,lr_rate,momentum, we, old_grads):
    #get gradients
    cost, grads = calculate_cost(data,y,we)
    #update weights 
    we[0] -= lr_rate*grads[0]+old_grads[0]*momentum
    we[1] -= lr_rate*grads[1]+old_grads[1]*momentum
    we[2] -= lr_rate*grads[2]+old_grads[2]*momentum
    we[3] -= lr_rate*grads[3]+old_grads[3]*momentum
    we[4] -= lr_rate*grads[4]+old_grads[4]*momentum

    return cost, grads, we
    
def train(x_train,y_train,D,P,epoch,num_batch,lr_rate,momentum):
    costs = []
    epochs = []
    data = input_vector(x_train)
    data = np.squeeze(data,axis=1)
    label = output_vector(y_train)
    label = np.array(label)
    we = initialize_weights(D,P,data,mean=0,std=0.01)
    momentum = 0.0000085
    cost, old_grads = calculate_cost(data[:200],label[0:200],we)
    lr_rate = 0.0000015
    for i in range (epoch):
        epochs.append(i)
        for j in range (num_batch):
            data_batch = data[200*j:200*j+200]
            label_batch = label[200*j:200*j+200]
            cost, grads,we = backward(data_batch,label_batch, lr_rate,momentum,we,old_grads)
        costs.append(cost)
    j = 0
    for i in reversed (costs):
        print("Epoch: {} --------------> Loss: {} ".format(j+1,i))
        j+=1
    return we

def random_index(sample_size):
    random_index = []
    for i in range(sample_size):
        index = random.randint(0,46500)
        random_index.append(index)
    return random_index

def pick_sample(data,label,sample_size):
    sample = []
    labels = []
    sample_index = random_index(sample_size)
    for i in sample_index:
        sample.append(data[i])
        labels.append(label[i])
    return sample,labels

def predict(words,output):
    pred_rows = []
    for i in range(len(output)):
        word_index = output[i].argsort()[-10:][::-1]
        pred_words = []
        for word in word_index: 
            pred_words.append(str(words[word].decode("utf-8")))
        pred_rows.append((pred_words))
    return pred_rows

def print_preds(random_sample,test_label,pred_rows,words):
    for i in range(len(random_sample)):
        tri = "sample trigram: "
        for j in range(len(random_sample[i])):
            tri+=str(words[random_sample[i][j]].decode("utf-8"))+" "
        tri += " ----> label: " + str(words[test_label[i]].decode("utf-8"))
        print(tri)
        print("Top 10 predictions: ",pred_rows[i])
    


In [549]:
we = train(x_train,y_train,32,256,10,1000,0.15,0.85)

Epoch: 1 --------------> Loss: 5.300577973033711 
Epoch: 2 --------------> Loss: 5.300131218169797 
Epoch: 3 --------------> Loss: 5.299716461160852 
Epoch: 4 --------------> Loss: 5.299331194717817 
Epoch: 5 --------------> Loss: 5.298973076902554 
Epoch: 6 --------------> Loss: 5.298639896318873 
Epoch: 7 --------------> Loss: 5.298329542234294 
Epoch: 8 --------------> Loss: 5.298039979388973 
Epoch: 9 --------------> Loss: 5.297769227289148 
Epoch: 10 --------------> Loss: 5.297515343809774 


In [550]:
random_sample,test_label = pick_sample(x_test,y_test,200)
random_sample_vector = input_vector(random_sample)
random_sample_vector = np.squeeze(random_sample_vector,axis=1)
random_sample = random_sample[:5]
test_label  = test_label[:5]

In [551]:
_,_,output = forward(random_sample_vector, we)
output = output[:5]

In [556]:
pred_rows = predict(words,output)
print_preds(random_sample,test_label,pred_rows,words)

sample trigram: members it him  ----> label: left
Top 10 predictions:  ['here', 'say', 'people', 'should', 'not', 'before', 'can', 'big', 'there', 'way']
sample trigram: well them day  ----> label: then
Top 10 predictions:  ['.', 'to', 'know', 'make', '?', 'before', 'at', 'nt', 'who', 'can']
sample trigram: including said it  ----> label: under
Top 10 predictions:  ['.', '?', 'money', 'never', 'know', 'right', 'all', 'work', 'director', 'life']
sample trigram: night today even  ----> label: political
Top 10 predictions:  ['.', '?', 'over', 'see', ',', 'since', 'office', 'your', 'old', 'percent']
sample trigram: though world set  ----> label: their
Top 10 predictions:  ['them', 'will', 'dr.', 'world', 'part', 'states', 'as', 'what', 'home', 'other']
