In [63]:
# Including required python libraries used in this project
import numpy as np
import pandas as pd
pd.options.mode.chained_assignment = None  # default='warn'
import emoji

from keras.models import Sequential
from keras.layers import Dense, Input, Dropout, SimpleRNN,LSTM, Activation
from keras.utils import np_utils

import matplotlib.pyplot as plt

In [64]:
# reading data for training and testing
train = pd.read_csv('train_emoji.csv',header=None)
test = pd.read_csv('test_emoji.csv',header=None)

In [65]:
# Checking data by showing first 5 rows of the train data
train.head()

Unnamed: 0,0,1,2,3
0,never talk to me again,3,,
1,I am proud of your achievements,2,,
2,It is the worst day in my life,3,,
3,Miss you so much,0,,[0]
4,food is life,4,,


In [66]:
# Checking data by showing first 5 rows of the test data
test.head()

Unnamed: 0,0,1
0,I want to eat\t,4
1,he did not answer\t,3
2,he got a raise\t,2
3,she got me a present\t,0
4,ha ha ha it was so funny\t,2


In [67]:
# Creating dictionary for some emoji's, consisting of key - number and value - emoji 
emoji_dict = { 0 : ":heart:", 1 : ":baseball:", 2 : ":smile:", 3 : ":disappointed:", 4 : ":fork_and_knife:"}

In [68]:
# Printing each emoji icon by emojizing each emoji
for index in emoji_dict.keys():
    print (index,end=" ")
    print (emoji.emojize(emoji_dict[index], use_aliases=True))

0 ❤
1 ⚾
2 😄
3 😞
4 🍴


In [69]:
# Creating training and testing data
X_train = train[0]
Y_train = train[1]

X_test = test[0]
Y_test = test[1]

print (X_train.shape, Y_train.shape, X_test.shape, Y_test.shape)
print ("-------------------------")
print (X_train[0],Y_train[0])

(132,) (132,) (56,) (56,)
-------------------------
never talk to me again 3


In [70]:
# Splitting the train data from sentences to words
for ix in range(X_train.shape[0]):
    X_train[ix] = X_train[ix].split()

# Splitting the test data from sentences to words
for ix in range(X_test.shape[0]):
    X_test[ix] = X_test[ix].split()
    
# Converting labels into categorical form
Y_train = np_utils.to_categorical(Y_train)

In [71]:
# Now checking the above conversion by printing train and test data at 0th index
print (X_train[0],Y_train[0])

['never', 'talk', 'to', 'me', 'again'] [0. 0. 0. 1. 0.]


In [72]:
# To check maximum length of sentence in training data
np.unique(np.array([len(ix) for ix in X_train]) , return_counts=True)

(array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10]),
 array([ 4,  5, 26, 35, 20, 21, 11,  5,  1,  4], dtype=int64))

In [73]:
# To check maximum length of senetence in testing data
np.unique(np.array([len(ix) for ix in X_test]) , return_counts=True)

(array([2, 3, 4, 5, 6, 7, 8]),
 array([ 3, 12, 16, 17,  3,  4,  1], dtype=int64))

In [74]:
# Creating  embeddings dictionary with key = word and value = list of words in glove vector
embeddings_index = {}

f = open('glove.6B.50d.txt', encoding="utf8")
for line in f:
    values = line.split()
    word = values[0]
    coefs = np.asarray(values[1:], dtype='float32')
    embeddings_index[word] = coefs
f.close()

In [75]:
# Checking length of a particular word
embeddings_index["i"].shape

(50,)

In [76]:
from scipy import spatial
# Checking cosine similarity of words happy and sad
spatial.distance.cosine(embeddings_index["happy"], embeddings_index["sad"])

0.31093674898147583

In [77]:
# Checking cosine similarity of words India and Delhi
spatial.distance.cosine(embeddings_index["india"], embeddings_index["delhi"])

0.18572336435317993

In [78]:
# Checking cosine similarity of words france and paris
spatial.distance.cosine(embeddings_index["france"], embeddings_index["paris"])

0.19746702909469604

In [79]:
# Filling the embedding matrix
embedding_matrix_train = np.zeros((X_train.shape[0], 10, 50))
embedding_matrix_test = np.zeros((X_test.shape[0], 10, 50))

for ix in range(X_train.shape[0]):
    for ij in range(len(X_train[ix])):
        embedding_matrix_train[ix][ij] = embeddings_index[X_train[ix][ij].lower()]
        
for ix in range(X_test.shape[0]):
    for ij in range(len(X_test[ix])):
        embedding_matrix_test[ix][ij] = embeddings_index[X_test[ix][ij].lower()]        

In [80]:
# Reshaping and transposing the matrices to fit into the model
embedding_matrix_train = embedding_matrix_train.reshape(132, 500)
embedding_matrix_test = embedding_matrix_test.reshape(56, 500)
embedding_matrix_train = embedding_matrix_train.T
embedding_matrix_test = embedding_matrix_test.T
Y_train = Y_train.T
Y_test = Y_test.T

In [81]:
print (embedding_matrix_train.shape, embedding_matrix_test.shape, Y_train.shape)

(500, 132) (500, 56) (5, 132)


# Training a basic model (EBPA)

In [82]:
n_layers = 3
layers = [500, 256, 100 , 50 , 5] #List containing number of  neurons in each layer
assert(layers[0] == embedding_matrix_train.shape[0])  #first layer = input layer
assert(layers[-1] == Y_train.shape[0]) #number of outputs should be equal to number of classes

In [83]:
#softmax activation will be used to get number in last layer
#We are using ReLu activation elsewhere
def softmax(z):
    z = np.exp(z)
    z = z/np.sum(z , axis = 0 , keepdims = True , dtype=np.float32) #sum along a column
    return z

def relu(z):
    temp = z > 0
    z = z * temp
    return z

In [84]:
def d_relu(A):
    return A >= 0

In [85]:
#defining network variables
a = {}  #activations
w = {}  #Weights
b = {}  #bias
# del_a = None   #d(error)/d(activation)
# del_w = None   #d(error)/d(weight)
# del_b = None   #d(error)/d(bias)
a[0] =  embedding_matrix_train #input layer

In [86]:
def initialize_nn(layers):
    for i in range(1 , len(layers)):
        w[i] = np.random.randn(layers[i] , layers[i - 1])/100. 
        b[i] = np.zeros((layers[i] , 1))
        a[i] = relu(np.dot(w[i] , a[i - 1]) + b[i]) #dummy

In [87]:
#trains neural network for single epoch
#It implements Batch Gradient Descent instead of Stochastic Gradient Descent
def train_one_epoch(alpha):
    #Forward propagation:
    
    #using relu for all layers except last
    for i  in range(1 , len(layers) - 1):
        a[i] = relu(np.dot(w[i] , a[i - 1]) + b[i])
        
    last_index = len(layers) - 1
    
    #using softmax for last layer
    a[last_index] = softmax(np.dot(w[last_index] , a[last_index - 1]) + b[last_index])
    output = a[last_index]
    
    
    #Error Calculation:
    #softmax crossentropy was used 
    m = 132
    error = -1*(Y_train*np.log(output))
    error = 1/m*np.sum(np.sum(error , axis = 1 , keepdims = True) )
    
     
    #Back propagation:
    
    #for last layer (with softmax activation)
    del_a =  -1 * np.divide(Y_train , output)
    del_z = a[last_index] - Y_train      #z represents logits, activation(z) = a
    del_w = 1/m *np.dot(del_z , a[last_index - 1].T)
    del_b = 1/m * np.sum(del_z , axis = 1 , keepdims=True)
    del_a = np.dot(w[last_index].T , del_z)
    
    #weight updation:
    w[last_index] -= del_w*alpha
    b[last_index] -= del_b*alpha
    
    #for all layers except last
    for i in range(last_index - 1, 0, -1):
        del_z = del_a * d_relu(a[i])
        del_w = 1/m *np.dot(del_z , a[i - 1].T)
        del_b = 1/m*np.sum(del_z , axis = 1 , keepdims=True)
        del_a = np.dot(w[i].T , del_z)
        
        #weight updation:
        w[i] -= del_w*alpha
        b[i] -= del_b*alpha
        
    return error

In [88]:
#trains NN for n_epochs epochs
def train(n_epochs , learning_rate = 0.01):
    for i in range(n_epochs):
        print("Epoch :" + str(i + 1) , end=" ...... ")
        error = train_one_epoch(learning_rate)
        print("\t Error = " + str(error))

In [89]:
#forward propagates a case which we want to test
def find_output(sample_case):
    last_index = len(layers) - 1
    a[0] = sample_case
    for i in range(1 , len(layers) - 1):
        a[i] = relu(np.dot(w[i] , a[i - 1]) + b[i])
    return softmax(np.dot(w[last_index] , a[last_index - 1]) + b[last_index])

In [90]:
initialize_nn(layers)

In [91]:
train(50 , 1)

Epoch :1 ...... 	 Error = 1.6094340981260142
Epoch :2 ...... 	 Error = 1.5891193229169924
Epoch :3 ...... 	 Error = 1.5762703547913957
Epoch :4 ...... 	 Error = 1.5681846368100518
Epoch :5 ...... 	 Error = 1.5631058948750505
Epoch :6 ...... 	 Error = 1.559912262475861
Epoch :7 ...... 	 Error = 1.5578981518641875
Epoch :8 ...... 	 Error = 1.5566221143374668
Epoch :9 ...... 	 Error = 1.5558091450627223
Epoch :10 ...... 	 Error = 1.555287312442391
Epoch :11 ...... 	 Error = 1.554949151830833
Epoch :12 ...... 	 Error = 1.5547275776949658
Epoch :13 ...... 	 Error = 1.5545800268681167
Epoch :14 ...... 	 Error = 1.554479470588075
Epoch :15 ...... 	 Error = 1.5544090293815906
Epoch :16 ...... 	 Error = 1.5543581826691606
Epoch :17 ...... 	 Error = 1.5543200092331362
Epoch :18 ...... 	 Error = 1.55428976252486
Epoch :19 ...... 	 Error = 1.5542645888882893
Epoch :20 ...... 	 Error = 1.554242531864993
Epoch :21 ...... 	 Error = 1.5542224733733447
Epoch :22 ...... 	 Error = 1.5542033710718133
Epoc

In [92]:
pred = find_output(embedding_matrix_test)
# print(pred.shape)
pred = np.argmax(pred, axis=0)
# print(pred.shape, Y_test.shape, Y_train.shape, embedding_matrix_test.shape)

# Calculating Accuracy 

In [93]:
float(sum(pred==Y_test))/embedding_matrix_test.shape[1]

0.32142857142857145