In [133]:
import keras
from keras.optimizers import Adam
from keras.utils import np_utils
from keras.preprocessing.text import one_hot
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers.embeddings import Embedding
from keras.layers.recurrent import LSTM
from keras.layers import Dropout
from keras.layers import Dense

import numpy as np
import pandas as pd
np.random.seed(1)

In [25]:
#Importing raw data
rawData=pd.read_csv('./Raw Data/ign.csv',index_col=0)

In [26]:
#Grouping 'score_phrase' to represent just two emotions (positive or negative)
def binaryEmotions(emotion):
    if (emotion in ['Amazing', 'Great', 'Good', 'Masterpiece', 'Okay']):
        return 1
    else:
        return 0

rawData['binEmotions']=rawData.apply(lambda row: binaryEmotions(row['score_phrase']),axis=1)

In [82]:
#Separating features and labels
features=rawData['title']
labels=pd.Categorical(rawData['score_phrase'])
labelsBin=pd.Categorical(rawData['binEmotions'])

In [83]:
#Converting labels to vectors and one-hot encoding feature sentences
labelsVector=np_utils.to_categorical(labels.codes)
labelsBinVector=np_utils.to_categorical(labelsBin.codes)

featuresList=features.apply(lambda x: one_hot(x,10000))

In [92]:
#Getting maximum sentence length and padding with maxlen of 30
print("Maximum sentence length in raw data =",max([len(x) for x in featuresList]))
featuresPad=pad_sequences(featuresList,maxlen=30,value=0.)

Maximum sentence length in raw data = 17


In [135]:
#Create model - for 11 emotions model
model=Sequential()
model.add(Embedding(input_dim=10000,input_length=30,output_dim=256))
model.add(LSTM(256,dropout_U=0.5,dropout_W=0.5))
model.add(Dense(11,activation='softmax'))

In [137]:
#Compile model
adam=Adam(lr=0.0001)
model.compile(loss='categorical_crossentropy',optimizer=adam,metrics=['accuracy'])

#Train the model
model.fit(featuresPad,labelsVector,batch_size=10,nb_epoch=50,validation_split=0.1)

Train on 16762 samples, validate on 1863 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
  610/16762 [>.............................] - ETA: 381s - loss: 1.4629 - acc: 0.4820

KeyboardInterrupt: 

In [None]:
#Binary emotion model
modelBin=Sequential()
modelBin.add(Embedding(input_dim=10000,input_length=30,output_dim=256))
modelBin.add(LSTM(256))
modelBin.add(Dropout(0.8))
modelBin.add(Dense(2,activation='softmax'))

#Compile model
adam=Adam(lr=0.0001)
modelBin.compile(loss='categorical_crossentropy',optimizer=adam,metrics=['accuracy'])

#Train the model
modelBin.fit(featuresPad,labelsBinVector,batch_size=20,nb_epoch=30,validation_split=0.1)

Train on 16762 samples, validate on 1863 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
 1980/16762 [==>...........................] - ETA: 224s - loss: 0.1904 - acc: 0.9212