In [1]:
import warnings; warnings.simplefilter('ignore')
import pickle
import sys, os, re, csv, codecs, numpy as np, pandas as pd
import matplotlib.pyplot as plt
from sklearn import metrics
from sklearn.model_selection import train_test_split
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.layers import Dense, Input, LSTM, Embedding, Dropout, Activation
from keras.layers import Bidirectional, GlobalMaxPool1D
from keras.models import Model
from keras import initializers, regularizers, constraints, optimizers, layers

Using TensorFlow backend.


In [2]:
list_classes = [1, 2, 3, 4, 5]

fileObject = open('pickels/clean_reviews','rb')  
cleaned_reviews = pickle.load(fileObject)

fileObject = open('pickels/ratings','rb')  
ratings = pickle.load(fileObject)

In [3]:
X_train, X_test, y_train, y_test = train_test_split(cleaned_reviews, ratings, test_size=0.2, random_state=42)
y_test = [int(item) for items in y_test for item in items]

In [4]:
max_features = 20000
tokenizer = Tokenizer(num_words=max_features)
tokenizer.fit_on_texts(list(X_train))
list_tokenized_train = tokenizer.texts_to_sequences(X_train)
list_tokenized_test = tokenizer.texts_to_sequences(X_test)

In [5]:
maxlen = 100
X_t = pad_sequences(list_tokenized_train, maxlen=maxlen)
X_te = pad_sequences(list_tokenized_test, maxlen=maxlen)

In [6]:
y_train = np.asarray(y_train)
y_test = np.asarray(y_test)

In [7]:
inp = Input(shape=(maxlen, ))
embed_size = 128
x = Embedding(max_features, embed_size)(inp)
x = LSTM(200, return_sequences=True,name='lstm_layer')(x)
x = GlobalMaxPool1D()(x)
x = Dropout(0.1)(x)
x = Dense(120, activation="relu")(x)
x = Dropout(0.1)(x)
x = Dense(60, activation="relu")(x)
x = Dropout(0.1)(x)
x = Dense(6, activation="sigmoid")(x)
model = Model(inputs=inp, outputs=x)
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [8]:
print("start fitting...")
model.fit(X_t,y_train, epochs=10, batch_size=32, validation_split=0.1)

start fitting...
Train on 17897 samples, validate on 1989 samples
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


<keras.callbacks.History at 0x2100edbdc50>

In [9]:
# evaluate the model
scores = model.evaluate(X_te, y_test)
print("\n%s: %.4f%%" % (model.metrics_names[1], scores[1]*100))
y_pred = model.predict(X_te, batch_size=1024)
y_classes = y_pred.argmax(axis=-1)


acc: 47.4457%


In [10]:
accScore = metrics.accuracy_score(y_test,y_classes)

lbl = [1,2,3,4,5]
precision = metrics.precision_score(y_test,y_classes,average=None,labels=lbl)
recall = metrics.recall_score(y_test,y_classes,average=None,labels=lbl)
f1Score = metrics.f1_score(y_test,y_classes,average=None,labels=lbl)

print("\nOverall Acurracy: ",accScore,"\n")

for i in range(len(lbl)):
    print("Precision of %s class: %f" %(lbl[i],precision[i]))
    print("Recall of %s class: %f" %(lbl[i],recall[i]))
    print("F1-Score of %s class: %f" %(lbl[i],f1Score[i]),"\n") 


Overall Acurracy:  0.4744569589702333 

Precision of 1 class: 0.534351
Recall of 1 class: 0.358974
F1-Score of 1 class: 0.429448 

Precision of 2 class: 0.361165
Recall of 2 class: 0.339416
F1-Score of 2 class: 0.349953 

Precision of 3 class: 0.347184
Recall of 3 class: 0.406486
F1-Score of 3 class: 0.374502 

Precision of 4 class: 0.492578
Recall of 4 class: 0.528926
F1-Score of 4 class: 0.510105 

Precision of 5 class: 0.588554
Recall of 5 class: 0.537809
F1-Score of 5 class: 0.562038 

