## Necessary libraries imported

In [23]:
from sklearn.preprocessing import LabelBinarizer, LabelEncoder
from tensorflow import keras
from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout
from keras.preprocessing import text, sequence
from keras import utils
import numpy as np

### import processed data from RatingPredictor file

In [6]:
import import_ipynb
import RatingPredictor

In [15]:
%store -r ds_train

In [16]:
ds_train.head()

Unnamed: 0,star,reviews
0,4,good performance in this range read more
1,4,nice read more
2,4,system is ok but battery back up is less than...
3,4,nice look read more
4,4,good read more


## Process and split data into train set and test set

In [17]:
train_size = int(len(ds_train) * .7)
print ("Train size: %d" % train_size)
print ("Test size: %d" % (len(ds_train) - train_size))

Train size: 2941
Test size: 1261


In [18]:
train_reviews = ds_train['reviews'][:train_size]
train_stars = ds_train['star'][:train_size]

test_reviews = ds_train['reviews'][train_size:]
test_stars = ds_train['star'][train_size:]

In [19]:
max_words = 1000
tokenize = text.Tokenizer(num_words=max_words, char_level=False)

In [20]:
# only fit on train
tokenize.fit_on_texts(train_reviews)

x_train = tokenize.texts_to_matrix(train_reviews)
x_test = tokenize.texts_to_matrix(test_reviews)

### Label Encoding

In [21]:
encoder = LabelEncoder()
encoder.fit(train_stars)
y_train = encoder.transform(train_stars)
y_test = encoder.transform(test_stars)

In [24]:
num_classes = np.max(y_train) + 1
y_train = utils.to_categorical(y_train, num_classes)
y_test = utils.to_categorical(y_test, num_classes)

In [25]:
print('x_train shape:', x_train.shape)
print('x_test shape:', x_test.shape)
print('y_train shape:', y_train.shape)
print('y_test shape:', y_test.shape)

x_train shape: (2941, 1000)
x_test shape: (1261, 1000)
y_train shape: (2941, 5)
y_test shape: (1261, 5)


## Create model

In [26]:
# Build the model
model = Sequential()
model.add(Dense(512, input_shape=(max_words,)))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes))
model.add(Activation('softmax'))

model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

### Validation phase

In [27]:
history = model.fit(x_train, y_train,
                    batch_size=32,
                    epochs=5,
                    verbose=1,
                    validation_split=0.1)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


### Test Data

In [28]:
score = model.evaluate(x_test, y_test,
                       batch_size=32, verbose=1)
print('Test accuracy:', score[1])

Test accuracy: 0.2791435420513153


## Observations from the scores

1. The Deep Learning model gives a validation accuracy of 0.78 which is lower than the accuracy observed in the Machine Learning models.
2. Further ,it is observed that the model does not perform well on test data suggesting probable underfitting.