In [None]:
import numpy as np
import pandas as pd

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns 
from scipy import stats

from keras.datasets import imdb
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers.embeddings import Embedding
from keras.layers import SimpleRNN, Dense, Activation

top_words = 10000
(X_train, y_train), (X_test, y_test) = imdb.load_data(nb_words = top_words)

In [None]:
unique, counts = np.unique(y_train, return_counts = True)
print("Y train distribution: ",dict(zip(unique, counts)))

In [None]:
unique, counts = np.unique(y_test, return_counts = True)
print("Y test distribution: ",dict(zip(unique, counts)))

In [None]:
d = X_train[0]
print(d)
print(len(d))

In [None]:
review_len_train = []
review_len_test = []

for i, ii in zip(X_train,X_test):
    review_len_train.append(len(i))
    review_len_test.append(len(ii))

In [None]:
sns.distplot(review_len_train, hist_kws = {"alpha":0.3})
sns.distplot(review_len_test, hist_kws = {"alpha":0.3})

print("Train mean: ", np.mean(review_len_train))
print("Train median: ", np.median(review_len_train))
print("Train mode: ", stats.mode(review_len_train))

In [None]:
num_words = 1000
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words = num_words)

maxlen = 32
X_train = pad_sequences(X_train, maxlen = maxlen)
X_test = pad_sequences(X_test, maxlen = maxlen)

In [None]:
rnn = Sequential()
rnn.add(Embedding(num_words,16,input_length=(maxlen)))
rnn.add(SimpleRNN(16,input_shape=(num_words,maxlen),return_sequences = False, activation="tanh"))
rnn.add(Dense(1))
rnn.add(Activation("sigmoid"))

print(rnn.summary())
rnn.compile(loss="binary_crossentropy",optimizer="adam", metrics=["accuracy"])
history = rnn.fit(X_train,y_train, validation_data=(X_test,y_test), epochs=2,batch_size=32,verbose=1)

In [None]:
print(f"Accuracy:",round(history.history["accuracy"][-1]*100,2))

plt.figure()
plt.plot(history.history["accuracy"], label = "Train")
plt.plot(history.history["val_accuracy"], label = "Test")
plt.title("Acc")
plt.ylabel("Acc")
plt.xlabel("Epochs")
plt.legend()
plt.show()

plt.figure()
plt.plot(history.history["loss"], label = "Train")
plt.plot(history.history["val_loss"], label = "Test")
plt.title("Loss")
plt.ylabel("Loss")
plt.xlabel("Epochs")
plt.legend()
plt.show()

In [None]:
from keras.layers import LSTM

In [None]:
max_words = 10240
maxlen = 32

(X_train, y_train), (_,_) = imdb.load_data(nb_words = max_words)
X_train = pad_sequences(X_train, maxlen = maxlen)
X_test = pad_sequences(X_test, maxlen = maxlen)

In [None]:
model = Sequential()
model.add(Embedding(max_words, 16, input_length = maxlen))
model.add(LSTM(128))
model.add(Dense(1,activation="sigmoid"))
model.compile(optimizer="adam",loss="binary_crossentropy",metrics= ["accuracy"])

history = model.fit(X_train, y_train, batch_size=128,epochs=10,verbose=1, validation_data=(X_test,y_test))

In [None]:
print(model.summary())


In [None]:
print(f"Accuracy:",round(history.history["accuracy"][-1]*100,2))

plt.figure()
plt.plot(history.history["accuracy"], label = "Train")
plt.plot(history.history["val_accuracy"], label = "Test")
plt.title("Acc")
plt.ylabel("Acc")
plt.xlabel("Epochs")
plt.legend()
plt.show()

plt.figure()
plt.plot(history.history["loss"], label = "Train")
plt.plot(history.history["val_loss"], label = "Test")
plt.title("Loss")
plt.ylabel("Loss")
plt.xlabel("Epochs")
plt.legend()
plt.show()