In [2]:
import numpy as np
import pandas as pd
from keras.preprocessing.text import one_hot
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.callbacks import Callback
from sklearn.model_selection import train_test_split
from keras.layers import Dense,Embedding,LSTM

print("All Dependencies Installed !")

All Dependencies Installed !


In [1]:
from google.colab import drive
drive.mount('/content/drive/')

Mounted at /content/drive/


In [3]:
df = pd.read_csv("/content/drive/MyDrive/IMDB Dataset.csv")
df["sentiment"].replace({"positive": 1, "negative": 0}, inplace=True)

x = np.array(df["review"].values)
y = np.array(df["sentiment"].values)

x_filtered = []

for review in x:

    #lowercasing the sentence
    review = review.lower()

    # removing punctuations from sentence
    for i in review:
        punc = '''  !()-[]{};:'"\,<>./?@#$%^&*_~  '''
        if i in punc :
            review = review.replace(i, " ")

    x_filtered.append(review)

print("Data Preparation Stage-1 completed !")

Data Preparation Stage-1 completed !


In [15]:
print(x_filtered[0])

one of the other reviewers has mentioned that after watching just 1 oz episode you ll be hooked  they are right  as this is exactly what happened with me  br    br   the first thing that struck me about oz was its brutality and unflinching scenes of violence  which set in right from the word go  trust me  this is not a show for the faint hearted or timid  this show pulls no punches with regards to drugs  sex or violence  its is hardcore  in the classic use of the word  br    br   it is called oz as that is the nickname given to the oswald maximum security state penitentary  it focuses mainly on emerald city  an experimental section of the prison where all the cells have glass fronts and face inwards  so privacy is not high on the agenda  em city is home to many  aryans  muslims  gangstas  latinos  christians  italians  irish and more    so scuffles  death stares  dodgy dealings and shady agreements are never far away  br    br   i would say the main appeal of the show is due to the fac

In [4]:
# One-Hot Encoding each sentence
vocalbulary_size = 5000
onehot_encoded = [one_hot(review,vocalbulary_size) for review in x_filtered]

# Padding each encoded sentence to have a max_length=500
max_length=500
x_padded = pad_sequences(onehot_encoded,max_length,padding="post")

x_train,x_test,y_train,y_test = train_test_split(x_padded,y,test_size=0.2)

print("Data Preparation Stage-2 completed !")

Data Preparation Stage-2 completed !


In [5]:
model = Sequential()
embeded_vector_size = 35
model.add(Embedding(vocalbulary_size,embeded_vector_size,input_length=max_length))
model.add(LSTM(100))
model.add(Dense(1,activation="sigmoid"))

model.compile(optimizer='adam', loss='binary_crossentropy',metrics=["accuracy"])

print(model.summary())
print("Model Creation Completed !")

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 500, 35)           175000    
                                                                 
 lstm (LSTM)                 (None, 100)               54400     
                                                                 
 dense (Dense)               (None, 1)                 101       
                                                                 
Total params: 229501 (896.49 KB)
Trainable params: 229501 (896.49 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
None
Model Creation Completed !


In [6]:
# Custom Keras callback to stop training when certain accuracy is achieved.
class MyThresholdCallback(Callback):
    def __init__(self, threshold):
        super(MyThresholdCallback, self).__init__()
        self.threshold = threshold

    def on_epoch_end(self, epoch, logs=None):
        val_acc = logs["val_accuracy"]
        if val_acc >= self.threshold:
            self.model.stop_training = True
            model_name = ("IMDB_sentiment_analysis_"+str(val_acc))
            model.save(model_name)

# Model converges at 0.87 accuracy with current hyperparameters.
model.fit(x_train,y_train,epochs=100,validation_data=(x_test,y_test),callbacks=[MyThresholdCallback(threshold=0.87)])

model.save("sentiment_analysis")

print("Model Training Completed !")

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Model Training Completed !


In [9]:
'''import shutil

# Path where your model is saved
model_folder_path = '/content/sentiment_analysis'

# Create a zip file for the folder to download
shutil.make_archive('IMDB_sentiment_analysis', 'zip', model_folder_path)

# Download the file
from google.colab import files
files.download('IMDB_sentiment_analysis.zip')'''


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [13]:
def get_sentiment(sentence: str):
    if isinstance(sentence, (str)):
        pass
    else:
        raise Exception("Input needs to be of type 'str' ")

    # filtering the sentence
    sentence = sentence.lower()

    punc = '''!()-[]{};:'"\, <>./?@#$%^&*_~'''

    for word in sentence:
        if word in punc:
            sentence = sentence.replace(word, " ")

    # Loading the saved trained model.
    from keras.models import load_model

    trained_model = load_model("/content/IMDB_sentiment_analysis_0.8701000213623047")

    predicted = trained_model.predict(x_test)[2]
    sentiment = 1 if predicted > 0.5 else 0

    if sentiment == 1:
        print("Positive")
    else:
        print("Negative")

    return sentiment


get_sentiment("That movie was really ok!")

Positive


1