In [None]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings("ignore")
data = pd.read_csv("/IMDB_Dataset.csv", header = 0, on_bad_lines='skip')
print(data.head(10))
print("\n", data.tail(10))

# New Section

In [None]:
print(data.shape)
type(data)

In [None]:
data["sentiment"].value_counts()

In [None]:
# Label encoder
# positive -> 1
# negative -> 0
data.replace({"sentiment": {"positive": 1, "negative": 0}}, inplace=True)
data.head()

In [None]:
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Embedding, LSTM
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

train_data, test_data = train_test_split(data, test_size = 0.2, random_state=42)
print("Train shape : ", train_data.shape)
print("Tetst shape : ", test_data.shape)

In [None]:
tokenizer = Tokenizer(num_words = 5000)
tokenizer.fit_on_texts(train_data["review"])
X_train = pad_sequences(tokenizer.texts_to_sequences(train_data["review"]), maxlen=200)
X_test = pad_sequences(tokenizer.texts_to_sequences(test_data["review"]), maxlen=200)
X_train

In [None]:
X_test

In [None]:
Y_train = train_data["sentiment"]
Y_test = test_data["sentiment"]
Y_train

In [None]:
model = Sequential()
model.add(Embedding(input_dim =5000, output_dim = 128, input_length = 200))
model.add(LSTM(128, dropout=0.2, recurrent_dropout = 0.2))
model.add(Dense(1, activation = "sigmoid"))
model.summary()

In [None]:
model.compile(optimizer = "adam", loss="binary_crossentropy", metrics=["accuracy"])
model.fit(X_train, Y_train, epochs = 5, batch_size = 64, validation_split = 0.2)

In [None]:
model.save("model.h5")
import joblib
joblib.dump(tokenizer, "tokenizer.pkl")
loss, accuracy = model.evaluate(X_test, Y_test)
print(loss)
print(accuracy)

In [None]:
def predictive_system(review):
  sequences = tokenizer.texts_to_sequences([review])
  padded_sequence = pad_sequences(sequences, maxlen=200)
  prediction = model.predict(padded_sequence)
  sentiment = "positive" if prediction[0][0] > 0.5 else "negative"
  return sentiment

In [None]:
print(predictive_system("This movie was fantastic and amazing cast"))
print(predictive_system("A trilling adventure with stunning visual"))
print(predictive_system("A visual masterpiece"))
print(predictive_system("Too slow and boring"))
print(predictive_system("The film I was looking forward to but was a big disappointment."))