In [0]:
!pip install tensorflow-gpu

In [0]:
import pandas as pd
import numpy as np
import tensorflow as tf

In [0]:
print(tf.__version__)

In [0]:
from google.colab import drive
drive.mount("/gdrive")

In [0]:
train_df = pd.read_csv("/gdrive/My Drive/Colab Notebooks/train.tsv", delimiter='\t',nrows=100000)
test_df = pd.read_csv("/gdrive/My Drive/Colab Notebooks/test.tsv", delimiter='\t',nrows=100000)

In [0]:
train_df.head()

In [0]:
#test_df.head()

In [0]:
train_df.set_index(keys="PhraseId", inplace=True)
train_df.drop(labels="SentenceId", axis=1, inplace=True)
train_df.head()

In [0]:
test_df.set_index(keys="PhraseId", inplace=True)
test_df.drop(labels="SentenceId", axis=1, inplace=True)
test_df.head()

## X_train and Y_train

In [0]:
features = train_df["Phrase"]
labels = train_df["Sentiment"]

In [0]:
from tensorflow.keras.utils import to_categorical

In [0]:
all_class = np.unique(labels)
num_class= len(all_class)

In [0]:
labels =to_categorical(labels, num_classes=num_class)
labels

In [0]:
from sklearn.model_selection import train_test_split

In [0]:
X_train, X_test, Y_train, Y_test = train_test_split(features, labels, test_size=20)

### Build Tokenizer

In [0]:
from tensorflow.keras.preprocessing.text import Tokenizer

In [0]:
top_words = 1000
wordTokenizer = Tokenizer(num_words=top_words)

In [0]:
wordTokenizer.fit_on_texts(X_train.to_list())

#### Get word index of each review

In [0]:
X_train = wordTokenizer.texts_to_sequences(X_train.to_list())

In [0]:
X_test = wordTokenizer.texts_to_sequences(X_test.to_list())

In [0]:
from tensorflow.keras.preprocessing import sequence

In [0]:
max_review_lenght = 300

In [0]:
X_train = sequence.pad_sequences(X_train, maxlen=max_review_lenght, padding="post")

In [0]:
X_test = sequence.pad_sequences(X_test, maxlen=max_review_lenght, padding="post")

### Build model

In [0]:
tf.keras.backend.clear_session()
model = tf.keras.Sequential()

In [0]:
model.add(tf.keras.layers.Embedding(top_words + 1, 50, input_length=max_review_lenght))

model.add(tf.keras.layers.Dropout(0.2))
model.add(tf.keras.layers.LSTM(256))
model.add(tf.keras.layers.Dropout(0.2))

model.add(tf.keras.layers.Dense(5, activation="softmax"))
model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=['accuracy'])

In [0]:
model.summary()

In [0]:
model.fit(X_train, Y_train, validation_data=(X_test, Y_test), epochs=10, batch_size=32)

### Prediction of Test data 

In [0]:
test_data = test_df['Phrase']

In [0]:
test_data = wordTokenizer.texts_to_sequences(test_data.to_list())
test_data = sequence.pad_sequences(test_data, maxlen=max_review_lenght, padding="post")

In [0]:
Y_pred = np.argmax(model.predict(test_data), axis=-1)

In [0]:
Y_pred

### List for people who are not in favor of the movie

In [0]:
test_df['Y_pred'] = Y_pred

In [0]:
test_df[test_df["Y_pred"] <=2]