# Sentimental analysis using LSTM

In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
from keras.layers import LSTM, Dense, Embedding
from keras.models import Sequential
from keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [2]:
df = pd.read_csv('./datasets/Sentimental Analysis.csv')
df.head()

Unnamed: 0,text,label
0,I grew up (b. 1965) watching and loving the Th...,0
1,"When I put this movie in my DVD player, and sa...",0
2,Why do people who do not know what a particula...,0
3,Even though I have great interest in Biblical ...,0
4,Im a die hard Dads Army fan and nothing will e...,1


In [3]:
df.shape

(40000, 2)

In [4]:
text = df.iloc[:5000,0]
label = df.iloc[:5000,1]

In [5]:
tknizr = Tokenizer()
tknizr.fit_on_texts(text)
seq = tknizr.texts_to_sequences(text)
data = pad_sequences(seq) #To ensure all the sequences are of same length we pad them

In [6]:
from sklearn.model_selection import train_test_split as tts
x_train, x_test, y_train, y_test = tts(data, label, test_size=0.25, random_state=42)

In [7]:
x_train.shape

(3750, 1336)

In [8]:
model = Sequential([
    Embedding(input_dim=len(tknizr.word_index)+1, input_length=data.shape[1], output_dim=32),
    LSTM(64),
    Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

y_train = np.asarray(y_train, dtype=float)
y_test = np.asarray(y_test, dtype=float)

model.fit(x_train, y_train, validation_data=(x_test, y_test), epochs=1)




<keras.callbacks.History at 0x263fa096ef0>

In [11]:
new_text = "I hate this city"
new_seq = tknizr.texts_to_sequences(new_text)
new_data = pad_sequences(new_seq)

pred = model.predict(new_data)



In [12]:
pred

array([[0.49554548],
       [0.51352435],
       [0.5171258 ],
       [0.50780153],
       [0.48519838],
       [0.48921096],
       [0.51352435],
       [0.48519838],
       [0.5171258 ],
       [0.49554548],
       [0.49010578],
       [0.51352435],
       [0.49591178],
       [0.49554548],
       [0.48519838],
       [0.496672  ]], dtype=float32)