# Sentiment Analysis using LSTM

In [2]:
import pandas as pd
import numpy as np
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Embedding, LSTM
from sklearn.model_selection import train_test_split as tts

In [6]:
# Step 1: Load the dataset
df = pd.read_csv('Sentiment.csv')
df.head()

Unnamed: 0,Index,message to examine,label (depression result)
0,106,just had a real good moment. i missssssssss hi...,0
1,217,is reading manga http://plurk.com/p/mzp1e,0
2,220,@comeagainjen http://twitpic.com/2y2lx - http:...,0
3,288,@lapcat Need to send 'em to my accountant tomo...,0
4,540,ADD ME ON MYSPACE!!! myspace.com/LookThunder,0


In [7]:
texts = df['message to examine'].values
labels = df['label (depression result)'].values

In [8]:
# Step 2: Preprocess the text data
tokenizer = Tokenizer()
tokenizer.fit_on_texts(texts)
sequences = tokenizer.texts_to_sequences(texts)
data = pad_sequences(sequences)

In [9]:
# Step 3: Split the data into training and testing sets
x_train, x_test, y_train, y_test = tts(data, labels, test_size = 0.2)
model = Sequential()
model.add(Embedding(len(tokenizer.index_word)+1, 32, input_length = data.shape[1]))
model.add(LSTM(64))
model.add(Dense(1, activation = 'sigmoid'))
model.compile(loss = 'binary_crossentropy', optimizer = 'adam', metrics = ['accuracy'])

In [10]:
y_train = np.array(y_train, dtype = float)
y_test = np.array(y_test, dtype = float)

In [11]:
model.fit(x_train, y_train, validation_data = (x_test, y_test), epochs = 1)



<keras.src.callbacks.History at 0x280205fd0>

In [12]:
# Step 4: Use the trained model to classify new text
new_text = ['I had a terrible experince with the product and service']
new_sequence = tokenizer.texts_to_sequences(new_text)
new_data = pad_sequences(new_sequence, maxlen = data.shape[1])
prediction = model.predict(new_data)



In [13]:
# Step 5: Evaluate the model
scores = model.evaluate(x_test, y_test)
print('Accuracy:', scores[1])

Accuracy: 0.9835191369056702
