In [16]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout
from tensorflow.keras.utils import to_categorical

In [22]:
data = pd.read_csv(r"C:\Users\Manthan\Desktop\jupyter_extension_prettier\vs_code_extension\sentiment_analysis.csv")
data.head()

Unnamed: 0,Year,Month,Day,Time of Tweet,text,sentiment,Platform
0,2018,8,18,morning,What a great day!!! Looks like dream.,positive,Twitter
1,2018,8,18,noon,"I feel sorry, I miss you here in the sea beach",positive,Facebook
2,2017,8,18,night,Don't angry me,negative,Facebook
3,2022,6,8,morning,We attend in the class just for listening teac...,negative,Facebook
4,2022,6,8,noon,"Those who want to go, let them go",negative,Instagram


In [23]:
max_len = 50
max_vocab = 5000

# object of tokenizer
tokenizer = Tokenizer(num_words=max_vocab, oov_token="OOV")

# shorten the data
data = data[["text", "sentiment"]]
data = data.head(10000)

In [41]:
data["text"] = data["text"].astype(str).fillna("unknown")

# tokenize the text
tokenizer.fit_on_texts(data["text"])


X=tokenizer.texts_to_sequences(data["text"])
X=pad_sequences(X, maxlen=max_len, padding="post")
print(X[98])


[ 68   9  11 306 448   2  51   8 449 450 307   9 233   2  48 451  95  28
  80  10 234 452  10  74 235 453   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0]


In [44]:
label_mapping = {label: idx for idx, label in enumerate(data["sentiment"].unique())}
print(label_mapping)

{'positive': 0, 'negative': 1, 'neutral': 2}


In [26]:
y = data["sentiment"].map(label_mapping).values
print(y[0])
y = to_categorical(y)
print(y[0])

0
[1. 0. 0.]


In [27]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)

print((np.unique(y_train)))

(399, 50)
(399, 3)
(100, 50)
(100, 3)
[0. 1.]


In [30]:
#  build LSTM model
model = Sequential([
    Embedding(input_dim=max_vocab, output_dim=128, input_length=128),
    LSTM(64),
    Dense(y.shape[1], activation="softmax")
])



In [32]:
model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])
model.fit(X_train, y_train, epochs=10, batch_size=16)
model.evaluate(X_test, y_test)

Epoch 1/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 28ms/step - accuracy: 0.3826 - loss: 1.0929
Epoch 2/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - accuracy: 0.4319 - loss: 1.0721
Epoch 3/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - accuracy: 0.4446 - loss: 1.0703
Epoch 4/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 28ms/step - accuracy: 0.4066 - loss: 1.0779
Epoch 5/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 29ms/step - accuracy: 0.4292 - loss: 1.0788
Epoch 6/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - accuracy: 0.4348 - loss: 1.0710
Epoch 7/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - accuracy: 0.4111 - loss: 1.0815
Epoch 8/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - accuracy: 0.4378 - loss: 1.0452
Epoch 9/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━

[1.1634173393249512, 0.4399999976158142]

In [38]:
test_seq = tokenizer.texts_to_sequences("I am very happing today")
test_seq = pad_sequences(test_seq, maxlen=max_len, padding="post")
print(test_seq)

[[  2   0   0 ...   0   0   0]
 [  0   0   0 ...   0   0   0]
 [  6   0   0 ...   0   0   0]
 ...
 [193   0   0 ...   0   0   0]
 [  6   0   0 ...   0   0   0]
 [950   0   0 ...   0   0   0]]


In [39]:
sentiment = model.predict(test_seq)
sentiment_label = sentiment.argmax(axis=-1)
print(f"Predicted sentiment index: {sentiment_label[0]}")

# {'positive': 0, 'negative': 1, 'neutral': 2}

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 50ms/step
Predicted sentiment index: 2
