In [5]:
!pip install tensorflow

import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, accuracy_score
from sklearn.model_selection import train_test_split

# Load balanced dataset
df = pd.read_csv("/content/drive/MyDrive/Dessertation/balanced_sentiment_data.csv", encoding="latin1")

# Preprocess and tokenize
texts = df['processed_text'].astype(str).tolist()
labels = df['sentiment_label'].tolist()

# Encode sentiment labels
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(labels)

# Tokenize text
vocab_size = 10000
tokenizer = Tokenizer(num_words=vocab_size, oov_token="<OOV>")
tokenizer.fit_on_texts(texts)
sequences = tokenizer.texts_to_sequences(texts)

# Pad sequences
max_length = 100
X = pad_sequences(sequences, maxlen=max_length, padding='post', truncating='post')

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Build the LSTM model
model = Sequential([
    Embedding(input_dim=vocab_size, output_dim=128, input_length=max_length),
    LSTM(128, return_sequences=False),
    Dropout(0.5),
    Dense(64, activation='relu'),
    Dense(3, activation='softmax')  # 3 sentiment classes
])

model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model
history = model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_test, y_test))

# Evaluate
y_pred_probs = model.predict(X_test)
y_pred = np.argmax(y_pred_probs, axis=1)

# Print evaluation
accuracy = accuracy_score(y_test, y_pred)
print(f"🔹 LSTM Accuracy: {accuracy:.4f}")
print("\n🔹 Classification Report:\n")
print(classification_report(y_test, y_pred, target_names=label_encoder.classes_))

Epoch 1/10




[1m404/404[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m60s[0m 144ms/step - accuracy: 0.3344 - loss: 1.1003 - val_accuracy: 0.3331 - val_loss: 1.0991
Epoch 2/10
[1m404/404[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 143ms/step - accuracy: 0.3367 - loss: 1.0991 - val_accuracy: 0.3334 - val_loss: 1.0997
Epoch 3/10
[1m404/404[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m58s[0m 143ms/step - accuracy: 0.3387 - loss: 1.0991 - val_accuracy: 0.3334 - val_loss: 1.0987
Epoch 4/10
[1m404/404[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m81s[0m 141ms/step - accuracy: 0.3335 - loss: 1.0992 - val_accuracy: 0.3331 - val_loss: 1.0987
Epoch 5/10
[1m404/404[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m55s[0m 136ms/step - accuracy: 0.3259 - loss: 1.0992 - val_accuracy: 0.3334 - val_loss: 1.0986
Epoch 6/10
[1m404/404[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 137ms/step - accuracy: 0.3252 - loss: 1.0991 - val_accuracy: 0.3334 - val_loss: 1.0986
Epoch 7/10
[1m404/40

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
