In [3]:
import warnings
warnings.filterwarnings('ignore')



In [4]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

# Attempt to read the CSV file with error handling
try:
    df1 = pd.read_csv('data.csv', encoding='utf-8', error_bad_lines=False)
except pd.errors.ParserError as e:
    print(f"Error reading CSV file: {e}")
    df1 = None

if df1 is not None:
    # Select columns of interest
    df = df1[['Review Text', 'Rating', 'Class Name', 'Age']]
    # Assuming 'Rating' is the target column with sentiment labels
    # and 'Review Text' is the feature column with text data
    df = df.dropna(subset=['Review Text'])  # Drop rows with NaN values in 'Review Text'

    X = df['Review Text']
    y = df['Rating']

    # Preprocess the text data
    tokenizer = Tokenizer(num_words=5000, oov_token="<OOV>")
    tokenizer.fit_on_texts(X.astype(str))  # Convert to string to handle NaNs
    sequences = tokenizer.texts_to_sequences(X)
    padded_sequences = pad_sequences(sequences, truncating='post', maxlen=500)

    # Encode the labels
    encoder = LabelEncoder()
    y_encoded = encoder.fit_transform(y)
    y_categorical = to_categorical(y_encoded)

    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(padded_sequences, y_categorical, test_size=0.2)

    # Define the LSTM model
    model = Sequential()
    model.add(Embedding(5000, 64, input_length=500))
    model.add(LSTM(64, dropout=0.2))
    # Change the output layer to have the same number of units as the number of classes
    # and use softmax activation for multi-class classification
    model.add(Dense(len(np.unique(y)), activation='softmax'))

    # Compile the model
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

    # Train the model
    model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=20, batch_size=64)

    # Evaluate the model
    loss, accuracy = model.evaluate(X_test, y_test)
    print(f"LSTM Model Accuracy: {accuracy * 100:.2f}%")
else:
    print("CSV file couldn't be loaded due to parsing errors.")

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
LSTM Model Accuracy: 38.79%


In [7]:
from sklearn.metrics import classification_report
import numpy as np

# Assuming y_test is a one-hot encoded array, convert it back to class labels
y_test_labels = np.argmax(y_test, axis=1)

# Now y_test_labels is a 1D array of class labels
# Let's assume you have a trained model named 'model' and you want to predict on y_test
y_pred = model.predict(X_test)

# If your model outputs probabilities, you might need to convert them to class labels
y_pred_labels = np.argmax(y_pred, axis=1)

# Compute and print the classification report
report = classification_report(y_test_labels, y_pred_labels)
print("Classification Report:")
print(report)

Classification Report:
              precision    recall  f1-score   support

           0       0.21      0.34      0.26      1002
           1       0.21      0.14      0.17      1106
           2       0.24      0.28      0.26      1364
           3       0.32      0.23      0.27      1794
           4       0.60      0.62      0.61      3263

    accuracy                           0.39      8529
   macro avg       0.32      0.32      0.31      8529
weighted avg       0.39      0.39      0.38      8529

