In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout
from tensorflow.keras.utils import to_categorical

2024-10-26 15:31:01.501034: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-10-26 15:31:01.631981: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-10-26 15:31:01.682746: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-10-26 15:31:01.698817: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-10-26 15:31:01.788313: I tensorflow/core/platform/cpu_feature_guar

In [3]:

# Load data
data = pd.read_csv('train.csv', names=['Comment', 'Label'],header=1)
data

Unnamed: 0,Comment,Label
0,sir g ki value positive hogi,doubt
1,like the video guyssss,irrelevant
2,ummm sir i want see ur wife,irrelevant
3,physics wallah punabi mundamundi mein thusde d...,irrelevant
4,bhupesh,irrelevant
...,...,...
213741,superb performance sir,feedback
213742,mja aaya,feedback
213743,blocks wala sum be padao,feedback
213744,uminima stable umaxima unstable,feedback


In [4]:

# Data Preprocessing
comments = data['Comment'].values
labels = data['Label'].map({'doubt': 0, 'irrelevant': 1, 'feedback': 2}).values
labels = to_categorical(labels)  # One-hot encoding

In [5]:
tokenizer = Tokenizer(num_words=5000, oov_token="<OOV>")
tokenizer.fit_on_texts(comments)
sequences = tokenizer.texts_to_sequences(comments)
padded_sequences = pad_sequences(sequences, maxlen=100, padding='post', truncating='post')

In [6]:
X_train, X_test, y_train, y_test = train_test_split(padded_sequences, labels, test_size=0.2, random_state=42)

In [7]:

# Building the LSTM Model
model = Sequential([
    Embedding(input_dim=5000, output_dim=64, input_length=100),
    LSTM(64, return_sequences=True),
    Dropout(0.5),
    LSTM(32),
    Dense(3, activation='softmax')  # Output layer with 3 classes
])




In [8]:
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [9]:
model.fit(X_train, y_train, epochs=10, validation_data=(X_test, y_test), batch_size=32)

Epoch 1/10
[1m5344/5344[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m245s[0m 45ms/step - accuracy: 0.4490 - loss: 1.0700 - val_accuracy: 0.4444 - val_loss: 1.0709
Epoch 2/10
[1m5344/5344[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m227s[0m 43ms/step - accuracy: 0.4489 - loss: 1.0692 - val_accuracy: 0.4444 - val_loss: 1.0709
Epoch 3/10
[1m5344/5344[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m228s[0m 43ms/step - accuracy: 0.4488 - loss: 1.0689 - val_accuracy: 0.4445 - val_loss: 1.0712
Epoch 4/10
[1m5344/5344[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m226s[0m 42ms/step - accuracy: 0.4497 - loss: 1.0589 - val_accuracy: 0.4531 - val_loss: 1.0251
Epoch 5/10
[1m5344/5344[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m226s[0m 42ms/step - accuracy: 0.4918 - loss: 0.9580 - val_accuracy: 0.6655 - val_loss: 0.7520
Epoch 6/10
[1m5344/5344[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m227s[0m 43ms/step - accuracy: 0.6912 - loss: 0.7117 - val_accuracy: 0.7014 - val_loss: 0.690

<keras.src.callbacks.history.History at 0x7313d8a0fdd0>

In [10]:
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {accuracy}")

[1m1336/1336[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 12ms/step - accuracy: 0.6971 - loss: 0.6998
Test Accuracy: 0.6992748379707336


In [None]:
from sklearn.metrics import f1_score
import numpy as np

# Make predictions
y_pred_probs = model.predict(X_test)
y_pred = np.argmax(y_pred_probs, axis=1)  # Convert probabilities to class labels
y_true = np.argmax(y_test, axis=1)  # Convert one-hot encoded test labels to class labels

# Calculate and print the weighted F1 score
f1 = f1_score(y_true, y_pred, average='weighted')
print("F1 Score:", f1)
