In [1]:
import pandas as pd
import numpy as np
from keras.layers import Embedding
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, GlobalMaxPooling1D, Dense
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.metrics import classification_report

# Load the dataset
df = pd.read_excel('dataset_new.xlsx')

# Preprocessing
# Tokenize comments and pad them
tokenizer = Tokenizer()
tokenizer.fit_on_texts(df['text'])
vocab_size = len(tokenizer.word_index) + 1

# Convert texts to sequences
sequences = tokenizer.texts_to_sequences(df['text'])

# Define max length for padding
max_length = max(len(seq) for seq in sequences)

# Pad sequences to the same length
padded_sequences = pad_sequences(sequences, maxlen=max_length, padding='post')

# Encode labels
label_encoder = LabelEncoder()
df['emotion_type'] = label_encoder.fit_transform(df['emotion_type'])

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(padded_sequences, df['emotion_type'], test_size=0.2, random_state=42)

# Sample model creation
model = Sequential([
    Embedding(input_dim=vocab_size, output_dim=128),  # Removed input_length
    Conv1D(filters=128, kernel_size=5, activation='relu'),
    GlobalMaxPooling1D(),
    Dense(64, activation='relu'),
    Dense(4, activation='softmax')  # Multiclass classification
])

# Compile the model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.2)

# Evaluate the model
y_pred_probs = model.predict(X_test)
y_pred_classes = np.argmax(y_pred_probs, axis=1)

# Calculate classification report
report = classification_report(y_test, y_pred_classes, target_names=label_encoder.classes_, output_dict=True)

# Print classification report
print("Classification Report:")
print(classification_report(y_test, y_pred_classes, target_names=label_encoder.classes_))

# Extract individual metrics
accuracy = report['accuracy']
precision = report['macro avg']['precision']
recall = report['macro avg']['recall']
f1_score = report['macro avg']['f1-score']

# Print individual metrics
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-score: {f1_score:.4f}")




Epoch 1/10
[1m901/901[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m173s[0m 187ms/step - accuracy: 0.3748 - loss: 1.2966 - val_accuracy: 0.4992 - val_loss: 1.1415
Epoch 2/10
[1m901/901[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m177s[0m 197ms/step - accuracy: 0.7794 - loss: 0.6321 - val_accuracy: 0.4890 - val_loss: 1.3593
Epoch 3/10
[1m901/901[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m183s[0m 175ms/step - accuracy: 0.9607 - loss: 0.1392 - val_accuracy: 0.4815 - val_loss: 1.5543
Epoch 4/10
[1m901/901[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m154s[0m 171ms/step - accuracy: 0.9827 - loss: 0.0663 - val_accuracy: 0.4797 - val_loss: 1.7120
Epoch 5/10
[1m901/901[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m197s[0m 165ms/step - accuracy: 0.9850 - loss: 0.0513 - val_accuracy: 0.4734 - val_loss: 1.7270
Epoch 6/10
[1m901/901[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m207s[0m 169ms/step - accuracy: 0.9859 - loss: 0.0425 - val_accuracy: 0.4738 - val_loss: 2.0163
Epoc