# Sentiment Analysis Models - TikNep Dataset

This notebook implements and evaluates various models for **Sentiment Analysis (SEN)** task.

**Task Type:** Multi-class Classification  
**Classes:** 0 = Neutral, 1 = Negative, 2 = Positive  
**Dataset:** 3,947 Nepali comments

## Models Implemented:
1. Machine Learning: SVM, MNB, RF
2. Deep Learning: Bi-LSTM, Bi-GRU
3. Transformer: BERT

---
## 1. Data Loading

### 1.1 Imports for Data Loading

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

# Set random seed for reproducibility
RANDOM_SEED = 42
np.random.seed(RANDOM_SEED)

# Visualization settings
sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (12, 6)

print("Data libraries imported successfully!")

### 1.2 Load Train/Validation/Test Splits

In [None]:
# Load pre-split data
X_train = pd.read_csv('../data/splits/sentiment/X_train_sentiment.csv')
y_train = pd.read_csv('../data/splits/sentiment/y_train_sentiment.csv')
X_val = pd.read_csv('../data/splits/sentiment/X_val_sentiment.csv')
y_val = pd.read_csv('../data/splits/sentiment/y_val_sentiment.csv')
X_test = pd.read_csv('../data/splits/sentiment/X_test_sentiment.csv')
y_test = pd.read_csv('../data/splits/sentiment/y_test_sentiment.csv')

print("Data loaded successfully!")
print(f"\nTrain: {len(X_train)} samples")
print(f"Validation: {len(X_val)} samples")
print(f"Test: {len(X_test)} samples")
print(f"Total: {len(X_train) + len(X_val) + len(X_test)} samples")

### 1.3 How Data Looks

In [None]:
print("Training Data Sample:")
print(X_train.head())
print("\nTraining Labels Sample:")
print(y_train.head())

# Display some example texts
print("\n" + "="*80)
print("Sample Comments from Training Set:")
print("="*80)
for idx in range(5):
    print(f"\n{idx+1}. Text: {X_train.iloc[idx]['Text']}")
    print(f"   Label: {y_train.iloc[idx].values[0]} (Sentiment)")

### 1.4 Summary of Data

In [None]:
# Class distribution
sentiment_map = {0: 'Neutral', 1: 'Negative', 2: 'Positive'}

print("="*80)
print("CLASS DISTRIBUTION")
print("="*80)

for dataset_name, y_data in [('Training', y_train), ('Validation', y_val), ('Test', y_test)]:
    print(f"\n{dataset_name} Set:")
    counts = y_data.iloc[:, 0].value_counts().sort_index()
    for label, count in counts.items():
        percentage = (count / len(y_data)) * 100
        print(f"  {label} ({sentiment_map[label]:>8}): {count:4d} samples ({percentage:5.2f}%)")

# Visualize class distribution
fig, axes = plt.subplots(1, 3, figsize=(18, 5))
sentiment_labels = ['Neutral', 'Negative', 'Positive']
colors = ['#5DADE2', '#E74C3C', '#52BE80']

for idx, (y_data, title) in enumerate([(y_train, 'Training'), (y_val, 'Validation'), (y_test, 'Test')]):
    counts = y_data.iloc[:, 0].value_counts().sort_index()
    axes[idx].bar(sentiment_labels, counts.values, color=colors)
    axes[idx].set_title(f'{title} Set - Class Distribution', fontweight='bold', fontsize=12)
    axes[idx].set_ylabel('Count', fontsize=11)
    axes[idx].grid(axis='y', alpha=0.3)
    for i, v in enumerate(counts.values):
        axes[idx].text(i, v + 10, str(v), ha='center', fontweight='bold')

plt.tight_layout()
plt.show()

---
## 2. Machine Learning Models

### 2.1 Support Vector Machine (SVM)

#### 2.1.1 Imports for SVM

In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import LinearSVC
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score,
    classification_report, confusion_matrix
)
import pickle
import os

print("SVM libraries imported successfully!")

#### 2.1.2 Build SVM

In [None]:
# TF-IDF Vectorization with word n-grams (1-2)
print("Creating TF-IDF features...")
print("Configuration: Word n-grams (1-2), max_features=50,000")

tfidf_vectorizer = TfidfVectorizer(
    max_features=50000,      # 50K features as per requirement
    ngram_range=(1, 2),      # Word unigrams and bigrams
    sublinear_tf=True        # Apply sublinear tf scaling (1 + log(tf))
)

# Fit on training data only, then transform all sets
X_train_tfidf = tfidf_vectorizer.fit_transform(X_train.iloc[:, 0])
X_val_tfidf = tfidf_vectorizer.transform(X_val.iloc[:, 0])
X_test_tfidf = tfidf_vectorizer.transform(X_test.iloc[:, 0])

print(f"TF-IDF vocabulary size: {len(tfidf_vectorizer.vocabulary_)}")
print(f"X_train_tfidf shape: {X_train_tfidf.shape}")
print(f"X_val_tfidf shape: {X_val_tfidf.shape}")
print(f"X_test_tfidf shape: {X_test_tfidf.shape}")

# Train LinearSVC with balanced class weights
print("\nTraining LinearSVC model...")
svm_model = LinearSVC(
    class_weight='balanced',  # Handle class imbalance
    random_state=RANDOM_SEED,
    max_iter=2000,            # Increase max iterations for convergence
    dual=False                # Use primal formulation (faster for large n_features)
)

svm_model.fit(X_train_tfidf, y_train.iloc[:, 0])
print("SVM training completed!")

#### 2.1.3 Inference on SVM

In [None]:
# Predictions
print("Making predictions...")
y_train_pred_svm = svm_model.predict(X_train_tfidf)
y_val_pred_svm = svm_model.predict(X_val_tfidf)
y_test_pred_svm = svm_model.predict(X_test_tfidf)

# Evaluation
print("\n" + "="*100)
print("SVM - EVALUATION RESULTS")
print("="*100)

# Validation Set Metrics
print("\nVALIDATION SET:")
print("-"*100)
val_accuracy = accuracy_score(y_val.iloc[:, 0], y_val_pred_svm)
val_macro_precision = precision_score(y_val.iloc[:, 0], y_val_pred_svm, average='macro')
val_macro_recall = recall_score(y_val.iloc[:, 0], y_val_pred_svm, average='macro')
val_macro_f1 = f1_score(y_val.iloc[:, 0], y_val_pred_svm, average='macro')

print(f"Accuracy:         {val_accuracy:.4f}")
print(f"Macro Precision:  {val_macro_precision:.4f}")
print(f"Macro Recall:     {val_macro_recall:.4f}")
print(f"Macro F1-Score:   {val_macro_f1:.4f}")
print("\nClassification Report:")
print(classification_report(y_val.iloc[:, 0], y_val_pred_svm, 
                          target_names=['Neutral', 'Negative', 'Positive'],
                          digits=4))

# Test Set Metrics
print("\nTEST SET:")
print("-"*100)
test_accuracy = accuracy_score(y_test.iloc[:, 0], y_test_pred_svm)
test_macro_precision = precision_score(y_test.iloc[:, 0], y_test_pred_svm, average='macro')
test_macro_recall = recall_score(y_test.iloc[:, 0], y_test_pred_svm, average='macro')
test_macro_f1 = f1_score(y_test.iloc[:, 0], y_test_pred_svm, average='macro')

print(f"Accuracy:         {test_accuracy:.4f}")
print(f"Macro Precision:  {test_macro_precision:.4f}")
print(f"Macro Recall:     {test_macro_recall:.4f}")
print(f"Macro F1-Score:   {test_macro_f1:.4f}")
print("\nClassification Report:")
print(classification_report(y_test.iloc[:, 0], y_test_pred_svm, 
                          target_names=['Neutral', 'Negative', 'Positive'],
                          digits=4))

# Confusion Matrix for Test Set
print("\nConfusion Matrix (Test Set):")
cm_test = confusion_matrix(y_test.iloc[:, 0], y_test_pred_svm)
print(cm_test)

# Visualize Confusion Matrices
fig, axes = plt.subplots(1, 2, figsize=(16, 6))

# Validation Confusion Matrix
cm_val = confusion_matrix(y_val.iloc[:, 0], y_val_pred_svm)
sns.heatmap(cm_val, annot=True, fmt='d', cmap='Blues', ax=axes[0],
            xticklabels=['Neutral', 'Negative', 'Positive'],
            yticklabels=['Neutral', 'Negative', 'Positive'])
axes[0].set_title('SVM - Confusion Matrix (Validation Set)', fontweight='bold', fontsize=14)
axes[0].set_ylabel('True Label', fontsize=12)
axes[0].set_xlabel('Predicted Label', fontsize=12)

# Test Confusion Matrix
sns.heatmap(cm_test, annot=True, fmt='d', cmap='Blues', ax=axes[1],
            xticklabels=['Neutral', 'Negative', 'Positive'],
            yticklabels=['Neutral', 'Negative', 'Positive'])
axes[1].set_title('SVM - Confusion Matrix (Test Set)', fontweight='bold', fontsize=14)
axes[1].set_ylabel('True Label', fontsize=12)
axes[1].set_xlabel('Predicted Label', fontsize=12)

plt.tight_layout()
plt.show()

# Summary
print("\n" + "="*100)
print("SUMMARY")
print("="*100)
print(f"Validation - Accuracy: {val_accuracy:.4f}, Macro F1: {val_macro_f1:.4f}")
print(f"Test       - Accuracy: {test_accuracy:.4f}, Macro F1: {test_macro_f1:.4f}")

#### 2.1.4 Save SVM

In [None]:
# Create models directory if it doesn't exist
os.makedirs('../models/sentiment', exist_ok=True)

# Save SVM model
with open('../models/sentiment/svm_model.pkl', 'wb') as f:
    pickle.dump(svm_model, f)

# Save TF-IDF vectorizer
with open('../models/sentiment/tfidf_vectorizer.pkl', 'wb') as f:
    pickle.dump(tfidf_vectorizer, f)

print("SVM model and vectorizer saved successfully!")

### 2.2 Multinomial Naive Bayes (MNB)

#### 2.2.1 Imports for MNB

In [None]:
from sklearn.naive_bayes import MultinomialNB

print("MNB libraries imported successfully!")

#### 2.2.2 Build MNB

In [None]:
# Train Multinomial Naive Bayes
print("Training Multinomial Naive Bayes model...")
print("Configuration: alpha=1.0 (Laplace smoothing)")

mnb_model = MultinomialNB(
    alpha=1.0  # Laplace smoothing for handling unseen features
)

# Note: Using the same TF-IDF features created for SVM
# (Word n-grams 1-2, max_features=50,000)
mnb_model.fit(X_train_tfidf, y_train.iloc[:, 0])
print("MNB training completed!")

#### 2.2.3 Inference on MNB

In [None]:
# Predictions
print("Making predictions...")
y_train_pred_mnb = mnb_model.predict(X_train_tfidf)
y_val_pred_mnb = mnb_model.predict(X_val_tfidf)
y_test_pred_mnb = mnb_model.predict(X_test_tfidf)

# Evaluation
print("\n" + "="*100)
print("MULTINOMIAL NAIVE BAYES - EVALUATION RESULTS")
print("="*100)

# Validation Set Metrics
print("\nVALIDATION SET:")
print("-"*100)
val_accuracy = accuracy_score(y_val.iloc[:, 0], y_val_pred_mnb)
val_macro_precision = precision_score(y_val.iloc[:, 0], y_val_pred_mnb, average='macro')
val_macro_recall = recall_score(y_val.iloc[:, 0], y_val_pred_mnb, average='macro')
val_macro_f1 = f1_score(y_val.iloc[:, 0], y_val_pred_mnb, average='macro')

print(f"Accuracy:         {val_accuracy:.4f}")
print(f"Macro Precision:  {val_macro_precision:.4f}")
print(f"Macro Recall:     {val_macro_recall:.4f}")
print(f"Macro F1-Score:   {val_macro_f1:.4f}")
print("\nClassification Report:")
print(classification_report(y_val.iloc[:, 0], y_val_pred_mnb, 
                          target_names=['Neutral', 'Negative', 'Positive'],
                          digits=4))

# Test Set Metrics
print("\nTEST SET:")
print("-"*100)
test_accuracy = accuracy_score(y_test.iloc[:, 0], y_test_pred_mnb)
test_macro_precision = precision_score(y_test.iloc[:, 0], y_test_pred_mnb, average='macro')
test_macro_recall = recall_score(y_test.iloc[:, 0], y_test_pred_mnb, average='macro')
test_macro_f1 = f1_score(y_test.iloc[:, 0], y_test_pred_mnb, average='macro')

print(f"Accuracy:         {test_accuracy:.4f}")
print(f"Macro Precision:  {test_macro_precision:.4f}")
print(f"Macro Recall:     {test_macro_recall:.4f}")
print(f"Macro F1-Score:   {test_macro_f1:.4f}")
print("\nClassification Report:")
print(classification_report(y_test.iloc[:, 0], y_test_pred_mnb, 
                          target_names=['Neutral', 'Negative', 'Positive'],
                          digits=4))

# Confusion Matrix for Test Set
print("\nConfusion Matrix (Test Set):")
cm_test = confusion_matrix(y_test.iloc[:, 0], y_test_pred_mnb)
print(cm_test)

# Visualize Confusion Matrices
fig, axes = plt.subplots(1, 2, figsize=(16, 6))

# Validation Confusion Matrix
cm_val = confusion_matrix(y_val.iloc[:, 0], y_val_pred_mnb)
sns.heatmap(cm_val, annot=True, fmt='d', cmap='Greens', ax=axes[0],
            xticklabels=['Neutral', 'Negative', 'Positive'],
            yticklabels=['Neutral', 'Negative', 'Positive'])
axes[0].set_title('MNB - Confusion Matrix (Validation Set)', fontweight='bold', fontsize=14)
axes[0].set_ylabel('True Label', fontsize=12)
axes[0].set_xlabel('Predicted Label', fontsize=12)

# Test Confusion Matrix
sns.heatmap(cm_test, annot=True, fmt='d', cmap='Greens', ax=axes[1],
            xticklabels=['Neutral', 'Negative', 'Positive'],
            yticklabels=['Neutral', 'Negative', 'Positive'])
axes[1].set_title('MNB - Confusion Matrix (Test Set)', fontweight='bold', fontsize=14)
axes[1].set_ylabel('True Label', fontsize=12)
axes[1].set_xlabel('Predicted Label', fontsize=12)

plt.tight_layout()
plt.show()

# Summary
print("\n" + "="*100)
print("SUMMARY")
print("="*100)
print(f"Validation - Accuracy: {val_accuracy:.4f}, Macro F1: {val_macro_f1:.4f}")
print(f"Test       - Accuracy: {test_accuracy:.4f}, Macro F1: {test_macro_f1:.4f}")

#### 2.2.4 Save MNB

In [None]:
# Save MNB model
with open('../models/sentiment/mnb_model.pkl', 'wb') as f:
    pickle.dump(mnb_model, f)

print("MNB model saved successfully!")

### 2.3 Random Forest (RF)

#### 2.3.1 Imports for RF

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.decomposition import TruncatedSVD

print("RF libraries imported successfully!")

#### 2.3.2 Build RF

In [None]:
# Dimensionality Reduction with TruncatedSVD
# Random Forest performs poorly on sparse high-dimensional data, so we reduce dimensions
print("Applying dimensionality reduction with TruncatedSVD...")
print("Configuration: Reducing to 300 components")

svd = TruncatedSVD(
    n_components=300,
    random_state=RANDOM_SEED
)

# Fit SVD on training TF-IDF features only, then transform all sets
X_train_svd = svd.fit_transform(X_train_tfidf)
X_val_svd = svd.transform(X_val_tfidf)
X_test_svd = svd.transform(X_test_tfidf)

print(f"Original TF-IDF shape: {X_train_tfidf.shape}")
print(f"Reduced SVD shape: {X_train_svd.shape}")
print(f"Explained variance ratio: {svd.explained_variance_ratio_.sum():.4f}")

# Train Random Forest
print("\nTraining Random Forest model...")
print("Configuration: n_estimators=300, max_depth=None, n_jobs=-1")

rf_model = RandomForestClassifier(
    n_estimators=300,         # Number of trees
    max_depth=None,           # No limit on tree depth
    random_state=RANDOM_SEED,
    n_jobs=-1,                # Use all available cores
    verbose=0
)

# Note: Using SVD-reduced features instead of raw TF-IDF
rf_model.fit(X_train_svd, y_train.iloc[:, 0])
print("RF training completed!")

#### 2.3.3 Inference on RF

In [None]:
# Predictions
print("Making predictions...")
y_train_pred_rf = rf_model.predict(X_train_svd)
y_val_pred_rf = rf_model.predict(X_val_svd)
y_test_pred_rf = rf_model.predict(X_test_svd)

# Evaluation
print("\n" + "="*100)
print("RANDOM FOREST - EVALUATION RESULTS")
print("="*100)

# Validation Set Metrics
print("\nVALIDATION SET:")
print("-"*100)
val_accuracy = accuracy_score(y_val.iloc[:, 0], y_val_pred_rf)
val_macro_precision = precision_score(y_val.iloc[:, 0], y_val_pred_rf, average='macro')
val_macro_recall = recall_score(y_val.iloc[:, 0], y_val_pred_rf, average='macro')
val_macro_f1 = f1_score(y_val.iloc[:, 0], y_val_pred_rf, average='macro')

print(f"Accuracy:         {val_accuracy:.4f}")
print(f"Macro Precision:  {val_macro_precision:.4f}")
print(f"Macro Recall:     {val_macro_recall:.4f}")
print(f"Macro F1-Score:   {val_macro_f1:.4f}")
print("\nClassification Report:")
print(classification_report(y_val.iloc[:, 0], y_val_pred_rf, 
                          target_names=['Neutral', 'Negative', 'Positive'],
                          digits=4))

# Test Set Metrics
print("\nTEST SET:")
print("-"*100)
test_accuracy = accuracy_score(y_test.iloc[:, 0], y_test_pred_rf)
test_macro_precision = precision_score(y_test.iloc[:, 0], y_test_pred_rf, average='macro')
test_macro_recall = recall_score(y_test.iloc[:, 0], y_test_pred_rf, average='macro')
test_macro_f1 = f1_score(y_test.iloc[:, 0], y_test_pred_rf, average='macro')

print(f"Accuracy:         {test_accuracy:.4f}")
print(f"Macro Precision:  {test_macro_precision:.4f}")
print(f"Macro Recall:     {test_macro_recall:.4f}")
print(f"Macro F1-Score:   {test_macro_f1:.4f}")
print("\nClassification Report:")
print(classification_report(y_test.iloc[:, 0], y_test_pred_rf, 
                          target_names=['Neutral', 'Negative', 'Positive'],
                          digits=4))

# Confusion Matrix for Test Set
print("\nConfusion Matrix (Test Set):")
cm_test = confusion_matrix(y_test.iloc[:, 0], y_test_pred_rf)
print(cm_test)

# Visualize Confusion Matrices
fig, axes = plt.subplots(1, 2, figsize=(16, 6))

# Validation Confusion Matrix
cm_val = confusion_matrix(y_val.iloc[:, 0], y_val_pred_rf)
sns.heatmap(cm_val, annot=True, fmt='d', cmap='Oranges', ax=axes[0],
            xticklabels=['Neutral', 'Negative', 'Positive'],
            yticklabels=['Neutral', 'Negative', 'Positive'])
axes[0].set_title('RF - Confusion Matrix (Validation Set)', fontweight='bold', fontsize=14)
axes[0].set_ylabel('True Label', fontsize=12)
axes[0].set_xlabel('Predicted Label', fontsize=12)

# Test Confusion Matrix
sns.heatmap(cm_test, annot=True, fmt='d', cmap='Oranges', ax=axes[1],
            xticklabels=['Neutral', 'Negative', 'Positive'],
            yticklabels=['Neutral', 'Negative', 'Positive'])
axes[1].set_title('RF - Confusion Matrix (Test Set)', fontweight='bold', fontsize=14)
axes[1].set_ylabel('True Label', fontsize=12)
axes[1].set_xlabel('Predicted Label', fontsize=12)

plt.tight_layout()
plt.show()

# Feature Importance Analysis
print("\n" + "="*100)
print("FEATURE IMPORTANCE ANALYSIS")
print("="*100)
feature_importance = rf_model.feature_importances_
top_k = 10
top_indices = np.argsort(feature_importance)[::-1][:top_k]

print(f"\nTop {top_k} Most Important SVD Components:")
for idx, component_idx in enumerate(top_indices, 1):
    print(f"  {idx}. Component {component_idx}: {feature_importance[component_idx]:.6f}")

# Visualize top feature importances
plt.figure(figsize=(10, 6))
plt.bar(range(top_k), feature_importance[top_indices], color='coral', edgecolor='black')
plt.xlabel('SVD Component Rank', fontsize=12, fontweight='bold')
plt.ylabel('Importance', fontsize=12, fontweight='bold')
plt.title(f'Top {top_k} Most Important SVD Components', fontsize=14, fontweight='bold')
plt.xticks(range(top_k), [f'C{i}' for i in top_indices], rotation=0)
plt.grid(axis='y', alpha=0.3)
plt.tight_layout()
plt.show()

# Summary
print("\n" + "="*100)
print("SUMMARY")
print("="*100)
print(f"Validation - Accuracy: {val_accuracy:.4f}, Macro F1: {val_macro_f1:.4f}")
print(f"Test       - Accuracy: {test_accuracy:.4f}, Macro F1: {test_macro_f1:.4f}")

#### 2.3.4 Save RF

In [None]:
# Save RF model and SVD transformer
with open('../models/sentiment/rf_model.pkl', 'wb') as f:
    pickle.dump(rf_model, f)

with open('../models/sentiment/svd_transformer.pkl', 'wb') as f:
    pickle.dump(svd, f)

print("RF model and SVD transformer saved successfully!")

---
## 3. Deep Learning Models

### 3.1 Bidirectional LSTM (Bi-LSTM)

#### 3.1.1 Imports for Bi-LSTM

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Bidirectional, Dense, Dropout
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.utils import to_categorical

# Set TensorFlow random seed
tf.random.set_seed(RANDOM_SEED)

print("Bi-LSTM libraries imported successfully!")
print(f"TensorFlow version: {tf.__version__}")
print(f"GPU Available: {len(tf.config.list_physical_devices('GPU')) > 0}")

#### 3.1.2 Build Bi-LSTM

In [None]:
# Hyperparameters
NUM_WORDS = 50000        # Vocabulary size
MAX_LEN = 128            # Maximum sequence length
EMBEDDING_DIM = 300      # Embedding dimension
LSTM_UNITS = 128         # LSTM units
BATCH_SIZE = 32          # Training batch size

# Tokenization
print("Tokenizing text data...")
print(f"Configuration: num_words={NUM_WORDS}, max_len={MAX_LEN}")

tokenizer = Tokenizer(num_words=NUM_WORDS, oov_token='<OOV>')

# Fit tokenizer only on training data
tokenizer.fit_on_texts(X_train.iloc[:, 0])

# Convert to sequences
X_train_seq = tokenizer.texts_to_sequences(X_train.iloc[:, 0])
X_val_seq = tokenizer.texts_to_sequences(X_val.iloc[:, 0])
X_test_seq = tokenizer.texts_to_sequences(X_test.iloc[:, 0])

# Pad sequences to same length
X_train_pad = pad_sequences(X_train_seq, maxlen=MAX_LEN, padding='post', truncating='post')
X_val_pad = pad_sequences(X_val_seq, maxlen=MAX_LEN, padding='post', truncating='post')
X_test_pad = pad_sequences(X_test_seq, maxlen=MAX_LEN, padding='post', truncating='post')

print(f"Vocabulary size: {len(tokenizer.word_index) + 1}")
print(f"X_train_pad shape: {X_train_pad.shape}")
print(f"X_val_pad shape: {X_val_pad.shape}")
print(f"X_test_pad shape: {X_test_pad.shape}")

# Build Bi-LSTM model
print("\nBuilding Bi-LSTM model...")
bilstm_model = Sequential([
    Embedding(input_dim=NUM_WORDS, output_dim=EMBEDDING_DIM, input_length=MAX_LEN),
    Bidirectional(LSTM(LSTM_UNITS, return_sequences=False, dropout=0.2, recurrent_dropout=0.2)),
    Dropout(0.5),
    Dense(64, activation='relu'),
    Dropout(0.3),
    Dense(3, activation='softmax')
])

# Compile model
bilstm_model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

print("Bi-LSTM model built successfully!")
bilstm_model.summary()

# Train Bi-LSTM
print("\nTraining Bi-LSTM model...")
callbacks = [
    EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True, verbose=1),
    ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, min_lr=1e-7, verbose=1)
]

history_bilstm = bilstm_model.fit(
    X_train_pad, y_train.iloc[:, 0],
    validation_data=(X_val_pad, y_val.iloc[:, 0]),
    epochs=50,
    batch_size=BATCH_SIZE,
    callbacks=callbacks,
    verbose=1
)

print("Bi-LSTM training completed!")

#### 3.1.3 Inference on Bi-LSTM

In [None]:
# Predictions
print("Making predictions...")
y_train_pred_bilstm = np.argmax(bilstm_model.predict(X_train_pad, verbose=0), axis=1)
y_val_pred_bilstm = np.argmax(bilstm_model.predict(X_val_pad, verbose=0), axis=1)
y_test_pred_bilstm = np.argmax(bilstm_model.predict(X_test_pad, verbose=0), axis=1)

# Evaluation
print("\n" + "="*100)
print("BI-LSTM - EVALUATION RESULTS")
print("="*100)

# Training Set Metrics
print("\nTRAINING SET:")
print("-"*100)
train_accuracy = accuracy_score(y_train.iloc[:, 0], y_train_pred_bilstm)
train_macro_precision = precision_score(y_train.iloc[:, 0], y_train_pred_bilstm, average='macro')
train_macro_recall = recall_score(y_train.iloc[:, 0], y_train_pred_bilstm, average='macro')
train_macro_f1 = f1_score(y_train.iloc[:, 0], y_train_pred_bilstm, average='macro')

print(f"Accuracy:         {train_accuracy:.4f}")
print(f"Macro Precision:  {train_macro_precision:.4f}")
print(f"Macro Recall:     {train_macro_recall:.4f}")
print(f"Macro F1-Score:   {train_macro_f1:.4f}")

# Validation Set Metrics
print("\nVALIDATION SET:")
print("-"*100)
val_accuracy = accuracy_score(y_val.iloc[:, 0], y_val_pred_bilstm)
val_macro_precision = precision_score(y_val.iloc[:, 0], y_val_pred_bilstm, average='macro')
val_macro_recall = recall_score(y_val.iloc[:, 0], y_val_pred_bilstm, average='macro')
val_macro_f1 = f1_score(y_val.iloc[:, 0], y_val_pred_bilstm, average='macro')

print(f"Accuracy:         {val_accuracy:.4f}")
print(f"Macro Precision:  {val_macro_precision:.4f}")
print(f"Macro Recall:     {val_macro_recall:.4f}")
print(f"Macro F1-Score:   {val_macro_f1:.4f}")
print("\nClassification Report:")
print(classification_report(y_val.iloc[:, 0], y_val_pred_bilstm, 
                          target_names=['Neutral', 'Negative', 'Positive'],
                          digits=4))

# Test Set Metrics
print("\nTEST SET:")
print("-"*100)
test_accuracy = accuracy_score(y_test.iloc[:, 0], y_test_pred_bilstm)
test_macro_precision = precision_score(y_test.iloc[:, 0], y_test_pred_bilstm, average='macro')
test_macro_recall = recall_score(y_test.iloc[:, 0], y_test_pred_bilstm, average='macro')
test_macro_f1 = f1_score(y_test.iloc[:, 0], y_test_pred_bilstm, average='macro')

print(f"Accuracy:         {test_accuracy:.4f}")
print(f"Macro Precision:  {test_macro_precision:.4f}")
print(f"Macro Recall:     {test_macro_recall:.4f}")
print(f"Macro F1-Score:   {test_macro_f1:.4f}")
print("\nClassification Report:")
print(classification_report(y_test.iloc[:, 0], y_test_pred_bilstm, 
                          target_names=['Neutral', 'Negative', 'Positive'],
                          digits=4))

# Confusion Matrices
print("\nConfusion Matrix (Test Set):")
cm_test = confusion_matrix(y_test.iloc[:, 0], y_test_pred_bilstm)
print(cm_test)

# Visualize Confusion Matrices
fig, axes = plt.subplots(1, 2, figsize=(16, 6))

# Validation Confusion Matrix
cm_val = confusion_matrix(y_val.iloc[:, 0], y_val_pred_bilstm)
sns.heatmap(cm_val, annot=True, fmt='d', cmap='Purples', ax=axes[0],
            xticklabels=['Neutral', 'Negative', 'Positive'],
            yticklabels=['Neutral', 'Negative', 'Positive'])
axes[0].set_title('Bi-LSTM - Confusion Matrix (Validation Set)', fontweight='bold', fontsize=14)
axes[0].set_ylabel('True Label', fontsize=12)
axes[0].set_xlabel('Predicted Label', fontsize=12)

# Test Confusion Matrix
sns.heatmap(cm_test, annot=True, fmt='d', cmap='Purples', ax=axes[1],
            xticklabels=['Neutral', 'Negative', 'Positive'],
            yticklabels=['Neutral', 'Negative', 'Positive'])
axes[1].set_title('Bi-LSTM - Confusion Matrix (Test Set)', fontweight='bold', fontsize=14)
axes[1].set_ylabel('True Label', fontsize=12)
axes[1].set_xlabel('Predicted Label', fontsize=12)

plt.tight_layout()
plt.show()

# Plot training history
fig, axes = plt.subplots(1, 2, figsize=(16, 5))

# Accuracy
axes[0].plot(history_bilstm.history['accuracy'], label='Train Accuracy', linewidth=2, marker='o', markersize=4)
axes[0].plot(history_bilstm.history['val_accuracy'], label='Validation Accuracy', linewidth=2, marker='s', markersize=4)
axes[0].set_title('Bi-LSTM - Training & Validation Accuracy', fontweight='bold', fontsize=12)
axes[0].set_xlabel('Epoch', fontsize=11)
axes[0].set_ylabel('Accuracy', fontsize=11)
axes[0].legend()
axes[0].grid(alpha=0.3)

# Loss
axes[1].plot(history_bilstm.history['loss'], label='Train Loss', linewidth=2, marker='o', markersize=4)
axes[1].plot(history_bilstm.history['val_loss'], label='Validation Loss', linewidth=2, marker='s', markersize=4)
axes[1].set_title('Bi-LSTM - Training & Validation Loss', fontweight='bold', fontsize=12)
axes[1].set_xlabel('Epoch', fontsize=11)
axes[1].set_ylabel('Loss', fontsize=11)
axes[1].legend()
axes[1].grid(alpha=0.3)

plt.tight_layout()
plt.show()

# Summary
print("\n" + "="*100)
print("SUMMARY")
print("="*100)
print(f"Training   - Accuracy: {train_accuracy:.4f}, Macro F1: {train_macro_f1:.4f}")
print(f"Validation - Accuracy: {val_accuracy:.4f}, Macro F1: {val_macro_f1:.4f}")
print(f"Test       - Accuracy: {test_accuracy:.4f}, Macro F1: {test_macro_f1:.4f}")

#### 3.1.4 Save Bi-LSTM

In [None]:
# Save Bi-LSTM model
bilstm_model.save('../models/sentiment/bilstm_model.h5')

# Save tokenizer
with open('../models/sentiment/tokenizer.pkl', 'wb') as f:
    pickle.dump(tokenizer, f)

print("Bi-LSTM model and tokenizer saved successfully!")

### 3.2 Bidirectional GRU (Bi-GRU)

#### 3.2.1 Imports for Bi-GRU

In [None]:
from tensorflow.keras.layers import GRU

print("Bi-GRU libraries imported successfully!")

#### 3.2.2 Build Bi-GRU

In [None]:
# Hyperparameters (same as Bi-LSTM for fair comparison)
NUM_WORDS = 50000        # Vocabulary size
MAX_LEN = 128            # Maximum sequence length
EMBEDDING_DIM = 300      # Embedding dimension
GRU_UNITS = 128          # GRU units
BATCH_SIZE = 32          # Training batch size

# Note: Using the same tokenized sequences from Bi-LSTM
# This ensures fair comparison between LSTM and GRU

# Build Bi-GRU model
print("Building Bi-GRU model...")
print(f"Configuration: GRU units={GRU_UNITS}, embedding_dim={EMBEDDING_DIM}")

bigru_model = Sequential([
    Embedding(input_dim=NUM_WORDS, output_dim=EMBEDDING_DIM, input_length=MAX_LEN),
    Bidirectional(GRU(GRU_UNITS, return_sequences=False, dropout=0.2, recurrent_dropout=0.2)),
    Dropout(0.5),
    Dense(64, activation='relu'),
    Dropout(0.3),
    Dense(3, activation='softmax')
])

# Compile model
bigru_model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

print("Bi-GRU model built successfully!")
bigru_model.summary()

# Train Bi-GRU
print("\nTraining Bi-GRU model...")
callbacks_gru = [
    EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True, verbose=1),
    ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, min_lr=1e-7, verbose=1)
]

history_bigru = bigru_model.fit(
    X_train_pad, y_train.iloc[:, 0],
    validation_data=(X_val_pad, y_val.iloc[:, 0]),
    epochs=50,
    batch_size=BATCH_SIZE,
    callbacks=callbacks_gru,
    verbose=1
)

print("Bi-GRU training completed!")

#### 3.2.3 Inference on Bi-GRU

In [None]:
# Predictions
print("Making predictions...")
y_train_pred_bigru = np.argmax(bigru_model.predict(X_train_pad, verbose=0), axis=1)
y_val_pred_bigru = np.argmax(bigru_model.predict(X_val_pad, verbose=0), axis=1)
y_test_pred_bigru = np.argmax(bigru_model.predict(X_test_pad, verbose=0), axis=1)

# Evaluation
print("\n" + "="*100)
print("BI-GRU - EVALUATION RESULTS")
print("="*100)

# Training Set Metrics
print("\nTRAINING SET:")
print("-"*100)
train_accuracy = accuracy_score(y_train.iloc[:, 0], y_train_pred_bigru)
train_macro_precision = precision_score(y_train.iloc[:, 0], y_train_pred_bigru, average='macro')
train_macro_recall = recall_score(y_train.iloc[:, 0], y_train_pred_bigru, average='macro')
train_macro_f1 = f1_score(y_train.iloc[:, 0], y_train_pred_bigru, average='macro')

print(f"Accuracy:         {train_accuracy:.4f}")
print(f"Macro Precision:  {train_macro_precision:.4f}")
print(f"Macro Recall:     {train_macro_recall:.4f}")
print(f"Macro F1-Score:   {train_macro_f1:.4f}")

# Validation Set Metrics
print("\nVALIDATION SET:")
print("-"*100)
val_accuracy = accuracy_score(y_val.iloc[:, 0], y_val_pred_bigru)
val_macro_precision = precision_score(y_val.iloc[:, 0], y_val_pred_bigru, average='macro')
val_macro_recall = recall_score(y_val.iloc[:, 0], y_val_pred_bigru, average='macro')
val_macro_f1 = f1_score(y_val.iloc[:, 0], y_val_pred_bigru, average='macro')

print(f"Accuracy:         {val_accuracy:.4f}")
print(f"Macro Precision:  {val_macro_precision:.4f}")
print(f"Macro Recall:     {val_macro_recall:.4f}")
print(f"Macro F1-Score:   {val_macro_f1:.4f}")
print("\nClassification Report:")
print(classification_report(y_val.iloc[:, 0], y_val_pred_bigru, 
                          target_names=['Neutral', 'Negative', 'Positive'],
                          digits=4))

# Test Set Metrics
print("\nTEST SET:")
print("-"*100)
test_accuracy = accuracy_score(y_test.iloc[:, 0], y_test_pred_bigru)
test_macro_precision = precision_score(y_test.iloc[:, 0], y_test_pred_bigru, average='macro')
test_macro_recall = recall_score(y_test.iloc[:, 0], y_test_pred_bigru, average='macro')
test_macro_f1 = f1_score(y_test.iloc[:, 0], y_test_pred_bigru, average='macro')

print(f"Accuracy:         {test_accuracy:.4f}")
print(f"Macro Precision:  {test_macro_precision:.4f}")
print(f"Macro Recall:     {test_macro_recall:.4f}")
print(f"Macro F1-Score:   {test_macro_f1:.4f}")
print("\nClassification Report:")
print(classification_report(y_test.iloc[:, 0], y_test_pred_bigru, 
                          target_names=['Neutral', 'Negative', 'Positive'],
                          digits=4))

# Confusion Matrices
print("\nConfusion Matrix (Test Set):")
cm_test = confusion_matrix(y_test.iloc[:, 0], y_test_pred_bigru)
print(cm_test)

# Visualize Confusion Matrices
fig, axes = plt.subplots(1, 2, figsize=(16, 6))

# Validation Confusion Matrix
cm_val = confusion_matrix(y_val.iloc[:, 0], y_val_pred_bigru)
sns.heatmap(cm_val, annot=True, fmt='d', cmap='YlOrBr', ax=axes[0],
            xticklabels=['Neutral', 'Negative', 'Positive'],
            yticklabels=['Neutral', 'Negative', 'Positive'])
axes[0].set_title('Bi-GRU - Confusion Matrix (Validation Set)', fontweight='bold', fontsize=14)
axes[0].set_ylabel('True Label', fontsize=12)
axes[0].set_xlabel('Predicted Label', fontsize=12)

# Test Confusion Matrix
sns.heatmap(cm_test, annot=True, fmt='d', cmap='YlOrBr', ax=axes[1],
            xticklabels=['Neutral', 'Negative', 'Positive'],
            yticklabels=['Neutral', 'Negative', 'Positive'])
axes[1].set_title('Bi-GRU - Confusion Matrix (Test Set)', fontweight='bold', fontsize=14)
axes[1].set_ylabel('True Label', fontsize=12)
axes[1].set_xlabel('Predicted Label', fontsize=12)

plt.tight_layout()
plt.show()

# Plot training history
fig, axes = plt.subplots(1, 2, figsize=(16, 5))

# Accuracy
axes[0].plot(history_bigru.history['accuracy'], label='Train Accuracy', linewidth=2, marker='o', markersize=4)
axes[0].plot(history_bigru.history['val_accuracy'], label='Validation Accuracy', linewidth=2, marker='s', markersize=4)
axes[0].set_title('Bi-GRU - Training & Validation Accuracy', fontweight='bold', fontsize=12)
axes[0].set_xlabel('Epoch', fontsize=11)
axes[0].set_ylabel('Accuracy', fontsize=11)
axes[0].legend()
axes[0].grid(alpha=0.3)

# Loss
axes[1].plot(history_bigru.history['loss'], label='Train Loss', linewidth=2, marker='o', markersize=4)
axes[1].plot(history_bigru.history['val_loss'], label='Validation Loss', linewidth=2, marker='s', markersize=4)
axes[1].set_title('Bi-GRU - Training & Validation Loss', fontweight='bold', fontsize=12)
axes[1].set_xlabel('Epoch', fontsize=11)
axes[1].set_ylabel('Loss', fontsize=11)
axes[1].legend()
axes[1].grid(alpha=0.3)

plt.tight_layout()
plt.show()

# Summary
print("\n" + "="*100)
print("SUMMARY")
print("="*100)
print(f"Training   - Accuracy: {train_accuracy:.4f}, Macro F1: {train_macro_f1:.4f}")
print(f"Validation - Accuracy: {val_accuracy:.4f}, Macro F1: {val_macro_f1:.4f}")
print(f"Test       - Accuracy: {test_accuracy:.4f}, Macro F1: {test_macro_f1:.4f}")

#### 3.2.4 Save Bi-GRU

In [None]:
# Save Bi-GRU model
bigru_model.save('../models/sentiment/bigru_model.h5')

print("Bi-GRU model saved successfully!")

---
## 4. BERT Model

#### 4.1 Imports for BERT

In [None]:
from transformers import AutoTokenizer, TFAutoModelForSequenceClassification
from transformers import DataCollatorWithPadding
import tensorflow as tf

print("BERT libraries imported successfully!")

#### 4.2 Build BERT

In [None]:
# Load multilingual BERT tokenizer and model
MODEL_NAME = 'bert-base-multilingual-cased'
MAX_LENGTH = 128

print(f"Loading {MODEL_NAME} tokenizer...")
bert_tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

# Tokenize data
print("Tokenizing data for BERT...")
train_encodings = bert_tokenizer(
    X_train['Text'].tolist(),
    truncation=True,
    padding=True,
    max_length=MAX_LENGTH,
    return_tensors='tf'
)

val_encodings = bert_tokenizer(
    X_val['Text'].tolist(),
    truncation=True,
    padding=True,
    max_length=MAX_LENGTH,
    return_tensors='tf'
)

test_encodings = bert_tokenizer(
    X_test['Text'].tolist(),
    truncation=True,
    padding=True,
    max_length=MAX_LENGTH,
    return_tensors='tf'
)

print("Tokenization completed!")

# Create TensorFlow datasets
train_dataset = tf.data.Dataset.from_tensor_slices((
    dict(train_encodings),
    y_train.values.ravel()
)).batch(16)

val_dataset = tf.data.Dataset.from_tensor_slices((
    dict(val_encodings),
    y_val.values.ravel()
)).batch(16)

test_dataset = tf.data.Dataset.from_tensor_slices((
    dict(test_encodings),
    y_test.values.ravel()
)).batch(16)

# Load BERT model for sequence classification
print(f"\nLoading {MODEL_NAME} model...")
bert_model = TFAutoModelForSequenceClassification.from_pretrained(
    MODEL_NAME,
    num_labels=3
)

# Compile model
bert_model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=2e-5),
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    metrics=['accuracy']
)

print("BERT model loaded and compiled successfully!")

# Train BERT
print("\nTraining BERT model...")
history_bert = bert_model.fit(
    train_dataset,
    validation_data=val_dataset,
    epochs=3,
    verbose=1
)

print("BERT training completed!")

#### 4.3 Inference on BERT

In [None]:
# Predictions
print("Making predictions with BERT...")
y_train_pred_bert = np.argmax(bert_model.predict(train_dataset).logits, axis=1)
y_val_pred_bert = np.argmax(bert_model.predict(val_dataset).logits, axis=1)
y_test_pred_bert = np.argmax(bert_model.predict(test_dataset).logits, axis=1)

# Evaluation
print("="*80)
print("BERT - EVALUATION RESULTS")
print("="*80)

for dataset_name, y_true, y_pred in [('Training', y_train, y_train_pred_bert), 
                                       ('Validation', y_val, y_val_pred_bert),
                                       ('Test', y_test, y_test_pred_bert)]:
    print(f"\n{dataset_name} Set:")
    print("-"*80)
    accuracy = accuracy_score(y_true, y_pred)
    macro_f1 = f1_score(y_true, y_pred, average='macro')
    print(f"Accuracy: {accuracy:.4f}")
    print(f"Macro F1-Score: {macro_f1:.4f}")
    print("\nClassification Report:")
    print(classification_report(y_true, y_pred, target_names=['Neutral', 'Negative', 'Positive']))

# Confusion Matrix for Test Set
print("\nConfusion Matrix (Test Set):")
cm = confusion_matrix(y_test, y_test_pred_bert)
print(cm)

# Visualize Confusion Matrix
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Reds', 
            xticklabels=['Neutral', 'Negative', 'Positive'],
            yticklabels=['Neutral', 'Negative', 'Positive'])
plt.title('BERT - Confusion Matrix (Test Set)', fontweight='bold', fontsize=14)
plt.ylabel('True Label', fontsize=12)
plt.xlabel('Predicted Label', fontsize=12)
plt.tight_layout()
plt.show()

# Plot training history
fig, axes = plt.subplots(1, 2, figsize=(16, 5))

# Accuracy
axes[0].plot(history_bert.history['accuracy'], label='Train Accuracy', linewidth=2)
axes[0].plot(history_bert.history['val_accuracy'], label='Validation Accuracy', linewidth=2)
axes[0].set_title('BERT - Training & Validation Accuracy', fontweight='bold', fontsize=12)
axes[0].set_xlabel('Epoch', fontsize=11)
axes[0].set_ylabel('Accuracy', fontsize=11)
axes[0].legend()
axes[0].grid(alpha=0.3)

# Loss
axes[1].plot(history_bert.history['loss'], label='Train Loss', linewidth=2)
axes[1].plot(history_bert.history['val_loss'], label='Validation Loss', linewidth=2)
axes[1].set_title('BERT - Training & Validation Loss', fontweight='bold', fontsize=12)
axes[1].set_xlabel('Epoch', fontsize=11)
axes[1].set_ylabel('Loss', fontsize=11)
axes[1].legend()
axes[1].grid(alpha=0.3)

plt.tight_layout()
plt.show()

#### 4.4 Save BERT

In [None]:
# Save BERT model
bert_model.save_pretrained('../models/sentiment/bert_model')
bert_tokenizer.save_pretrained('../models/sentiment/bert_model')

print("BERT model and tokenizer saved successfully!")

---
## 5. Comparison of All Models

### 5.1 Collect All Metrics

In [None]:
# Collect all test predictions
models_predictions = {
    'SVM': y_test_pred_svm,
    'MNB': y_test_pred_mnb,
    'RF': y_test_pred_rf,
    'Bi-LSTM': y_test_pred_bilstm,
    'Bi-GRU': y_test_pred_bigru,
    'BERT': y_test_pred_bert
}

# Calculate metrics for each model
results = []
class_names = ['Neutral', 'Negative', 'Positive']

for model_name, y_pred in models_predictions.items():
    accuracy = accuracy_score(y_test, y_pred)
    macro_f1 = f1_score(y_test, y_pred, average='macro')
    
    # Per-class metrics
    precision_per_class = precision_score(y_test, y_pred, average=None)
    recall_per_class = recall_score(y_test, y_pred, average=None)
    f1_per_class = f1_score(y_test, y_pred, average=None)
    
    results.append({
        'Model': model_name,
        'Accuracy': accuracy,
        'Macro F1': macro_f1,
        'Neutral Precision': precision_per_class[0],
        'Neutral Recall': recall_per_class[0],
        'Neutral F1': f1_per_class[0],
        'Negative Precision': precision_per_class[1],
        'Negative Recall': recall_per_class[1],
        'Negative F1': f1_per_class[1],
        'Positive Precision': precision_per_class[2],
        'Positive Recall': recall_per_class[2],
        'Positive F1': f1_per_class[2]
    })

results_df = pd.DataFrame(results)
print("Metrics collection completed!")

### 5.2 Overall Performance Comparison

In [None]:
print("="*100)
print("MODEL COMPARISON - OVERALL METRICS (TEST SET)")
print("="*100)
print(results_df[['Model', 'Accuracy', 'Macro F1']].to_string(index=False))
print("="*100)

# Visualize overall metrics
fig, axes = plt.subplots(1, 2, figsize=(16, 6))
models = results_df['Model'].values
x_pos = np.arange(len(models))

# Accuracy comparison
axes[0].bar(x_pos, results_df['Accuracy'], color='steelblue', edgecolor='black', linewidth=1.5)
axes[0].set_xlabel('Model', fontsize=12, fontweight='bold')
axes[0].set_ylabel('Accuracy', fontsize=12, fontweight='bold')
axes[0].set_title('Model Comparison - Accuracy', fontsize=14, fontweight='bold')
axes[0].set_xticks(x_pos)
axes[0].set_xticklabels(models, rotation=45, ha='right')
axes[0].set_ylim([0, 1])
axes[0].grid(axis='y', alpha=0.3)
for i, v in enumerate(results_df['Accuracy']):
    axes[0].text(i, v + 0.02, f'{v:.4f}', ha='center', fontweight='bold')

# Macro F1 comparison
axes[1].bar(x_pos, results_df['Macro F1'], color='coral', edgecolor='black', linewidth=1.5)
axes[1].set_xlabel('Model', fontsize=12, fontweight='bold')
axes[1].set_ylabel('Macro F1-Score', fontsize=12, fontweight='bold')
axes[1].set_title('Model Comparison - Macro F1-Score', fontsize=14, fontweight='bold')
axes[1].set_xticks(x_pos)
axes[1].set_xticklabels(models, rotation=45, ha='right')
axes[1].set_ylim([0, 1])
axes[1].grid(axis='y', alpha=0.3)
for i, v in enumerate(results_df['Macro F1']):
    axes[1].text(i, v + 0.02, f'{v:.4f}', ha='center', fontweight='bold')

plt.tight_layout()
plt.show()

### 5.3 Class-wise Precision, Recall, F1

In [None]:
print("\n" + "="*100)
print("CLASS-WISE METRICS (TEST SET)")
print("="*100)

# Display class-wise metrics
for class_idx, class_name in enumerate(class_names):
    print(f"\n{class_name.upper()} Class:")
    print("-"*100)
    class_metrics = results_df[['Model', f'{class_name} Precision', f'{class_name} Recall', f'{class_name} F1']]
    print(class_metrics.to_string(index=False))

# Visualize class-wise F1-scores
fig, axes = plt.subplots(1, 3, figsize=(20, 6))

for idx, class_name in enumerate(class_names):
    precision = results_df[f'{class_name} Precision'].values
    recall = results_df[f'{class_name} Recall'].values
    f1 = results_df[f'{class_name} F1'].values
    
    x = np.arange(len(models))
    width = 0.25
    
    axes[idx].bar(x - width, precision, width, label='Precision', color='#5DADE2')
    axes[idx].bar(x, recall, width, label='Recall', color='#E74C3C')
    axes[idx].bar(x + width, f1, width, label='F1-Score', color='#52BE80')
    
    axes[idx].set_xlabel('Model', fontsize=11, fontweight='bold')
    axes[idx].set_ylabel('Score', fontsize=11, fontweight='bold')
    axes[idx].set_title(f'{class_name} Class Metrics', fontsize=13, fontweight='bold')
    axes[idx].set_xticks(x)
    axes[idx].set_xticklabels(models, rotation=45, ha='right', fontsize=9)
    axes[idx].set_ylim([0, 1])
    axes[idx].legend()
    axes[idx].grid(axis='y', alpha=0.3)

plt.tight_layout()
plt.show()

### 5.4 Confusion Matrices Comparison

In [None]:
# Create confusion matrices for all models
fig, axes = plt.subplots(2, 3, figsize=(18, 12))
axes = axes.ravel()

cmaps = ['Blues', 'Greens', 'Oranges', 'Purples', 'YlOrBr', 'Reds']

for idx, (model_name, y_pred) in enumerate(models_predictions.items()):
    cm = confusion_matrix(y_test, y_pred)
    
    sns.heatmap(cm, annot=True, fmt='d', cmap=cmaps[idx], ax=axes[idx],
                xticklabels=['Neutral', 'Negative', 'Positive'],
                yticklabels=['Neutral', 'Negative', 'Positive'],
                cbar_kws={'label': 'Count'})
    
    axes[idx].set_title(f'{model_name} - Confusion Matrix', fontweight='bold', fontsize=13)
    axes[idx].set_ylabel('True Label', fontsize=11)
    axes[idx].set_xlabel('Predicted Label', fontsize=11)

plt.tight_layout()
plt.show()

### 5.5 Save Comparison Results

In [None]:
# Create results directory if it doesn't exist
os.makedirs('../results', exist_ok=True)

# Save results to CSV
results_df.to_csv('../results/sentiment_model_comparison.csv', index=False)
print("Results saved to ../results/sentiment_model_comparison.csv")

# Save detailed report
with open('../results/sentiment_detailed_report.txt', 'w', encoding='utf-8') as f:
    f.write("="*100 + "\n")
    f.write("SENTIMENT ANALYSIS - MODEL COMPARISON REPORT\n")
    f.write("="*100 + "\n\n")
    
    f.write("Overall Performance:\n")
    f.write("-"*100 + "\n")
    f.write(results_df[['Model', 'Accuracy', 'Macro F1']].to_string(index=False))
    f.write("\n\n")
    
    f.write("Class-wise Detailed Metrics:\n")
    f.write("="*100 + "\n")
    f.write(results_df.to_string(index=False))
    f.write("\n\n")
    
    # Classification reports for each model
    for model_name, y_pred in models_predictions.items():
        f.write("\n" + "="*100 + "\n")
        f.write(f"{model_name} - DETAILED CLASSIFICATION REPORT\n")
        f.write("="*100 + "\n")
        f.write(classification_report(y_test, y_pred, target_names=['Neutral', 'Negative', 'Positive']))
        f.write("\n")

print("Detailed report saved to ../results/sentiment_detailed_report.txt")
print("\nAll models trained, evaluated, and saved successfully!")

---
## End of Notebook