In [1]:
# Import necessary libraries
import os
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, classification_report, confusion_matrix
import joblib
import tensorflow as tf
from tensorflow.keras.models import load_model
from transformers import TFBertForSequenceClassification, BertTokenizer
import seaborn as sns
import matplotlib.pyplot as plt

# Define paths
data_path = "./data/"  # Change this to your dataset directory in Colab
model_save_path = "./models/"
test_file = os.path.join(data_path, "test_data.csv")


ModuleNotFoundError: No module named 'transformers'

In [None]:

# Load test data
def load_test_data(file_path):
    return pd.read_csv(file_path)

test_df = load_test_data(test_file)
X_test = test_df['clean_text']
y_test = test_df['category']

# General evaluation function
def evaluate_model(y_true, y_pred, model_name):
    """
    Function to evaluate a model using standard metrics.
    """
    accuracy = accuracy_score(y_true, y_pred)
    f1 = f1_score(y_true, y_pred, average='weighted')
    precision = precision_score(y_true, y_pred, average='weighted')
    recall = recall_score(y_true, y_pred, average='weighted')

    print(f"\n{model_name} Evaluation Metrics:")
    print(f"Accuracy: {accuracy:.4f}")
    print(f"F1-score: {f1:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print("\nClassification Report:\n")
    print(classification_report(y_true, y_pred))



In [None]:
# **Section 1: Baseline Models**
print("Evaluating Baseline Models...")
def evaluate_baseline_models(X_test, y_test):
    """
    Function to load and evaluate baseline models: Logistic Regression, Naive Bayes, and SVM.
    """
    baseline_models = ["Logistic_Regression.pkl", "Naive_Bayes.pkl", "SVM.pkl"]
    vectorizer = joblib.load(os.path.join(model_save_path, "tfidf_vectorizer.pkl"))

    # Transform test data
    X_test_tfidf = vectorizer.transform(X_test)

    for model_file in baseline_models:
        model = joblib.load(os.path.join(model_save_path, model_file))
        y_pred = model.predict(X_test_tfidf)
        evaluate_model(y_test, y_pred, model_file.replace(".pkl", ""))

evaluate_baseline_models(X_test, y_test)



In [None]:
# **Section 2: LSTM Model**
print("\nEvaluating LSTM Model...")
def evaluate_lstm_model(X_test, y_test):
    """
    Function to load and evaluate the LSTM model.
    """
    lstm_model = load_model(os.path.join(model_save_path, "lstm_model.h5"))
    tokenizer = joblib.load(os.path.join(model_save_path, "tokenizer.pkl"))
    label_encoder = joblib.load(os.path.join(model_save_path, "label_encoder.pkl"))

    # Preprocess test data
    X_test_seq = tokenizer.texts_to_sequences(X_test)
    X_test_padded = tf.keras.preprocessing.sequence.pad_sequences(X_test_seq, maxlen=100)
    y_test_encoded = label_encoder.transform(y_test)

    # Predict
    y_probs = lstm_model.predict(X_test_padded)
    y_pred = np.argmax(y_probs, axis=1)

    evaluate_model(y_test_encoded, y_pred, "LSTM Model")

    # Confusion Matrix
    cm = confusion_matrix(y_test_encoded, y_pred)
    sns.heatmap(cm, annot=True, fmt="d", cmap="Blues")
    plt.title("Confusion Matrix for LSTM Model")
    plt.xlabel("Predicted Label")
    plt.ylabel("True Label")
    plt.show()

evaluate_lstm_model(X_test, y_test)



In [None]:
# **Section 3: BERT Model**
print("\nEvaluating BERT Model...")
def evaluate_bert_model(X_test, y_test):
    """
    Function to load and evaluate the BERT model.
    """
    bert_model = TFBertForSequenceClassification.from_pretrained(os.path.join(model_save_path, "bert_model"))
    tokenizer = BertTokenizer.from_pretrained(os.path.join(model_save_path, "bert_tokenizer"))
    label_mapping = joblib.load(os.path.join(model_save_path, "bert_label_mapping.pkl"))

    # Preprocess test data
    X_test_enc = tokenizer(
        list(X_test),
        max_length=128,
        truncation=True,
        padding=True,
        return_tensors="tf"
    )
    y_test_encoded = y_test.map(label_mapping).values

    # Predict
    y_probs = bert_model.predict({"input_ids": X_test_enc["input_ids"], "attention_mask": X_test_enc["attention_mask"]}).logits
    y_pred = np.argmax(y_probs, axis=1)

    evaluate_model(y_test_encoded, y_pred, "BERT Model")

    # Confusion Matrix
    cm = confusion_matrix(y_test_encoded, y_pred)
    sns.heatmap(cm, annot=True, fmt="d", cmap="Blues")
    plt.title("Confusion Matrix for BERT Model")
    plt.xlabel("Predicted Label")
    plt.ylabel("True Label")
    plt.show()

evaluate_bert_model(X_test, y_test)
