In [5]:

import pandas as pd
import numpy as np
import joblib
import pickle
from sklearn.metrics import accuracy_score, classification_report
from sklearn.model_selection import train_test_split

print("All libraries imported successfully!")

All libraries imported successfully!


In [6]:

print("Loading dataset...")

df = pd.read_csv('../data/processed_data.csv')
print(f"Dataset shape: {df.shape}")

print("Loading vectorizer...")
with open('tfidf_vectorizer.pkl', 'rb') as f:
    vectorizer = pickle.load(f)

print("Transforming text to features...")
X = vectorizer.transform(df['text'])
y = df['label']

print("Splitting data...")
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

print("Data preparation completed!")
print(f"Training set: {X_train.shape}")
print(f"Test set: {X_test.shape}")

Loading dataset...
Dataset shape: (8501, 8)
Loading vectorizer...
Transforming text to features...
Splitting data...
Data preparation completed!
Training set: (6800, 5000)
Test set: (1701, 5000)


In [8]:

print("TRAINING LIGHTGBM MODEL...")
from lightgbm import LGBMClassifier

lgb_model = LGBMClassifier(n_estimators=100, random_state=42, n_jobs=-1, verbose=0)
lgb_model.fit(X_train, y_train)
lgb_pred = lgb_model.predict(X_test)
lgb_accuracy = accuracy_score(y_test, lgb_pred)
print(f"LightGBM Accuracy: {lgb_accuracy:.4f} ({lgb_accuracy*100:.2f}%)")
joblib.dump(lgb_model, 'lightgbm_model.pkl')
print("LightGBM model saved!")

TRAINING LIGHTGBM MODEL...
LightGBM Accuracy: 0.9541 (95.41%)
LightGBM model saved!




In [9]:

print("TRAINING COMPLEMENT NAIVE BAYES MODEL...")
from sklearn.naive_bayes import ComplementNB

cnb_model = ComplementNB()
cnb_model.fit(X_train, y_train)
cnb_pred = cnb_model.predict(X_test)
cnb_accuracy = accuracy_score(y_test, cnb_pred)
print(f"ComplementNB Accuracy: {cnb_accuracy:.4f} ({cnb_accuracy*100:.2f}%)")
joblib.dump(cnb_model, 'complement_nb_model.pkl')
print("ComplementNB model saved!")

TRAINING COMPLEMENT NAIVE BAYES MODEL...
ComplementNB Accuracy: 0.8895 (88.95%)
ComplementNB model saved!


In [10]:

print("TRAINING PASSIVE AGGRESSIVE CLASSIFIER...")
from sklearn.linear_model import PassiveAggressiveClassifier

pa_model = PassiveAggressiveClassifier(max_iter=1000, random_state=42, verbose=0)
pa_model.fit(X_train, y_train)
pa_pred = pa_model.predict(X_test)
pa_accuracy = accuracy_score(y_test, pa_pred)
print(f"Passive Aggressive Accuracy: {pa_accuracy:.4f} ({pa_accuracy*100:.2f}%)")
joblib.dump(pa_model, 'passive_aggressive_model.pkl')
print("Passive Aggressive model saved!")

TRAINING PASSIVE AGGRESSIVE CLASSIFIER...
Passive Aggressive Accuracy: 0.9447 (94.47%)
Passive Aggressive model saved!
