In [1]:
import pandas as pd
from openai import OpenAI
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, HistGradientBoostingClassifier
from sklearn.metrics import accuracy_score, classification_report
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from collections import Counter
import re
from tqdm import tqdm

In [2]:
text_ds = pd.read_csv("data/spam.csv", encoding='latin-1')
email_ds = pd.read_csv("data/spamassassin.csv")

# Shuffle the datasets
text_ds = text_ds.sample(frac=1, random_state=42)[['label', 'text']]
email_ds = email_ds.sample(frac=1, random_state=42)[['label', 'text']]

In [3]:
# Do some dataset preprocessing
X_text = text_ds['text']
y_text = text_ds['label']

X_email = email_ds['text']
y_email = email_ds['label']

# Split the data
X_text_train, X_text_test, y_text_train, y_text_test = train_test_split(
    X_text, y_text, test_size=0.2, random_state=42
)

X_email_train, X_email_test, y_email_train, y_email_test = train_test_split(
    X_email, y_email, test_size=0.2, random_state=42
)

# Create TF-IDF features
tfidf_text = TfidfVectorizer(max_features=1000, stop_words='english')
X_text_train_tfidf = tfidf_text.fit_transform(X_text_train)
X_text_test_tfidf = tfidf_text.transform(X_text_test)

tfidf_email = TfidfVectorizer(max_features=1000, stop_words='english')
X_email_train_tfidf = tfidf_email.fit_transform(X_email_train)
X_email_test_tfidf = tfidf_email.transform(X_email_test)

In [4]:
def train_evaluate_model(X_train, X_test, y_train, y_test, model_name, model):
    # Train the model
    model.fit(X_train, y_train)
    
    # Make predictions
    y_pred = model.predict(X_test)
    
    # Calculate accuracy and spam recall
    accuracy = accuracy_score(y_test, y_pred)
    report = classification_report(y_test, y_pred, output_dict=True)
    spam_recall = report['0']['recall']  # recall for class 0 (spam)
    
    print(f"\n{model_name} Results:")
    print(f"Overal Accuracy: {accuracy:.4f}")
    print(f"% of Spam Caught: {spam_recall:.4f}")
    
    return model

In [5]:
# Train and evaluate models for text messages
nb_text = train_evaluate_model(
    X_text_train_tfidf, X_text_test_tfidf, 
    y_text_train, y_text_test,
    "Naive Bayes (Text Messages)",
    MultinomialNB()
)

lr_text = train_evaluate_model(
    X_text_train_tfidf, X_text_test_tfidf, 
    y_text_train, y_text_test,
    "Logistic Regression (Text Messages)",
    LogisticRegression(max_iter=10000, class_weight='balanced')
)

rf_text = train_evaluate_model(
    X_text_train_tfidf, X_text_test_tfidf, 
    y_text_train, y_text_test,
    "Random Forest (Text Messages)",
    RandomForestClassifier(
        class_weight='balanced',
        random_state=42
    )
)

hgbt_text = train_evaluate_model(
    X_text_train_tfidf.toarray(), X_text_test_tfidf.toarray(), 
    y_text_train, y_text_test,
    "HistGradientBoosting (Text Messages)",
    HistGradientBoostingClassifier(max_iter=100, class_weight='balanced')
)


Naive Bayes (Text Messages) Results:
Overal Accuracy: 0.9836
% of Spam Caught: 0.8824

Logistic Regression (Text Messages) Results:
Overal Accuracy: 0.9778
% of Spam Caught: 0.9265

Random Forest (Text Messages) Results:
Overal Accuracy: 0.9787
% of Spam Caught: 0.8897

HistGradientBoosting (Text Messages) Results:
Overal Accuracy: 0.9710
% of Spam Caught: 0.9338


In [6]:
feature_names = tfidf_text.get_feature_names_out()

# Get coefficients and their absolute values
coefficients = lr_text.coef_[0]
abs_coefficients = np.abs(coefficients)

# Create a DataFrame of features and their importance
feature_importance = pd.DataFrame({
    'feature': feature_names,
    'coefficient': coefficients,
    'abs_coefficient': abs_coefficients
})

# Sort by absolute coefficient value to get most important features
feature_importance = feature_importance.sort_values('abs_coefficient', ascending=False)

# Print top spam indicators (negative coefficients, as spam=0)
print("\nTop 10 Spam Indicators:")
spam_features = feature_importance[feature_importance['coefficient'] < 0].head(10)
for idx, row in spam_features.iterrows():
    print(f"{row['feature']}: {row['coefficient']:.4f}")

# Print top ham indicators (positive coefficients)
print("\nTop 10 Ham Indicators:")
ham_features = feature_importance[feature_importance['coefficient'] > 0].head(10)
for idx, row in ham_features.iterrows():
    print(f"{row['feature']}: {row['coefficient']:.4f}")

# Analyze feature frequency
print("\nFeature Frequency Analysis:")
X_train_array = X_text_train_tfidf.toarray()
feature_freq = np.sum(X_train_array > 0, axis=0)
freq_importance = pd.DataFrame({
    'feature': feature_names,
    'frequency': feature_freq,
    'importance': abs_coefficients,
    'true_importance': coefficients
})
freq_importance['freq_importance_ratio'] = freq_importance['importance'] / freq_importance['frequency']

print("\nTop 10 High-Impact, Low-Frequency Features:")
high_impact = freq_importance.sort_values('freq_importance_ratio', ascending=False).head(10)
for idx, row in high_impact.iterrows():
    print(f"{row['feature']}: Impact={row['true_importance']:.4f}, Freq={row['frequency']}")


Top 10 Spam Indicators:
txt: -4.6388
mobile: -4.0876
free: -3.5920
claim: -3.5912
uk: -3.5188
150p: -3.3224
service: -3.3055
text: -3.2897
www: -3.2653
new: -3.0460

Top 10 Ham Indicators:
ll: 1.9452
ok: 1.8341
lt: 1.7559
gt: 1.7467
da: 1.6229
home: 1.5653
got: 1.4247
way: 1.3929
lor: 1.3849
come: 1.3510

Feature Frequency Analysis:

Top 10 High-Impact, Low-Frequency Features:
charity: Impact=-1.2642, Freq=2
wap: Impact=-1.7276, Freq=5
0800: Impact=-2.3104, Freq=7
ac: Impact=-1.6422, Freq=6
sunshine: Impact=-1.5940, Freq=6
87077: Impact=-1.2605, Freq=5
comuk: Impact=-0.7402, Freq=3
00: Impact=-1.6077, Freq=7
user: Impact=-1.5997, Freq=7
20p: Impact=-1.5782, Freq=7


In [7]:
# Train and evaluate models for text messages
nb_email = train_evaluate_model(
    X_email_train_tfidf, X_email_test_tfidf, 
    y_email_train, y_email_test,
    "Naive Bayes (Emails)",
    MultinomialNB()
)

lr_email = train_evaluate_model(
    X_email_train_tfidf, X_email_test_tfidf, 
    y_email_train, y_email_test,
    "Logistic Regression (Emails)",
    LogisticRegression(max_iter=10000, class_weight='balanced')
)

rf_email = train_evaluate_model(
    X_email_train_tfidf, X_email_test_tfidf, 
    y_email_train, y_email_test,
    "Random Forest (Emails)",
    RandomForestClassifier(
        class_weight='balanced',
        random_state=42
    )
)

hgbt_email = train_evaluate_model(
    X_email_train_tfidf.toarray(), X_email_test_tfidf.toarray(), 
    y_email_train, y_email_test,
    "HistGradientBoosting (Emails)",
    HistGradientBoostingClassifier(max_iter=100, class_weight='balanced')
)


Naive Bayes (Emails) Results:
Overal Accuracy: 0.9098
% of Spam Caught: 0.8736

Logistic Regression (Emails) Results:
Overal Accuracy: 0.9738
% of Spam Caught: 0.9753

Random Forest (Emails) Results:
Overal Accuracy: 0.9746
% of Spam Caught: 0.9505

HistGradientBoosting (Emails) Results:
Overal Accuracy: 0.9820
% of Spam Caught: 0.9780


In [8]:
feature_names = tfidf_email.get_feature_names_out()

# Get coefficients and their absolute values
coefficients = lr_email.coef_[0]
abs_coefficients = np.abs(coefficients)

# Create a DataFrame of features and their importance
feature_importance = pd.DataFrame({
    'feature': feature_names,
    'coefficient': coefficients,
    'abs_coefficient': abs_coefficients
})

# Sort by absolute coefficient value to get most important features
feature_importance = feature_importance.sort_values('abs_coefficient', ascending=False)

# Print top spam indicators (negative coefficients, as spam=0)
print("\nTop 10 Spam Indicators:")
spam_features = feature_importance[feature_importance['coefficient'] < 0].head(10)
for idx, row in spam_features.iterrows():
    print(f"{row['feature']}: {row['coefficient']:.4f}")

# Print top ham indicators (positive coefficients)
print("\nTop 10 Ham Indicators:")
ham_features = feature_importance[feature_importance['coefficient'] > 0].head(10)
for idx, row in ham_features.iterrows():
    print(f"{row['feature']}: {row['coefficient']:.4f}")

# Analyze feature frequency
print("\nFeature Frequency Analysis:")
X_train_array = X_email_train_tfidf.toarray()
feature_freq = np.sum(X_train_array > 0, axis=0)
freq_importance = pd.DataFrame({
    'feature': feature_names,
    'frequency': feature_freq,
    'importance': abs_coefficients,
    'true_importance': coefficients
})
freq_importance['freq_importance_ratio'] = freq_importance['importance'] / freq_importance['frequency']

print("\nTop 10 High-Impact, Low-Frequency Features:")
high_impact = freq_importance.sort_values('freq_importance_ratio', ascending=False).head(10)
for idx, row in high_impact.iterrows():
    print(f"{row['feature']}: Impact={row['true_importance']:.4f}, Freq={row['frequency']}")


Top 10 Spam Indicators:
remove: -3.7749
sightings: -3.7274
free: -3.2335
money: -3.1632
center: -2.8821
font: -2.8300
br: -2.5963
align: -2.5247
nwe: -2.4083
removed: -2.1524

Top 10 Ham Indicators:
cnet: 4.2253
wrote: 3.6706
lockergnome: 3.6045
2002: 3.5972
ndate: 3.0097
url: 2.9741
clickthru: 2.6644
zdnet: 2.3449
said: 2.3000
spam: 2.2734

Feature Frequency Analysis:

Top 10 High-Impact, Low-Frequency Features:
enenkio: Impact=-1.1781, Freq=2
nmv: Impact=-0.6699, Freq=7
lockergnome: Impact=3.6045, Freq=43
mediaunspun: Impact=0.8271, Freq=10
comics: Impact=1.1034, Freq=14
unspun: Impact=0.6656, Freq=10
dilbert: Impact=0.5899, Freq=9
imakenews: Impact=0.6301, Freq=10
tribute: Impact=0.2830, Freq=5
fool: Impact=1.1109, Freq=21


In [10]:
def show_top_features(lr_model, tfidf, n=10):
    feature_names = tfidf.get_feature_names_out()

    # Get coefficients and their absolute values
    coefficients = lr_model.coef_[0]
    abs_coefficients = np.abs(coefficients)

    # Create a DataFrame of features and their importance
    feature_importance = pd.DataFrame({
        'feature': feature_names,
        'coefficient': coefficients,
        'abs_coefficient': abs_coefficients
    })

    # Sort by absolute coefficient value to get most important features
    feature_importance = feature_importance.sort_values('abs_coefficient', ascending=False)

    # Print top spam indicators (negative coefficients, as spam=0)
    print("\nTop 10 Spam Indicators:")
    spam_features = feature_importance[feature_importance['coefficient'] < 0].head(n)
    for idx, row in spam_features.iterrows():
        print(f"{row['feature']}: {row['coefficient']:.4f}")

    # Print top ham indicators (positive coefficients)
    print("\nTop 10 Ham Indicators:")
    ham_features = feature_importance[feature_importance['coefficient'] > 0].head(n)
    for idx, row in ham_features.iterrows():
        print(f"{row['feature']}: {row['coefficient']:.4f}")

    # Analyze feature frequency
    print("\nFeature Frequency Analysis:")
    X_train_array = X_email_train_tfidf.toarray()
    feature_freq = np.sum(X_train_array > 0, axis=0)
    freq_importance = pd.DataFrame({
        'feature': feature_names,
        'frequency': feature_freq,
        'importance': abs_coefficients,
        'true_importance': coefficients
    })
    freq_importance['freq_importance_ratio'] = freq_importance['importance'] / freq_importance['frequency']

    print("\nTop 10 High-Impact, Low-Frequency Features:")
    high_impact = freq_importance.sort_values('freq_importance_ratio', ascending=False).head(n)
    for idx, row in high_impact.iterrows():
        print(f"{row['feature']}: Impact={row['true_importance']:.4f}, Freq={row['frequency']}")

In [11]:
show_top_features(lr_email, tfidf_email, 10)


Top 10 Spam Indicators:
remove: -3.7749
sightings: -3.7274
free: -3.2335
money: -3.1632
center: -2.8821
font: -2.8300
br: -2.5963
align: -2.5247
nwe: -2.4083
removed: -2.1524

Top 10 Ham Indicators:
cnet: 4.2253
wrote: 3.6706
lockergnome: 3.6045
2002: 3.5972
ndate: 3.0097
url: 2.9741
clickthru: 2.6644
zdnet: 2.3449
said: 2.3000
spam: 2.2734

Feature Frequency Analysis:

Top 10 High-Impact, Low-Frequency Features:
enenkio: Impact=-1.1781, Freq=2
nmv: Impact=-0.6699, Freq=7
lockergnome: Impact=3.6045, Freq=43
mediaunspun: Impact=0.8271, Freq=10
comics: Impact=1.1034, Freq=14
unspun: Impact=0.6656, Freq=10
dilbert: Impact=0.5899, Freq=9
imakenews: Impact=0.6301, Freq=10
tribute: Impact=0.2830, Freq=5
fool: Impact=1.1109, Freq=21
