### Introduction

This spam and phishing detection tool was developed as part of **Chapter 5: Practical AI using ML and LM** of my Master's thesis titled *Practical AI in Cyberwarfare and Cybersecurity*.

Created by **Konstantinos Zafeiropoulos (ID: 20390293)**  
**University of West Attica**  
**Faculty of Engineering, Department of Informatics and Computer Engineering*

The tool applies **machine learning and NLP** to classify messages as *spam* or *ham*, including phishing detection. It uses **TF-IDF vectorization** and a **Logistic Regression** classifier trained on real-world datasets. The system is fast, lightweight and effective for real-time filtering applications.


In [None]:
# Import libraries
import pandas as pd
import numpy as np
import nltk
import matplotlib.pyplot as plt
import seaborn as sns
from nltk.corpus import stopwords
from collections import Counter
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix

# Download stopwords
nltk.download("stopwords")

# Load datasets
df_sms = pd.read_csv("/kaggle/input/spam-sms-classification-using-nlp/Spam_SMS.csv")
df_sms.columns = ["Category", "Message"]

df_email = pd.read_csv("/kaggle/input/spam-email-classification/email.csv")
df_email.columns = ["Category", "Message"]

df_uci_sms = pd.read_csv("/kaggle/input/uci-sms-spam-collection-data-set/SMSSpamCollection", sep="\t", header=None, names=["Category", "Message"])

# Load phishing email datasets
paths = [
    "/kaggle/input/phishing-email-dataset/CEAS_08.csv",
    "/kaggle/input/phishing-email-dataset/Enron.csv",
    "/kaggle/input/phishing-email-dataset/Ling.csv",
    "/kaggle/input/phishing-email-dataset/Nazario.csv",
    "/kaggle/input/phishing-email-dataset/Nigerian_Fraud.csv",
    "/kaggle/input/phishing-email-dataset/SpamAssasin.csv",
    "/kaggle/input/phishing-email-dataset/phishing_email.csv"
]

phishing_dfs = []
for path in paths:
    try:
        df_temp = pd.read_csv(path)
        df_temp = df_temp.iloc[:, :2]
        df_temp.columns = ["Category", "Message"]
        phishing_dfs.append(df_temp)
    except Exception as e:
        print(f"❌ Error loading {path}: {e}")

# Combine all datasets
df = pd.concat([df_sms, df_email, df_uci_sms] + phishing_dfs, ignore_index=True)
df.dropna(subset=["Category", "Message"], inplace=True)

# Clean and encode labels
df["Category"] = df["Category"].astype(str).str.lower().str.strip()
df = df[df["Category"].isin(["spam", "ham"])]
df["Category"] = df["Category"].map({"spam": 0, "ham": 1})
df.dropna(inplace=True)

print(f"📦 Total Samples Before Balancing: {df.shape[0]}")

# Visualize original distribution
sns.countplot(data=df, x="Category")
plt.title("Original Distribution of Spam vs Ham")
plt.show()

# Balance the dataset (undersampling)
spam_df = df[df["Category"] == 0]
ham_df = df[df["Category"] == 1].sample(len(spam_df), random_state=42)
df_balanced = pd.concat([spam_df, ham_df]).sample(frac=1, random_state=42)

print(f"📦 Total Samples After Balancing: {df_balanced.shape[0]}")

# Visualize balanced data
sns.countplot(data=df_balanced, x="Category")
plt.title("Balanced Spam vs Ham Distribution")
plt.show()

# Train/test split on balanced data
X = df_balanced["Message"]
Y = df_balanced["Category"]
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=3)

# TF-IDF vectorization
vectorizer = TfidfVectorizer(min_df=1, stop_words="english", lowercase=True)
X_train_features = vectorizer.fit_transform(X_train)
X_test_features = vectorizer.transform(X_test)

# Train model
model = LogisticRegression()
model.fit(X_train_features, Y_train)

# Evaluate model
train_pred = model.predict(X_train_features)
test_pred = model.predict(X_test_features)

print("📈 Accuracy on training data:", accuracy_score(Y_train, train_pred))
print("📊 Accuracy on testing data:", accuracy_score(Y_test, test_pred))

# Confusion matrix
cm = confusion_matrix(Y_test, test_pred)
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues")
plt.title("Confusion Matrix")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.show()

In [None]:
# Test the model
new_messages = ["Congratulations! You have won a free ticket to Bahamas! Click here.",
                "Hey are we still on for lunch today?"]
new_features = vectorizer.transform(new_messages)
predictions = model.predict(new_features)

for msg, pred in zip(new_messages, predictions):
    label = "Ham" if pred == 1 else "Spam"
    print(f"Message: {msg}\nPrediction: {label}\n")

In [None]:
import time

def predict_message(msg):
    features = vectorizer.transform([msg])
    
    start_time = time.time()  # ⏱ Start timer
    result = model.predict(features)[0]
    end_time = time.time()    # ⏱ End timer

    label = "Ham" if result == 1 else "Spam"
    duration = (end_time - start_time) * 1000  # in milliseconds
    print(f"🔍 Message: {msg}")
    print(f"➡️ Prediction: {label}")
    print(f"⏱ Prediction Time: {duration:.4f} ms")

user_msg = input("✉️ Enter a message to check: ")
predict_message(user_msg)


In [None]:
from sklearn.metrics import classification_report

print(classification_report(Y_test, test_pred, target_names=["Spam", "Ham"]))

In [None]:
# Most common spam words
stop_words = set(stopwords.words("english"))
spam_words = " ".join(df[df['Category'] == 0]['Message']).split()
word_freq = Counter([word.lower() for word in spam_words if word.lower() not in stop_words and word.isalpha()])

plt.figure(figsize=(10, 6))
plt.bar(*zip(*word_freq.most_common(7)), color='orange')
plt.title("Top 7 Most Common Words in Spam Messages")
plt.xlabel("Words")
plt.ylabel("Frequency")
plt.xticks(rotation=45)
plt.show()

In [None]:
#Explainability, Reducing AI Risks

import shap

# Convert sparse matrix to dense
X_sample_dense = X_test_features[:5].toarray()

# Use LinearExplainer for LogisticRegression
explainer = shap.Explainer(model, X_train_features[:100].toarray())  # small background for performance
shap_values = explainer(X_sample_dense)

In [None]:
# Get TF-IDF feature names
feature_names = vectorizer.get_feature_names_out()

# Get mean absolute SHAP values
import numpy as np
mean_shap_values = np.abs(shap_values.values).mean(axis=0)

# Create DataFrame mapping feature indices to words
import pandas as pd
shap_df = pd.DataFrame({
    'Word': feature_names,
    'Mean SHAP Value': mean_shap_values
})

# Sort and display top 20 most influential words
shap_df_sorted = shap_df.sort_values(by='Mean SHAP Value', ascending=False).head(20)
print(shap_df_sorted)

# Plot top SHAP words
import matplotlib.pyplot as plt

plt.figure(figsize=(10, 6))
plt.barh(shap_df_sorted['Word'], shap_df_sorted['Mean SHAP Value'], color='orange')
plt.xlabel("Mean |SHAP Value|")
plt.title("Top 20 Words Influencing Spam Classification")
plt.gca().invert_yaxis()
plt.show()

In [None]:
def test_real_email():
    import time

    # 🔹 Ask user to paste their email
    print("📥 Paste the full email content below (e.g., subject + body):\n")
    email_text = input("✉️ Email: ")

    # 🔹 Transform with vectorizer
    features = vectorizer.transform([email_text])

    # 🔹 Predict
    start_time = time.time()
    result = model.predict(features)[0]
    end_time = time.time()

    # 🔹 Interpret result
    label = "✅ Ham (Safe Message)" if result == 1 else "⚠️ Spam/Phishing"
    print("\n📨 Email Content Preview:")
    print("-" * 60)
    print(email_text[:300] + ("..." if len(email_text) > 300 else ""))
    print("-" * 60)
    print(f"🔎 Prediction: {label}")
    print(f"⏱ Prediction Time: {(end_time - start_time)*1000:.2f} ms")

# Call this to test
test_real_email()