# Social Engineering Scam Detection with LLMs

Ronish Nair, Fahim Abbasi, and Shahbaz Pervez. 2025. PhishEmailLLM:A Meta Model Approach to Detect Phishing emails by leveraging LLMs and Machine Learning models. In 2025 Australasian Computer Science Week (ACSW 2025), February 10–13, 2025, Brisbane, QLD, Australia. ACM, New York, NY, USA, 11 pages. https://doi.org/10.1145/3727166.3727169

In [1]:
import os 
os.getcwd()

'/Users/VyasSrinivasan/AI-Social-Engineering-Scam-Detector'

### Imports

In [2]:
import pandas as pd
import numpy as np
import nltk
import matplotlib.pyplot as plt
import seaborn as sns
from nltk.corpus import stopwords
from collections import Counter

from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix


In [None]:
nltk.download("stopwords")

### Load Datasets

In [None]:
#Email
df_email = pd.read_csv("./data/email.csv")
df_email.columns = ["Category", "Message"]

df_email.head()

In [None]:
#SMS
df_smsText = pd.read_csv("./data/Spam_SMS.csv")
df_smsText.columns = ["Category", "Message"]

df_smsText.head()

In [None]:
#UCI SMS
df_uciSmsText = pd.read_csv("./data/SMSSpamCollection", sep="\t", header=None, names=["Category", "Message"])
df_uciSmsText.head()

In [None]:
phishingFiles = ["CEAS_08", "Enron", "Ling", "Nazario", "Nigerian_Fraud", "phishing_email", "SpamAssasin"]

In [None]:
phishDF = []
for phishFile in phishingFiles:
    try:
        df_temp = pd.read_csv("./data/Phishing/" + phishFile + ".csv")
        df_temp = df_temp.iloc[:, :2]
        df_temp.columns = ["Category", "Message"]
        phishDF.append(df_temp)
        
    except Exception as e:
        print("Error loading ./data/Phishing/"+phishFile+ ".csv : " + e)
        

### Combine Datasets

In [None]:
df = pd.concat([df_smsText, df_email, df_uciSmsText] + phishDF, ignore_index=True)
df.dropna(subset=["Category", "Message"], inplace=True)

### Data Cleaning

In [None]:
df["Category"] = df["Category"].astype(str).str.lower().str.strip()
df = df[df["Category"].isin(["spam", "ham"])]
df["Category"] = df["Category"].map({"spam": 0, "ham": 1})
df.dropna(inplace=True)

print(f"Total Samples Before Balancing: {df.shape[0]}")

### Original distribution

In [None]:
sns.countplot(data=df, x="Category")
plt.title("Original Distribution of Spam vs Ham")
plt.show()

### Balanced Distribution

In [None]:
spam_df = df[df["Category"] == 0]
ham_df = df[df["Category"] == 1].sample(len(spam_df), random_state=42)
df_balanced = pd.concat([spam_df, ham_df]).sample(frac=1, random_state=42)

In [None]:
sns.countplot(data=df_balanced, x="Category")
plt.title("Balanced Spam vs Ham Distribution")
plt.show()

### Train/test split

In [None]:
X = df_balanced["Message"]
Y = df_balanced["Category"]
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=3)

### LLMs

###  Feature Extraction

In [None]:
# TF-IDF vectorization
vectorizer = TfidfVectorizer(min_df=1, stop_words="english", lowercase=True)
X_train_features = vectorizer.fit_transform(X_train)
X_test_features = vectorizer.transform(X_test)

### Models

Training/Testing

In [None]:
# Llama3.1

In [None]:
# Llama3.2

In [None]:
# Gemma2

In [None]:
# Qwen2.5

In [None]:
# Mistral

In [None]:
# Granite3 Dense

Evaluation

In [None]:
# Precision

# Recall

# Accuracy

# F1