In [1]:
#Installing required packages
!pip install --quiet streamlit openai scikit-learn pandas joblib pyngrok


[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.3/44.3 kB[0m [31m2.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.1/10.1 MB[0m [31m66.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m70.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m79.1/79.1 kB[0m [31m4.6 MB/s[0m eta [36m0:00:00[0m
[?25h

In [15]:
import pandas as pd
import joblib
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

# Download dataset
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/spambase/spambase.data"
columns = [f"feature_{i}" for i in range(57)] + ["label"]
df = pd.read_csv(url, header=None, names=columns)

# Split and train model
X = df.drop("label", axis=1)
y = df["label"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Evaluate
print(classification_report(y_test, model.predict(X_test)))

# Save model
joblib.dump(model, "spam_model.pkl")
print("✅ Model trained and saved as spam_model.pkl")


              precision    recall  f1-score   support

           0       0.94      0.98      0.96       531
           1       0.98      0.92      0.95       390

    accuracy                           0.96       921
   macro avg       0.96      0.95      0.95       921
weighted avg       0.96      0.96      0.96       921

✅ Model trained and saved as spam_model.pkl


In [16]:
import streamlit as st
import pandas as pd
import re
import joblib
import openai

#Setting OpenAI API key
openai.api_key = "sk-proj-................"

#Loading trained spam classifier model
model = joblib.load("spam_model.pkl")

#Defining spammy keywords used in feature extraction
spammy_keywords = [
    "free", "money", "win", "click", "remove", "order", "now", "guarantee", "urgent", "buy",
    "offer", "credit", "cheap", "deal", "save", "cash", "discount"
]

#Feature extraction using UCI Spambase style
def extract_spambase_features(text: str):
    text_lower = text.lower()
    words = re.findall(r'\b\w+\b', text_lower)
    num_words = len(words) if words else 1
    features = []

    for word in spammy_keywords[:48]:
        freq = text_lower.count(word) / num_words
        features.append(freq)

    for char in [';', '(', '[', '!', '$', '#']:
        freq = text.count(char) / len(text) if len(text) > 0 else 0
        features.append(freq)

    capital_runs = re.findall(r'[A-Z]{2,}', text)
    if capital_runs:
        lengths = [len(run) for run in capital_runs]
        avg = sum(lengths) / len(lengths)
        max_run = max(lengths)
        total = sum(lengths)
    else:
        avg = max_run = total = 0

    features.extend([avg, max_run, total])

    while len(features) < 57:
        features.append(0)

    return features

#GPT-based classification
def classify_email_gpt(email_text: str) -> str:
    prompt = f"""
You are a spam detection AI. Analyze the following email and say whether it is Spam or Not Spam.

Respond exactly in this format:

Spam or Not Spam
Reason: <short reason>

Email:
\"\"\"
{email_text}
\"\"\"
"""
    try:
        response = openai.ChatCompletion.create(
            model="gpt-4",
            messages=[{"role": "user", "content": prompt}],
            temperature=0,
        )
        return response["choices"][0]["message"]["content"].strip()
    except Exception as e:
        return f"OpenAI API error: {e}"

#atreamlit UI
st.set_page_config(page_title="SpamMorph", layout="centered")
st.title("SpamMorph 🔍 GPT + ML Spam Classifier")
st.write("Enter email text to classify it as Spam or Not Spam using both GPT and ML models.")

email_text = st.text_area("📨 Paste your email text below:", height=250)

if email_text:
    st.subheader("📄 Raw Email Text")
    st.write(email_text)

    st.subheader("🧠 GPT Verdict")
    gpt_result = classify_email_gpt(email_text)
    st.success(gpt_result)

    st.subheader("🤖 ML Model Verdict")
    features = extract_spambase_features(email_text)
    features_df = pd.DataFrame([features], columns=[f"feature_{i}" for i in range(57)])
    prediction = model.predict(features_df)[0]
    pred_label = "Spam 🚫" if prediction == 1 else "Not Spam ✅"
    st.success(f"ML Prediction: **{pred_label}**")

    confidence = model.predict_proba(features_df)[0][prediction]
    st.write(f"Prediction Confidence: **{confidence:.2%}**")


2025-07-09 18:46:13.974 
  command:

    streamlit run /usr/local/lib/python3.11/dist-packages/colab_kernel_launcher.py [ARGUMENTS]
2025-07-09 18:46:13.987 Session state does not function when running a script without `streamlit run`


In [19]:
#Installing and running Streamlit in background (via Colab tunneling)
!pip install streamlit -q

#Running Streamlit app in background
!streamlit run app.py & npx localtunnel --port 8501


[1G[0K⠙[1G[0K⠹[1G[0K⠸
Collecting usage statistics. To deactivate, set browser.gatherUsageStats to false.
[0m
[1G[0K⠼[1G[0K⠴[1G[0K⠦[1G[0K⠧[1G[0K⠇[1G[0K⠏[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Local URL: [0m[1mhttp://localhost:8501[0m
[34m  Network URL: [0m[1mhttp://172.28.0.12:8501[0m
[34m  External URL: [0m[1mhttp://35.239.134.39:8501[0m
[0m
[1G[0K⠋[1G[0K⠙[1G[0K⠹[1G[0K⠸[1G[0K⠼[1G[0K⠴[1G[0Kyour url is: https://crazy-kids-find.loca.lt
[34m  Stopping...[0m
^C


In [13]:
with open("app.py", "w", encoding="utf-8") as f:
    f.write(app_code)
