In [None]:
from google.colab import files
uploaded = files.upload()

Saving SMSSpamCollection to SMSSpamCollection (1)


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split

In [None]:
train = pd.read_csv('SMSSpamCollection', sep='\t', header=None, names=['label', 'text'])

In [None]:
train['label'] = train['label'].map({'ham': 0, 'spam': 1})

In [None]:
X = train['text']
y = train['label']

In [None]:
X_train, X_val, y_train, y_val = train_test_split(
    X,
    y,
    test_size=0.2,
    random_state=64,
    stratify=y
)

In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer

vectorizer = TfidfVectorizer(
    ngram_range=(1,2),
    min_df=2,
    max_df=0.95,
    sublinear_tf= True
)

In [None]:
X_train_tfidf = vectorizer.fit_transform(X_train)
X_val_tfidf = vectorizer.transform(X_val)

In [None]:
from sklearn.linear_model import LogisticRegression

model =LogisticRegression(
    class_weight='balanced',
    max_iter=1000,
    C=1.0
)

model.fit(X_train_tfidf,y_train)

In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix
y_pred = model.predict(X_val_tfidf)
y_prob = model.predict_proba(X_val_tfidf)[:,1]

In [None]:
print("Accuracy:", accuracy_score(y_val,y_pred))
print("Precision:", precision_score(y_val,y_pred))
print("Recall:", recall_score(y_val, y_pred))
print("F1:", f1_score(y_val, y_pred))
print("ROC-AUC:", roc_auc_score(y_val,y_prob))
print("Confusion Matrix:\n", confusion_matrix(y_val,y_pred))

Accuracy: 0.9865470852017937
Precision: 0.958904109589041
Recall: 0.9395973154362416
F1: 0.9491525423728814
ROC-AUC: 0.9929342615365376
Confusion Matrix:
 [[960   6]
 [  9 140]]


In [None]:
feature_names = vectorizer.get_feature_names_out()  # all words
weights = model.coef_[0]                             # weight for each word

In [None]:
import numpy as np
top_spam_idx = np.argsort(weights)[::-1][:20]
top_ham_idx = np.argsort(weights)[:20]

print("Top spam words:", feature_names[top_spam_idx])
print("Top ham words:", feature_names[top_ham_idx])

Top spam words: ['call' 'txt' 'free' 'text' 'to' 'mobile' 'www' 'uk' 'claim' 'reply'
 'stop' '150p' 'from' 'chat' 'now' 'com' 'service' 'or' 'your' 'new']
Top ham words: ['me' 'my' 'ok' 'it' 'that' 'come' 'lt' 'gt' 'how' 'da' 'so' 'll' 'but'
 'he' 'lt gt' 'later' 'home' 'got' 'then' 'at']


In [None]:
def remove_spam_tokens(message, top_spam_words, k=3):
    tokens = message.split()
    filtered = [t for t in tokens if t.lower() not in top_spam_words]
    return " ".join(filtered)

def inject_ham_tokens(message, top_ham_words, k=5):
    ham_words = list(top_ham_words[:k])
    return message + " " + " ".join(ham_words)

def obfuscate(message):
    replacements = {"free":"fr33", "win":"w1n", "urgent":"urg3nt", "prize":"pr1ze", "winner":"w1nner"}
    tokens = message.split()
    return " ".join([replacements.get(t.lower(), t) for t in tokens])

def predict_proba_single(message):
    vec = vectorizer.transform([message])
    return model.predict_proba(vec)[0][1]

In [None]:
feature_names = vectorizer.get_feature_names_out()
weights = model.coef_[0]

top_spam_idx = np.argsort(weights)[::-1][:20]
top_ham_idx = np.argsort(weights)[:20]

top_spam_words = feature_names[top_spam_idx]
top_ham_words = feature_names[top_ham_idx]

print("Top spam words:", top_spam_words)
print("Top ham words:", top_ham_words)

Top spam words: ['call' 'txt' 'free' 'text' 'to' 'mobile' 'www' 'uk' 'claim' 'reply'
 'stop' '150p' 'from' 'chat' 'now' 'com' 'service' 'or' 'your' 'new']
Top ham words: ['me' 'my' 'ok' 'it' 'that' 'come' 'lt' 'gt' 'how' 'da' 'so' 'll' 'but'
 'he' 'lt gt' 'later' 'home' 'got' 'then' 'at']


In [None]:
def play_game(top_spam_words, top_ham_words):
    print("\n=== SMS SPAM DETECTOR GAME ===")
    print("Modify your message to fool the detector!\n")

    difficulty = input("Difficulty (easy/medium/hard): ").strip().lower()
    thresholds = {"easy": 0.7, "medium": 0.5, "hard": 0.3}
    if difficulty not in thresholds:
        print("Invalid, defaulting to medium")
        difficulty = "medium"
    threshold = thresholds[difficulty]

    message = input("Enter a spam message: ").strip()
    prob = predict_proba_single(message)
    print(f"Spam probability: {prob:.2f}")

    if prob < threshold:
        print("This message is already safe. Try something spammier!")
        return

    for attempt in range(1, 4):
        print(f"\nAttempt {attempt} of 3 — rewrite the message to get below {threshold}")
        message = input("Your message: ").strip()
        prob = predict_proba_single(message)
        print(f"Spam probability: {prob:.2f}")

        if prob < threshold:
            print(f"You fooled the detector in {attempt} attempt(s)!")
            return

    print(f"Failed! Final probability: {prob:.2f}")

play_game(top_spam_words, top_ham_words)


=== SMS SPAM DETECTOR GAME ===
Modify your message to fool the detector!

Difficulty (easy/medium/hard): easy
Enter a spam message: I need your help urgently
Spam probability: 0.32
This message is already safe. Try something spammier!


In [None]:
!pip install streamlit -q
!pip install pyngrok -q

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.1/9.1 MB[0m [31m28.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m19.8 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
%%writefile app.py
import streamlit as st

Writing app.py


In [None]:
import pickle

with open('model.pkl', 'wb') as f:
    pickle.dump(model, f)

with open('vectorizer.pkl', 'wb') as f:
    pickle.dump(vectorizer, f)

with open('words.pkl', 'wb') as f:
    pickle.dump((top_spam_words, top_ham_words), f)

print("Saved successfully")

Saved successfully


In [None]:
%%writefile app.py
import streamlit as st
import pickle
import numpy as np

# Load model and vectorizer
with open('model.pkl', 'rb') as f:
    model = pickle.load(f)
with open('vectorizer.pkl', 'rb') as f:
    vectorizer = pickle.load(f)
with open('words.pkl', 'rb') as f:
    top_spam_words, top_ham_words = pickle.load(f)

def predict_proba_single(message):
    vec = vectorizer.transform([message])
    return model.predict_proba(vec)[0][1]

# Page config
st.set_page_config(page_title="SMS Spam Detector Game", layout="wide")

# Sidebar
st.sidebar.title("Game Settings")
difficulty = st.sidebar.radio("Difficulty", ["Easy", "Medium", "Hard"])
thresholds = {"Easy": 0.7, "Medium": 0.5, "Hard": 0.3}
threshold = thresholds[difficulty]
st.sidebar.markdown(f"**Threshold:** {threshold}")
st.sidebar.markdown("---")
st.sidebar.markdown("**Top Spam Words:**")
st.sidebar.write(list(top_spam_words[:10]))

# Main title
st.title("SMS Spam Detector — Can You Fool It?")
st.markdown(f"Your goal is to get spam probability **below {threshold}** within 3 attempts.")
st.markdown("---")

# Session state init
if "attempt" not in st.session_state:
    st.session_state.attempt = 0
if "messages" not in st.session_state:
    st.session_state.messages = []
if "game_over" not in st.session_state:
    st.session_state.game_over = False
if "prob" not in st.session_state:
    st.session_state.prob = None

# Reset button
if st.sidebar.button("Reset Game"):
    st.session_state.attempt = 0
    st.session_state.messages = []
    st.session_state.game_over = False
    st.session_state.prob = None
    st.rerun()

# Chat history display
for msg in st.session_state.messages:
    with st.chat_message(msg["role"]):
        st.write(msg["content"])

# Probability bar
if st.session_state.prob is not None:
    st.markdown("**Spam Probability:**")
    st.progress(st.session_state.prob)
    st.markdown(f"### {st.session_state.prob:.2f}")

# Input
if not st.session_state.game_over:
    if st.session_state.attempt == 0:
        placeholder = "Enter a spam message to start..."
    else:
        placeholder = f"Attempt {st.session_state.attempt + 1} of 3 — modify your message..."

    user_input = st.chat_input(placeholder)

    if user_input:
        prob = predict_proba_single(user_input)
        st.session_state.prob = prob
        st.session_state.attempt += 1

        st.session_state.messages.append({"role": "user", "content": user_input})

        if prob < threshold:
            st.session_state.messages.append({"role": "assistant", "content": f"Probability: {prob:.2f} — YOU WIN! You fooled the detector!"})
            st.session_state.game_over = True
        elif st.session_state.attempt >= 3:
            st.session_state.messages.append({"role": "assistant", "content": f"Probability: {prob:.2f} — GAME OVER! You failed to fool the detector."})
            st.session_state.game_over = True
        else:
            remaining = 3 - st.session_state.attempt
            st.session_state.messages.append({"role": "assistant", "content": f"Probability: {prob:.2f} — Still above {threshold}. {remaining} attempt(s) left."})

        st.rerun()
else:
    st.success("Game over! Hit Reset in the sidebar to play again.")

Overwriting app.py


In [None]:
from google.colab import files
files.download('model.pkl')
files.download('vectorizer.pkl')
files.download('words.pkl')



<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
streamlit run app.py

SyntaxError: invalid syntax (ipython-input-3737097518.py, line 1)

In [None]:
files.download('app.py')


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>