In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import classification_report
import joblib


df = pd.read_csv("spam.csv", encoding="latin-1")[['v1', 'v2']]
df.columns = ['label', 'text']


df['label'] = df['label'].map({'ham': 0, 'spam': 1})


X_train, X_test, y_train, y_test = train_test_split(df['text'], df['label'], test_size=0.2, random_state=42)


vectorizer = TfidfVectorizer(stop_words='english', max_features=3000)
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)


model = MultinomialNB()
model.fit(X_train_vec, y_train)

print(classification_report(y_test, model.predict(X_test_vec)))


joblib.dump(model, "spam_model.pkl")
joblib.dump(vectorizer, "vectorizer.pkl")


              precision    recall  f1-score   support

           0       0.97      1.00      0.99       965
           1       1.00      0.83      0.91       150

    accuracy                           0.98      1115
   macro avg       0.99      0.92      0.95      1115
weighted avg       0.98      0.98      0.98      1115



['vectorizer.pkl']

In [2]:
from flask import Flask, request, jsonify
import joblib

app = Flask(__name__)

model = joblib.load("spam_model.pkl")
vectorizer = joblib.load("vectorizer.pkl")

@app.route('/predict', methods=['POST'])
def predict():
    data = request.get_json()
    email_text = data.get('email', '')

    if not email_text:
        return jsonify({'error': 'No email text provided'}), 400

    email_vec = vectorizer.transform([email_text])
    prediction = model.predict(email_vec)[0]
    label = 'spam' if prediction == 1 else 'ham'

    return jsonify({'classification': label})

if __name__ == '__main__':
    app.run(port=5000, debug=True, use_reloader=False)


 * Serving Flask app '__main__'
 * Debug mode: on


 * Running on http://127.0.0.1:5000
Press CTRL+C to quit


Invoke-WebRequest -Uri "http://127.0.0.1:5000/predict" `
  -Method POST `
  -Headers @{ "Content-Type" = "application/json" } `
  -Body '{"email": "Congratulations! You won a prize!"}'



In [None]:
import base64
from googleapiclient.discovery import build
from google_auth_oauthlib.flow import InstalledAppFlow
from google.auth.transport.requests import Request
import os.path
import joblib




model = joblib.load("spam_model.pkl")
vectorizer = joblib.load("vectorizer.pkl")

SCOPES = ['https://www.googleapis.com/auth/gmail.modify']

def authenticate_gmail():
    creds = None
    if os.path.exists('token.json'):
        from google.oauth2.credentials import Credentials
        creds = Credentials.from_authorized_user_file('token.json', SCOPES)
    if not creds or not creds.valid:
        if creds and creds.expired and creds.refresh_token:
            creds.refresh(Request())
        else:
            flow = InstalledAppFlow.from_client_secrets_file('credentials.json', SCOPES)
            creds = flow.run_local_server(port=0)
        with open('token.json', 'w') as token:
            token.write(creds.to_json())
    return build('gmail', 'v1', credentials=creds)

def classify_email(text):
    vec = vectorizer.transform([text])
    return 'spam' if model.predict(vec)[0] == 1 else 'ham'


import time

def process_recent_emails(num_emails=10):
    service = authenticate_gmail()
    results = service.users().messages().list(userId='me', labelIds=['INBOX'], maxResults=num_emails).execute()
    messages = results.get('messages', [])

    for msg in messages:
        msg_data = service.users().messages().get(userId='me', id=msg['id']).execute()
        snippet = msg_data.get('snippet', '')

        classification = classify_email(snippet)
        print(f"Received a {classification} email: {snippet[:60]}...")

        if classification == 'spam':
            service.users().messages().modify(
                userId='me',
                id=msg['id'],
                body={'addLabelIds': ['SPAM'], 'removeLabelIds': ['INBOX']}
            ).execute()
            print(f"Moved spam: {snippet[:60]}...")

def run_periodically(num_emails=10, interval_hours=6):
    while True:
        print(f"Checking the {num_emails} most recent emails...")
        process_recent_emails(num_emails)
        print(f"Sleeping for {interval_hours} hours...")
        time.sleep(interval_hours * 60 * 60)


if __name__ == "__main__":
    run_periodically()


Checking the 10 most recent emails...
Received a ham email: ✨ I am thrilled to share that… ͏ ͏ ͏ ͏ ͏ ͏ ͏ ͏ ͏ ͏ ͏ ͏ ͏ ͏ ͏...
Received a ham email: Recommended actions for you ͏ ͏ ͏ ͏ ͏ ͏ ͏ ͏ ͏ ͏ ͏ ͏ ͏ ͏ ͏ ͏ ...
Received a ham email: Hi there, Thanks for using Cursor! To get started, read our ...
Received a ham email: تم تفعيل التحقق بخطوتين adhambdalmnm5@gmail.com‏ حسابك على G...
Received a ham email: تمّت إضافة رقم هاتف لميزة &quot;التحقّق بخطوتين&quot; adhamb...
Received a ham email: Machine Learning Engineer | AI Enthusiast | Computer Science...
Received a ham email: You have 1 new invitation ͏ ͏ ͏ ͏ ͏ ͏ ͏ ͏ ͏ ͏ ͏ ͏ ͏ ͏ ͏ ͏ ͏ ...
Received a ham email: You&#39;re getting noticed ͏ ͏ ͏ ͏ ͏ ͏ ͏ ͏ ͏ ͏ ͏ ͏ ͏ ͏ ͏ ͏ ͏...
Received a ham email: The Bloom Internship Program… ͏ ͏ ͏ ͏ ͏ ͏ ͏ ͏ ͏ ͏ ͏ ͏ ͏ ͏ ͏ ...
Received a ham email: Recommended actions for you ͏ ͏ ͏ ͏ ͏ ͏ ͏ ͏ ͏ ͏ ͏ ͏ ͏ ͏ ͏ ͏ ...
Sleeping for 6 hours...
Checking the 10 most recent emails...
Received a ham email: ✨ I am thrilled to