In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score

df = pd.read_csv('emails.csv')

X = df.iloc[:, 1:-1]  # All word columns
y = df['Prediction']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

Accuracy: 0.9719806763285024
              precision    recall  f1-score   support

           0       0.98      0.98      0.98       739
           1       0.94      0.96      0.95       296

    accuracy                           0.97      1035
   macro avg       0.96      0.97      0.97      1035
weighted avg       0.97      0.97      0.97      1035



In [42]:
from sklearn.feature_extraction.text import TfidfVectorizer
import numpy as np
from collections import Counter
import re
vectorizer = TfidfVectorizer()

In [None]:
def predict_email_spam(email_text):
    """
    Predict if the given email text is spam or not.
    ## param
    **email_text**: str, the email content to classify
    ## return
    **str**: 'Spam' or 'Not Spam'
    """
    
    vectorizer.fit(X.columns)
    email_features = vectorizer.transform([email_text])

    words = re.findall(r'\b\w+\b', email_text.lower())
    word_counts = Counter(words)
    email_features = np.array([[word_counts.get(col, 0) for col in X.columns]])

    prediction = model.predict(email_features)

    return 'Spam' if prediction[0] == 1 else 'Not Spam'

In [47]:
predict_email_spam("Dear MHR Residents, This is to inform you that a new mess menu has been prepared for the upcoming week.Kindly find the updated mess menu attached below for your reference. We request all residents to review the menu. New General Secretary will monitor the implementation and address any concerns or feedback raised. Warm regards, Prabal Dubey ")  # Example usage



'Not Spam'