In [None]:
pip install pandas numpy scikit-learn nltk


In [None]:
import pandas as pd
import numpy as np
import re
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report


In [None]:
# Load dataset
df = pd.read_csv("emails.csv")  # Replace with your dataset path

# Convert labels to numerical values (spam = 1, not spam = 0)
df['label'] = df['label'].map({'spam': 1, 'ham': 0})

# Download NLTK stopwords
nltk.download('stopwords')
nltk.download('punkt')

# Function to clean text
def preprocess_text(text):
    text = text.lower()  # Convert to lowercase
    text = re.sub(r'\W', ' ', text)  # Remove special characters
    text = re.sub(r'\s+', ' ', text)  # Remove extra spaces
    words = word_tokenize(text)  # Tokenize words
    words = [word for word in words if word not in stopwords.words('english')]  # Remove stopwords
    return " ".join(words)

# Apply preprocessing
df['cleaned_text'] = df['message'].apply(preprocess_text)


In [None]:
vectorizer = TfidfVectorizer(max_features=3000)  # Convert text to numerical features
X = vectorizer.fit_transform(df['cleaned_text']).toarray()  # Transform email text
y = df['label'].values  # Labels (spam or not spam)


In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
y_pred = model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy:.2f}")

print("Classification Report:")
print(classification_report(y_test, y_pred))


In [None]:
def predict_email(email):
    email_cleaned = preprocess_text(email)
    email_vectorized = vectorizer.transform([email_cleaned]).toarray()
    prediction = model.predict(email_vectorized)
    return "Spam" if prediction == 1 else "Not Spam"

# Example
test_email = "Congratulations! You have won a $1000 gift card. Click here to claim now."
print(f"Prediction: {predict_email(test_email)}")
