In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import make_pipeline
from sklearn.metrics import classification_report, accuracy_score

# Load the dataset (you can change the URL to your dataset path)

data = pd.read_csv('mail_data.csv')

# Display the first few rows and the columns of the DataFrame
print("Sample Data:")
print(data.head())
print("\nAvailable Columns:")
print(data.columns)

# Check the actual columns in the dataset and select the appropriate ones
if 'Category' in data.columns and 'Message' in data.columns:
    # Renaming columns to standardize them
    data = data[['Category', 'Message']]  # Select relevant columns
    data.columns = ['label', 'message']  # Rename columns
else:
    # If columns are different, print the names to understand how to select them
    print("Adjusting column selection based on available columns.")
    data.columns = ['label', 'message']  # Rename columns if necessary

# Check for missing values
if data.isnull().values.any():
    print("Missing values found. Dropping rows with missing values.")
    data = data.dropna()  # Remove rows with missing values

# Convert labels to binary (ham: 0, spam: 1)
data['label'] = data['label'].map({'ham': 0, 'spam': 1})

# Prepare the data
X = data['message']  # Features (text messages)
y = data['label']    # Labels (0 for ham, 1 for spam)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a pipeline to vectorize the text and train the model
model = make_pipeline(CountVectorizer(), MultinomialNB())

# Train the model
model.fit(X_train, y_train)

# Evaluate the model
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"\nModel Accuracy: {accuracy:.2f}")

# Display classification report
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

# Function to predict if a message is spam or ham
def predict_spam(message):
    prediction = model.predict([message])
    return "Spam" if prediction[0] == 1 else "Ham"

# Test the model with new messages
test_messages = [
    "Congratulations! You've won a $1,000 Walmart gift card. Click here to claim now.",
    "Hey, are we still meeting for lunch tomorrow?",
    "Free entry in 2 a weekly competition to win FA Cup tickets.",
    "Your appointment is scheduled for next week."
]

print("\nTesting New Messages:")
for msg in test_messages:
    result = predict_spam(msg)
    print(f"Message: '{msg}'\nPrediction: {result}\n")


Sample Data:
  Category                                            Message
0      ham  Go until jurong point, crazy.. Available only ...
1      ham                      Ok lar... Joking wif u oni...
2     spam  Free entry in 2 a wkly comp to win FA Cup fina...
3      ham  U dun say so early hor... U c already then say...
4      ham  Nah I don't think he goes to usf, he lives aro...

Available Columns:
Index(['Category', 'Message'], dtype='object')

Model Accuracy: 0.99

Classification Report:
              precision    recall  f1-score   support

           0       0.99      1.00      1.00       966
           1       1.00      0.94      0.97       149

    accuracy                           0.99      1115
   macro avg       1.00      0.97      0.98      1115
weighted avg       0.99      0.99      0.99      1115


Testing New Messages:
Message: 'Congratulations! You've won a $1,000 Walmart gift card. Click here to claim now.'
Prediction: Spam

Message: 'Hey, are we still meeting for lu

In [None]:
while True:
    user_input = input("\nEnter a message to check if it's spam (or type 'exit' to quit): ")
    if user_input.lower() == 'exit':
        print("Exiting the spam checker.")
        break
    result = predict_spam(user_input)
    print(f"Message: '{user_input}'\nPrediction: {result}\n")


Enter a message to check if it's spam (or type 'exit' to quit): hlo i am tulasi
Message: 'hlo i am tulasi'
Prediction: Ham


Enter a message to check if it's spam (or type 'exit' to quit): hey how are you
Message: 'hey how are you'
Prediction: Ham


Enter a message to check if it's spam (or type 'exit' to quit): are you free now
Message: 'are you free now'
Prediction: Ham


Enter a message to check if it's spam (or type 'exit' to quit): can we meet today
Message: 'can we meet today'
Prediction: Ham


Enter a message to check if it's spam (or type 'exit' to quit): shall we have lunch tomorrow
Message: 'shall we have lunch tomorrow'
Prediction: Ham


Enter a message to check if it's spam (or type 'exit' to quit): this is a free website
Message: 'this is a free website'
Prediction: Ham


Enter a message to check if it's spam (or type 'exit' to quit): You've been selected for a chance to win a free vacation! Just reply with your email address!
Message: 'You've been selected for a chance t