In [None]:
import pandas as pd
import numpy as np

# Set the random seed for reproducibility
np.random.seed(42)

# Define some sample messages
ham_messages = [
    "Hey, how are you?",
    "Don't forget to bring the documents.",
    "Let's catch up over coffee tomorrow.",
    "See you at the meeting at 10 AM.",
    "Happy Birthday! Have a great day!",
]

spam_messages = [
    "Congratulations! You've won a $1000 gift card. Click here to claim.",
    "You've been selected for a free cruise. Call now!",
    "Get a loan with low interest rates. Apply today.",
    "Win a brand new car. Text WIN to 12345.",
    "Limited time offer! Get 50% off on all products. Visit our website now.",
]

# Generate synthetic data
n_samples = 500
labels = np.random.choice(['ham', 'spam'], n_samples)
messages = []

for label in labels:
    if label == 'ham':
        messages.append(np.random.choice(ham_messages))
    else:
        messages.append(np.random.choice(spam_messages))

# Create a DataFrame
synthetic_data = pd.DataFrame({
    'Label': labels,
    'Message': messages
})

# Display the first few rows
print(synthetic_data.head())

# Save to a CSV file
synthetic_data.to_csv('synthetic_sms_spam_data.csv', index=False)


  Label                                            Message
0   ham                  Happy Birthday! Have a great day!
1  spam  Limited time offer! Get 50% off on all product...
2   ham                                  Hey, how are you?
3   ham               Let's catch up over coffee tomorrow.
4   ham               Don't forget to bring the documents.


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report
import joblib

# Load the synthetic dataset
data = pd.read_csv('synthetic_sms_spam_data.csv')

# Preprocess data
data['Label'] = data['Label'].map({'ham': 0, 'spam': 1})

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(data['Message'], data['Label'], test_size=0.25, random_state=42)

# Vectorize text data
vectorizer = CountVectorizer()
X_train = vectorizer.fit_transform(X_train)
X_test = vectorizer.transform(X_test)

# Train the model
model = MultinomialNB()
model.fit(X_train, y_train)

# Save the model and vectorizer
joblib.dump(model, 'spam_classifier_model.pkl')
joblib.dump(vectorizer, 'vectorizer.pkl')

# Evaluate the model
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)
print(f'Accuracy: {accuracy}')
print(report)


Accuracy: 1.0
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        65
           1       1.00      1.00      1.00        60

    accuracy                           1.00       125
   macro avg       1.00      1.00      1.00       125
weighted avg       1.00      1.00      1.00       125



In [None]:
import joblib
import pandas as pd

# Load the trained model and vectorizer
model = joblib.load('spam_classifier_model.pkl')
vectorizer = joblib.load('vectorizer.pkl')

# Sample messages for prediction
new_messages = [
    "Hey, don't forget about our meeting tomorrow at 10 AM.",
    "Congratulations! You've won a free ticket to the Bahamas. Click here to claim.",
    "Can we reschedule our call to next week?",
    "Get a loan approved instantly with no credit check. Apply now!",
]

# Vectorize new messages
new_messages_vectorized = vectorizer.transform(new_messages)

# Predict whether the messages are spam or ham
predictions = model.predict(new_messages_vectorized)

# Create a DataFrame to display the results
prediction_results = pd.DataFrame({
    'Message': new_messages,
    'Prediction': ['spam' if pred == 1 else 'ham' for pred in predictions]
})

# Display the predictions
print(prediction_results)


                                             Message Prediction
0  Hey, don't forget about our meeting tomorrow a...        ham
1  Congratulations! You've won a free ticket to t...       spam
2           Can we reschedule our call to next week?       spam
3  Get a loan approved instantly with no credit c...       spam
