<a href="https://colab.research.google.com/github/abishekraja018/SDC-GENAI/blob/main/SPAM_DETECTION.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# ✅ Step 1: Install necessary libraries
!pip install scikit-learn pandas numpy --quiet

# ✅ Step 2: Import required libraries
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

# ✅ Step 3: Generate synthetic spam message dataset
data = {
    'message': [
        "Free money!!!", "Hello, how are you?", "Win a free iPhone now",
        "Let's meet for coffee", "Congratulations, you've won a lottery",
        "Important work meeting tomorrow", "Claim your free prize now",
        "Let's catch up soon", "Final warning: your account has been hacked",
        "Hello, I miss you", "This is not a spam message", "Limited offer, buy now"
    ],
    'label': [1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0]  # 1 = Spam, 0 = Not Spam
}

df = pd.DataFrame(data)

# ✅ Step 4: Preprocessing the data
X = df['message']  # Feature: Text messages
y = df['label']    # Target: Spam (1) or Not Spam (0)

# ✅ Step 5: Convert text data to numerical form using TF-IDF vectorization
vectorizer = TfidfVectorizer(stop_words='english')
X_tfidf = vectorizer.fit_transform(X)

# ✅ Step 6: Train-test split (80-20 split)
X_train, X_test, y_train, y_test = train_test_split(X_tfidf, y, test_size=0.2, random_state=42)

# ✅ Step 7: Train the logistic regression model
model = LogisticRegression()
model.fit(X_train, y_train)

# ✅ Step 8: Predictions
y_pred = model.predict(X_test)

# ✅ Step 9: Evaluate model performance
print(f"Accuracy: {accuracy_score(y_test, y_pred):.2f}")
print("\nClassification Report:\n")
print(classification_report(y_test, y_pred))

# ✅ Step 10: Make predictions on new messages
def predict_spam(message):
    message_tfidf = vectorizer.transform([message])
    prediction = model.predict(message_tfidf)
    return "Spam" if prediction == 1 else "Not Spam"

# Example usage
print("\nExample prediction:")
new_message = "Get a free gift card now!"
print(f"Message: {new_message}")
print(f"Prediction: {predict_spam(new_message)}")

Accuracy: 0.67

Classification Report:

              precision    recall  f1-score   support

           0       0.50      1.00      0.67         1
           1       1.00      0.50      0.67         2

    accuracy                           0.67         3
   macro avg       0.75      0.75      0.67         3
weighted avg       0.83      0.67      0.67         3


Example prediction:
Message: Get a free gift card now!
Prediction: Spam
