# 🔍 Interactive SMS Spam Detection Testing

This notebook provides an interactive interface for testing our trained DistilBERT-based SMS spam detection model. You can input any SMS message and get real-time predictions with confidence scores.

In [None]:
# Import required libraries
import torch
from transformers import DistilBertTokenizer, DistilBertForSequenceClassification
import re
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer

# Download required NLTK data
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')
nltk.download('omw-1.4')

In [None]:
# Initialize preprocessing components
stop_words = set(stopwords.words('english'))
lemmatizer = WordNetLemmatizer()

def preprocess_text(text):
    # Convert to lowercase
    text = text.lower()
    
    # Remove special characters and numbers
    text = re.sub(r'[^a-zA-Z\s]', '', text)
    
    # Tokenize
    tokens = word_tokenize(text)
    
    # Remove stopwords and lemmatize
    tokens = [lemmatizer.lemmatize(token) for token in tokens if token not in stop_words]
    
    # Join tokens back into text
    return ' '.join(tokens)

# Load the model and tokenizer
model_path = "best_model.pt"  # Update this path if needed
tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')

try:
    model = DistilBertForSequenceClassification.from_pretrained('distilbert-base-uncased', num_labels=2)
    model.load_state_dict(torch.load(model_path))
    model.eval()
    print("Model loaded successfully!")
except Exception as e:
    print(f"Error loading model: {e}")

In [None]:
def predict_spam(text):
    # Preprocess the input text
    processed_text = preprocess_text(text)
    
    # Tokenize the text
    inputs = tokenizer(processed_text, return_tensors="pt", padding=True, truncation=True, max_length=512)
    
    # Make prediction
    with torch.no_grad():
        outputs = model(**inputs)
        predictions = torch.softmax(outputs.logits, dim=1)
        
    # Get the predicted class and confidence
    predicted_class = torch.argmax(predictions, dim=1).item()
    confidence = predictions[0][predicted_class].item()
    
    return {
        "is_spam": bool(predicted_class),
        "confidence": confidence,
        "processed_text": processed_text
    }

In [None]:
# Interactive testing loop
while True:
    # Get input from user
    text = input("\nEnter an SMS message to test (or 'quit' to exit): ")
    
    # Check if user wants to quit
    if text.lower() == 'quit':
        print("\nThank you for using the SMS Spam Detector!")
        break
    
    # Make prediction
    result = predict_spam(text)
    
    # Print results
    print("\n🔍 Analysis Results:")
    print("-" * 50)
    print(f"Original Text: {text}")
    print(f"Processed Text: {result['processed_text']}")
    print(f"Prediction: {'SPAM' if result['is_spam'] else 'HAM (Not Spam)'}")
    print(f"Confidence: {result['confidence']*100:.2f}%")
    print("-" * 50)

## Example Usage

Try testing the model with different types of messages. Here are some examples you can try:

1. A normal message: "Hi, can we meet tomorrow at 2pm?"
2. A potential spam: "CONGRATULATIONS! You've won $1,000,000! Click here to claim now!"
3. Another normal message: "Please pick up some milk on your way home"
4. Another potential spam: "FREE CREDIT CHECK! No obligation! Reply YES now!"

The model will analyze each message and provide:
- The processed version of your text (after cleaning and preprocessing)
- The prediction (SPAM or HAM)
- The confidence level of the prediction

Type 'quit' when you're done testing.