In [2]:
import torch
from transformers import DistilBertTokenizer, DistilBertForSequenceClassification, pipeline
import warnings
warnings.filterwarnings('ignore')


In [3]:
USE_HUGGINGFACE = True

In [4]:
MODEL_PATH = "didulantha/sms-spam-detector"

In [5]:
try:
    classifier = pipeline("text-classification", model=MODEL_PATH)
    print("✓ Model loaded successfully!\n")
except Exception as e:
    print(f"❌ Error loading model: {e}")
    print("\nMake sure:")
    print("1. Model is trained and saved in ./sms-spam-detector/")
    print("2. Or update MODEL_PATH with your HuggingFace model ID")
    exit(1)

config.json:   0%|          | 0.00/569 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

Device set to use cuda:0


✓ Model loaded successfully!



In [6]:
# Example messages covering different spam types
demo_messages = [
    # Clear SPAM
    {
        "text": "CONGRATULATIONS! You've WON a FREE iPhone 15! Call 555-GIFT now to claim your prize!",
        "expected": "SPAM",
        "category": "Prize Scam"
    },
    {
        "text": "URGENT: Your bank account has been locked. Click here immediately to verify: bit.ly/notascam",
        "expected": "SPAM",
        "category": "Phishing"
    },
    {
        "text": "Limited time offer! Get 90% OFF on all products! Text STOP to unsubscribe. Reply NOW!",
        "expected": "SPAM",
        "category": "Marketing"
    },
    {
        "text": "You have been selected for a $1000 Walmart gift card. Claim here: freegiift.com",
        "expected": "SPAM",
        "category": "Gift Card Scam"
    },
    {
        "text": "XXX Hot singles in your area waiting to meet you! Click now for instant access!!!",
        "expected": "SPAM",
        "category": "Adult Content"
    },
    
    # Clear HAM (Normal messages)
    {
        "text": "Hey, are we still meeting for lunch tomorrow at 1pm? Let me know!",
        "expected": "HAM",
        "category": "Social"
    },
    {
        "text": "Your Amazon order #123-4567890 has been shipped and will arrive by Thursday.",
        "expected": "HAM",
        "category": "Legitimate Business"
    },
    {
        "text": "Can you pick up milk and eggs on your way home? Thanks!",
        "expected": "HAM",
        "category": "Family"
    },
    {
        "text": "Meeting rescheduled to 3pm. Conference room B. See you there.",
        "expected": "HAM",
        "category": "Work"
    },
    {
        "text": "Happy birthday! 🎂 Hope you have an amazing day! Let's celebrate soon!",
        "expected": "HAM",
        "category": "Personal"
    }
]


In [7]:
correct = 0
total = len(demo_messages)

for i, item in enumerate(demo_messages, 1):
    text = item["text"]
    expected = item["expected"]
    category = item["category"]
    
    # Get prediction
    result = classifier(text)[0]
    
    # Convert label (LABEL_0 = HAM, LABEL_1 = SPAM)
    prediction = "SPAM" if result['label'] == 'LABEL_1' else "HAM"
    confidence = result['score']
    
    # Check if correct
    is_correct = prediction == expected
    if is_correct:
        correct += 1
    
    # Display result
    status_emoji = "✅" if is_correct else "❌"
    pred_emoji = "🚨" if prediction == "SPAM" else "✅"
    
    print(f"{status_emoji} Test {i}: [{category}]")
    print(f"   Message: \"{text[:70]}{'...' if len(text) > 70 else ''}\"")
    print(f"   Expected: {expected} | Predicted: {pred_emoji} {prediction}")
    print(f"   Confidence: {confidence:.4f} ({confidence*100:.1f}%)")
    print()

# Summary
accuracy = correct / total
print("="*70)
print(f"📊 DEMO RESULTS: {correct}/{total} correct ({accuracy*100:.1f}% accuracy)")
print("="*70)

✅ Test 1: [Prize Scam]
   Message: "CONGRATULATIONS! You've WON a FREE iPhone 15! Call 555-GIFT now to cla..."
   Expected: SPAM | Predicted: 🚨 SPAM
   Confidence: 0.9982 (99.8%)

✅ Test 2: [Phishing]
   Message: "URGENT: Your bank account has been locked. Click here immediately to v..."
   Expected: SPAM | Predicted: 🚨 SPAM
   Confidence: 0.8401 (84.0%)

✅ Test 3: [Marketing]
   Message: "Limited time offer! Get 90% OFF on all products! Text STOP to unsubscr..."
   Expected: SPAM | Predicted: 🚨 SPAM
   Confidence: 0.9979 (99.8%)

✅ Test 4: [Gift Card Scam]
   Message: "You have been selected for a $1000 Walmart gift card. Claim here: free..."
   Expected: SPAM | Predicted: 🚨 SPAM
   Confidence: 0.9971 (99.7%)

✅ Test 5: [Adult Content]
   Message: "XXX Hot singles in your area waiting to meet you! Click now for instan..."
   Expected: SPAM | Predicted: 🚨 SPAM
   Confidence: 0.9977 (99.8%)

✅ Test 6: [Social]
   Message: "Hey, are we still meeting for lunch tomorrow at 1pm? Let me know