In [2]:

# NEC Phishing SMS Detector (AI-Powered)

# This AI-powered project helps detect fake or phishing SMS messages that may target voters during the 2025 general elections in Tanzania.

#  It is powered by a pre-trained spam classifier (BERT-tiny)
#  It runs entirely in Google Colab (no setup needed)
#  It uses AI to detect suspicious messages based on content and structure
#  It uses some custom keywords for Swahili political context (e.g., NEC, kura, zawadi)

#--------------------------------------------------------------------------------------------------------------------------------------------
#  How to Use this SMS Spam Detector
#--------------------------------------------------------------------------------------------------------------------------------------------

# 1. Run all the code cells above to load the model and functions.
# 2. In the cell below, type or paste any SMS message.
# 3. Press Enter and see the AI prediction.
# 4. For another message just click on the play button, in the Prompt, type any message and see the AI predict for you.




In [3]:
!pip install -q pandas scikit-learn #This installs required libraries

In [4]:
#importing required libraries
import pandas as pd # This handles the dataset (rows of messages)
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

#machine learning
from sklearn.model_selection import train_test_split # 	Splits data into training and testing
from sklearn.feature_extraction.text import TfidfVectorizer  # Turns text into numbers that ML can understand
from sklearn.naive_bayes import MultinomialNB # Naive Bayes model. It is great for text classification
from sklearn.metrics import classification_report,accuracy_score # To check model performance


In [5]:
!pip install transformers torch -q

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m5.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m105.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m82.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m47.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.5/211.5 MB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.3/56.3 MB[0m [31m9.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m127.9/127.9 MB[0m [31m5.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [6]:
# Loads the necessary AI tools from Hugging Face Transformers
from transformers import pipeline

# We'll also use this to show results
import textwrap

In [7]:
from transformers import pipeline

# Loads spam specific classifier
classifier = pipeline("text-classification", model="mrm8488/bert-tiny-finetuned-sms-spam-detection")


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/645 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/17.6M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/324 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

Device set to use cpu


In [12]:

import textwrap

def enhanced_check_sms(text):
    spam_keywords = [

    # Scam and phishing verbs
    'won', 'win', 'claim', 'click', 'tap', 'press', 'confirm', 'verify', 'validate', 'register',
    'free', 'gift', 'bonus', 'offer', 'reward', 'promo', 'promotion',

    # Financial bait
    'millions', 'kiasi', 'shinda', 'pesa', 'zawadi', 'bure', 'kwa gharama yoyote', 'malipo', 'tuma','itume' 'lipa',

    # Urgency / commands
    'now', 'haraka', 'sasa', 'leo', 'kabla', 'deadline', 'muda', 'fanya', 'pigia', 'jaza', 'fungua',

    # Manipulative messages
    'congratulations', 'hongera', 'umeshinda', 'umeteuliwa', 'umebahatika', 'umefuzu',
    'reform', 'no reform', 'hakuna mabadiliko', 'mapinduzi', 'suluhisho', 'tumaini',

    # Election interference
    'usipige', 'usimpigie', 'usichague', 'kataa', 'chagua',  'pigia kura', 'msaada wa kura',

    # Swahili political manipulation
    'viongozi', 'mgombea', 'rais', 'wabunge', 'diwani', 'siasa', 'uhakiki', 'matokeo', 'kuhesabu', 'kujiandikisha',

    # Political party names (used maliciously or deceptively)
    'ccm', 'chadema', 'cuf', 'nccr', 'udp', 'tlp', 'demokrasia makini', 'nld', 'sau',
    'adc', 'act', 'wazalendo', 'dp', 'ada', 'tadea', 'ppt', 'updp', 'chausta', 'jahazi', 'cck',
    'nra', 'pep', 'umd', 'adu', 'chauma', 'cdm',    'Chama Cha Mapinduzi (CCM)',
    'Chama cha Demokrasia na Maendeleo (CHADEMA)','Civic United Front (CUF)',
    'NCCR–Mageuzi (NCCR–Mageuzi)',
    'United Democratic Party (UDP)',
    'Tanzania Labour Party (TLP)',
    'Demokrasia Makini (Demokrasia Makini)',
    'National League for Democracy (NLD)',
    'Sauti ya Umma (SAU)',
    'Alliance for Democratic Change (ADC)',
    'Alliance for Change and Transparency – Wazalendo (ACT–Wazalendo)',
    'Democratic Party (DP)',
    'African Democratic Alliance Party (ADA–TADEA)',
    'Progressive Party of Tanzania–Maendeleo (PPT–Maendeleo)',
    'United Peoples Democratic Party (UPDP)',
    'Chama cha Haki na Usitawi (CHAUSTA)',
    'Jahazi Asilia (JA)',
    'Chama cha Kijamii (CCK)',
    'National Reconstruction Alliance (NRA)',
    'Peoples’ Empowerment Party (PEP)',
    'Union for Multiparty Democracy (UMD)',
    'African Democratic Union (ADU)',
    'Chama cha Ukombozi wa Umma (CHAUMA)',
    'Chama cha Demokrasia Makini (CDM)',

    # Suspicious messaging triggers
    'nec', 'nec2025', 'tanzania elections', 'vote2025', 'sms vote', 'ballot', 'matokeo', 'wasiliana', 'msaada',

    # Phone
    'pigia', 'piga simu', 'tuma ujumbe', 'bonyeza hapa', 'andika namba', 'itume','andikisha namba',

    # Emotional triggers
    'msaada', 'tumaini', 'habari njema', 'ujumbe muhimu', 'salama', 'tahadhari'
]


    spam_links = [
    'https', 'http', '.tk', '.ml', '.ga', '.cf', '.gq',  # free domains often used in scams
    '.xyz', '.click', '.info', '.top',                   # cheap TLDs often used in spam
    'nec-', 'nec.', 'nec2025',                           # fake references to NEC
    'vote-', 'vote.', 'vote2025',                        # fake voting prompts
    'verify-', 'verify.', 'verification',                # misleading verification prompts
    'ballot-', 'ballot.', 'election-', 'election.',      # fake ballot links
    'tz-election', 'tzvote', 'tz-verify',                # localized fake URLs
    'electiontz', 'piga-kura', 'tanzania-vote',          # Swahili/local social engineering
    'register-', 'update-', 'id-check',                  # fake voter update tricks
    'result-', 'matokeo-', 'sms-vote', 'sms-election',   # fake result or voting services
    'mpigie-', 'msajili-', 'tumie-',                     # commands or phishing triggers
]

    result = classifier(text)[0]
    label = result['label'].lower()
    score = round(result['score'] * 100, 2)

    print("\n📩 SMS Message:")
    print(textwrap.fill(text, width=60))

    if label == "spam" or any(w in text.lower() for w in spam_keywords + spam_links):
        print("🔍 Prediction: 🚨 It is a SPAM message 🚨")
    elif score < 70:
        print("🔍 Prediction: ⚠️ Suspicious – Low Confidence ⚠️")
    else:
        print("🔍 Prediction: ✅ Safe Message ")

    print(f"📊 Model Confidence: {score}%")


In [13]:
# -------------------------------------
#  Interactive SMS Check (User Input)
# -------------------------------------
print("Welcome to the NEC Phishing SMS Detector!")
print("Type an SMS message below to check if it is spam or safe.\n")

sms = input("Enter SMS message: ")
enhanced_check_sms(sms)


Welcome to the NEC Phishing SMS Detector!
Type an SMS message below to check if it is spam or safe.

Enter SMS message: ile hela itume kwenye namba hii

📩 SMS Message:
ile hela itume kwenye namba hii
🔍 Prediction: 🚨 It is a SPAM message 🚨
📊 Model Confidence: 93.2%
