<a href="https://colab.research.google.com/github/hajibeevi/genai/blob/main/Copy_of_Spam_detection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Install required libraries
!pip install gradio pandas scikit-learn nltk

# Import libraries
import gradio as gr
import pandas as pd
import string
import nltk
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from nltk.corpus import stopwords

# Download NLTK stopwords
nltk.download("stopwords")

# Download and extract SMS Spam dataset
import urllib.request
import zipfile

url = "https://archive.ics.uci.edu/ml/machine-learning-databases/00228/smsspamcollection.zip"
zip_path = "smsspamcollection.zip"
urllib.request.urlretrieve(url, zip_path)

with zipfile.ZipFile(zip_path, "r") as zip_ref:
    zip_ref.extractall(".")

# Load dataset
df = pd.read_csv("SMSSpamCollection", sep="\t", names=["label", "message"], header=None)
df["label"] = df["label"].map({"ham": 0, "spam": 1})

# Function to clean text
def clean_text(text):
    text = text.lower()  # Lowercase
    text = "".join([char for char in text if char not in string.punctuation])  # Remove punctuation
    text = " ".join([word for word in text.split() if word not in stopwords.words("english")])  # Remove stopwords
    return text

df["clean_message"] = df["message"].apply(clean_text)

# Convert text to numerical features
vectorizer = CountVectorizer()
X = vectorizer.fit_transform(df["clean_message"])
y = df["label"]

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train Naive Bayes classifier
model = MultinomialNB()
model.fit(X_train, y_train)

# Function for prediction
def predict_spam(text):
    text_clean = clean_text(text)
    text_vectorized = vectorizer.transform([text_clean])
    prediction = model.predict(text_vectorized)[0]
    return "Spam" if prediction == 1 else "Not Spam"

# Gradio UI
iface = gr.Interface(fn=predict_spam, inputs="text", outputs="text", title="Spam Detector")
iface.launch(share=True)  # Generates a public link

Collecting gradio
  Downloading gradio-5.27.0-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<25.0,>=22.0 (from gradio)
  Downloading aiofiles-24.1.0-py3-none-any.whl.metadata (10 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.12-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.5.0-py3-none-any.whl.metadata (3.0 kB)
Collecting gradio-client==1.9.0 (from gradio)
  Downloading gradio_client-1.9.0-py3-none-any.whl.metadata (7.1 kB)
Collecting groovy~=0.1 (from gradio)
  Downloading groovy-0.1.2-py3-none-any.whl.metadata (6.1 kB)
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=0.0.18 (from gradio)
  Downloading python_multipart-0.0.20-py3-none-any.whl.metadata (1.8 kB)
Collecting ruff>=0.9.3 (from gradio)
  Downloading ruff-0.11.7-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (25 kB)
Collecting safehttpx<0.2.0,>=0.1.6 (

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://331d49b173d3ea79f4.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


