<a href="https://colab.research.google.com/github/harinijs03/2023103549_SDC_assignment/blob/main/Spam.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
!pip install gradio
import pandas as pd
import gradio as gr
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score
import zipfile
import urllib.request
import os

# Load dataset from UCI repository (direct link)
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/00228/smsspamcollection.zip"
zip_filename = "smsspamcollection.zip"
extract_path = "smsspamcollection"
csv_filename = os.path.join(extract_path, "SMSSpamCollection") # Correct path to CSV

# Download and extract the dataset if required
if not os.path.exists(csv_filename):  # Check if CSV already exists
    print("Downloading and extracting dataset...")
    # Download dataset
    try:
        urllib.request.urlretrieve(url, zip_filename)
    except Exception as e:
        print(f"Error downloading dataset: {e}")
        # Handle the error, e.g., provide a manual download link
        raise

    # Extract files
    try:
        with zipfile.ZipFile(zip_filename, 'r') as zip_ref:
            zip_ref.extractall(extract_path)
    except Exception as e:
        print(f"Error extracting dataset: {e}")
        raise  # Re-raise to stop execution if extraction fails

    print("Dataset downloaded and extracted successfully.")
else:
    print("Dataset already exists. Loading from disk.")

# Load dataset
try:
    df = pd.read_csv(csv_filename, sep='\t', header=None, names=['label', 'text'], encoding='latin-1')  # Added encoding
except Exception as e:
    print(f"Error loading CSV file: {e}")
    raise

# Encode labels
df['label'] = df['label'].map({'ham': 0, 'spam': 1})

# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(df['text'], df['label'], test_size=0.2, random_state=42)

# Create a pipeline: TF-IDF + Logistic Regression
model = Pipeline([
    ('tfidf', TfidfVectorizer(stop_words='english')),
    ('clf', LogisticRegression())
])

# Train the model
try:
    model.fit(X_train, y_train)
except Exception as e:
    print(f"Error training the model: {e}")
    raise

# Evaluate the model
try:
    preds = model.predict(X_test)
    accuracy = accuracy_score(y_test, preds)
    print(f"Accuracy: {accuracy}")
except Exception as e:
    print(f"Error evaluating the model: {e}")
    raise
# Define the prediction function
def predict_spam(message):
    try:
        prediction = model.predict([message])[0]
        return "Spam" if prediction == 1 else "Not Spam"
    except Exception as e:
        return f"Error during prediction: {e}" # Return error message to Gradio

# Gradio interface
try:
    iface = gr.Interface(
        fn=predict_spam,
        inputs=gr.Textbox(lines=3, placeholder="Enter an email message..."),
        outputs="text",
        title="Email Spam Detection",
        description="This is an Email Spam Detection system using Logistic Regression. It classifies messages as spam or not."
    )
except Exception as e:
    print(f"Error creating Gradio interface: {e}")
    raise

# Launch the Gradio interface
try:
    iface.launch()
except Exception as e:
    print(f"Error launching Gradio interface: {e}")
    raise


Collecting gradio
  Downloading gradio-5.25.1-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<25.0,>=22.0 (from gradio)
  Downloading aiofiles-24.1.0-py3-none-any.whl.metadata (10 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.12-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.5.0-py3-none-any.whl.metadata (3.0 kB)
Collecting gradio-client==1.8.0 (from gradio)
  Downloading gradio_client-1.8.0-py3-none-any.whl.metadata (7.1 kB)
Collecting groovy~=0.1 (from gradio)
  Downloading groovy-0.1.2-py3-none-any.whl.metadata (6.1 kB)
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=0.0.18 (from gradio)
  Downloading python_multipart-0.0.20-py3-none-any.whl.metadata (1.8 kB)
Collecting ruff>=0.9.3 (from gradio)
  Downloading ruff-0.11.5-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (25 kB)
Collecting safehttpx<0.2.0,>=0.1.6 (