In [1]:
# Import pandas
import pandas as pd
# Import the required dependencies from sklearn
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import LinearSVC

# Set the column width to view the text message data.
pd.set_option('max_colwidth', 200)

# Import Gradio
import gradio as gr

In [2]:
def sms_classification(sms_text_df):
    """
    Train a model to classify SMS messages as spam or not spam.
    """
    if 'text_message' not in sms_text_df.columns or 'label' not in sms_text_df.columns:
        raise ValueError("DataFrame must contain 'text_message' and 'label' columns.")
    
    X = sms_text_df['text_message']
    y = sms_text_df['label']

    # Use stratified split to ensure class balance
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.33, random_state=42, stratify=y
    )

    # Print class distribution for debugging
    print("Class distribution in training set:")
    print(y_train.value_counts())

    text_clf = Pipeline([
        ('tfidf', TfidfVectorizer()),
        ('clf', LinearSVC())
    ])

    text_clf.fit(X_train, y_train)
    accuracy = text_clf.score(X_test, y_test)
    print(f"Model Accuracy: {accuracy:.2f}")
    return text_clf

In [3]:
# Load the dataset into a DataFrame

# Replace 'dataset.csv' with the actual path to your dataset file
data_path = 'Resources/SMSSpamCollection.csv'  

sms_text_df = pd.read_csv(data_path)

# Display the first few rows of the DataFrame to ensure proper loading
print(sms_text_df.head())


  label  \
0   ham   
1   ham   
2  spam   
3   ham   
4   ham   

                                                                                                                                                  text_message  
0                                              Go until jurong point, crazy.. Available only in bugis n great world la e buffet... Cine there got amore wat...  
1                                                                                                                                Ok lar... Joking wif u oni...  
2  Free entry in 2 a wkly comp to win FA Cup final tkts 21st May 2005. Text FA to 87121 to receive entry question(std txt rate)T&C's apply 08452810075over18's  
3                                                                                                            U dun say so early hor... U c already then say...  
4                                                                                                Nah I don't think he goes to us

In [4]:
# Call the sms_classification function with the DataFrame and set the result to the "text_clf" variable
text_clf = sms_classification(sms_text_df)

Class distribution in training set:
ham     3233
spam     500
Name: label, dtype: int64
Model Accuracy: 0.99


In [5]:
# Step 2: SMS Prediction Function
def sms_prediction(text, model):
    """
    Predict if the SMS is spam or not using the trained model.
    """
    try:
        prediction = model.predict([text])[0]
        return f'The text message: "{text}", is {"spam" if prediction == 1 else "not spam"}.'
    except Exception as e:
        return f"An error occurred: {str(e)}"


In [8]:
sms_text_df = pd.DataFrame({
    'text_message': [
        'Win a free lottery now!', 'Hey, are you coming to the meeting?',
        'Claim your free prize!', 'Let’s grab coffee this afternoon.',
        'Congratulations, you have been selected!', 'Can you send me the report by 5 PM?',
        'Exclusive offer just for you!', 'Meeting postponed to tomorrow at 10 AM.'
    ],
    'label': [1, 0, 1, 0, 1, 0, 1, 0]  # 1 for spam, 0 for not spam
})

# Shuffle the dataset
sms_text_df = sms_text_df.sample(frac=1, random_state=42).reset_index(drop=True)

# Train the classification model
text_clf = sms_classification(sms_text_df)



Class distribution in training set:
1    3
0    2
Name: label, dtype: int64
Model Accuracy: 0.67


In [7]:
# Step 4: Gradio App
def sms_app(text):
    return sms_prediction(text, text_clf)

sms_interface = gr.Interface(
    fn=sms_app,
    inputs=gr.Textbox(label="Enter SMS Text", placeholder="Type your SMS here..."),
    outputs=gr.Textbox(label="Classification Result"),
    title="SMS Spam Classifier",
    description="Classify SMS messages as Spam or Not Spam using a trained machine learning model."
)

# Step 5: Launch the App
if __name__ == "__main__":
    sms_interface.launch(share=True)


Running on local URL:  http://127.0.0.1:7860
Running on public URL: https://232734b10a50b63fa5.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


## Test the following text messages. 

---

1. You are a lucky winner of $5000!
2. You won 2 free tickets to the Super Bowl.
3. You won 2 free tickets to the Super Bowl text us to claim your prize.
4. Thanks for registering. Text 4343 to receive free updates on medicare.