In [1]:
# Import pandas
import pandas as pd
# Import the required dependencies from sklearn
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import LinearSVC

# Set the column width to view the text message data.
pd.set_option('max_colwidth', 200)

# Import Gradio
import gradio as gr

In [2]:
def sms_classification(sms_text_df):
    features = sms_text_df['text_message']
    target = sms_text_df['label']
    X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.33, random_state=42)
    text_clf = Pipeline([
        ('tfidf', TfidfVectorizer()),
        ('clf', LinearSVC())
    ])
    text_clf.fit(X_train, y_train)
    return text_clf


In [3]:
# Create a function called `sms_prediction` that takes in the SMS text and predicts the whether the text is "not spam" or "spam". 
# The function should return the SMS message, and say whether the text is "not spam" or "spam".
def sms_prediction(text):
    """
    Predict the spam/ham classification of a given text message using a pre-trained model.

    Parameters:
    - text (str): The text message to be classified.

    Returns:
    - str: A message indicating whether the text message is classified as spam or not.

    This function takes a text message and a pre-trained pipeline model, then predicts the
    spam/ham classification of the text. The result is a message stating whether the text is
    classified as spam or not.
    """
    # Create a variable that will hold the prediction of a new text.
    prediction = text_clf.predict([text])[0]
    # Using a conditional if the prediction is "ham" return the message:
    if prediction == 'ham':
        return f'The text message: "{text}", is not spam.'
    # f'The text message: "{text}", is not spam.' Else, return f'The text message: "{text}", is spam.'
    else:
        return f'The text message: "{text}", is spam.'


In [4]:
# Load the dataset into a DataFrame
sms_text_df = pd.read_csv('./resources/SMSSpamCollection.csv')
# Display the first few rows of the DataFrame
print(sms_text_df.head())
print("Value counts for 'label':")
print(sms_text_df['label'].value_counts())


  label  \
0   ham   
1   ham   
2  spam   
3   ham   
4   ham   

                                                                                                                                                  text_message  
0                                              Go until jurong point, crazy.. Available only in bugis n great world la e buffet... Cine there got amore wat...  
1                                                                                                                                Ok lar... Joking wif u oni...  
2  Free entry in 2 a wkly comp to win FA Cup final tkts 21st May 2005. Text FA to 87121 to receive entry question(std txt rate)T&C's apply 08452810075over18's  
3                                                                                                            U dun say so early hor... U c already then say...  
4                                                                                                Nah I don't think he goes to us

In [5]:
# Call the sms_classification function with the DataFrame and set the result to the "text_clf" variable
text_clf = sms_classification(sms_text_df)



In [6]:
# Create a sms_app that takes a textbox for the inputs and has a textbox for the output.  
# Povide labels for each textbox. 

# Gradio Interface
iface = gr.Interface(
    fn=sms_prediction,
    inputs=gr.Textbox(lines=2, placeholder="Enter SMS text here...", label="Input SMS Text"),
    outputs=gr.Textbox(label="Prediction"),
    title="SMS Spam Detector",
    description="Enter an SMS text message to determine whether it is spam or not."
)

# Launch the app
iface.launch()

Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.




## Test the following text messages. 

---

1. You are a lucky winner of $5000!
2. You won 2 free tickets to the Super Bowl.
3. You won 2 free tickets to the Super Bowl text us to claim your prize.
4. Thanks for registering. Text 4343 to receive free updates on medicare.