In [1]:
# Import pandas
import pandas as pd
# Import the required dependencies from sklearn
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import LinearSVC

# Set the column width to view the text message data.
pd.set_option('max_colwidth', 200)

# Import Gradio
import gradio as gr

In [3]:
# Set the column width to view the text message data.
pd.set_option('max_colwidth', 200)
# Load the dataset into a DataFrame
sms_text_df = pd.read_csv('Resources/SMSSpamCollection.csv')
sms_text_df.head()

Unnamed: 0,label,text_message
0,ham,"Go until jurong point, crazy.. Available only in bugis n great world la e buffet... Cine there got amore wat..."
1,ham,Ok lar... Joking wif u oni...
2,spam,Free entry in 2 a wkly comp to win FA Cup final tkts 21st May 2005. Text FA to 87121 to receive entry question(std txt rate)T&C's apply 08452810075over18's
3,ham,U dun say so early hor... U c already then say...
4,ham,"Nah I don't think he goes to usf, he lives around here though"


In [5]:
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import LinearSVC
from sklearn.pipeline import Pipeline
import pandas as pd

def sms_classification(sms_text_df):
    """
    Perform SMS classification using a pipeline with TF-IDF vectorization and Linear Support Vector Classification.

    Parameters:
    - sms_text_df (pd.DataFrame): DataFrame containing 'text_message' and 'label' columns for SMS classification.

    Returns:
    - text_clf (Pipeline): Fitted pipeline model for SMS classification.

    This function takes a DataFrame with 'text_message' and 'label' columns, splits the data into
    training and testing sets, builds a pipeline with TF-IDF vectorization and Linear Support Vector
    Classification, and fits the model to the training data. 
    The fitted pipeline is returned to make future predictions.
    """
    # Set the features variable to the text message column.
    X = sms_text_df['text_message']
    
    # Set the target variable to the "label" column.
    y = sms_text_df['label']
    
    # Split data into training and testing and set the test_size = 33%
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)
    
    # Build a pipeline to transform the test set to compare to the training set.
    text_clf = Pipeline([
        ('tfidf', TfidfVectorizer()),          # TF-IDF Vectorizer
        ('clf', LinearSVC())                  # Linear Support Vector Classifier
    ])
    
    # Fit the model to the transformed training data
    text_clf.fit(X_train, y_train)
    
    # Return the fitted pipeline model
    return text_clf


In [7]:
# Load the dataset into a DataFrame
sms_text_df = pd.read_csv('Resources/SMSSpamCollection.csv')
sms_text_df.head()

Unnamed: 0,label,text_message
0,ham,"Go until jurong point, crazy.. Available only in bugis n great world la e buffet... Cine there got amore wat..."
1,ham,Ok lar... Joking wif u oni...
2,spam,Free entry in 2 a wkly comp to win FA Cup final tkts 21st May 2005. Text FA to 87121 to receive entry question(std txt rate)T&C's apply 08452810075over18's
3,ham,U dun say so early hor... U c already then say...
4,ham,"Nah I don't think he goes to usf, he lives around here though"


In [13]:
# Call the sms_classification function with the DataFrame and set the result to the "text_clf" variable
text_clf = sms_classification(sms_text_df)
print(text_clf)

Pipeline(steps=[('tfidf', TfidfVectorizer()), ('clf', LinearSVC())])


In [15]:
def sms_prediction(text, model):
    """
    Predict the spam/ham classification of a given text message using a pre-trained model.

    Parameters:
    - text (str): The text message to be classified.
    - model (Pipeline): The pre-trained pipeline model for SMS classification.

    Returns:
    - str: A message indicating whether the text message is classified as spam or not.

    This function takes a text message and a pre-trained pipeline model, then predicts the
    spam/ham classification of the text. The result is a message stating whether the text is
    classified as spam or not.
    """
    # Create a variable that will hold the prediction of a new text.
    prediction = model.predict([text])[0]
    
    # Using a conditional to determine whether the message is "not spam" or "spam".
    if prediction == "ham":
        return f'The text message: "{text}", is not spam.'
    else:
        return f'The text message: "{text}", is spam.'


In [17]:
import gradio as gr

# Pre-trained SMS classification model (assume you have trained it using the sms_classification function)
import pandas as pd

# Example DataFrame for training
sms_text_df = pd.DataFrame({
    'text_message': ['Win a free iPhone!', 'Hello, how are you?', 'Claim your prize now!'],
    'label': ['spam', 'ham', 'spam']
})

# Train the model
model = sms_classification(sms_text_df)

# Prediction function
def sms_prediction(text):
    prediction = model.predict([text])[0]
    if prediction == "ham":
        return f'The text message: "{text}", is not spam.'
    else:
        return f'The text message: "{text}", is spam.'

# Gradio app
def sms_app():
    # Define input and output interfaces
    with gr.Blocks() as app:
        gr.Markdown("### SMS Spam Classification App")
        
        # Input textbox
        input_text = gr.Textbox(label="Enter SMS Message", placeholder="Type your message here...")
        
        # Output textbox
        output_text = gr.Textbox(label="Prediction", interactive=False)
        
        # Button to trigger prediction
        submit_button = gr.Button("Classify")
        
        # Define the action for the button
        submit_button.click(fn=sms_prediction, inputs=input_text, outputs=output_text)
    
    return app

# Launch the app
sms_app().launch()

* Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.




## Test the following text messages. 

---

1. You are a lucky winner of $5000!
2. You won 2 free tickets to the Super Bowl.
3. You won 2 free tickets to the Super Bowl text us to claim your prize.
4. Thanks for registering. Text 4343 to receive free updates on medicare.

In [None]:
ham
ham
spam
spam