<a href="https://colab.research.google.com/github/arraakularavind/AspireNex/blob/main/sms_spam.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [29]:
#!pip install scikit-learn nltk
#!pip install scikit-learn
#!pip install voila ipywidgets
#!pip install scikit-learn imbalanced-learn
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import SMOTE
from sklearn.preprocessing import LabelEncoder
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.metrics import classification_report,accuracy_score
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
import string
import re
import ipywidgets as widgets
from IPython.display import display,clear_output
from google.colab import drive

nltk.download("stopwords")
nltk.download("wordnet")


def preprocess_text(text):
  text=text.lower()
  text=text.translate(str.maketrans(" "," ",string.punctuation)) #Removing punctuation provided
  text=re.sub(r'\d+'," ",text) #Removing Number
  words=text.split()
  stop_words=set(stopwords.words("english")) #removing is,and,the,a,an,in,on
  words=[word for word in words if word not in stop_words]
  lemmatizer=WordNetLemmatizer() #finding its rootform of a word
  words=[lemmatizer.lemmatize(word) for word in words]
  return " ".join(words)


drive.mount("/content/drive",force_remount=True)

data=pd.read_csv("/content/drive/MyDrive/Colab Notebooks/spam.csv",encoding="latin1")

data=data[["v1","v2"]]
data.columns=["Label","message"]
data.dropna(inplace=True)

data["message"]=data["message"].apply(preprocess_text)
le=LabelEncoder() #to convert spam-->1 and ham-->0
data["Label"]=le.fit_transform(data["Label"])

x_train,x_test,y_train,y_test=train_test_split(data["message"],data["Label"],test_size=0.2,stratify=data["Label"])

#Vectoring the text using tf-idf

tfidf=TfidfVectorizer(max_features=3000)
x_train_tfidf=tfidf.fit_transform(x_train)
x_test_tfidf=tfidf.transform(x_test)

smote=SMOTE(random_state=42,sampling_strategy="minority")
x_train_tfidf_r,y_train_r=smote.fit_resample(x_train_tfidf,y_train)

#naive Bayes

nb=MultinomialNB()
nb.fit(x_train_tfidf_r,y_train_r)
y_pred_nb=nb.predict(x_test_tfidf)

print("Naive Bayes Report\n")
nb_accuracy=accuracy_score(y_test,y_pred_nb)
nb_report=classification_report(y_test,y_pred_nb)

#Logistics Regression

lr_model=LogisticRegression(max_iter=1000)
lr_model.fit(x_train_tfidf_r,y_train_r)
y_pred_lr=lr_model.predict(x_test_tfidf)

print("LR Report\n")
lr_accuracy=accuracy_score(y_test,y_pred_lr)
lr_report=classification_report(y_test,y_pred_lr)

#SVM

svm_model=SVC()
svm_model.fit(x_train_tfidf_r,y_train_r)
y_pred_svm=svm_model.predict(x_test_tfidf)

print("SVM Report\n")
svm_accuracy=accuracy_score(y_test,y_pred_svm)
svm_report=classification_report(y_test,y_pred_svm)


def predict(models,message):
  message=preprocess_text(message)
  message_tfidf=tfidf.transform([message])
  predictions=[model.predict(message_tfidf)[0] for model in models.values()]
  return "spam" if any(pred==1 for pred in predictions) else "ham"


text_input=widgets.Text(
    value="",placeholder="Type your message Here",description="Message",disable=False
)
model_selector=widgets.Dropdown(
    options=["Naive Bayes","Logistic Regression","SVM"],
    value="Naive Bayes",
    description="Model",
    disabled=False
)

output=widgets.Output()
models={"Naive Bayes":nb,"Logistic Regression":lr_model,"SVM":svm_model}
reports={"Naive Bayes":nb_report,"Logistic Regression":lr_report,"SVM":svm_report}

def on_button_click(button):
  with output:
    output.clear_output()
    model_name=model_selector.value
    message=text_input.value
    prediction=predict(models,message)
    report=reports[model_name]

    print(f"Prediction:{prediction}\n")
    print(f"\nClassification Report for {model_name}:\n")

    print(report)

button=widgets.Button(
    description="Predict",
    disabled=False,
    tooltip="Click to Predict",
    icon="check"
)
clear_output()
button.on_click(on_button_click)
display(text_input,widgets.HTML("<br>"),model_selector,widgets.HTML("<br>"),button,widgets.HTML("<br>"),output)



Text(value='', description='Message', placeholder='Type your message Here')

HTML(value='<br>')

Dropdown(description='Model', options=('Naive Bayes', 'Logistic Regression', 'SVM'), value='Naive Bayes')

HTML(value='<br>')

Button(description='Predict', icon='check', style=ButtonStyle(), tooltip='Click to Predict')

HTML(value='<br>')

Output()