# SPAM SMS DETECTION
## Build an AI model that can classify SMS messages as spam orlegitimate. Use techniques like TF-IDF or word embeddings withclassifiers like Naive Bayes, Logistic Regression, or Support Vector Machines to identify spam messages

In [25]:
# importing the necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
from sklearn.pipeline import Pipeline

In [26]:
# Loading the SMS spam dataset
df = pd.read_csv("C:\\Users\\91831\\Downloads\\spam.csv", encoding='latin-1')

In [27]:
# Preprocessing: Removing stop words and stemming
stop_words = set(stopwords.words("english"))
stemmer = PorterStemmer()

In [28]:
def preprocess_text(text):
    words = text.split()
    words = [stemmer.stem(word) for word in words if word.lower() not in stop_words]
    return " ".join(words)

In [29]:
df['SMS'] = df['SMS'].apply(preprocess_text)

In [30]:
# Spliting the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(df['SMS'], df['Label'], test_size=0.2, random_state=42)

In [31]:
# Creating a pipeline with TF-IDF vectorizer and Naive Bayes classifier
model = Pipeline([
    ('tfidf', TfidfVectorizer()),
    ('classifier', MultinomialNB())
])

# Train the model
model.fit(X_train, y_train)

# Make predictions on the test set
predictions = model.predict(X_test)

In [32]:
X_test

3245    funni fact nobodi teach volcano 2 erupt, tsuna...
944     sent score sopha secondari applic schools. thi...
1044    know someon know fanci you. call 09058097218 f...
2484     promis get soon can. text morn let know made ok.
812     congratul ur award either å£500 cd gift vouche...
2973                       i'll text carlo let know, hang
2991                     k.i did't see you.:)k:)wher now?
2942                  message..no responce..what happend?
230     get gandhipuram walk cross cut road. right sid...
1181                                    flippin shit yet?
1912    real tho sucks. can't even cook whole electr o...
1992    free tone hope enjoy new content. text stop 61...
5435                               i'm wif buy tix lar...
4805                         call u finish come n pick u.
401                                     dear chechi. talk
1859                          what' up. want come online?
1344                              somewher fredericksburg
2952    urgent

In [33]:
predictions

array(['Not spam', 'Not spam', 'Not spam', 'Not spam', 'spam', 'Not spam',
       'Not spam', 'Not spam', 'Not spam', 'Not spam', 'Not spam', 'spam',
       'Not spam', 'Not spam', 'Not spam', 'Not spam', 'Not spam', 'spam',
       'Not spam', 'Not spam', 'Not spam', 'Not spam', 'Not spam',
       'Not spam', 'spam', 'Not spam', 'Not spam', 'Not spam', 'Not spam',
       'Not spam', 'Not spam', 'Not spam', 'Not spam', 'Not spam',
       'Not spam', 'Not spam', 'Not spam', 'Not spam', 'Not spam',
       'Not spam', 'Not spam', 'Not spam', 'spam', 'Not spam', 'Not spam',
       'Not spam', 'Not spam', 'Not spam', 'Not spam', 'Not spam', 'spam',
       'Not spam', 'spam', 'Not spam', 'Not spam', 'Not spam', 'Not spam',
       'Not spam', 'Not spam', 'Not spam', 'Not spam', 'spam', 'Not spam',
       'Not spam', 'Not spam', 'Not spam', 'Not spam', 'Not spam',
       'Not spam', 'spam', 'Not spam', 'Not spam', 'Not spam', 'Not spam',
       'Not spam', 'spam', 'Not spam', 'Not spam', 'Not s

In [34]:
y_test

3245    Not spam
944     Not spam
1044        spam
2484    Not spam
812         spam
2973    Not spam
2991    Not spam
2942    Not spam
230     Not spam
1181    Not spam
1912    Not spam
1992        spam
5435    Not spam
4805    Not spam
401     Not spam
1859    Not spam
1344    Not spam
2952        spam
501     Not spam
3337    Not spam
1945    Not spam
3142    Not spam
2422    Not spam
381     Not spam
5567        spam
4937    Not spam
79      Not spam
5240    Not spam
2554    Not spam
5345    Not spam
4379    Not spam
4789    Not spam
683         spam
5519    Not spam
4315    Not spam
393     Not spam
5541    Not spam
4546    Not spam
3599    Not spam
2225    Not spam
881     Not spam
4625    Not spam
3997        spam
5015    Not spam
23      Not spam
4479    Not spam
1215    Not spam
1961    Not spam
5339    Not spam
848     Not spam
4294        spam
2664    Not spam
3407        spam
351     Not spam
3492    Not spam
3103    Not spam
3315    Not spam
1881    Not spam
3504    Not sp

In [35]:
#checking the accuracy of the model
from sklearn.metrics import accuracy_score
accuracy_score(predictions,y_test)

0.9668161434977578

In [36]:
#displaying full data
import numpy as np
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
np.set_printoptions(threshold=np.inf)

In [37]:
#confusion matrix
conf_matrix = confusion_matrix(y_test, predictions)
classification_rep = classification_report(y_test, predictions)
print("Confusion Matrix:")
print(conf_matrix)
print("Classification Report:")
print(classification_rep)

Confusion Matrix:
[[965   0]
 [ 37 113]]
Classification Report:
              precision    recall  f1-score   support

    Not spam       0.96      1.00      0.98       965
        spam       1.00      0.75      0.86       150

    accuracy                           0.97      1115
   macro avg       0.98      0.88      0.92      1115
weighted avg       0.97      0.97      0.96      1115



In [39]:
# Taking the user input
user_input = input("Enter an SMS message: ")

# Making predictions on the user input
user_prediction = model.predict([preprocess_text(user_input)])

# Displaying the prediction
print(f"Prediction: {user_prediction[0]}")

Enter an SMS message: URGENT! You have won a 1 week FREE membership in our å£100,000 Prize Jackpot! Txt the word: CLAIM to No: 81010 T&C www.dbuk.net LCCLTD POBOX 4403LDNW1A7RW18
Prediction: spam
