# Importing libraries

In [1]:
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import pickle

# Read the dataset

In [2]:
df=pd.read_csv(r'C:/Users/Welcome/Desktop/Data Science Internship/CSV FILES/spam.csv',encoding="latin-1")
df

Unnamed: 0,class,message,Unnamed: 2,Unnamed: 3,Unnamed: 4
0,ham,"Go until jurong point, crazy.. Available only ...",,,
1,ham,Ok lar... Joking wif u oni...,,,
2,spam,Free entry in 2 a wkly comp to win FA Cup fina...,,,
3,ham,U dun say so early hor... U c already then say...,,,
4,ham,"Nah I don't think he goes to usf, he lives aro...",,,
...,...,...,...,...,...
5567,spam,This is the 2nd time we have tried 2 contact u...,,,
5568,ham,Will Ì_ b going to esplanade fr home?,,,
5569,ham,"Pity, * was in mood for that. So...any other s...",,,
5570,ham,The guy did some bitching but I acted like i'd...,,,


# Drop the columns with null values

In [3]:
df.drop(['Unnamed: 2','Unnamed: 3','Unnamed: 4'], axis=1, inplace=True)


# Rename the remaining columns

In [4]:
df.columns=['label','message']
df

Unnamed: 0,label,message
0,ham,"Go until jurong point, crazy.. Available only ..."
1,ham,Ok lar... Joking wif u oni...
2,spam,Free entry in 2 a wkly comp to win FA Cup fina...
3,ham,U dun say so early hor... U c already then say...
4,ham,"Nah I don't think he goes to usf, he lives aro..."
...,...,...
5567,spam,This is the 2nd time we have tried 2 contact u...
5568,ham,Will Ì_ b going to esplanade fr home?
5569,ham,"Pity, * was in mood for that. So...any other s..."
5570,ham,The guy did some bitching but I acted like i'd...


# Map the 'ham' as 0 and 'spam' as 1

In [5]:
df['label']=df['label'].map({'ham':0, 'spam':1})
df

Unnamed: 0,label,message
0,0,"Go until jurong point, crazy.. Available only ..."
1,0,Ok lar... Joking wif u oni...
2,1,Free entry in 2 a wkly comp to win FA Cup fina...
3,0,U dun say so early hor... U c already then say...
4,0,"Nah I don't think he goes to usf, he lives aro..."
...,...,...
5567,1,This is the 2nd time we have tried 2 contact u...
5568,0,Will Ì_ b going to esplanade fr home?
5569,0,"Pity, * was in mood for that. So...any other s..."
5570,0,The guy did some bitching but I acted like i'd...


# Extract features and labels

In [6]:
x=df['message']
y=df['label']

# Convert the text data into numerical data

In [7]:
cv=CountVectorizer()
x=cv.fit_transform(x)

# Split the data into training and testing sets

In [8]:
x_train, x_test, y_train, y_test=train_test_split(x,y, test_size=0.33, random_state=42)

# Train the Naive Bayes Classifier

In [9]:
clf=MultinomialNB()
clf.fit(x_train, y_train)

# Evaluate the model

In [10]:
y_pred=clf.predict(x_test)
print(f'Accuracy: {accuracy_score(y_test, y_pred)}')

Accuracy: 0.9793365959760739


# Save the trained model and count vectorizer for later use

In [11]:
with open('nlp_model.pkl', 'wb') as model_file:
    pickle.dump(clf, model_file)
with open('transform.pkl', 'wb') as cv_file:
    pickle.dump(cv, cv_file)
print(f"model saved.")    
    

model saved.


# Deploy the model on Streamlit

In [12]:
streamlit_code = """
import streamlit as st
import pickle

# Load the pre-trained model and CountVectorizer
model_filename = 'nlp_model.pkl'
cv_filename = 'transform.pkl'

clf = pickle.load(open(model_filename, 'rb'))
cv = pickle.load(open(cv_filename, 'rb'))

# Title of the web app
st.title('Spam Detector')

# Text input for the message
message = st.text_area('Enter a message:')

# Predict button
if st.button('Predict'):
    if message:
        data = [message]
        vect = cv.transform(data).toarray()
        my_prediction = clf.predict(vect)
        
        # Display the prediction result
        if my_prediction[0] == 1:
            st.write('The message is *spam*.')
        else:
            st.write('The message is *not spam*.')
    else:
        st.write('Please enter a message to predict.')
"""
# Specify the file path where the app.py file will be saved
file_path = 'spamapp.py'

# Write the content to the file
with open(file_path, 'w') as file:
    file.write(streamlit_code)

print(f"File '{file_path}' has been saved.")

File 'spamapp.py' has been saved.
