# Spam Message Detection

## Import Libraries

In [105]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

## Load Data

In [106]:
message = pd.read_csv("spam.csv")

In [107]:
# Giving Suitable labels
message['type'] = message['v1']
message['message'] = message['v2']
mess = message

In [52]:
mess.head()

Unnamed: 0,v1,v2,Unnamed: 2,Unnamed: 3,Unnamed: 4,type,message
0,ham,"Go until jurong point, crazy.. Available only ...",,,,ham,"Go until jurong point, crazy.. Available only ..."
1,ham,Ok lar... Joking wif u oni...,,,,ham,Ok lar... Joking wif u oni...
2,spam,Free entry in 2 a wkly comp to win FA Cup fina...,,,,spam,Free entry in 2 a wkly comp to win FA Cup fina...
3,ham,U dun say so early hor... U c already then say...,,,,ham,U dun say so early hor... U c already then say...
4,ham,"Nah I don't think he goes to usf, he lives aro...",,,,ham,"Nah I don't think he goes to usf, he lives aro..."


In [108]:
#Deleting Unwanted Coulumns
mess.drop(["v1","v2","Unnamed: 2","Unnamed: 3","Unnamed: 4"],axis=1,inplace=True)

In [109]:
mess.head(2)

Unnamed: 0,type,message
0,ham,"Go until jurong point, crazy.. Available only ..."
1,ham,Ok lar... Joking wif u oni...


In [110]:
# Changing spam = 0 and ham = 1
mess.loc[mess['type']== 'spam','type'] = 0
mess.loc[mess['type'] == 'ham','type'] = 1

In [111]:
mess.head()

Unnamed: 0,type,message
0,1,"Go until jurong point, crazy.. Available only ..."
1,1,Ok lar... Joking wif u oni...
2,0,Free entry in 2 a wkly comp to win FA Cup fina...
3,1,U dun say so early hor... U c already then say...
4,1,"Nah I don't think he goes to usf, he lives aro..."


## Splitting Data

In [62]:
from sklearn.model_selection import train_test_split

In [63]:
X_train, X_test, y_train, y_test = train_test_split(mess['message'], mess['type'], test_size=0.33, random_state=42)

In [113]:
X_train.head()

3235    Aight text me when you're back at mu and I'll ...
945     I cant wait to see you! How were the photos we...
5319                         Kothi print out marandratha.
5528    Its just the effect of irritation. Just ignore it
247                       Kallis wont bat in 2nd innings.
Name: message, dtype: object

## Feature Extraction

In [64]:
from sklearn.feature_extraction.text import TfidfVectorizer

In [66]:
# Applying Feature Extraction on mess['message']
# min_df=1 because we want every word one time
# stop_words = 'english' is default
# lowercase= True so that words from message will be cnverted in lowercase.
vector = TfidfVectorizer(min_df=1, stop_words='english', lowercase=True)
X_train_final = vector.fit_transform(X_train)
X_test_final = vector.transform(X_test)

In [114]:
# Changing y_train and y_test data type to integer
y_train_final = y_train.astype('int')
y_test_final = y_test.astype('int')

## ML Model (SVM)

In [115]:
from sklearn.svm import LinearSVC

In [116]:
model = LinearSVC()
model.fit(X_train_final,y_train_final)

LinearSVC()

## Evaluation

modelpred = model.predict(X_test_final)

In [119]:
print("Linear SVC Accurary For Train Set: ",accuracy_score(y_train_final,modelpred_train))
print("Linear SVC Accuracy For Test Set: ",accuracy_score(y_test_final,modelpred))

Linear SVC Accurary For Train Set:  1.0
Linear SVC Accuracy For Test Set:  0.9809679173463839


In [120]:
user = ['Hello Bro, i was sayin that i am free today so if you as free too can go for a movie this eve']
user_feature = vector.transform(user)
predictions = model.predict(user_feature)

if predictions==1:
    print("Ham Mail")
else:
    print("Spam Mail")

Ham Mail


# Thank You 