# SMS Spam Classifier

In [1]:
import numpy as np
import pandas as pd

In [2]:
df= pd.read_csv('./spam.csv', encoding = "ISO-8859-1")

In [3]:
df.sample(5)

Unnamed: 0,v1,v2,Unnamed: 2,Unnamed: 3,Unnamed: 4
904,ham,"We're all getting worried over here, derek and...",,,
3990,ham,I am on the way to ur home,,,
2199,ham,Not sure I have the stomach for it ...,,,
2795,ham,Tell your friends what you plan to do on Valen...,,,
2816,spam,Gr8 Poly tones 4 ALL mobs direct 2u rply with ...,,,


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5572 entries, 0 to 5571
Data columns (total 5 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   v1          5572 non-null   object
 1   v2          5572 non-null   object
 2   Unnamed: 2  50 non-null     object
 3   Unnamed: 3  12 non-null     object
 4   Unnamed: 4  6 non-null      object
dtypes: object(5)
memory usage: 217.8+ KB


In [5]:
df.shape

(5572, 5)

In [None]:
# Project Flow

# 1. Data Cleaning
# 2. EDA
# 3. Text Preprocessing
# 4. Model Building
# 5. Evaluation
# 6. Improvement
# 7. Website
# 8. Deploy

## 1. Data Cleaning

In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5572 entries, 0 to 5571
Data columns (total 5 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   v1          5572 non-null   object
 1   v2          5572 non-null   object
 2   Unnamed: 2  50 non-null     object
 3   Unnamed: 3  12 non-null     object
 4   Unnamed: 4  6 non-null      object
dtypes: object(5)
memory usage: 217.8+ KB


In [8]:
# dropping the last 3 columns
df.drop(['Unnamed: 2','Unnamed: 3','Unnamed: 4'],axis=1,inplace=True)

In [9]:
df.sample(5)

Unnamed: 0,v1,v2
3565,ham,Its ok..come to my home it vl nice to meet and...
4411,ham,If i said anything wrong sorry de:-)
525,ham,Hi i won't b ard 4 christmas. But do enjoy n m...
5044,ham,We have sent JD for Customer Service cum Accou...
5023,ham,Where you. What happen


In [13]:
# renaming the columns
df.rename(columns={'v1': 'target', 'v2': 'text'}, inplace= True)

In [14]:
df.head()

Unnamed: 0,target,text
0,ham,"Go until jurong point, crazy.. Available only ..."
1,ham,Ok lar... Joking wif u oni...
2,spam,Free entry in 2 a wkly comp to win FA Cup fina...
3,ham,U dun say so early hor... U c already then say...
4,ham,"Nah I don't think he goes to usf, he lives aro..."


In [11]:
from sklearn.preprocessing import LabelEncoder
encoder= LabelEncoder()

In [16]:
# encoding target column
df['target']= encoder.fit_transform(df['target'])

In [17]:
df.head()

Unnamed: 0,target,text
0,0,"Go until jurong point, crazy.. Available only ..."
1,0,Ok lar... Joking wif u oni...
2,1,Free entry in 2 a wkly comp to win FA Cup fina...
3,0,U dun say so early hor... U c already then say...
4,0,"Nah I don't think he goes to usf, he lives aro..."


In [20]:
# missing values
df.isnull().sum()

target    0
text      0
dtype: int64

In [22]:
# duplicate values
df.duplicated().sum()

403

In [24]:
# remove duplicate values
df= df.drop_duplicates(keep='first')

In [25]:
df.duplicated().sum()

0

In [27]:
df.shape

(5169, 2)

## 2. EDA

In [28]:
df.head()

Unnamed: 0,target,text
0,0,"Go until jurong point, crazy.. Available only ..."
1,0,Ok lar... Joking wif u oni...
2,1,Free entry in 2 a wkly comp to win FA Cup fina...
3,0,U dun say so early hor... U c already then say...
4,0,"Nah I don't think he goes to usf, he lives aro..."


In [29]:
df.value_counts()

target  text                                                                                                                              
0        &lt;#&gt;  in mca. But not conform.                                                                                                  1
        Thats cool. i liked your photos. You are very sexy!                                                                                   1
        That's good, because I need drugs                                                                                                     1
        That's fine, have him give me a call if he knows what he wants or has any questions                                                   1
        That's fine, I'll bitch at you about it later then                                                                                    1
                                                                                                                                             