Importing Libraries

In [179]:
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.naive_bayes import GaussianNB
from sklearn.naive_bayes import MultinomialNB
from sklearn.tree import DecisionTreeClassifier

Loading dataset

In [180]:
import pandas as pd
data = pd.read_csv("/content/spam_ham_dataset.csv")

Preprocessing

In [181]:
data.columns

Index(['Unnamed: 0', 'label', 'text', 'label_num'], dtype='object')

In [182]:
# Drop unnecessary columns
data.drop(['Unnamed: 0', 'label_num'], axis=1, inplace=True)
# Convert labels to binary values in label column
data['label'] = data['label'].map({'ham': 0, 'spam': 1})

In [183]:
data.columns

Index(['label', 'text'], dtype='object')

Split the dataset into training and testing sets

In [184]:
X_train, X_test, y_train, y_test = train_test_split(data['text'], data['label'], test_size=0.3, random_state=0)

Vectorizing data to train text column in model training

In [185]:
vectorizer = CountVectorizer()
X_train_vect = vectorizer.fit_transform(X_train)
X_test_vect = vectorizer.transform(X_test)

**Training Models:**
1. Gaussian Naive Bayes
2. Multinomial Naive Bayes
3. J48 Dicision Tree

In [213]:
GNB = GaussianNB() #Gaussian Naive Bayes
MNB = MultinomialNB() #Multinomial Naive Bayes
J48 = DecisionTreeClassifier() #J48 Decision Tree

In [214]:
models = {"Gaussian Naive Bayes" : GNB, "Multinomial Naive Bayes" : MNB, "J48 Decision Tree" : J48} #Dictionary containing models

Function to check followings




1. Predictions

2. Accuracay

3. Confusion Matrix















In [208]:
def model_results(model):
  model.fit(X_train_vect.toarray(), y_train)
  y_pred = model.predict(X_test_vect.toarray())
  accuracy = accuracy_score(y_test, y_pred)
  print('Accuracy:', accuracy)
  cm = confusion_matrix(y_test, y_pred)
  print("Confusion Matrix: \n", cm)

Loop to check Models Results one by one

In [212]:
for i in models:
  print(i, "Results:")
  model_results(models[i])
  print("----"*10)

Gaussian Naive Bayes Results:
Accuracy: 0.958118556701031
Confusion Matrix: 
 [[1107   21]
 [  44  380]]
----------------------------------------
Multinomial Naive Bayes Results:
Accuracy: 0.9774484536082474
Confusion Matrix: 
 [[1115   13]
 [  22  402]]
----------------------------------------
J48 Decision Tree Results:
Accuracy: 0.9368556701030928
Confusion Matrix: 
 [[1080   48]
 [  50  374]]
----------------------------------------
