## Naive_Bayes_All Models

In [25]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report,confusion_matrix
from sklearn.naive_bayes import GaussianNB,MultinomialNB,BernoulliNB
from sklearn.naive_bayes import CategoricalNB,ComplementNB
import warnings
warnings.filterwarnings('ignore')


In [2]:
#Read the CSV file
data = pd.read_csv("Social_Network_Ads.csv")
data.head(5)

Unnamed: 0,User ID,Gender,Age,EstimatedSalary,Purchased
0,15624510,Male,19,19000,0
1,15810944,Male,35,20000,0
2,15668575,Female,26,43000,0
3,15603246,Female,27,57000,0
4,15804002,Male,19,76000,0


In [3]:
#Print the shape of data
print("No of rows :",data.shape[0])
print("No of columns",data.shape[1])


No of rows : 400
No of columns 5


In [7]:
#Delete the unwanted feature:
data = data.drop(['User ID'], axis=1)
data.head(5)

Unnamed: 0,Gender,Age,EstimatedSalary,Purchased
0,Male,19,19000,0
1,Male,35,20000,0
2,Female,26,43000,0
3,Female,27,57000,0
4,Male,19,76000,0


In [9]:
#Since we have one object value ('Gender') - convert to numerical value
df = pd.get_dummies(data,drop_first = True, dtype=int)
df.head(5)

Unnamed: 0,Age,EstimatedSalary,Purchased,Gender_Male
0,19,19000,0,1
1,35,20000,0,1
2,26,43000,0,0
3,27,57000,0,0
4,19,76000,0,1


In [11]:
# split the data into X and Y
independent = df.drop(columns=['Purchased'])
dependent = df['Purchased']

In [13]:
#Get the count of each value in parchesed feature
dependent.value_counts()

Purchased
0    257
1    143
Name: count, dtype: int64

In [15]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(independent, dependent, test_size=0.2, random_state=42)

# Feature scaling - Not used because 2,3,4th models should not have negative number in testdata.
#scaler = StandardScaler()
#X_train_scaled = scaler.fit_transform(X_train)
#X_test_scaled = scaler.transform(X_test)


In [27]:
# Models dictionary
models = {
    "Gaussian_NB": GaussianNB(),
    "Multinomial_NB": MultinomialNB(),
    "Bernoulli_NB": BernoulliNB(),
    "CategoricalNB": CategoricalNB(),
    "Complement_NB": ComplementNB()
}

best_model_name = None
best_model = None
best_accuracy = 0

print("Model Evaluation:\n")
for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    clf_acc = classification_report(y_test, y_pred)
    print(f"{name} Accuracy: {acc:.4f}")
    print("classification_report :\n",clf_acc)
    
    # Save best model
    if acc > best_accuracy:
        best_accuracy = acc
        best_model_name = name
        best_model = model

print(f"\n Best Model: {best_model_name} with Accuracy: {best_accuracy:.4f}")


Model Evaluation:

Gaussian_NB Accuracy: 0.9375
classification_report :
               precision    recall  f1-score   support

           0       0.94      0.96      0.95        52
           1       0.93      0.89      0.91        28

    accuracy                           0.94        80
   macro avg       0.93      0.93      0.93        80
weighted avg       0.94      0.94      0.94        80

Multinomial_NB Accuracy: 0.6000
classification_report :
               precision    recall  f1-score   support

           0       0.68      0.73      0.70        52
           1       0.42      0.36      0.38        28

    accuracy                           0.60        80
   macro avg       0.55      0.54      0.54        80
weighted avg       0.59      0.60      0.59        80

Bernoulli_NB Accuracy: 0.6500
classification_report :
               precision    recall  f1-score   support

           0       0.65      1.00      0.79        52
           1       0.00      0.00      0.00        2