In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score,precision_score,f1_score,recall_score
def read_data(file_path):
    return pd.read_csv(file_path)



In [None]:
class NaiveBayes():
    def __init__(self):
        self.class_prob={}
        self.features_prob={}


    def fit(self,X_train,Y_train):
        classes,counts=np.unique(Y_train,return_counts=True)
        total_samples=len(Y_train)
        for c,count in zip(classes,counts):
            self.class_prob[c]=count/total_samples
        self.features_prob={}
        for c in classes:
            self.features_prob[c]={}
            for feature in X_train.columns:
                unique_values=X_train[feature].unique()
                self.features_prob[c][feature]={}
                for value in unique_values:
                    count = np.sum((X_train[feature] == value) & (Y_train == c))
                    self.features_prob[c][feature][value] = count / counts[c]


    def predict(self,X_test):
        predictions=[]
        for _,row in X_test.iterrows():
            max_prob=-1
            predicted_class=None
            for c in self.class_prob:
                prob = self.class_prob[c]
                for feature, value in row.items():
                    if value in self.features_prob[c][feature]:
                        prob *= self.features_prob[c][feature][value]
                    else:
                        prob *= 0
                if prob > max_prob:
                    max_prob = prob
                    predicted_class = c
                predictions.append(predicted_class)
        return predictions


In [None]:
data = read_data("Social_Network_Ads(1).csv")
X = data.iloc[:,1:4]
y = data['Purchased']
X_train,X_test,Y_train,Y_test=train_test_split(X,y,test_size=0.2,random_state=0)
model=NaiveBayes()
model.fit(X_train,Y_train)
Y_pred=model.predict(X_test)