In [1]:
pip install numpy

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.3.1 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [12]:
import numpy as np
from collections import defaultdict


# MODEL
class NaiveBayes:
    def __init__(self, data, label):
        self.data, self.label = data, label
        self.probability, self.train = {}, False
        self.records, self.feature =  np.shape(self.data)
        
    def fit(self):
        feature = self.feature
        
        for i in range(feature):
            category, temp, pos_dict = self.data[:, i], {}, defaultdict(list)
            
            for idx, val in enumerate(category):
                pos_dict[val].append(idx)
            
            pos_dict = dict(pos_dict)
            for key, value in pos_dict.items():
                temp[key] = {}
                
                count_yes, count_no = 0, 0
                
                for val in value:
                    if self.label[val] == 1:
                        count_yes += 1
                        temp[key][self.label[val]] = count_yes
                    else:
                        count_no += 1
                        temp[key][self.label[val]] = count_no
                    
                
            self.probability[i] = temp
        self.train = True
        
    def predict(self, new_data):
        if not self.train:
            raise Exception("The model is not trained")
        else:
            _, counts = np.unique(self.label, return_counts=True)
            no, yes = counts
            P_yes, P_no = yes / self.records, no / self.records
            
            result = []
            
            for _, data in enumerate(new_data):
                new_yes, new_no = 1, 1
                
                for i, feature in enumerate(data):
                    is_yes_no = self.probability[i][feature]
                    
                    if 1 not in is_yes_no:
                        new_no *= is_yes_no[0] / no
                        continue
                    elif 0 not in is_yes_no:
                        new_yes *= is_yes_no[1] / yes
                        continue
                    else:
                        new_no *= is_yes_no[0] / no
                        new_yes *= is_yes_no[1] / yes
                    
                
                new_yes *= P_yes
                new_no *= P_no
                
                if new_yes > new_no:
                    is_result = "YES"
                else:
                    is_result = "NO"
                
                result.append(is_result)
                
            return result, {"yes": P_yes, "no": P_no}
            

# DATASET
age = ["old", "young", "mid-age"]
gender = ["male", "female"]
payment_method = ["cash", "credit-card", "cheque"]
future_customer = ["no", "yes"]


data = np.array([
    [0, 0, 1],
    [1, 0, 2],
    [1, 1, 1],
    [1, 1, 1],
    [1, 0, 1],
    [1, 1, 2],
    [1, 1, 1],
    [2, 0, 1],
    [0, 1, 1],
    [1, 0, 1],
    [1, 0, 1],
    [2, 1, 0],
    [1, 0, 0],
    [1, 0, 1]
])


label = np.array([1,1,1,0,1,0,1,1,0,1,1,0,1,1])


nb = NaiveBayes(data, label)
nb.fit()
new_data = np.array([[0, 0, 1]])

result = nb.predict(new_data)

print(result)        

print(len(data))

'''
{0: {0: {0: , 1: } }, 1, 2}
'''

(['YES'], {'yes': np.float64(0.7142857142857143), 'no': np.float64(0.2857142857142857)})
14


'\n{0: {0: {0: , 1: } }, 1, 2}\n'