In [10]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import warnings

In [2]:
data = pd.read_csv("golf dataset/golf_df.csv")

In [3]:
data

Unnamed: 0,Outlook,Temperature,Humidity,Windy,Play
0,sunny,hot,high,False,no
1,sunny,hot,high,True,no
2,overcast,hot,high,False,yes
3,rainy,mild,high,False,yes
4,rainy,cool,normal,False,yes
5,rainy,cool,normal,True,no
6,overcast,cool,normal,True,yes
7,sunny,mild,high,False,no
8,sunny,cool,normal,False,yes
9,rainy,mild,normal,False,yes


In [4]:
class NaiveBayes:
    def __init__(self,output_types):
        #self.kernel = kernel
        self.output_types = output_types
        
    def fit(self,x,y):
        self.class_prob = {}
        self.attribute_prob = {}
        len_y = len(y)
        x["output"] = y
        del y
        
        self.attributes = x.drop(["output"],axis=1).columns
        
        for C_i in self.output_types:
            self.class_prob[C_i] = x["output"].value_counts()[C_i]/len_y
                  
        for attribute_i in self.attributes:       
            a_i = {}
            
            for attribute_i_type in x[attribute_i].unique():
                a_i_type = {}
                
                for C_i in self.output_types:
                    a_i_type[C_i] = x[(x[attribute_i]==attribute_i_type) & (x["output"]==C_i)].shape[0]/x["output"].value_counts()[C_i]
                    
                a_i[attribute_i_type] = a_i_type
                
            self.attribute_prob[attribute_i] = a_i
                 
    
    def predict(self,x):
        output_prob = {}
        
        for C_i in self.output_types: 
            P_X_given_C_i = 1
            
            for attribute in self.attributes:
                P_X_given_C_i *= self.attribute_prob[attribute][x[attribute].iloc[0]][C_i]
            
            output_prob[C_i] = P_X_given_C_i * self.class_prob[C_i]
            
        return max(output_prob, key= lambda x: output_prob[x])
                     
    
    def accuracy(self,x,y):
        pass

In [33]:
x = data.drop(["Play"],axis=1)
y = data["Play"]

x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.2)

print("train size:", len(x_train),"\ntest size: ",len(x_test))

train size: 11 
test size:  3


In [34]:
model = NaiveBayes(pd.unique(y))

In [35]:
warnings.filterwarnings("ignore")
model.fit(x_train,y_train)

In [36]:
for i in range(x_test.shape[0]):
    print("real output: ",y_test.iloc[i],"  predicted output: ",model.predict(x_test.iloc[i:i+1,:]))

real output:  yes   predicted output:  yes
real output:  yes   predicted output:  yes
real output:  no   predicted output:  no
