In [9]:
import pandas as pd
import numpy as np
from collections import Counter

In [10]:
dataset = pd.read_csv('allelectronics.csv')

In [11]:
dataset

Unnamed: 0,age,income,student,credit_rating,Class
0,youth,high,no,fair,no
1,youth,high,no,excellent,no
2,middle_aged,high,no,fair,yes
3,senior,medium,no,fair,yes
4,senior,low,yes,fair,yes
5,senior,low,yes,excellent,no
6,middle_aged,low,yes,excellent,yes
7,youth,medium,no,fair,no
8,youth,low,yes,fair,yes
9,senior,medium,yes,fair,yes


In [12]:
class NaiveBayes:
    def __init__(self,dataset):
        self.__dataset = dataset
        self.__classAttribute = list(self.__dataset.keys())[-1]
        self.__classes = list(set(self.__dataset[self.__classAttribute]))
        self.__attributes = list(self.__dataset.keys())[:-1]
        self.__classProbabilites = dict()
        
        
        
        
    def train(self):
        self.__classCounts = dict(Counter(self.__dataset[self.__classAttribute]))
        self.__featureProbabilites = {}
        self.__initClassProbabilities()
        self.__initFeatureProbabilites()
        
        
    
    def __initClassProbabilities(self):
        counts = dict(Counter(self.__dataset[self.__classAttribute]))
        totalNumberOfTuples = sum(counts.values())
        self.__classProbabilites = {key:self.__getProbability(counts[key],totalNumberOfTuples) for key in counts.keys()}
        
    
    

    def __initFeatureProbabilites(self):
        for attribute in self.__attributes:
            data = {}
            for attributeValue in self.__getAttributeValues(attribute):
                probabilities = {}
                for classValue in self.__classes:
                    probability = self.__getProbability(self.__getCounts((attribute,attributeValue),(self.__classAttribute,classValue)),self.__classCounts[classValue])
                    probabilities[classValue] = probability
                data[attributeValue] = probabilities
            self.__featureProbabilites[attribute] = data
        return self.__featureProbabilites
        
    def __getAttributeValues(self,attribute):
        return list(set(self.__dataset[attribute]))
                
    
    def __getCounts(self,tuple1,tuple2):
        return len(dataset[(self.__dataset[tuple1[0]] == tuple1[1]) & (self.__dataset[tuple2[0]] == tuple2[1])])
        
    def __getProbability(self,n,N):
        return n/N

    def __getClassProbabilities(self):
        return self.__classProbabilites
    
    def __getFeatureProbabilities(self):
        return self.__featureProbabilites
    
    def predict(self,featureDictionary):
        probabilitesOfClasses = []
        for classValue in self.__classes:
            probability = 1
            for key,value in featureDictionary.items():
                probability*= self.__featureProbabilites[key][value][classValue]
            
            probability *=self.__classProbabilites[classValue]
        
            probabilitesOfClasses.append(probability)
        
        return self.__classes[np.argmax(probabilitesOfClasses)]
    
    
        
        
    
    
    
        
        
        
        

In [13]:
classifier = NaiveBayes(dataset)
classifier.train()


In [14]:
feature = {'age':'youth','income':'medium','student':'yes','credit_rating':'fair'}
classifier.predict(feature)

'yes'

In [15]:
latest = pd.read_csv('latest.csv')

Unnamed: 0,171,63,1
0,170,58,1
1,172,61,1
2,152,62,2
3,153,58,1
4,148,73,2
5,159,69,2
6,178,46,0
7,179,80,2
8,178,68,1
9,169,50,0
