In [30]:
import math

In [31]:
class NaiveBayesClassifier:
    def __init__(self):
        #probability variables
        self.class_p = {}
        self.feature_p = {}

    def train(self, X, y):
        total_samples = len(y)
        self.class_p = {}
        self.feature_p = {}

        #get unique classes i.e. yes & no
        classes = set(y)

        #iterating on 'yes' and 'no' class
        for c in classes:
            c_count = y.count(c)  #no of samples in the current class
            self.class_p[c] = c_count / total_samples  #probability of getting a result from the current class out of the whole dataset
            #it looks like class_p = {'yes':...,'no':...}

            #initialize feature probabilities for this class
            self.feature_p[c] = {}

            #get all data samples of this class
            c_samples = [X[i] for i in range(total_samples) if y[i] == c]
            total_features = len(X[0])    #we have 2 features in our dataset i.e. weather & temperature

            #iterating on each feature one by one
            for feature in range(total_features):
                values = [sample[feature] for sample in c_samples]
                value_counts = {}
                for val in values:  #iterating over each value i.e. overcast, rainy etc
                    #checking the dictionary for the current value
                    #if val exists as a key, it returns its current count i.e. increments
                    #if val doesn't exist, it returns the default value 0 i.e. creates a new key
                    value_counts[val] = value_counts.get(val, 0) + 1

                #convert to probabilities
                total = len(values)
                for val, count in value_counts.items():
                    self.feature_p[c][(feature, val)] = count / total  #P(overcast∣yes), P(sunny∣yes), P(rainy∣yes)...

    def predict(self, x):
        results = {}
        for c in self.class_p:
            log_prob = math.log(self.class_p[c])  #use log to avoid underflow, multiplying many small probabilities can cause numeric errors
            for i, val in enumerate(x):           #i is the feature index, val is the feature value
                #laplace smoothing
                #if the value is not in feature_p, it uses a small value 1e-6 as basic smoothing to avoid multiplying by 0.
                prob = self.feature_p[c].get((i, val), 1e-6)  #P(feature=val ∣ class=c)
                log_prob += math.log(prob)
            results[c] = log_prob  #looks like {'yes': ..., 'no': ...}

        #return the key in results whose value is the largest.
        #compare based on the values, not the keys.
        return max(results, key=results.get)

In [32]:
#sample dataset: [weather, temperature]
X = [['sunny', 'hot'],
     ['sunny', 'hot'],
     ['overcast', 'hot'],
     ['rainy', 'mild'],
     ['rainy', 'cool'],
     ['rainy', 'cool'],
     ['overcast', 'cool'],
     ['sunny', 'mild'],
     ['sunny', 'cool'],
     ['rainy', 'mild']]

y = ['no', 'no', 'yes', 'yes', 'yes', 'no', 'yes', 'no', 'yes', 'yes']

nb = NaiveBayesClassifier()
nb.train(X, y)

test = ['sunny', 'cool']
print("Predicted class:", nb.predict(test))

Predicted class: no
