In [1]:
from scipy.stats import norm
from sklearn import naive_bayes
from numpy import array, prod, append
from sklearn.datasets import load_iris
from math import sqrt

In [2]:
def predict_proba_custom(M, x):

    # get the values of theta_c and sigma^2_c
    theta_c = clf_NB.theta_ 
    variance_c = clf_NB.sigma_

    # get class probabilities from model
    class_probabilities = M.class_prior_
    # calculate normalizing term K before computing each class probability

    numerators = []

    # iterate through each class 
    for class_num in range(len(M.class_count_)):    

        # get the per-class expectations and variances 
        theta_c_i = theta_c[class_num]
        variance_c_i = variance_c[class_num]

        # prior probability of class c 
        prior_c = class_probabilities[class_num]

        # calculate numerator of P(c | x) for the current c

        # compute the probability of getting the attribute vector given class c (P(x | c))
        # we need to look at theta_c, sigma^2_c per class attribute
        # the probability of x given c is the product of P(X_i | c) by independence assumption
        prob_x_given_c = prod([norm.pdf(x_i, loc=theta_c_i[i], scale=sqrt(variance_c_i[i])) for i, x_i in enumerate(x)])

        numerators.append(prior_c * prob_x_given_c)

    K = sum(numerators)

    probabilities = []
    for numerator in numerators:
        probabilities.append(numerator/K)

    return array(probabilities)


In [3]:
iris = load_iris()

X = iris['data']
target = iris['target']

clf_NB = naive_bayes.GaussianNB()

# fit the model to the full Iris dataset
clf_NB.fit(X, target)

x_0 = [5, 3, 2, .8]
proba = predict_proba_custom(clf_NB, x_0)

for class_num, prob in enumerate(proba):

    print("Probability Class {}: {:f}".format(class_num + 1, prob))

Probability Class 1: 0.385446
Probability Class 2: 0.614554
Probability Class 3: 0.000000
