In [1]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import numpy as np

In [2]:
# Load the IRIS dataset
iris = load_iris()
X = iris.data.copy()
y = np.array([iris.target_names[yi] for yi in iris.target])
n_quantiles = 3

In [3]:
# Simple quantization, maps each value to which quantile it belongs
def quantize(x, n_intervals=3):
    p = np.percentile(x, np.linspace(0, 100, num=(n_intervals+1)))
    return np.array([max(sum(xi > p), 1) for xi in x])

In [4]:
# Discretize features using simple quantile encoding, since we are
# implementing a multinomial version of the naive Bayes
for feature in range(X.shape[1]):
    X[:, feature] = quantize(X[:, feature], n_quantiles)
X = X.astype(int)

In [13]:
# Split data into training and testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33)
print("Num. train instances:", len(X_train))
print("Num. test instances:", len(X_test))
print("Num. features:", X.shape[1])
print(X_train[0])
print(X_test[0])
print(y_train[0])
print(y_train.shape)
print(y_test[0])

('Num. train instances:', 100)
('Num. test instances:', 50)
('Num. features:', 4)
[2 2 2 2]
[2 1 2 2]
versicolor
(100,)
versicolor


In [6]:
# Num. insances, num. features and num. classes
classes = np.unique(y)
n, d, C = X_train.shape[0], X_train.shape[1], len(classes)

# Laplace smoothing factor
alpha = 1

In [7]:
# We store the class probabilites in this dictionary
class_probs = {}

# We compute each probability according to the formula above
for c in classes:
    class_probs[c] = (np.array(y_train == c).sum() + alpha) / (n + C*alpha)

In [9]:
# First, we get the list of possible values for each feature:
possible_values = [set(X_train[:, feature]) for feature in range(d)]
print(possible_values)
# We will store these probabilities in this dictionary
# The key of the dictionary is a pair of (feature, class) such that:
#   feature_probs[j, c]  
# gives a dictionary which maps each possible value of the j-th feature to 
# its probability, given each class, as we saw in the formula above, i.e.
# P(X_{i,j} = x | y_i = c) can be obtained from: feature_probs[j, c][k]
feature_probs = {(j, c): {v: 0 for v in possible_values[j]} 
                 for c in classes for j in range(d)}

# Now, we compute the above probabilites, for each feature, given each class
for j in range(d):
    for c in classes:
        # This gives us the j-th feature of instances in class c
        in_class_c = X_train[y_train == c, j]
        for x in possible_values[j]:
            numerator = sum(in_class_c == x) + alpha
            denominator = len(in_class_c) + len(possible_values[j])*alpha
            feature_probs[j, c][x] = numerator / denominator

[set([1, 2, 3]), set([1, 2, 3]), set([1, 2, 3]), set([1, 2, 3])]
