In [None]:
import numpy as np

#Bayesian classifier:
class GaussianNaiveBayes:
  def __init__(self, dataset, labels, vars, prior_var=[1.0, 1.0], prior_mean=[0.0, 0.0]):
    """
    labels : 1d array, incude index of classes, start at 0
    dataset: (number_sample, num_features)
    default variance is the variance of the current dataset.
    """
    self.N = dataset.shape[0]
    self.prior_var = prior_var
    self.prior_mean = prior_mean
    self.num_classes = int(np.amax(labels) + 1)
    self.prior_value = self.get_prior_value(labels)
    self.vars = vars
    self.means = self.get_likelihood_value(dataset, labels)
  def get_prior_value(self, labels):
    """
    return all prior probability of all classes
    """
    result = []
    for c_index in np.arange(self.num_classes):
      result.append(np.sum(labels == c_index, dtype=np.float32) / self.N); 
    return np.array(result)

  def predict(self, x):
    """
    Predict a class for new sample
    x need to be normalize.
    Return: Probability log 
    """    
    posterior_probability = []
    for i in np.arange(self.num_classes):
      posterior_probability.append(
          np.log(self.prior_value[i]) + 
          np.sum(
            - 0.5 * (x - self.means[i, :]) ** 2 / self.vars[i] \
            - np.log(np.sqrt(2 * np.math.pi) * self.vars[i]), 
            axis=1)) 
    return np.argmax(np.array(posterior_probability), axis=0)

  def predict_proba(self, x):
    """
    Predict a class for new sample
    x need to be normalize.
    Return: Probability log 
    """    
    posterior_probability_per_class = []
    for i in np.arange(self.num_classes):
      posterior_probability_per_class.append(
          self.prior_value[i] * np.prod((np.exp(- 0.5 * (x - self.means[i, :]) ** 2 / self.vars[i]) / (np.sqrt(2 * np.math.pi * self.vars[i]))), axis=1)
      )
    posterior_probability_per_class = np.array(posterior_probability_per_class)
    return posterior_probability_per_class / np.sum(posterior_probability_per_class, axis=0)
  
  def evaluate(self, x, y):
    """
    return: miss_classify:  sum(predicted_class == y)
    """
    return np.sum(np.equal(self.predict(x)  , y), ) 

  def get_likelihood_value(self, dataset, labels):
    means = []
    for c_index in np.arange(self.num_classes):
      #Count N_c:
      index_N_c = np.reshape(np.argwhere(labels == c_index), -1)
      N_c = len(index_N_c)
      #Mean:
      mean = np.sum(dataset[index_N_c], axis=0, dtype=np.float32) / N_c
      mean_map = (N_c * self.prior_var[c_index]) / (N_c*self.prior_var[c_index] + self.vars[c_index])  * mean  + self.vars[c_index] / \
                    (N_c*self.prior_var[c_index] + self.vars[c_index]) * self.prior_mean[c_index]
      means.append(mean_map)
    return np.array(means)

  def get_mean_map(self):
    return self.means