In [None]:
# https://deeplearningcourses.com/c/deep-learning-gans-and-variational-autoencoders
# https://www.udemy.com/deep-learning-gans-and-variational-autoencoders
from __future__ import print_function, division
from builtins import range, input
# Note: you may need to update your version of future
# sudo pip install -U future

import util
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import multivariate_normal as mvn


def clamp_sample(x):
  x = np.minimum(x, 1)
  x = np.maximum(x, 0)
  return x


class BayesClassifier:
  def fit(self, X, Y):
    # assume classes are numbered 0...K-1
    self.K = len(set(Y))

    self.gaussians = []
    self.p_y = np.zeros(self.K)
    for k in range(self.K):
      Xk = X[Y == k]
      self.p_y[k] = len(Xk)
      mean = Xk.mean(axis=0)
      cov = np.cov(Xk.T)
      g = {'m': mean, 'c': cov}
      self.gaussians.append(g)
    # normalize p(y)
    self.p_y /= self.p_y.sum()

  def sample_given_y(self, y):
    g = self.gaussians[y]
    return clamp_sample( mvn.rvs(mean=g['m'], cov=g['c']) )

  def sample(self):
    y = np.random.choice(self.K, p=self.p_y)
    return clamp_sample( self.sample_given_y(y) )


if __name__ == '__main__':
  X, Y = util.get_mnist()
  clf = BayesClassifier()
  clf.fit(X, Y)

  for k in range(clf.K):
    # show one sample for each class
    # also show the mean image learned

    sample = clf.sample_given_y(k).reshape(28, 28)
    mean = clf.gaussians[k]['m'].reshape(28, 28)

    plt.subplot(1,2,1)
    plt.imshow(sample, cmap='gray')
    plt.title("Sample")
    plt.subplot(1,2,2)
    plt.imshow(mean, cmap='gray')
    plt.title("Mean")
    plt.show()

  # generate a random sample
  sample = clf.sample().reshape(28, 28)
  plt.imshow(sample, cmap='gray')
  plt.title("Random Sample from Random Class")
  plt.show()


[bayes-classifier-and-naive-bayes-tutorial-using](https://lazyprogrammer.me/bayes-classifier-and-naive-bayes-tutorial-using/)

$
c^* = argmax_{c}{ P(c | x) } =argmax_{c}{ \frac{ P(x | c)P(c) }{ P(x) } }
$

You will notice that P(x) is constant for all values of c in P(c|x).

So when I take the argmax over P(x|c)P(c)P(x)
I can ignore P(x).

Using this information, we can simplify our problem so that, in order to choose “which digit” given an image, all we need to do is calculate this argmax (notice P(x) is removed):


$
c^* = argmax_{c}{ P(x | c)P(c) }
$

A reasonable first-guess for modeling continuous data is the multivariate Gaussian or the multivariate Normal.

$
P(x | c) = \frac{1}{\sqrt{ (2\pi)^D |\Sigma| }} exp\left({ -\frac{1}{2}(x – \mu)^T \Sigma^{-1} (x – \mu) }\right)
$

The log-likelihood can be represented as:

$
logP(x | c) = -\frac{D}{2}ln(2\pi) – \frac{1}{2}ln|\Sigma| – \frac{1}{2}(x – \mu)^T \Sigma^{-1} (x – \mu)
$

Earlier, we wanted the argmax over P(x|c)P(c). Since log(AB)=log(A)+log(B), then using log probabilities, we can choose the digit class using:

$
c^* = argmax_{c} {\left( logP(x | c) + logP(c) \right)}
$