In [None]:
%matplotlib inline
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from scipy.stats import norm
from sklearn.naive_bayes import GaussianNB
matplotlib.style.use('ggplot')

In [None]:
# generate a 2D gaussian with density 
def gauss_pdf(mean, cov, x):
    return (1./(((2*np.pi)**(1.*len(mean)/2))*np.linalg.det(cov)**.5))*np.exp(-np.matrix(x-mean)*np.matrix(np.linalg.inv(cov))*np.matrix(x-mean).T/2 ).tolist()[0][0]

N = 1000
p = []
mean = [1, 1]
cov = [[1, -.25], [-.25, 1]]
x = np.random.multivariate_normal(mean, cov, N)

for n in range(N):
    p.append( gauss_pdf(mean, cov, x[n]) )

p = np.array(p)

# plot the 2D RVs with the 3rd dim being the pdf value 
fig = plt.figure()
ax = fig.gca(projection='3d')
ax.scatter(x[:,0], x[:,1], p)
ax.legend()

plt.show()

In [None]:
def mahal(mean, cov, x):
    return (-np.matrix(x-mean)*np.matrix(np.linalg.inv(cov))*np.matrix(x-mean).T/2 ).tolist()[0][0]


x = np.random.multivariate_normal([-1, -1], [[2, -1], [-1, 2]], 50000)
m = []
for n in range(N):
   m.append( gauss_pdf(mean, cov, x[n]) )

x = x.T
d = plt.hist2d(x[0], x[1], bins = 75)
plt.xlabel('$x_1$')
plt.ylabel('$x_2$')



In [None]:
x = np.linspace(-3, 5, 500)

pw1 = .6
pw2 = .4
pxw1 = norm.pdf(x, 3, .8)
pxw2 = norm.pdf(x, 1, .6)
px = pxw1*pw1+pxw2*pw2
pwx1 = pxw1*pw1/px
pwx2 = pxw2*pw2/px

plt.figure()
plt.plot(x, pxw1, 'b-', lw=5, alpha=0.6, label='$p(x|\omega_1)$')
plt.plot(x, pxw2, 'r-', lw=5, alpha=0.6, label='$p(x|\omega_2)$')
plt.plot(x, pwx1, 'b--', lw=5, alpha=0.6, label='$p(\omega_1|x)$')
plt.plot(x, pwx2, 'r--', lw=5, alpha=0.6, label='$p(\omega_2|x)$')
plt.legend()
plt.xlabel('$x$')
plt.ylabel('conditional probability')


plt.figure()
plt.plot(x, pxw1, 'b-', lw=5, alpha=0.6, label='$p(x|\omega_1)$')
plt.plot(x, pxw2, 'r-', lw=5, alpha=0.6, label='$p(x|\omega_2)$')
plt.legend()
plt.fill_between(x, 0, pxw2, where=pxw1 > pxw2, facecolor='red', alpha=0.5)
plt.fill_between(x, 0, pxw1, where=pxw2 > pxw1, facecolor='blue', alpha=0.5)
plt.text(-2.9, .4, '$p_2 = \int_{\mathcal{R}_2}p(x|\omega_1)p(\omega_1)dx$', fontsize=15, color='b')
plt.text(-2.9, .55, '$p_1 = \int_{\mathcal{R}_1}p(x|\omega_2)p(\omega_2)dx$', fontsize=15, color='r')
plt.text(-2.9, .2, '$p_{err} = p_1+p_2$', fontsize=15)

ax.arrow(1.9, 0.5, 2.1, 0.05, head_width=0.05, head_length=0.05, fc='k', ec='k')

plt.xlabel('$x$')
plt.ylabel('conditional probability')

In [None]:
pxw1

In [None]:
data = np.genfromtxt('../data/optical_train.csv', delimiter=',')
X = data[:,0:-1]
y = data[:,-1]
data = np.genfromtxt('../data/optical_test.csv', delimiter=',')
Xt = data[:,0:-1]
yt = data[:,-1]

gnb = GaussianNB()
gnb.fit(X, y)
ypred = gnb.predict(Xt)
print("Error: ", 100*np.sum(1.*(ypred != yt))/len(ypred))

In [None]:
from sklearn import datasets
iris = datasets.load_iris()
from sklearn.naive_bayes import GaussianNB
gnb = GaussianNB()
y_pred = gnb.fit(iris.data, iris.target).predict(iris.data)
print("Number of mislabeled points out of a total %d points : %d"% 
      (iris.data.shape[0],(iris.target != y_pred).sum()))
