# IE7275 - Bayesian Classification

### We demonstrate how to apply Bayes Classifier for Classification of Iris data

By Yilin Yin and Chun-An Chou

In [1]:
from sklearn import datasets
import numpy as np
import pandas as pd
from numpy.linalg import eig
from scipy.stats import multivariate_normal
from sklearn.naive_bayes import GaussianNB

In [2]:
# load the iris dataset from the cloud database
iris = datasets.load_iris()
data1 = pd.DataFrame(data= np.c_[iris['data'], iris['target']],
                     columns= iris['feature_names'] + ['target'])

In [3]:
cols = [2,3]
data1.drop(data1.columns[cols],axis=1,inplace=True)

data1.target.value_counts()
data1.target = data1.target.replace(2,1)
data1.target.value_counts()

1.0    100
0.0     50
Name: target, dtype: int64

In [4]:
# prior prob
pp_0 = data1.target.value_counts()[0]/len(data1.target)
pp_1 = data1.target.value_counts()[1]/len(data1.target)

In [5]:
# Likelihood (Assume data are normally distributed) 

# sample mean
d0 = data1[data1.target == 0]
d1 = data1[data1.target == 1]
mu0 = [np.mean(d0.iloc[:,0]),np.mean(d0.iloc[:,1])]
mu1 = [np.mean(d1.iloc[:,0]),np.mean(d1.iloc[:,1])]

# sample covariance
cov1 = d1.cov()
cov1 = cov1.iloc[0:2,0:2]

cov0 = d0.cov()
cov0 = cov0.iloc[0:2,0:2]

In [6]:
#classification
x =[6.75,4.25]
x0 = multivariate_normal.pdf(x,mu0, np.array(cov0))

x1 = multivariate_normal.pdf(x,mu1, np.array(cov1))

post0 = x0 * pp_0
post1 = x1 * pp_1

if post1 > post0:
    print('class of [6.75,4.25] is 1')
else:
    print('class of [6.75,4.25] is 0')


class of [6.75,4.25] is 1


### We demonstrate how to apply Bayes Classifier for Classification of Iris discretized data

In [35]:
info_percent = Evalues/sum(Evalues)
print(info_percent)

[0.92461872 0.05306648 0.01710261 0.00521218]


### We confirm the total information in data are the same before and after PCA

In [7]:
# Data discritization

data2 = pd.DataFrame(data= np.c_[iris['data'], iris['target']],
                     columns= iris['feature_names'] + ['target'])
data2.columns[0]

conditions1 = [
    (data2['sepal length (cm)'] <= 5.2),
    (data2['sepal length (cm)'] > 5.2) & (data2['sepal length (cm)'] <= 6.1),
    (data2['sepal length (cm)'] > 6.1) & (data2['sepal length (cm)'] <= 7.0),
    (data2['sepal length (cm)'] > 7)
    ]

values1 = ['Very Short', 'Short', 'Long', 'Very Long']

data2['sepal_length'] = np.select(conditions1, values1)

conditions2 = [
    (data2['sepal width (cm)'] <= 2.8),
    (data2['sepal width (cm)'] > 2.8) & (data2['sepal width (cm)'] <= 3.6),
    (data2['sepal width (cm)'] > 3.6)
    ]

values2 = ['Short', 'Medium', 'Long']

data2['sepal_width'] = np.select(conditions2, values2)


In [8]:
# Naive Bayes Classification

x = ['Long','Long']

N_d0 = data2[data2.target == 0]
N_d1 = data2[data2.target == 1]

#conditional prob for length 0
if len(N_d0[N_d0.sepal_length == x[0]]) != 0:
    p_l0 = (len(N_d0[N_d0.sepal_length == x[0]])) / len(N_d0.sepal_length)
else:
    p_l0 = (len(N_d0[N_d0.sepal_length == x[0]])+1)/(len(N_d0.sepal_length)+len(values1))


#conditional prob for width 0
if len(N_d0[N_d0.sepal_length == x[1]]) != 0:
    p_w0 = (len(N_d0[N_d0.sepal_width == x[1]])) / len(N_d0.sepal_width)
else:
    p_w0 = (len(N_d0[N_d0.sepal_width == x[1]])+1)/(len(N_d0.sepal_width)+len(values2))


#conditional prob for length 1
if len(N_d1[N_d1.sepal_length == x[0]]) != 0:
    p_l1 = (len(N_d1[N_d1.sepal_length == x[0]])) / len(N_d1.sepal_length)
else:
    p_l1 = (len(N_d1[N_d1.sepal_length == x[0]])+1)/(len(N_d1.sepal_length)+len(values1))


#conditional prob for width 1
if len(N_d1[N_d1.sepal_length == x[1]]) != 0:
    p_w1 = (len(N_d1[N_d1.sepal_width == x[1]])) / len(N_d1.sepal_width)
else:
    p_w1 = (len(N_d1[N_d1.sepal_width == x[1]])+1)/(len(N_d1.sepal_width)+len(values2))


#post prob

post0 = p_l0*p_w0*pp_0
post1 = p_l1*p_w1*pp_1

if post1 > post0:
    print('class of [6.75,4.25] is 1')
else:
    print('class of [6.75,4.25] is 0')


class of [6.75,4.25] is 0


### We verify the Bayes classifier results with the Python library

In [11]:
x = [[6.75, 4.25]]
gnb = GaussianNB()
X_train = np.array(data1.iloc[:,0:2])
y_train =  np.array(data1.target)
y_pred = gnb.fit(X_train, y_train).predict(x)

print('class of [6.75,4.25] is'+ str(y_pred))


class of [6.75,4.25] is[1.]
