# 1. Import Library

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

# 2. Write Function

### 2.1 สร้าง Matrix สำหรับเก็บค่าเฉลี่ย และ ส่วนเบี่ยงเบนมาตราฐาน

In [2]:
def NB_create_MeanStd_Storage(Feature_Name, All_Class):
    n_feature = len(Feature_Name)
    n_rows = n_feature + 1
    n_class = len(All_Class)
    n_columns = n_class + 1
    mean = np.zeros([n_rows, n_columns], dtype='object')
    std = np.zeros([n_rows, n_columns], dtype='object')
    for c in range(n_class):
        mean[0, c+1] = All_Class[c]
        std[0, c+1] = All_Class[c]
    for r in range(n_feature):
        mean[r+1, 0] = Feature_Name[r]
        std[r+1, 0] = Feature_Name[r]
    return mean, std

### 2.2 เรียนรู้

In [3]:
def NBCD_fit(X_Train, Y_Train, Feature_Name, All_Class):
    mean, std = NB_create_MeanStd_Storage(Feature_Name, All_Class)
    n_feature = len(Feature_Name)
    n_class = len(All_Class)
    for c, _class in enumerate(All_Class):
        filter_class = np.argwhere(Y_Train[:, 0] == _class).ravel()
        X_Class = X_Train[filter_class]
        for r in range(n_feature):
            mean[r+1, c+1] = X_Class[:, r].mean()
            std[r+1, c+1] = X_Class[:, r].std()
    return mean, std

### 2.3 พยากรณ์

In [4]:
def NBCD_predict(X_Test, mean, std, All_Class, Count_All_Class, top = 1):
    prob_class = Count_All_Class/(Count_All_Class.sum())
    n_feature = X_Test.shape[1]
    n_class = len(All_Class)
    Yhat = []
    for x_test in X_Test:
        prob_fgc = np.zeros([n_feature, n_class])
        for d in range(n_feature):
            for c in range(n_class):
                prob_fgc[d, c] = normal_pdf(x_test[d], mean[d+1, c+1], std[d+1, c+1])
        prob_afgc = prob_fgc.prod(axis=0)
        prob_nb = prob_class*prob_afgc
        sorted_class = All_Class[prob_nb.argsort()[::-1]]
        Yhat.append(sorted_class)
    return np.array(Yhat)[:, :top]

In [5]:
def normal_pdf(x, mean, std):
    degree = ((x - mean)/std)**2
    fraction = np.e**(-degree/2)
    denorminator = std*np.sqrt(2*np.pi)
    pdf = fraction/denorminator
    return pdf

In [6]:
def find_error_classification(Y, Yhat):
    N = Y.shape[0]
    error = (100/N)*(Y != Yhat).sum()
    return error

# 3. Read Data & Prepare Data

In [7]:
Data = pd.read_excel('BMI_Dataset_Classification.xlsx', usecols = 'A:C')

In [8]:
Data

Unnamed: 0,Weight,Height,Target
0,66.17,185.21,normal
1,71.27,168.91,fat lv3
2,73.17,179.83,normal
3,81.74,171.76,fat lv2
4,92.97,172.89,fat lv3
...,...,...,...
995,63.24,141.45,fat lv3
996,47.55,166.64,thin
997,68.10,168.52,fat lv1
998,74.39,169.24,fat lv2


In [9]:
DataMatrix = Data.values

In [10]:
DataMatrix.shape

(1000, 3)

In [11]:
D = DataMatrix.shape[1] - 1

In [12]:
X = DataMatrix[:, :D]
Y = DataMatrix[:, D:]

In [13]:
start_train = 0
end_train = -200

In [14]:
X_Train = X[start_train:end_train, :]
Y_Train = Y[start_train:end_train, :]

X_Test = X[end_train:, :]
Y_Test = Y[end_train:, :]

# 4. Create Model

In [15]:
Feature_Name = np.array(Data.columns[:-1])
All_Class, Count_All_Class = np.unique(Y_Train[:, :], return_counts = True)
print(All_Class)
print(Count_All_Class)

['fat lv1' 'fat lv2' 'fat lv3' 'normal' 'thin']
[164 196 139 242  59]


In [16]:
mean, std = NBCD_fit(X_Train, Y_Train, Feature_Name, All_Class)
print(std)

[[0 'fat lv1' 'fat lv2' 'fat lv3' 'normal' 'thin']
 ['Weight' 5.340695335062391 6.088534201208335 9.166414128420524
  5.806620964509083 5.53040465939328]
 ['Height' 6.110198417305286 6.607857019751706 7.113091345193596
  6.869481716401708 6.579074983288824]]


# 5. Making Prediction

In [17]:
Y_Test

array([['fat lv1'],
       ['fat lv2'],
       ['normal'],
       ['fat lv3'],
       ['fat lv2'],
       ['fat lv3'],
       ['thin'],
       ['fat lv2'],
       ['normal'],
       ['fat lv2'],
       ['fat lv2'],
       ['fat lv2'],
       ['fat lv2'],
       ['fat lv2'],
       ['normal'],
       ['fat lv2'],
       ['thin'],
       ['normal'],
       ['fat lv2'],
       ['normal'],
       ['fat lv1'],
       ['fat lv2'],
       ['fat lv1'],
       ['fat lv1'],
       ['normal'],
       ['thin'],
       ['normal'],
       ['thin'],
       ['fat lv2'],
       ['thin'],
       ['normal'],
       ['fat lv2'],
       ['fat lv1'],
       ['fat lv2'],
       ['normal'],
       ['fat lv2'],
       ['normal'],
       ['normal'],
       ['fat lv2'],
       ['fat lv3'],
       ['fat lv1'],
       ['normal'],
       ['fat lv2'],
       ['fat lv3'],
       ['thin'],
       ['normal'],
       ['fat lv2'],
       ['fat lv3'],
       ['normal'],
       ['fat lv3'],
       ['fat lv2'],
       ['fat

In [18]:
Yhat_Test = NBCD_predict(X_Test, mean, std, All_Class, Count_All_Class) #pdf
print(Yhat_Test)

[['fat lv1']
 ['fat lv2']
 ['fat lv1']
 ['fat lv3']
 ['fat lv2']
 ['fat lv3']
 ['normal']
 ['fat lv1']
 ['normal']
 ['fat lv2']
 ['fat lv2']
 ['fat lv2']
 ['fat lv2']
 ['fat lv3']
 ['normal']
 ['fat lv2']
 ['thin']
 ['normal']
 ['fat lv2']
 ['normal']
 ['normal']
 ['fat lv2']
 ['fat lv1']
 ['fat lv1']
 ['normal']
 ['thin']
 ['normal']
 ['normal']
 ['fat lv2']
 ['normal']
 ['normal']
 ['fat lv2']
 ['fat lv2']
 ['fat lv2']
 ['normal']
 ['fat lv2']
 ['normal']
 ['normal']
 ['fat lv2']
 ['normal']
 ['fat lv2']
 ['normal']
 ['fat lv2']
 ['fat lv3']
 ['normal']
 ['normal']
 ['fat lv3']
 ['fat lv3']
 ['normal']
 ['fat lv3']
 ['fat lv2']
 ['fat lv2']
 ['fat lv2']
 ['fat lv3']
 ['fat lv2']
 ['normal']
 ['normal']
 ['fat lv2']
 ['normal']
 ['fat lv1']
 ['normal']
 ['normal']
 ['normal']
 ['fat lv2']
 ['fat lv1']
 ['fat lv2']
 ['normal']
 ['normal']
 ['fat lv2']
 ['normal']
 ['fat lv1']
 ['fat lv2']
 ['normal']
 ['normal']
 ['fat lv1']
 ['fat lv1']
 ['fat lv1']
 ['normal']
 ['fat lv2']
 ['normal'

In [19]:
error_Test = find_error_classification(Y_Test, Yhat_Test)

In [20]:
error_Test

28.5