### Gaussian Distribution

In [4]:
import numpy as np 
import math

In [5]:
a = np.array([1.4, 1.0 , 1.3, 1.9, 2.0,1.8])
print(np.mean(a))

1.5666666666666667


In [6]:
print(a.var())

0.1288888888888889


In [7]:
b = np.array([3.0, 3.8, 4.1, 3.9, 4.2, 3.4])
print(np.mean(b))
print(b.var())

3.733333333333333
0.1722222222222222


In [8]:
def gaussian(x, mean, variance):
    """
        Tính hàm Gaussian cho giá trị x với mean và variance đã cho.
        
        :param x: Giá trị đầu vào
        :param mean: Giá trị trung bình (mean)
        :param variance: Phương sai (variance)
        :return: Giá trị hàm Gaussian tại x
    """
    coefficient = 1 / np.sqrt(2 * np.pi * variance)
    exponent = np.exp(-((x - mean) ** 2) / (2 * variance))
    return coefficient * exponent

In [9]:
g1 = gaussian(3.4, float(np.mean(a)),float(np.var(a))) * 0.5
print(g1)

1.2080820590230545e-06


In [10]:
g2  = gaussian(3.4, float(np.mean(b)),float(np.var(b))) * 0.5
print(g2)

0.34812922367906424


In [11]:
print(g1 / (g1+g2))

3.4701995252515143e-06


In [12]:
print(g2 / (g1+g2))

0.9999965298004748


### Create dataset

In [21]:
import numpy as np
data = [['Sunny', 'Hot', 'High', 'Weak', 'no'],
            ['Sunny', 'Hot', 'High', 'Strong', 'no'],
            ['Overcast', 'Hot', 'High', 'Weak', 'yes'],
            ['Rain', 'Mild', 'High', 'Weak', 'yes'],
            ['Rain', 'Cool', 'Normal', 'Weak', 'yes'],
            ['Rain', 'Cool', 'Normal', 'Strong', 'no'],
            ['Overcast', 'Cool', 'Normal', 'Strong', 'yes'],
            ['Overcast', 'Mild', 'High', 'Weak', 'no'],
            ['Sunny', 'Cool', 'Normal', 'Weak', 'yes'],
            ['Rain', 'Mild', 'Normal', 'Weak', 'yes']]

data = np.array(data)
print(data)

[['Sunny' 'Hot' 'High' 'Weak' 'no']
 ['Sunny' 'Hot' 'High' 'Strong' 'no']
 ['Overcast' 'Hot' 'High' 'Weak' 'yes']
 ['Rain' 'Mild' 'High' 'Weak' 'yes']
 ['Rain' 'Cool' 'Normal' 'Weak' 'yes']
 ['Rain' 'Cool' 'Normal' 'Strong' 'no']
 ['Overcast' 'Cool' 'Normal' 'Strong' 'yes']
 ['Overcast' 'Mild' 'High' 'Weak' 'no']
 ['Sunny' 'Cool' 'Normal' 'Weak' 'yes']
 ['Rain' 'Mild' 'Normal' 'Weak' 'yes']]


In [14]:
yes = np.where(data[:, 4]=='yes')
num_yes = data[yes].shape[0]
print(num_yes)
sunny= np.where(data[:, 0]=="Sunny")

6


In [15]:
print(data[yes]); print(data[sunny])

[['Overcast' 'Hot' 'High' 'Weak' 'yes']
 ['Rain' 'Mild' 'High' 'Weak' 'yes']
 ['Rain' 'Cool' 'Normal' 'Weak' 'yes']
 ['Overcast' 'Cool' 'Normal' 'Strong' 'yes']
 ['Sunny' 'Cool' 'Normal' 'Weak' 'yes']
 ['Rain' 'Mild' 'Normal' 'Weak' 'yes']]
[['Sunny' 'Hot' 'High' 'Weak' 'no']
 ['Sunny' 'Hot' 'High' 'Strong' 'no']
 ['Sunny' 'Cool' 'Normal' 'Weak' 'yes']]


In [16]:
sunny = np.where(data[:, :]=='Sunny')
print(sunny[0][1])

1


### Compute Prior Probability 

* Find probability yes in data set
* Find probility no in data set

In [39]:
def compute_prior_probability(data : np.array) -> tuple: 

    """
        This function compute prior probability (Tính xác suất tiên nghiệm)
    """
    categorical = data[: , -1]

    num_yes = np.sum(categorical=='yes')
    num_no  = np.sum(categorical=='no')

    prior_probablity_yes = num_yes / (num_yes  +num_no )
    prior_probablity_no = num_no / (num_yes  +num_no )

    return (prior_probablity_no, prior_probablity_yes)

In [41]:
prior_probability = compute_prior_probability(data=data)
print(prior_probability)

(0.4, 0.6)


In [43]:
print(f'prior probablity no: {compute_prior_probability(data)[0]}')
print(f'prior probablity yes: {compute_prior_probability(data)[1]}')

prior probablity no: 0.4
prior probablity yes: 0.6


### Compute Likelihood

#### Compute Conditional Probability

* Calculate $$P(X_{x_1, x_2, ...,x_n}|class = c)$$

In [18]:
def statistic (train_data, column = 0, feature = 'Sunny', conditional = 'yes'):
    num_conditional = np.sum(train_data[:, 4] == conditional)
    num_feature_given_conditional = np.sum(train_data[train_data[:,4]==conditional][:, column] == feature)

    return num_feature_given_conditional / num_conditional

In [23]:
def compute_conditional_probability(train_data) -> tuple:
    """
        Return 
    """

    conditional_probability = np.zeros((4,), dtype=object)
    list_x_name = []
    for i in range(0, train_data.shape[1]-1):
        x_unique = np.unique(train_data[:, i])
        list_x_name.append(x_unique)

    outlook_index = 0
    temperature_index = 1
    humidity_index = 2
    wind_index = 3

    p_outlook = np.zeros((2, len(list_x_name[outlook_index])), dtype=float)
    p_temperature = np.zeros((2, len(list_x_name[temperature_index])), dtype=float)
    p_humidity = np.zeros((2, len(list_x_name[humidity_index])), dtype=float)
    p_wind = np.zeros((2, len(list_x_name[wind_index])), dtype=float)

    for i in range(len(list_x_name[outlook_index])):
        p_outlook[1, i] = statistic(train_data, outlook_index, list_x_name[outlook_index][i],conditional='yes')
        p_outlook[0, i] = statistic(train_data, outlook_index, list_x_name[outlook_index][i],conditional='no')

    for i in range(len(list_x_name[temperature_index])):
        p_temperature[1, i] = statistic(train_data, temperature_index, list_x_name[temperature_index][i],conditional='yes')
        p_temperature[0, i] = statistic(train_data, temperature_index, list_x_name[temperature_index][i],conditional='no')

    for i in range(len(list_x_name[humidity_index])):
        p_humidity[1, i] = statistic(train_data, humidity_index, list_x_name[humidity_index][i],conditional='yes')
        p_humidity[0, i] = statistic(train_data, humidity_index, list_x_name[humidity_index][i],conditional='no')
    

    for i in range(len(list_x_name[wind_index])):
        p_wind[1, i] = statistic(train_data, wind_index, list_x_name[wind_index][i],conditional='yes')
        p_wind[0, i] = statistic(train_data, wind_index, list_x_name[wind_index][i],conditional='no')

    conditional_probability[0]= p_outlook
    conditional_probability[1]= p_temperature
    conditional_probability[2]= p_humidity
    conditional_probability[3]= p_wind

    return np.array(conditional_probability), list_x_name

In [45]:
conditional_probability, list_features = compute_conditional_probability(data)
print(conditional_probability), print(list_features)

[array([[0.25      , 0.25      , 0.5       ],
        [0.33333333, 0.5       , 0.16666667]])
 array([[0.25      , 0.5       , 0.25      ],
        [0.5       , 0.16666667, 0.33333333]])
 array([[0.75      , 0.25      ],
        [0.33333333, 0.66666667]]) array([[0.5       , 0.5       ],
                                          [0.16666667, 0.83333333]])]
[array(['Overcast', 'Rain', 'Sunny'], dtype='<U8'), array(['Cool', 'Hot', 'Mild'], dtype='<U8'), array(['High', 'Normal'], dtype='<U8'), array(['Strong', 'Weak'], dtype='<U8')]


(None, None)

### Add Padding text in array 

In [17]:
nested_list = [['Overcast', 'Rain', 'Sunny'], 
               ['Cool', 'Hot', 'Mild'], 
               ['High', 'Normal'], 
               ['Strong', 'Weak']]

max_length = max(len(sublist) for sublist in nested_list)

padded_list = [sublist + [' '] * (max_length - len(sublist)) for sublist in nested_list]

numpy_array = np.array(padded_list, dtype='<U8')

# In ra mảng numpy
print(numpy_array)

[['Overcast' 'Rain' 'Sunny']
 ['Cool' 'Hot' 'Mild']
 ['High' 'Normal' ' ']
 ['Strong' 'Weak' ' ']]


### Get index from value 

In [30]:
def get_index_from_value (list_features : list[np.array], 
                          feature_name :str ) :
    
    return np.nonzero(list_features == feature_name)[0][0] if len(np.nonzero(list_features == feature_name)[0]) >=1 else -1

In [46]:
feature = 'Sunny'
print(f'Index of {feature}: {get_index_from_value(list_features[0], feature)}')

Index of Sunny: 2


### Prediction 

In [49]:

def prediction_play_tennis (list_x_name , prior_probability , conditional_probability ,x = ["Sunny","Cool","High","Strong"]):

    x1 = get_index_from_value (x [0] , list_x_name [0])
    x2 = get_index_from_value (x [1] , list_x_name [1])
    x3 = get_index_from_value (x [2] , list_x_name [2])
    x4 = get_index_from_value (x [3] , list_x_name [3])

    p0 = prior_probability[0]* conditional_probability[0][0,x1] * conditional_probability[1][0,x2]* conditional_probability[2][0,x3] * conditional_probability[3][0,x4]
    print(f'Probability play tennis = no  when event X hapened: {p0}')

    p1= prior_probability[1]* conditional_probability[0][1,x1] * conditional_probability[1][1,x2]* conditional_probability[2][1,x3] * conditional_probability[3][1,x4]
    print(f'Probability play tennis = yes when event X hapened: {p1}')

    if p0 > p1 :
        y_pred =0
    else :
        y_pred =1

    return y_pred

In [50]:
probability_play_tennis  = prediction_play_tennis(list_features, prior_probability,  conditional_probability)
print(f'Class:0( Play Tennis = No) | 1(Play tennis  = Yes): {probability_play_tennis}')  

Probability play tennis = no  when event X hapened: 0.018750000000000003
Probability play tennis = yes when event X hapened: 0.002777777777777777
Class:0( Play Tennis = No) | 1(Play tennis  = Yes): 0
