In [19]:
import pandas as pd
import numpy as np
from IPython.display import display
import matplotlib.pyplot as plt
import math

In [3]:
columns = ['Person', 'height (feet)', 'Weight (lbs)', 'foot size (inches)']
data_set = [
    ['male', 6, 180, 12],
    ['male', 5.92, 190, 11],
    ['male', 5.58, 170, 12],
    ['male', 5.92, 165, 10],
    ['female', 5, 100, 6],
    ['female', 5.5, 150, 8],
    ['female', 5.42, 130, 7],
    ['female', 5.75, 150, 9]
]
df = pd.DataFrame(data_set, columns=columns)

In [47]:
class Gauss_clf:
    def separate_by_classes(self, x, y):
        """
        This function separates our dataset in subdatasets by classes
        """
        self.classes = np.unique(y)
        classes_index = {}
        subdatasets = {}
        cls, counts = np.unique(y, return_counts=True)
        self.class_freq = dict(zip(cls, counts))
        
#         print("Self.classes = ", self.classes)
#         display(self.class_freq)  # display class frequency
        
        for class_type in self.classes:
            classes_index[class_type] = np.argwhere(y==class_type)
            subdatasets[class_type] = x[classes_index[class_type], :]
            self.class_freq[class_type] = self.class_freq[class_type] / sum(list(self.class_freq.values()))
        return subdatasets
    
    def fit(self, X, y):
        """
        The fitting function
        """
        separated_x = self.separate_by_classes(X, y)
        self.means = {}
        self.std = {}
        
        for class_type in self.classes:
            # here we calculate the means and the standard deviations from the datasets
            self.means[class_type] = np.mean(separated_x[class_type], axis=0)[0]
            self.std[class_type] = np.std(separated_x[class_type], axis=0)[0]
    
    def calculate_probability(self, x, mean, stdev):
        """This function calculates the class probability using the gaussian distribution"""
        exponent = math.exp(-((x-mean) ** 2 / (2 * stdev ** 2)))
        return (1 / (math.sqrt(2 * math.pi) * stdev)) * exponent
    
    def predict_proba(self, x):
        """This function predicts the probability for every class"""
        self.class_prob = {cls : math.log(self.class_freq[cls], math.e) for cls in self.classes}
        
#         print(f"class probabilities :", self.class_prob)  # display class probabilities
#         print("means :", self.means)
#         print("stdevs :", self.std)
        
        for cls in self.classes:
            for i in range(len(self.means)):
#                 print(x[i])
                self.class_prob[cls] += math.log(self.calculate_probability(x[i], self.means[cls][i], self.std[cls][i]), math.e)
        self.class_prob = {cls : math.e ** self.class_prob[cls] for cls in self.class_prob}
        return self.class_prob
    
    def predict(self, X):
        """This function predicts the class of a sample"""
        pred = []
        for x in X:
            pred_class = None
            max_prob = 0
            for cls, prob in self.predict_proba(x).items():
                if prob > max_prob:
                    max_prob = prob
                    pred_class = cls
            pred.append(pred_class)
        return pred
        

In [74]:
gnb = Gauss_clf()
gnb.fit(df[columns[1:]].values, df['Person'].values)

test = np.array([[5.3, 150.2, 11]])
pred_gender = gnb.predict(test)

height, weight, foot_size = test.tolist()[0]
print(f"""\
[Given Parameters]

Height: {height} feet,
Weight: {weight} lbs,
Foot Size: {foot_size} inches

\t-------

[Predicted]

Person Gender: {pred_gender[0]}
""")

[Given Parameters]

Height: 5.3 feet,
Weight: 150.2 lbs,
Foot Size: 11.0 inches

	-------

[Predicted]

Person Gender: female

