In [1]:
import numpy as np
import pandas as pd

In [2]:
# Database
employee = {
    "Department": ["sales","sales","sales","systems","systems","systems","systems","marketing","marketing", "secretary","secretary"],
    "Age": ["31...35","26...30","31...35","21...25","31...35","26...30","41...45","36...40","31...35","46...50","26...30"],
    "Salary": ["46K-50K","26K-30K","31K-35K","46K-50K","66K-70K","46K-50K","66K-70K","46K-50K","41K-45K","36K-40K","26K-30K"],
    "Status": ["senior","junior","junior","junior","senior","junior","senior","senior","junior","senior","junior"],
}

# Convert data into DataFrame
employee = pd.DataFrame(employee)
employee

Unnamed: 0,Department,Age,Salary,Status
0,sales,31...35,46K-50K,senior
1,sales,26...30,26K-30K,junior
2,sales,31...35,31K-35K,junior
3,systems,21...25,46K-50K,junior
4,systems,31...35,66K-70K,senior
5,systems,26...30,46K-50K,junior
6,systems,41...45,66K-70K,senior
7,marketing,36...40,46K-50K,senior
8,marketing,31...35,41K-45K,junior
9,secretary,46...50,36K-40K,senior


In [3]:
# Naive Bayes Train Function
def NBTrain(data, laplace = 0):
    
    # Calculate Prior Probablities 
    y_classes = data["Status"].unique()
    y_classes_len = len(y_classes)
    prior_P = np.zeros(y_classes_len)
    for i in range(0,y_classes_len):
        obs = sum(data['Status'] == y_classes[i]) + laplace
        total = len(data['Status']) + 2 * laplace
        prior_P[i] =  obs / total
        
    # Calculate Conditional Probablities
    cond_P = {}
    for column in data.columns[:-1]:
        x_classes = list(set(data[column]))
        x_classes_len = len(x_classes)
        x_cond_P = np.zeros((y_classes_len, len(set(data[column]))))
        
        for a in range(0, y_classes_len):
            for b in range(0, x_classes_len):
                count = data.loc[(data[column] == x_classes[b]) & (data['Status'] == y_classes[a]),].shape[0] + laplace
                total = sum(data["Status"] == y_classes[a]) + x_classes_len * laplace
                x_cond_P[a][b] = count / total
                
        x_cond_P = pd.DataFrame(x_cond_P,columns=x_classes,index=y_classes)   
        cond_P[column] = x_cond_P
    
    return prior_P, cond_P

In [4]:
# Train Classifier on Employee Data
prior_P, cond_P = NBTrain(employee, laplace = 1)

In [5]:
prior_P

array([0.46153846, 0.53846154])

In [6]:
cond_P["Department"]

Unnamed: 0,marketing,systems,sales,secretary
senior,0.222222,0.333333,0.222222,0.222222
junior,0.2,0.3,0.3,0.2


In [7]:
cond_P["Age"]

Unnamed: 0,31...35,41...45,26...30,46...50,21...25,36...40
senior,0.272727,0.181818,0.090909,0.181818,0.090909,0.181818
junior,0.25,0.083333,0.333333,0.083333,0.166667,0.083333


In [8]:
cond_P["Salary"]

Unnamed: 0,66K-70K,41K-45K,46K-50K,36K-40K,31K-35K,26K-30K
senior,0.272727,0.090909,0.272727,0.181818,0.090909,0.090909
junior,0.083333,0.166667,0.25,0.083333,0.166667,0.25


In [9]:
# Prediction Function
def NBPrediction(pred):
    department, age, salary = pred
    P = {}
    P["senior"] = prior_P[0]*cond_P["Department"][department][0]*cond_P["Age"][age][0]*cond_P["Salary"][salary][0]
    P["junior"] = prior_P[1]*cond_P["Department"][department][1]*cond_P["Age"][age][1]*cond_P["Salary"][salary][1]
    
    ans = max(P, key=P.get)
    
    return ans, P[ans]

In [10]:
# Testing Classifier
a = NBPrediction(["marketing", "31...35", "46K-50K"])
b = NBPrediction(["sales", "31...35", "66K-70K"])
c = NBPrediction(["systems", "26...30", "46K-50K"])

print(a)
print(b)
print(c)

('senior', 0.007628734901462173)
('senior', 0.007628734901462173)
('junior', 0.013461538461538459)
