In [116]:
import numpy as np 
import pandas as pd

In [117]:
data = pd.read_csv('caesarian.csv')
target = data.columns[-1]
print(target)

# P(y|X) = P(X|y) * P(y) / P(X)

# naive bayes classifier for multiple instances of features
def prior_probability(df, y):              # P(y)
      
    rows = len(df)
    values = df[y].value_counts()
    prior = []

    for i in values:
        prob = i/rows
        prior.append(prob)
        
    return prior     

def training(df, className):
    feature_prob_given_true = []
    feature_prob_given_false = []

    features = df.columns[1:-1]                 # load all features except class label
    class_count = df[className].value_counts()  # count of each class label (eg. yes = 100, no = 200)
    class_values = df[className].unique()       # unique class labels (Yes/No)

    for feature in features:
        distinct_values = df[feature].unique()
        f_ls = []
        t_ls = []
        for value in distinct_values:
            true_count = 0
            false_count = 0

            for i, row in df.iterrows():
                if row[className] == class_values[0] and row[feature] == value:
                    true_count += 1
                elif row[className] == class_values[1] and row[feature] == value:
                    false_count += 1
            
            prob_true = true_count / class_count[class_values[0]]
            prob_false = false_count / class_count[class_values[1]]

            
            t_ls.append({value: prob_true})
            f_ls.append({value: prob_false})
            
        feature_prob_given_true.append(t_ls)
        feature_prob_given_false.append(f_ls)
        
    return feature_prob_given_true, feature_prob_given_false

def naive_bayes_classifer(df, X_test, Y_test): 
     
    class_values = df[Y_test].unique()
    mle_given_True, mle_given_false = training(df, Y_test)   

    # Testing Starts...

    descision = []
    for each_test_case in X_test:
        posterior_T = 1
        posterior_F = 1
       
        for index, feature in enumerate(each_test_case, 0):
            
            feature_values = mle_given_True[index]
            posterior_T *= [i[feature] for i in feature_values if feature in i][0]
            
            feature_values = mle_given_false[index]
            posterior_F *= [i[feature] for i in feature_values if feature in i][0]

        posterior_T *= prior_probability(data, Y_test)[0]
        posterior_F *= prior_probability(data, Y_test)[1]
    
        descision.append(class_values[0] if posterior_T > posterior_F else class_values[1])
        
    return descision

 Caesarian


In [118]:
from sklearn.model_selection import train_test_split
train, test = train_test_split(data, test_size=.2)

X_test = test.iloc[:,1:-1].values
Y_test = test.iloc[:,-1].values
Y_pred = naive_bayes_classifer(data, X_test=X_test, Y_test=target)
print("Predicted: ",Y_pred, "\nActual: ", Y_test)

Predicted:  [1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0] 
Actual:  [1 1 1 1 1 1 1 1 1 0 1 0 0 1 1 0]


In [119]:
# manual calculation on Accuracy score and Confusion matrix

TP = 0  # Prediction (+) , Actual(+) 
FP = 0  # Prediction (+) , Actual(-) 
TN = 0  # Prediction (-) , Actual(-) 
FN = 0  # Prediction (-) , Actual(+)

for i in range(len(Y_pred)):
    if Y_pred[i] == 1 and Y_test[i] == 1:
        TP+=1
    elif Y_pred[i] == 1 and Y_test[i] == 0:
        FP+=1
    elif Y_pred[i] == 0 and Y_test[i] == 0:
        TN+=1
    elif Y_pred[i] == 0 and Y_test[i] == 1:
        FN+=1
        
print("TP: ", TP, " TN: ", TN, " FP: ", FP, " FN: ", FN)
accuracy = (TP+TN)/(TP+TN+FP+FN)
CF = np.zeros(shape=(2,2),dtype=int)
CF[0,0] = TN
CF[0,1] = FP
CF[1,0] = FN
CF[1,1] = TP
print(CF)
print("Accuracy Score: ", accuracy*100, "%")

TP:  9  TN:  4  FP:  0  FN:  3
[[4 0]
 [3 9]]
Accuracy Score:  81.25 %


In [120]:
# Library Based Model Train and Accuracy, Confusion Matrix Calculation

from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import confusion_matrix, f1_score , accuracy_score

model = GaussianNB()
model.fit(train.iloc[:,1:-1], train.iloc[:,-1])
Y_pred2 = model.predict(test.iloc[:,1:-1])
# print(Y_pred)

print("Confusion Matrix: \n", confusion_matrix(Y_test, Y_pred2))
print("Accuracy: ",accuracy_score(Y_test, Y_pred2)*100, "%")

Confusion Matrix: 
 [[4 0]
 [3 9]]
Accuracy:  81.25 %
