In [1]:
#importing libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import random

%matplotlib inline

# Reading data

In [2]:
# Reading testing and training data
train = pd.read_csv('datasets/train_AER_credit_card_data.csv')
test = pd.read_csv('datasets/test_AER_credit_card_data.csv')

# splitting into features and labels for test and train
train_x,test_x,train_y,test_y =train[['owner', 'selfemp', 'dependents']],test[['owner', 'selfemp', 'dependents']],train['card'],test['card']

In [3]:
class My_NaieveBayes:
    def __init__(self,train_X,train_Y):
        
        
        self.train =pd.concat([train_X,train_Y],axis=1)
        
        # target col name
        self.class_col_name = train_Y.name
        
        # get all column names 
        self.columns = list(train_X.columns)
        
        # unique output classes in target
        self.output_classes =list(np.unique(train_Y))
        
        # m estimate
        self.m = {} # dictionary of {'col_name': (number of unique class values)}
        self.p = {} # dictionary of {'col_name': 1/(number of unique class values)}
        for col in self.columns:
            
            # p = 1/(number of unique class values)
            self.p[col] = 1/len(np.unique(self.train[col]))
            
            # m = (number of unique class values)
            self.m[col] = len(np.unique(self.train[col]))
            
        
    def predit(self,test):
        
        # probablites of for each output class 
        probablities = []
        
        # iterate through each output class ['positive','negative']
        for class_label in self.output_classes:
    
            class_col_name =self.class_col_name # 'card'
        
            # select data belonging to that class label value example: positive
            class_df = self.train[self.train[class_col_name]==class_label]
            
            #claculate prior
            prior = (len(class_df))/(len(self.train))
            likelihood = 1
            for col in self.columns:
                Nc = (len(class_df[class_df[col]==test[col]])+(self.p[col]*self.m[col]))
                N = (len(class_df)+self.m[col])
                likelihood = likelihood * (Nc/N)
            
            probablities.append(likelihood*prior)
        
        # return class with max probablity
        return self.output_classes[np.argmax(probablities)]
        

In [4]:
# initialize NaieveBayes
model = My_NaieveBayes(train_x,train_y)


In [5]:
P=[]
# predict for each test instance
for i in range(len(test_x)):
    P.append(model.predit(test_x.iloc[i]))

    
pred = pd.concat([test_x,test_y],axis=1)
pred['prediction'] = np.array(P)

# save as CSV
pred.to_csv('predictions.csv')
pred.head()


Unnamed: 0,owner,selfemp,dependents,card,prediction
0,yes,no,0,positive,positive
1,yes,no,0,positive,positive
2,no,no,0,positive,positive
3,yes,no,2,positive,positive
4,no,no,0,positive,positive


# Calculating accuracy, sensitivity and specificity

In [6]:
TP=0
TN=0
FP=0
FN=0
for i in range(len(test_x)):
    
    if P[i] == 'positive' and test_y.iloc[i]=='positive': # if predicted=positive and actual=postive then inceremt TP
        TP+=1
    elif P[i] == 'positive' and test_y.iloc[i]=='negative': # if predicted=positive and actual=negative then inceremt FP
        FP+=1
    elif P[i] == 'negative' and test_y.iloc[i]=='positive':# if predicted=negative and actual=postive then inceremt FN
        FN+=1
    else:   # if predicted=negative and actual=negative then inceremt TN
        TN+=1
        
# formula
accuracy = (TN + TP)/(TN+TP+FN+FP)
sensitivity = TP/(TP+FN)
specificity = TN/(TN+FP)

print('accuracy: ', accuracy, 'sensitivity :',sensitivity, 'specificity :',specificity)

accuracy:  0.7746212121212122 sensitivity : 0.9975550122249389 specificity : 0.008403361344537815


In [7]:
TN,TP,FN,FP

(1, 408, 1, 118)