In [1]:
import numpy as np
import pandas as pd
import math

In [2]:
data = pd.read_csv("Data.csv")
data

Unnamed: 0,Refund,Marital Status,Taxable Income,Evade
0,Yes,Single,125,No
1,No,Married,100,No
2,No,Single,70,No
3,Yes,Married,120,No
4,No,Divorced,95,Yes
5,No,Married,60,No
6,Yes,Divorced,220,No
7,No,Single,85,Yes
8,No,Married,75,No
9,No,Single,90,Yes


In [3]:
def Calculate_Prior_Prob(data, target):
    
    total_data = len(data)
    
    yes =  data[data[target] == 'Yes']
    no = data[data[target] == 'No']
    
    print(yes.head(), '\n')
    print(no.head(), '\n')
    
    prob_yes = len(yes)/total_data
    prob_no = len(no)/total_data 
    
    return (prob_yes, prob_no)

In [4]:
def Discrete_Attribute_Prob(data, target, disc_attr):
    
    yes =  data[data[target] == 'Yes']
    no = data[data[target] == 'No']
    
    # to get unque values of discrete attribute
    unique_val = data[disc_attr].unique()
    
    # for calculating probability of discrete attributes
    attr_yes = {}
    attr_no = {}
    
    for i in unique_val:
        temp_yes = yes[yes[disc_attr] == i]
        attr_yes[i] = round(len(temp_yes)/len(yes), 2)
        
        temp_no = no[no[disc_attr] == i]
        attr_no[i] = round(len(temp_no)/len(no), 2)
        
    return (attr_yes, attr_no)

In [5]:
def Continuous_Attribute_Prob(data, target, cont_attr, val):
    
    pi = 3.14159265
    
    yes =  data[data[target] == 'Yes']
    no = data[data[target] == 'No']    
    
    mean_yes = round(yes.mean(), 2)
    mean_no = round(no.mean(), 2)
    
    st_dev_yes = round(yes.var(), 2)
    st_dev_no = round(no.var(), 2)
    
    attr_yes = math.exp(-((val - mean_yes[cont_attr])**2)/(2*st_dev_yes[cont_attr]))
    attr_yes /= math.sqrt(2*pi*st_dev_yes[cont_attr])
    attr_yes = round(attr_yes, 5)
    
    attr_no = math.exp(-((val - mean_no[cont_attr])**2)/(2*st_dev_no[cont_attr]))
    attr_no /= math.sqrt(2*pi*st_dev_no[cont_attr])
    attr_no = round(attr_no, 5)
    
    return (attr_yes, attr_no)

In [6]:
prob_yes, prob_no = Calculate_Prior_Prob(data, 'Evade')
print(f'Prior Probability of Yes (Evade): {prob_yes}')
print(f'Prior Probability of No (Evade): {prob_no}')

print("\nProbability for Refund")
prob_yes_1, prob_no_1 = Discrete_Attribute_Prob(data, 'Evade', 'Refund')
print("Probability of Yes: ", prob_yes_1)
print("Probability of No: ", prob_no_1)

print("\nProbability for Marital Status")
prob_yes_2, prob_no_2 = Discrete_Attribute_Prob(data, 'Evade', 'Marital Status')
print("Probability of Yes: ", prob_yes_2)
print("Probability of No: ", prob_no_2)

  Refund Marital Status  Taxable Income Evade
4     No       Divorced              95   Yes
7     No         Single              85   Yes
9     No         Single              90   Yes 

  Refund Marital Status  Taxable Income Evade
0    Yes         Single             125    No
1     No        Married             100    No
2     No         Single              70    No
3    Yes        Married             120    No
5     No        Married              60    No 

Prior Probability of Yes (Evade): 0.3
Prior Probability of No (Evade): 0.7

Probability for Refund
Probability of Yes:  {'Yes': 0.0, 'No': 1.0}
Probability of No:  {'Yes': 0.43, 'No': 0.57}

Probability for Marital Status
Probability of Yes:  {'Single': 0.67, 'Married': 0.0, 'Divorced': 0.33}
Probability of No:  {'Single': 0.29, 'Married': 0.57, 'Divorced': 0.14}


In [7]:
prob_yes_3, prob_no_3 = Continuous_Attribute_Prob(data, 'Evade', 'Taxable Income', 120)

print("Predict Label for P(No, Married, 120K)")

print("\nPredicting Yes", end=" ")
result_yes = round(prob_yes * prob_yes_1['No'] * prob_yes_2['Married'] * prob_yes_3 , 5)
print(result_yes)

print("\nPredicting No", end=" ")
result_no = round(prob_no * prob_no_1['No'] * prob_no_2['Married'] * prob_no_3  , 5)
print(result_no)

print('\nLabel for P(No, Married, 120K) =', end = " ")
print('Yes Evade') if result_yes > result_no else print('No Evade') 

Predict Label for P(No, Married, 120K)

Predicting Yes 0.0

Predicting No 0.00164

Label for P(No, Married, 120K) = No Evade


In [8]:
prob_yes_3, prob_no_3 = Continuous_Attribute_Prob(data, 'Evade', 'Taxable Income', 90)

print("Predict Label for P(Yes, Divorced, 90K)")

print("\nPredicting Yes", end=" ")
result_yes = round(prob_yes * prob_yes_1['Yes'] * prob_yes_2['Divorced'] * prob_yes_3 , 5)
print(result_yes)

print("\nPredicting No", end=" ")
result_no = round(prob_no * prob_no_1['Yes'] * prob_no_2['Divorced'] * prob_no_3  , 5)
print(result_no)

print('\nLabel for P(Yes, Divorced, 90K) =', end = " ")
print('Yes Evade') if result_yes > result_no else print('No Evade') 

Predict Label for P(Yes, Divorced, 90K)

Predicting Yes 0.0

Predicting No 0.00029

Label for P(Yes, Divorced, 90K) = No Evade


In [9]:
prob_yes_3, prob_no_3 = Continuous_Attribute_Prob(data, 'Evade', 'Taxable Income', 60)

print("Predict Label for P(No, Married, 60K)")

print("\nPredicting Yes", end=" ")
result_yes = round(prob_yes * prob_yes_1['No'] * prob_yes_2['Married'] * prob_yes_3 , 5)
print(result_yes)

print("\nPredicting No", end=" ")
result_no = round(prob_no * prob_no_1['No'] * prob_no_2['Married'] * prob_no_3  , 5)
print(result_no)

print('\nLabel for P(No, Married, 60K) =', end = " ")
print('Yes Evade') if result_yes > result_no else print('No Evade') 

Predict Label for P(No, Married, 60K)

Predicting Yes 0.0

Predicting No 0.00109

Label for P(No, Married, 60K) = No Evade
