In [81]:
import pandas as pd
import numpy as np

In [82]:
# Load the dataset
df = pd.read_csv('D:\Bank_dataset.csv', sep=';')

In [83]:
percentage = float(input("Enter the percentage of the data to use for training: "))

In [84]:
# Split the dataset into training and testing sets based on the specified percentage
train_df = df.sample(frac=percentage)
test_df = df.drop(train_df.index)

In [85]:
# Define the feature names
features = ['age', 'job', 'marital', 'education', 'housing']

In [86]:
# Define the Bayesian classifier function
def bayesian_classifier(train_df, test_df, features):
    
    prior_yes = len(train_df[train_df['y'] == 'yes']) / len(train_df)
    prior_no = len(train_df[train_df['y'] == 'no']) / len(train_df)

    
    likelihood_yes = {}
    likelihood_no = {}

    for feature in features:
        likelihood_yes[feature] = {}
        likelihood_no[feature] = {}
        for value in train_df[feature].unique():
            count_yes = len(train_df[(train_df[feature] == value) & (train_df['y'] == 'yes')])
            count_no = len(train_df[(train_df[feature] == value) & (train_df['y'] == 'no')])
            likelihood_yes[feature][value] = count_yes / len(train_df[train_df['y'] == 'yes'])
            likelihood_no[feature][value] = count_no / len(train_df[train_df['y'] == 'no'])


    predictions = []

    for index, row in test_df.iterrows():
        evidence_yes = prior_yes
        evidence_no = prior_no
        for feature in features:
            value = row[feature]
            if value in likelihood_yes[feature]:
                evidence_yes *= likelihood_yes[feature][value]
            else:
                evidence_yes *= 0.000001 
            if value in likelihood_no[feature]:
                evidence_no *= likelihood_no[feature][value]
            else:
                evidence_no *= 0.000001 
        if evidence_yes > evidence_no:
            predictions.append('yes')
        else:
            predictions.append('no')

    
    accuracy = sum(predictions == test_df['y']) / len(test_df)

    return accuracy

In [87]:
# Define the decision tree classifier function
class Node:
    def __init__(self, feature=None, prediction=None, left=None, right=None):
        self.feature = feature
        self.prediction = prediction
        self.left = left
        self.right = right

    def predict(self, data):
        if self.prediction is not None:
            return self.prediction
        elif data[self.feature] == 0:
            return self.left.predict(data)
        else:
            return self.right.predict(data)

def decision_tree_classifier(data, features):
    
    if len(data['y'].unique()) == 1:
        return Node(prediction=data['y'].iloc[0])
    
    if len(features) == 0:
        prediction = data['y'].value_counts().idxmax()
        return Node(prediction=prediction)
    
    best_feature = None
    best_gain = 0
    for feature in features:
        gain = information_gain(data, feature)
        if gain > best_gain:
            best_feature = feature
            best_gain = gain
    
    left_data = data[data[best_feature] == 0]
    right_data = data[data[best_feature] == 1]
    
    if len(left_data) == 0:
        prediction = data['y'].value_counts().idxmax()
        return Node(prediction=prediction)
    elif len(right_data) == 0:
        prediction = data['y'].value_counts().idxmax()
        return Node(prediction=prediction)
    
    left_subtree = decision_tree_classifier(left_data, set(features) - set([best_feature]))
    right_subtree = decision_tree_classifier(right_data, set(features) - set([best_feature]))

    return Node(feature=best_feature, left=left_subtree, right=right_subtree)

def information_gain(data, feature):
    
    entropy_parent = entropy(data)
    
    left_data = data[data[feature] == 0]
    entropy_left = entropy(left_data)
    
    right_data = data[data[feature] == 1]
    entropy_right = entropy(right_data)
    
    gain = entropy_parent - len(left_data) / len(data) * entropy_left - len(right_data) / len(data) * entropy_right
    return gain

def entropy(data):
    
    if len(data) == 0:
        return 0
    
    proportion_positive = len(data[data['y'] == 'yes']) / len(data)
    proportion_negative = len(data[data['y'] == 'no']) / len(data)
    
    if proportion_positive == 0 or proportion_negative == 0:
        entropy = 0
    else:
        entropy = -proportion_positive * np.log2(proportion_positive) - proportion_negative * np.log2(proportion_negative)
    return entropy

In [88]:
# Compute the accuracy of the Bayesian classifier
bayesian_accuracy = bayesian_classifier(train_df, test_df, features)
print(f'Bayesian classifier accuracy: {bayesian_accuracy:.2f}')

# Compute the accuracy of the decision tree classifier
features = set(features)
decision_tree = decision_tree_classifier(train_df, features)
decision_tree_accuracy = sum(test_df.apply(decision_tree.predict, axis=1) == test_df['y']) / len(test_df)
print(f'Decision tree classifier accuracy: {decision_tree_accuracy:.2f}')

if bayesian_accuracy > decision_tree_accuracy:
    print('The Bayesian classifier is more accurate.')
else:
    print('The decision tree classifier is more accurate.')

Bayesian classifier accuracy: 0.90
Decision tree classifier accuracy: 0.89
The Bayesian classifier is more accurate.
