In [1]:
import tkinter as tk
from tkinter import ttk
import pandas as pd

In [2]:
def custom_discretize_numerical_column_uniform(column, num_bins=5):
    min_val = column.min()
    max_val = column.max()
    bin_width = (max_val - min_val) / num_bins

    # Assign bin labels based on uniform intervals
    discretized_column = ((column - min_val) / bin_width).astype(int)

    return discretized_column

def discretize_dataframe_uniform(df,threhold=10, num_bins=5):

    for col in df.columns:
        if df[col].dtype == 'object' or len(df[col].unique()) < threhold:
            # Column is categorical or has too many unique values, skip discretization
            continue
        else:
            # Column is numerical and suitable for discretization
            df[col + '_bins'] = custom_discretize_numerical_column_uniform(df[col], num_bins=num_bins)
            df.drop(columns=[col], inplace=True)

    return df

In [3]:
class NiveBayesClassifier():
    
    def __init__(self, data, label):
        self.data=data
        self.target=label
        self.labels= self.data[label].unique().tolist()
        self.cols= data.columns.tolist()
        v_lab={}
        self.tables={}
        self.label_c={}
        '''
        label_c is a dictionary count of each label in the target 
        tables are dictionaries that holds all the naive bayes tables 
        self.labels is the uniqu val of the target columns 

        '''
    
    #count the target label only 
    def count_labels(self):
        sum=0
        for label in self.labels:
            count = (self.data[self.target] == label).sum()
            self.label_c[str(label)]=count
            sum+=count    
        self.label_c["total"]=sum

        
    #construct table for a column in the data Frame 
    def cons_table(self,col):
        table={}
        uniq_val = self.data[col].unique().tolist()
        for cat in uniq_val:
            sub_table={}
            for label in self.labels:
                val=((self.data[self.target] == label ) & (self.data[col] == cat )).sum()
                sub_table[str(label)]= round(val/self.label_c[str(label)],3)
            table[str(cat)]=sub_table
        return table

    def cons_table_target(self):
        table={}
        for label in self.labels:
            table[str(label)]= self.label_c[str(label)]/self.label_c["total"]
        return table
    
    #for construction of every table including the target table         
    def con_tables(self):
        #unique of the label columns
        #loop over each label
        for col in self.cols:
            if(col==self.target):
                self.tables[col]=self.cons_table_target()

            else:
                self.tables[col]=self.cons_table(col)

    def naive(self):
        self.count_labels()
        self.con_tables()
        print(self.tables)
        print(self.label_c)

    
    def test(self,test):
        correct=0
        for index,row in test.iterrows():
            max=0
            max_label="none"
            for lab in self.labels :
                sum=1
                for col in self.cols:#age
                    if(col!=self.target):#age != depression 
                        cat =str(row[col])#high
                        lb=str(lab)
                        val=self.tables[str(col)][cat][lb]
                        sum*= val
                    else:
                        sum*=self.tables[str(col)][str(lab)]
                
                print("row number : "+ str(index)+ " for label : "+str(lab) + " = "+str(sum))
                #selects the output label
                if(sum>max):
                    max=sum
                    max_label = lab 
            print(max_label)
            if(max_label==row[self.target]):
                correct+=1
        
        return correct/len(test)

In [11]:
def on_submit():
    data = pd.read_csv("diabetes.csv")
    df=discretize_dataframe_uniform(data)
    training_size = int(training_size_entry.get())
    test_size_1 = int(test_size_entry.get())
    
    total_rows = len(df)
    train_size = int(0.75 * total_rows)  # 75% for training
    test_size = total_rows - train_size  # Remaining 25% for testing

    # Split the DataFrame into training and testing sets
    df_train = df.iloc[:train_size]   # First 75% for training
    df_test = df.iloc[train_size:]   
    
    nb=NiveBayesClassifier(df_train[:training_size],"diabetes")
    nb.naive()
    #print(df_test)
    accuracy=20
    accuracy = nb.test(df_test[:test_size_1])
    result_label.config(text=f"Accuracy: {accuracy:.2f}%")

# Create main application window
root = tk.Tk()
root.title("Accuracy Calculator")

# Create input widgets
training_label = ttk.Label(root, text="Training Set Size:")
training_label.grid(row=0, column=0, padx=10, pady=5, sticky="e")
training_size_entry = ttk.Entry(root, width=10)
training_size_entry.grid(row=0, column=1, padx=10, pady=5)

test_label = ttk.Label(root, text="Test Set Size:")
test_label.grid(row=1, column=0, padx=10, pady=5, sticky="e")
test_size_entry = ttk.Entry(root, width=10)
test_size_entry.grid(row=1, column=1, padx=10, pady=5)

submit_button = ttk.Button(root, text="Calculate Accuracy", command=on_submit)
submit_button.grid(row=2, column=0, columnspan=2, pady=10)

result_label = ttk.Label(root, text="", font=("Helvetica", 12, "bold"))
result_label.grid(row=3, column=0, columnspan=2, pady=10)

# Start the main event loop
root.mainloop()


{'gender': {'Female': {'0': 0.593, '1': 0.537}, 'Male': {'0': 0.407, '1': 0.463}}, 'hypertension': {'0': {'0': 0.943, '1': 0.829}, '1': {'0': 0.057, '1': 0.171}}, 'heart_disease': {'1': {'0': 0.028, '1': 0.073}, '0': {'0': 0.972, '1': 0.927}}, 'smoking_history': {'never': {'0': 0.377, '1': 0.439}, 'No Info': {'0': 0.383, '1': 0.098}, 'current': {'0': 0.061, '1': 0.171}, 'former': {'0': 0.092, '1': 0.171}, 'ever': {'0': 0.039, '1': 0.024}, 'not current': {'0': 0.048, '1': 0.098}}, 'diabetes': {'0': 0.918, '1': 0.082}, 'age_bins': {'5': {'0': 0.054, '1': 0.146}, '3': {'0': 0.2, '1': 0.341}, '1': {'0': 0.233, '1': 0.024}, '2': {'0': 0.227, '1': 0.195}, '4': {'0': 0.113, '1': 0.293}, '0': {'0': 0.172, '1': 0.0}}, 'bmi_bins': {'0': {'0': 0.481, '1': 0.293}, '1': {'0': 0.503, '1': 0.659}, '2': {'0': 0.015, '1': 0.024}, '3': {'0': 0.0, '1': 0.024}}, 'HbA1c_level_bins': {'2': {'0': 0.623, '1': 0.463}, '1': {'0': 0.181, '1': 0.0}, '0': {'0': 0.196, '1': 0.0}, '5': {'0': 0.0, '1': 0.146}, '3': {