In [1]:
import numpy as np
import pandas as pd

# Visualization libraries
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

from pgmpy.models import BayesianNetwork
from pgmpy.estimators import HillClimbSearch, BicScore, MaximumLikelihoodEstimator
from pgmpy.inference import VariableElimination

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
df = pd.read_csv("diabetes_data_new_features.csv")

In [3]:
df

Unnamed: 0,Gender,Polyuria,Polydipsia,sudden weight loss,weakness,Polyphagia,visual blurring,Itching,Irritability,muscle stiffness,Obesity,class,Age < 39,Age 40-60,Age >= 60
0,1,0,1,0,1,0,0,1,0,1,1,1,0,1,0
1,1,0,0,0,1,0,1,0,0,0,0,1,0,1,0
2,1,1,0,0,1,1,0,1,0,1,0,1,0,1,0
3,1,0,0,1,1,1,0,1,0,0,0,1,0,1,0
4,1,1,1,1,1,1,1,1,1,1,1,1,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
515,0,1,1,1,0,1,0,1,0,0,0,1,0,0,0
516,0,1,1,1,1,1,0,1,1,0,0,1,0,1,0
517,0,1,1,1,1,1,1,0,0,1,1,1,0,1,0
518,0,0,0,0,1,0,1,1,0,0,0,0,1,0,0


In [5]:
# from imblearn.over_sampling import SMOTE

# smote = SMOTE(sampling_strategy='minority')

In [4]:
train_data, test_data = train_test_split(df, test_size=0.2, random_state=42)

In [8]:
# X_resampled, y_resampled = smote.fit_resample(train_data.drop(columns=['class']), train_data['class'])

In [9]:
# train_data = pd.concat([X_resampled, y_resampled], axis=1)

In [5]:
hc = HillClimbSearch(train_data)
bic_score = BicScore(train_data)

best_model_structure = hc.estimate(scoring_method=bic_score)

print("Adjusted Model Structure:")
for edge in best_model_structure.edges():
    print(edge)

  0%|          | 26/1000000 [00:00<5:10:57, 53.60it/s]

Adjusted Model Structure:
('Polyuria', 'class')
('Polyuria', 'Polydipsia')
('Polydipsia', 'weakness')
('Polydipsia', 'Age < 39')
('Polyphagia', 'Polyuria')
('Polyphagia', 'Age < 39')
('visual blurring', 'Itching')
('visual blurring', 'Age >= 60')
('visual blurring', 'Polydipsia')
('Itching', 'weakness')
('Itching', 'Age < 39')
('muscle stiffness', 'visual blurring')
('muscle stiffness', 'Polyphagia')
('class', 'Polydipsia')
('class', 'sudden weight loss')
('class', 'Gender')
('class', 'visual blurring')
('class', 'Irritability')
('class', 'Itching')
('Age < 39', 'Age 40-60')
('Age < 39', 'Age >= 60')
('Age < 39', 'Obesity')
('Age 40-60', 'sudden weight loss')
('Age >= 60', 'Age 40-60')
('Age >= 60', 'Irritability')





In [6]:
model = BayesianNetwork(best_model_structure.edges())
model.fit(train_data, estimator=MaximumLikelihoodEstimator)
inference = VariableElimination(model)

In [7]:
predictions = []
for _, row in test_data.iterrows():
    try:
        evidence = row.to_dict()
        actual_outcome = evidence.pop('class', None)
        predicted_outcome = inference.map_query(variables=['class'], evidence=evidence)['class']
        predictions.append((predicted_outcome, actual_outcome))
    except Exception as e:
        print("Error processing row:", row)
        print("Exception:", e)
        break

if predictions:
    correct_predictions = sum(1 for pred, actual in predictions if pred == actual)
    accuracy = correct_predictions / len(predictions)
    print(f'Accuracy: {accuracy:.2f}')
else:
    print("No predictions made.")


Finding Elimination Order: : : 0it [00:00, ?it/s]
0it [00:00, ?it/s]
Finding Elimination Order: : : 0it [00:00, ?it/s]
0it [00:00, ?it/s]
Finding Elimination Order: : : 0it [00:00, ?it/s]
0it [00:00, ?it/s]
Finding Elimination Order: : : 0it [00:00, ?it/s]
0it [00:00, ?it/s]
Finding Elimination Order: : : 0it [00:00, ?it/s]
0it [00:00, ?it/s]
Finding Elimination Order: : : 0it [00:00, ?it/s]
0it [00:00, ?it/s]
Finding Elimination Order: : : 0it [00:00, ?it/s]
0it [00:00, ?it/s]
Finding Elimination Order: : : 0it [00:00, ?it/s]
0it [00:00, ?it/s]
Finding Elimination Order: : : 0it [00:00, ?it/s]
0it [00:00, ?it/s]
Finding Elimination Order: : : 0it [00:00, ?it/s]
0it [00:00, ?it/s]
Finding Elimination Order: : : 0it [00:00, ?it/s]
0it [00:00, ?it/s]
Finding Elimination Order: : : 0it [00:00, ?it/s]
0it [00:00, ?it/s]
Finding Elimination Order: : : 0it [00:00, ?it/s]
0it [00:00, ?it/s]
Finding Elimination Order: : : 0it [00:00, ?it/s]
0it [00:00, ?it/s]
Finding Elimination Order: : : 0it

Accuracy: 0.93



