In [1]:
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
import networkx

In [3]:
from pgmpy.models import DiscreteBayesianNetwork

In [4]:
df=pd.read_csv("C:\\Users\\Amoghaya\\Desktop\\Datasets\\Heart_Disease.csv")

In [5]:
df=df.drop(columns=['id','dataset'])

In [6]:
df['age']=pd.cut(df['age'],bins=[0,40,55,100],labels=[0,1,2])
df['trestbps']=pd.cut(df['trestbps'],bins=[80,120,140,200],labels=[0,1,2])
df['oldpeak']=pd.cut(df['oldpeak'],bins=[-1,1,3,10],labels=[0,1,2])
df['chol']=pd.cut(df['chol'],bins=[0,201,402,603],labels=[0,1,2])
df['thalch']=pd.cut(df['thalch'],bins=[60,107,154,202],labels=[0,1,2])

In [7]:
df['cp']=pd.Categorical(df['cp']).codes
df['sex']=pd.Categorical(df['sex']).codes
df['restecg']=pd.Categorical(df['restecg']).codes
df['fbs']=pd.Categorical(df['fbs']).codes
df['thal']=pd.Categorical(df['thal']).codes
df['exang']=pd.Categorical(df['exang']).codes
df['slope']=pd.Categorical(df['slope']).codes
df['ca']=pd.Categorical(df['ca']).codes

In [8]:
df = df.fillna(df.mode().iloc[0])  # Safely fills NaNs
df = df.astype(int)  # Now works since all values are numeric

In [9]:
model=DiscreteBayesianNetwork([
    ('Age','HeartDisease'),
    ('Cholestrol','HeartDisease'),
    ('ChestPain','HeartDisease')
])

In [10]:
from pgmpy.estimators import HillClimbSearch

In [11]:
from pgmpy.estimators import BDeu

In [12]:
est=HillClimbSearch(df)
best_model=est.estimate(scoring_method=BDeu(df),max_iter=10000)

INFO:pgmpy: Datatype (N=numerical, C=Categorical Unordered, O=Categorical Ordered) inferred from data: 
 {'age': 'N', 'sex': 'N', 'cp': 'N', 'trestbps': 'N', 'chol': 'N', 'fbs': 'N', 'restecg': 'N', 'thalch': 'N', 'exang': 'N', 'oldpeak': 'N', 'slope': 'N', 'ca': 'N', 'thal': 'N', 'num': 'N'}
INFO:pgmpy: Datatype (N=numerical, C=Categorical Unordered, O=Categorical Ordered) inferred from data: 
 {'age': 'N', 'sex': 'N', 'cp': 'N', 'trestbps': 'N', 'chol': 'N', 'fbs': 'N', 'restecg': 'N', 'thalch': 'N', 'exang': 'N', 'oldpeak': 'N', 'slope': 'N', 'ca': 'N', 'thal': 'N', 'num': 'N'}
INFO:pgmpy: Datatype (N=numerical, C=Categorical Unordered, O=Categorical Ordered) inferred from data: 
 {'age': 'N', 'sex': 'N', 'cp': 'N', 'trestbps': 'N', 'chol': 'N', 'fbs': 'N', 'restecg': 'N', 'thalch': 'N', 'exang': 'N', 'oldpeak': 'N', 'slope': 'N', 'ca': 'N', 'thal': 'N', 'num': 'N'}


  0%|          | 0/10000 [00:00<?, ?it/s]

In [13]:
print(best_model.edges())

[('age', 'trestbps'), ('sex', 'thal'), ('cp', 'num'), ('fbs', 'ca'), ('fbs', 'chol'), ('exang', 'cp'), ('exang', 'thalch'), ('exang', 'trestbps'), ('exang', 'num'), ('exang', 'oldpeak'), ('oldpeak', 'cp'), ('slope', 'oldpeak'), ('slope', 'ca'), ('slope', 'exang'), ('ca', 'thal'), ('ca', 'restecg'), ('ca', 'thalch'), ('num', 'sex'), ('num', 'age'), ('num', 'fbs')]


In [14]:
from pgmpy.estimators import MaximumLikelihoodEstimator

In [15]:
print(df.columns.tolist())

['age', 'sex', 'cp', 'trestbps', 'chol', 'fbs', 'restecg', 'thalch', 'exang', 'oldpeak', 'slope', 'ca', 'thal', 'num']


In [16]:
print(model.nodes)

['Age', 'HeartDisease', 'Cholestrol', 'ChestPain']


In [23]:
df_renamed = df.rename(columns={
    'age': 'Age',
    'chol': 'Cholestrol',
    'cp': 'ChestPain',
    'num': 'HeartDisease'
})


In [24]:
model.fit(df_renamed, estimator=MaximumLikelihoodEstimator)

INFO:pgmpy: Datatype (N=numerical, C=Categorical Unordered, O=Categorical Ordered) inferred from data: 
 {'Age': 'N', 'sex': 'N', 'ChestPain': 'N', 'trestbps': 'N', 'Cholestrol': 'N', 'fbs': 'N', 'restecg': 'N', 'thalch': 'N', 'exang': 'N', 'oldpeak': 'N', 'slope': 'N', 'ca': 'N', 'thal': 'N', 'HeartDisease': 'N'}


<pgmpy.models.DiscreteBayesianNetwork.DiscreteBayesianNetwork at 0x151a4d0fd90>

In [27]:
from pgmpy.inference import VariableElimination


age_input = int(input("Enter Age category (0: <40, 1: 40-55, 2: >55): "))
chol_input = int(input("Enter Cholestrol category (0: Low, 1: Medium, 2: High): "))
cp_input = int(input("Enter ChestPain category (0: typical angina, 1: atypical angina, 2: non-anginal, 3: asymptomatic): "))


inference = VariableElimination(model)


result = inference.query(
    variables=['HeartDisease'],
    evidence={'Age': age_input, 'Cholestrol': chol_input, 'ChestPain': cp_input}
)


for state, prob in zip(result.state_names['HeartDisease'], result.values):
    print(f"Probability of HeartDisease level {state}: {prob:.2%}")


Enter Age category (0: <40, 1: 40-55, 2: >55): 1
Enter Cholestrol category (0: Low, 1: Medium, 2: High): 2
Enter ChestPain category (0: typical angina, 1: atypical angina, 2: non-anginal, 3: asymptomatic): 2
Probability of HeartDisease level 0: 0.00%
Probability of HeartDisease level 1: 100.00%
Probability of HeartDisease level 2: 0.00%
Probability of HeartDisease level 3: 0.00%
Probability of HeartDisease level 4: 0.00%
