In [48]:
# Cell 2 â€” Imports + safe pgmpy import (handles different pgmpy versions)
import os
import numpy as np
import pandas as pd

# Try imports for pgmpy in a robust way (different versions have slightly different structure)
try:
    from pgmpy.models import BayesianModel
    from pgmpy.estimators import MaximumLikelihoodEstimator
    from pgmpy.inference import VariableElimination
    print("Imported pgmpy from pgmpy.models (usual import).")
except Exception as e1:
    try:
        # alternative location in some versions
        from pgmpy.models.BayesianModel import BayesianModel
        from pgmpy.estimators import MaximumLikelihoodEstimator
        from pgmpy.inference import VariableElimination
        print("Imported pgmpy from pgmpy.models.BayesianModel (alternative).")
    except Exception as e2:
        print("WARNING: Could not import BayesianModel using standard paths.")
        print("Errors:", e1, "\n", e2)
        raise


Imported pgmpy from pgmpy.models (usual import).


In [47]:
# Cell 1 â€” Install required packages (run once)
# If you're in a restricted environment or already installed packages, this will be quick.
# If pip shows an update notice, ignore for now â€” the packages below are what we need.

!pip install --quiet pandas numpy pgmpy==0.1.23 scipy



[notice] A new release of pip is available: 25.2 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


In [49]:
# Cell 3 â€” Load dataset (tries multiple fallbacks)
# Preferred filename: 'heart.csv' (same as in the lab manual code)
dataset_path = "heart.csv"

def load_heart_dataset(path=dataset_path):
    # Attempt 1: local csv
    if os.path.exists(path):
        print(f"Loading local file: {path}")
        df = pd.read_csv(path)
        return df

    # Attempt 2: common column name variations from popular heart datasets
    # Try a few common default names (only if internet is allowed; otherwise skip)
    print(f"'{path}' not found locally. Creating a small fallback sample (toy) dataset.")
    # Create a tiny fallback sample based on examples in the lab manual for demonstration
    sample = [
        # age, sex, cp, trestbps, chol, fbs, restecg, thalach, exang, oldpeak, slope, ca, thal, heartdisease
        [63, 1, 1, 145, 233, 1, 2, 150, 0, 2.3, 3, 0, 6, 0],
        [67, 1, 4, 160, 286, 0, 2, 108, 1, 1.5, 2, 3, 3, 2],
        [67, 1, 4, 120, 229, 0, 2, 129, 1, 2.6, 2, 2, 7, 1],
        [41, 0, 2, 130, 204, 0, 2, 172, 0, 1.4, 1, 0, 3, 0],
        [62, 0, 4, 140, 268, 0, 2, 160, 0, 3.6, 3, 2, 3, 3],
        [60, 1, 4, 130, 206, 0, 2, 132, 1, 2.4, 2, 2, 7, 4],
    ]
    cols = ["age","sex","cp","trestbps","chol","fbs","restecg","thalach","exang","oldpeak","slope","ca","thal","heartdisease"]
    df = pd.DataFrame(sample, columns=cols)
    # Save fallback so user can inspect
    df.to_csv("heart_sample_fallback.csv", index=False)
    print("Saved fallback sample to 'heart_sample_fallback.csv'.")
    return df

heart = load_heart_dataset()
print("Dataset shape:", heart.shape)
heart.head()



Loading local file: heart.csv
Dataset shape: (303, 14)


Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2,1
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2,1
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2,1
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2,1


In [50]:
# Cell 4 â€” Inspect columns, attempt to harmonize names used in different datasets
print("Columns:", heart.columns.tolist())

# Many heart disease CSVs use 'target' instead of 'heartdisease'. Normalize to 'heartdisease'.
if "target" in heart.columns and "heartdisease" not in heart.columns:
    heart = heart.rename(columns={"target": "heartdisease"})
    print("Renamed 'target' -> 'heartdisease'.")

# If heartdisease is present, ensure discrete integer values
if "heartdisease" in heart.columns:
    # Some datasets encode heart disease as 0/1 or 0-4; keep as-is but ensure integer
    heart["heartdisease"] = heart["heartdisease"].astype(int)
print("Unique values for heartdisease:", heart["heartdisease"].unique())


Columns: ['age', 'sex', 'cp', 'trestbps', 'chol', 'fbs', 'restecg', 'thalach', 'exang', 'oldpeak', 'slope', 'ca', 'thal', 'target']
Renamed 'target' -> 'heartdisease'.
Unique values for heartdisease: [1 0]


In [51]:

# Cell 4 â€” Inspect columns, attempt to harmonize names used in different datasets
print("Columns:", heart.columns.tolist())

# Many heart disease CSVs use 'target' instead of 'heartdisease'. Normalize to 'heartdisease'.
if "target" in heart.columns and "heartdisease" not in heart.columns:
    heart = heart.rename(columns={"target": "heartdisease"})
    print("Renamed 'target' -> 'heartdisease'.")

# If heartdisease is present, ensure discrete integer values
if "heartdisease" in heart.columns:
    # Some datasets encode heart disease as 0/1 or 0-4; keep as-is but ensure integer
    heart["heartdisease"] = heart["heartdisease"].astype(int)
print("Unique values for heartdisease:", heart["heartdisease"].unique())


Columns: ['age', 'sex', 'cp', 'trestbps', 'chol', 'fbs', 'restecg', 'thalach', 'exang', 'oldpeak', 'slope', 'ca', 'thal', 'heartdisease']
Unique values for heartdisease: [1 0]


In [52]:
# Cell 5 â€” Preprocessing & Discretization (PGMs here will use discrete CPDs)
# We'll discretize continuous numeric attributes (simple equal-frequency bins).
# Choose which columns to discretize based on presence in the dataframe.

# Identify candidate continuous columns
cont_cols = []
for col in ["age","trestbps","chol","thalach","oldpeak"]:
    if col in heart.columns:
        cont_cols.append(col)

print("Continuous columns to discretize:", cont_cols)

# Discretize into 3 bins (low/medium/high) â€” adjust bins as needed.
from sklearn.preprocessing import KBinsDiscretizer

if cont_cols:
    kb = KBinsDiscretizer(n_bins=3, encode="ordinal", strategy="quantile")
    heart_disc = heart.copy()
    heart_disc[cont_cols] = kb.fit_transform(heart[cont_cols])
    # Convert to int categories
    for c in cont_cols:
        heart_disc[c] = heart_disc[c].astype(int).astype(str)  # string categories are fine
else:
    heart_disc = heart.copy()

# Also ensure other categorical columns are strings (so pgmpy treats them as discrete states)
cat_cols = []
for col in ["sex","cp","fbs","restecg","exang","slope","ca","thal","heartdisease"]:
    if col in heart_disc.columns:
        cat_cols.append(col)
        heart_disc[col] = heart_disc[col].astype(int).astype(str)

print("Categorical columns used:", cat_cols)
heart_disc.head()


Continuous columns to discretize: ['age', 'trestbps', 'chol', 'thalach', 'oldpeak']
Categorical columns used: ['sex', 'cp', 'fbs', 'restecg', 'exang', 'slope', 'ca', 'thal', 'heartdisease']


Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,heartdisease
0,2,1,3,2,1,1,0,1,0,2,0,0,1,1
1,0,1,2,1,1,0,1,2,0,2,0,0,2,1
2,0,0,1,1,0,0,0,2,0,2,2,0,2,1
3,1,1,1,0,1,0,1,2,0,1,2,0,2,1
4,1,0,0,0,2,0,1,2,1,1,2,0,2,1


In [53]:
# Cell 5 â€” Preprocessing & Discretization (PGMs here will use discrete CPDs)
# We'll discretize continuous numeric attributes (simple equal-frequency bins).
# Choose which columns to discretize based on presence in the dataframe.

# Identify candidate continuous columns
cont_cols = []
for col in ["age","trestbps","chol","thalach","oldpeak"]:
    if col in heart.columns:
        cont_cols.append(col)

print("Continuous columns to discretize:", cont_cols)

# Discretize into 3 bins (low/medium/high) â€” adjust bins as needed.
from sklearn.preprocessing import KBinsDiscretizer

if cont_cols:
    kb = KBinsDiscretizer(n_bins=3, encode="ordinal", strategy="quantile")
    heart_disc = heart.copy()
    heart_disc[cont_cols] = kb.fit_transform(heart[cont_cols])
    # Convert to int categories
    for c in cont_cols:
        heart_disc[c] = heart_disc[c].astype(int).astype(str)  # string categories are fine
else:
    heart_disc = heart.copy()

# Also ensure other categorical columns are strings (so pgmpy treats them as discrete states)
cat_cols = []
for col in ["sex","cp","fbs","restecg","exang","slope","ca","thal","heartdisease"]:
    if col in heart_disc.columns:
        cat_cols.append(col)
        heart_disc[col] = heart_disc[col].astype(int).astype(str)

print("Categorical columns used:", cat_cols)
heart_disc.head()


Continuous columns to discretize: ['age', 'trestbps', 'chol', 'thalach', 'oldpeak']
Categorical columns used: ['sex', 'cp', 'fbs', 'restecg', 'exang', 'slope', 'ca', 'thal', 'heartdisease']


Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,heartdisease
0,2,1,3,2,1,1,0,1,0,2,0,0,1,1
1,0,1,2,1,1,0,1,2,0,2,0,0,2,1
2,0,0,1,1,0,0,0,2,0,2,2,0,2,1
3,1,1,1,0,1,0,1,2,0,1,2,0,2,1
4,1,0,0,0,2,0,1,2,1,1,2,0,2,1


In [56]:
# Cell 6 â€” Define Bayesian model structure (edges)
# Using the structure suggested in the lab manual:
# ('age','heartdisease'), ('sex','heartdisease'), ('exang','heartdisease'), ('cp','heartdisease'),
# ('heartdisease','restecg'), ('heartdisease','chol')

edges = [
    ('age','heartdisease'),
    ('sex','heartdisease'),
    ('exang','heartdisease'),
    ('cp','heartdisease'),
    ('heartdisease','restecg'),
    ('heartdisease','chol'),
]

model = BayesianModel(edges)
print("BayesianModel created with edges:")
print(model.edges())




ImportError: BayesianNetwork has been deprecated. Please use DiscreteBayesianNetwork instead.

In [55]:
# Correct imports for pgmpy version â‰¥ 0.1.24 (most stable for Python 3.11+)
from pgmpy.models.BayesianModel import BayesianModel
from pgmpy.estimators import MaximumLikelihoodEstimator
from pgmpy.inference import VariableElimination


In [58]:
## Practical 3 â€” Bayesian Network on Heart Disease Dataset (Fixed for new pgmpy versions)

# Step 1: Import libraries
import pandas as pd
from pgmpy.models import DiscreteBayesianNetwork
from pgmpy.estimators import MaximumLikelihoodEstimator
from pgmpy.inference import VariableElimination

# Step 2: Load dataset
heart_data = pd.read_csv("heart.csv")
print("âœ… Dataset loaded successfully!")
print(heart_data.head())

# (If your target column is named 'target', rename it)
if 'target' in heart_data.columns:
    heart_data.rename(columns={'target': 'heartdisease'}, inplace=True)

# Step 3: Define Bayesian Network structure
model = DiscreteBayesianNetwork([
    ('age', 'heartdisease'),
    ('sex', 'heartdisease'),
    ('exang', 'heartdisease'),
    ('cp', 'heartdisease'),
    ('heartdisease', 'restecg'),
    ('heartdisease', 'chol')
])

# Step 4: Fit the model using Maximum Likelihood Estimation
model.fit(heart_data, estimator=MaximumLikelihoodEstimator)
print("\nâœ… Model training completed successfully!")

# Step 5: Perform inference
inference = VariableElimination(model)

# Query 1
print("\nðŸ”¹ P(heartdisease | restecg = 1):")
print(inference.query(variables=['heartdisease'], evidence={'restecg': 1}))

# Query 2
print("\nðŸ”¹ P(heartdisease | cp = 2):")
print(inference.query(variables=['heartdisease'], evidence={'cp': 2}))


INFO:pgmpy: Datatype (N=numerical, C=Categorical Unordered, O=Categorical Ordered) inferred from data: 
 {'age': 'N', 'sex': 'N', 'cp': 'N', 'trestbps': 'N', 'chol': 'N', 'fbs': 'N', 'restecg': 'N', 'thalach': 'N', 'exang': 'N', 'oldpeak': 'N', 'slope': 'N', 'ca': 'N', 'thal': 'N', 'heartdisease': 'N'}


âœ… Dataset loaded successfully!
   age  sex  cp  trestbps  chol  fbs  restecg  thalach  exang  oldpeak  slope  \
0   63    1   3       145   233    1        0      150      0      2.3      0   
1   37    1   2       130   250    0        1      187      0      3.5      0   
2   41    0   1       130   204    0        0      172      0      1.4      2   
3   56    1   1       120   236    0        1      178      0      0.8      2   
4   57    0   0       120   354    0        1      163      1      0.6      2   

   ca  thal  target  
0   0     1       1  
1   0     2       1  
2   0     2       1  
3   0     2       1  
4   0     2       1  

âœ… Model training completed successfully!

ðŸ”¹ P(heartdisease | restecg = 1):
+-----------------+---------------------+
| heartdisease    |   phi(heartdisease) |
| heartdisease(0) |              0.4242 |
+-----------------+---------------------+
| heartdisease(1) |              0.5758 |
+-----------------+---------------------+

ðŸ”¹ P(heartdi