In [None]:
import pandas as pd
import numpy as np
from pgmpy.estimators import HillClimbSearch, BicScore
from pgmpy.models import BayesianNetwork
from pgmpy.estimators import MaximumLikelihoodEstimator
from pgmpy.inference import VariableElimination
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

In [None]:

file_path = "/Users/muthuraj/Desktop/git hub/CM-Project--Heart-Risk-Prediction-using-ECG/ECG_Cardiac_Features_Cleaned.csv"  # Replace with the correct path
data = pd.read_csv(file_path)


label_columns = ["Label_HYP", "Label_MI", "Label_NORM", "Label_STTC"]


continuous_features = data.select_dtypes(include=[np.number]).columns.difference(label_columns).tolist()

print("Continuous Features:")
print(continuous_features)


In [None]:
file_path = "/Users/muthuraj/Desktop/git hub/CM-Project--Heart-Risk-Prediction-using-ECG/ECG_Cardiac_Features_Cleaned.csv"  # Replace with the correct path
data = pd.read_csv(file_path)


print("Available columns in the dataset:")
print(data.columns)

continuous_features = ['HRV_MeanNN', 'HRV_RMSSD', 'HRV_SDNN', 'HRV_pNN50', 'Heart Rate', 'PR_duration', 'Patient_ID', 'QRS_duration', 'QT Interval Mean', 'QT Interval SD', 'RR Interval Max', 'RR Interval Mean', 'RR Interval Min', 'RR Interval RMSSD', 'RR Interval SD', 'ST Segment Amplitude Mean', 'ST Segment Amplitude SD', 'ST Segment Duration Mean', 'ST Segment Duration SD', 'ecg_id']

for feature in continuous_features:
    if feature in data.columns:
        try:
            
            unique_values = data[feature].nunique()
            if unique_values > 3:
                bins = min(3, unique_values)  
                data[feature] = pd.qcut(
                    data[feature],
                    q=bins,  
                    labels=["Low", "Normal", "High"][:bins],  
                    duplicates="drop" 
                )
            else:
                print(f"Skipping {feature}: Insufficient unique values for discretization.")
        except Exception as e:
            print(f"Error discretizing {feature}: {e}")
    else:
        print(f"Feature {feature} not found in the dataset.")

label_columns = ["Label"]

for label in label_columns:
    if label in data.columns:
        data[label] = data[label].astype(str)
    else:
        print(f"Label column {label} not found in the dataset. Skipping conversion.")

print("Preprocessed Data Sample:")
print(data.head())

data.to_csv("preprocessed_dataset.csv", index=False)

print("Constructing Bayesian Network...")
try:
 
    hc = HillClimbSearch(data)
    best_model = hc.estimate(scoring_method=BicScore(data))
    print("Learned Network Structure:")
    print(best_model.edges())

    model = BayesianNetwork(best_model.edges())
    model.fit(data, estimator=MaximumLikelihoodEstimator)

    
    infer = VariableElimination(model)
    sample_evidence = {"Heart Rate": "Normal", "HRV_SDNN": "Low"}  # Example evidence
    predictions = infer.map_query(variables=label_columns, evidence=sample_evidence)
    print("Predictions based on evidence:", predictions)

except Exception as e:
    print(f"Error constructing Bayesian Network: {e}")

In [None]:
bootstrap_iterations = 100
edge_strengths = {}
for _ in range(bootstrap_iterations):
    sampled_data = train_data.sample(frac=0.8, replace=True)
    hc_sampled = HillClimbSearch(sampled_data, scoring_method=BicScore(sampled_data))
    model_sampled = hc_sampled.estimate()
    for edge in model_sampled.edges():
        edge_strengths[edge] = edge_strengths.get(edge, 0) + 1

for edge, strength in edge_strengths.items():
    bayes_graph[edge[0]][edge[1]]['weight'] = strength / bootstrap_iterations


inference_model = BayesianModel(best_model.edges())
inference_model.fit(train_data)
inference = VariableElimination(inference_model)


evidence = {'Label_CD': 1} 
event_of_interest = 'Label_MI'
result = inference.query(variables=[event_of_interest], evidence=evidence)

plt.bar(result.values.index, result.values, color="coral")
plt.title(f"Inference Results for {event_of_interest}")
plt.xlabel("States")
plt.ylabel("Probability")
plt.show()