In [1]:
import pandas as pd
import numpy as np
from pgmpy.estimators import HillClimbSearch, BicScore
from pgmpy.estimators import PC
from pgmpy.models import BayesianNetwork
import networkx as nx
import matplotlib.pyplot as plt

In [2]:
def learn_structure(data_path):
    """
    Learn the structure of a Bayesian Network from data
    
    Parameters:
    -----------
    data_path : str
        Path to the CSV file containing the dataset
    method : str
        Either 'hill_climb' or 'pc' for different structure learning approaches
        
    Returns:
    --------
    model : BayesianNetwork
        Learned Bayesian Network structure
    """
    # Read the data
    data = pd.read_csv(data_path)
        
    # Initialize PC algorithm
    pc = PC(data=data)
    
    # Learn the skeleton
    skeleton = pc.estimate(variant='stable', max_cond_vars=3)
    
    # Create a BayesianNetwork object
    model = BayesianNetwork(skeleton.edges())
    
    return model

In [3]:
def visualize_network(model, save_path=None):
    """
    Visualize the learned Bayesian Network structure
    
    Parameters:
    -----------
    model : BayesianNetwork
        The learned Bayesian Network
    save_path : str, optional
        Path to save the visualization
    """
    # Create a new figure
    plt.figure(figsize=(12, 8))
    
    # Draw the network
    pos = nx.spring_layout(model)
    nx.draw(
        model,
        pos=pos,
        with_labels=True,
        node_color='lightblue',
        node_size=1500,
        font_size=10,
        font_weight='bold',
        arrows=True,
        edge_color='gray'
    )
    
    if save_path:
        plt.savefig(save_path)
    plt.show()

In [None]:
# Example usage
data_path = 'cdc_diabetes_health_indicators_train1.csv'

# Learn structure using Hill Climbing
print("Learning network structure using PC...")
hc_model = learn_structure(data_path)
print("\nLearned edges (PC):")
print(hc_model.edges())

# Visualize the network
print("\nVisualizing network structure...")
visualize_network(hc_model, save_path='network_structure_pc.png')

Learning network structure using Hill Climbing...


  0%|          | 0/3 [00:00<?, ?it/s]

INFO:pgmpy:Reached maximum number of allowed conditional variables. Exiting


NetworkXError: Input is not a valid edge list