# BN Generation Parameter Sweep

This notebook sweeps over DAG/BN generation parameters outlined in `notebooks/graph_generation/ideas.md` and materializes multiple discrete BN variants per DAG.

It varies:
- n (number of variables)
- target treewidth
- variable arity (fixed or range)
- CPT skewness (Dirichlet alpha)
- determinism fraction (mostly 0%)

Outputs:
- CSV with per-variant metadata
- On-screen CPT previews for a small sample



In [38]:
import sys
from pathlib import Path
import re
from os import getenv

# Ensure src is importable
repo_root = Path(".").resolve().parents[1]
sys.path.append(str(repo_root / 'src'))

from graph_generation import generate_dag_with_treewidth
from bn_generation import generate_variants_for_dag
from cpd_utils import cpd_to_ascii_table
from discrete.discrete_inference import format_probability_query, query_probability
from llm_calling import run_llm_call
from yaml_utils import load_yaml

import pandas as pd
import numpy as np
from openai import OpenAI

In [39]:
# LLM Configuration and Helper Functions
# GLOBAL TOGGLE: Set to False to disable all LLM calls
ENABLE_LLM_CALLS = False

MODEL = "deepseek/deepseek-chat-v3.1:free"
MODEL = "openai/gpt-5"
MODEL = "openai/o3-mini-high"  

# Initialize OpenAI client (only if LLM calls are enabled)
if ENABLE_LLM_CALLS:
    client = OpenAI(
        base_url="https://openrouter.ai/api/v1",
        api_key=getenv("OPENROUTER_API_KEY")
    )
else:
    client = None
    print("LLM calls are DISABLED. Set ENABLE_LLM_CALLS = True to enable.")

# Load prompts
prompt_path = repo_root / "notebooks" / "discrete" / "prompts.yaml"
prompts = load_yaml(prompt_path)
# Import the functions from llm_calling instead of defining them here
from llm_calling import extract_numeric_answer, create_probability_prompt, run_llm_call 


LLM calls are DISABLED. Set ENABLE_LLM_CALLS = True to enable.


In [40]:
# Parameter grids (edit as needed)
#ns = [7, 11, 15]
#ns = [25]
ns = [9]
#treewidths = [2, 3, 4]
#treewidths = [5]
treewidths = [3]
arity_specs = [
    #{"type": "fixed", "fixed": 2},
    {"type": "range", "min": 2, "max": 3},
]
#dirichlet_alphas = [0.5, 1.0]
dirichlet_alphas = [1.0]
#determinism_fracs = [0.0, 0.1]  # mostly 0%; includes a nonzero test
determinism_fracs = [0.0]  # mostly 0%; includes a nonzero test
#naming_strategies = ['simple', 'confusing', 'semantic']  # Add naming strategy variation
#naming_strategies = ['simple', 'confusing']  # Add naming strategy variation
naming_strategies = ['confusing']  # Add naming strategy variation
variants_per_combo = 4
base_seed = 42

rows = []
preview_samples = []

In [41]:
def arity_to_str(spec):
    if spec["type"] == "fixed":
        return f"fixed:{spec['fixed']}"
    return f"range:{spec['min']}-{spec['max']}"

sample_counter = 0
all_bayesian_networks = []  # Store all BNs and their metadata

for n in ns:
    for tw in treewidths:
        for naming in naming_strategies:
            dag, achieved_tw, _ = generate_dag_with_treewidth(n, tw, node_naming=naming, seed=base_seed + sample_counter)
            for arity in arity_specs:
                for alpha in dirichlet_alphas:
                    for det in determinism_fracs:
                        cfgs = []
                        for i in range(variants_per_combo):
                            cfgs.append({
                                "arity_strategy": arity,
                                "dirichlet_alpha": alpha,
                                "determinism_fraction": det,
                            })
                        variants = generate_variants_for_dag(dag, cfgs, base_seed=base_seed + sample_counter)
                        for idx, (bn, meta) in enumerate(variants):
                            # Store BN and its metadata for later access
                            all_bayesian_networks.append({
                                "bn": bn,
                                "meta": {
                                    "n": n,
                                    "target_tw": tw,
                                    "achieved_tw": achieved_tw,
                                    "naming": naming,
                                    "arity": arity_to_str(arity),
                                    "alpha": meta["dirichlet_alpha"],
                                    "determinism": meta["determinism_fraction"],
                                    "seed": meta["seed"],
                                    "variant_index": idx,
                                    "num_edges": bn.number_of_edges(),
                                    "num_nodes": bn.number_of_nodes(),
                                }
                            })
                            rows.append({
                                "n": n,
                                "target_tw": tw,
                                "achieved_tw": achieved_tw,
                                "naming": naming,
                                "arity": arity_to_str(arity),
                                "alpha": meta["dirichlet_alpha"],
                                "determinism": meta["determinism_fraction"],
                                "seed": meta["seed"],
                                "variant_index": idx,
                                "num_edges": bn.number_of_edges(),
                                "num_nodes": bn.number_of_nodes(),
                            })
                            if sample_counter < 3:  # collect a few previews
                                preview_samples.append(bn)
                        sample_counter += 1



In [42]:
df = pd.DataFrame(rows)
display(df.head())
print(f"Total variants: {len(df)}")

# Save CSV next to notebook
out_csv = repo_root / 'notebooks' / 'graph_generation' / 'bn_generation_sweep.csv'
df.to_csv(out_csv, index=False)
print('Saved to', out_csv)

Unnamed: 0,n,target_tw,achieved_tw,naming,arity,alpha,determinism,seed,variant_index,num_edges,num_nodes
0,9,3,3,confusing,range:2-3,1.0,0.0,42,0,12,9
1,9,3,3,confusing,range:2-3,1.0,0.0,10015,1,12,9
2,9,3,3,confusing,range:2-3,1.0,0.0,19988,2,12,9
3,9,3,3,confusing,range:2-3,1.0,0.0,29961,3,12,9


Total variants: 4
Saved to /home/bmihaljevic/repos/code-projects/aily/probabilistic-reasoning-llms/notebooks/graph_generation/bn_generation_sweep.csv


In [43]:
# For each Bayesian network, generate n queries using generate_queries, run them, and collect results
from query_generation import generate_queries
from pgmpy.inference import VariableElimination

# Store all queries for later recovery: a list of lists (per BN)
all_bn_queries = []

query_rows = []

for idx, bn_dict in enumerate(all_bayesian_networks):
    bn = bn_dict["bn"]
    # Use a different seed per BN for query generation for reproducibility
    query_seed = 1000 + idx
    # Generate 5 queries for this BN
    queries = generate_queries(
        bn,
        num_queries=12,
        query_node_counts=(1, 2),
        #query_node_counts=[2],
        evidence_counts=(0, 1, 2),
        #evidence_counts=(2),
        #distance_buckets=[(1, 1), (2, 3), (1, 3)],
        distance_buckets=[(2, 3)],
        seed=query_seed,
    )
    all_bn_queries.append(queries)
    # Get the BN's properties from the main df
    bn_row = df.iloc[idx].to_dict()
    for qidx, query in enumerate(queries):
        # Prepare inference
        infer = VariableElimination(bn)
        # Query variables and their states
        query_vars = [v for v, _ in query.targets]
        query_states = [s for _, s in query.targets]
        # Evidence dict: variable -> state
        evidence = query.evidence if query.evidence else None

        # Compute exact probability
        try:
            # pgmpy: query returns a factor, we need to index into the right assignment
            result = infer.query(variables=query_vars, evidence=evidence, show_progress=False)
            # result is a DiscreteFactor, get the probability for the assignment
            # The order of query_vars matches the order of query_states
            assignment = dict(zip(query_vars, query_states))
            prob = result.get_value(**assignment)
        except Exception as e:
            prob = None

        # Collect all info for the table, merging BN and query properties (no LLM here)
        row = dict(bn_row)  # copy BN properties
        row.update({
            "bn_index": idx,
            "query_index": qidx,
            "query_vars": str(query_vars),
            "query_states": str(query_states),
            "evidence": str(query.evidence),
            "distance": query.meta.get("min_target_evidence_distance"),
            "num_evidence": query.meta.get("num_evidence_nodes"),
            "probability": prob,
        })
        query_rows.append(row)

# Convert to DataFrame and display
full_df = pd.DataFrame(query_rows)
print(f"Total queries: {len(full_df)}")







Total queries: 48


In [44]:
# Optionally, save the full query+BN dataframe
#out_query_csv = repo_root / 'notebooks' / 'graph_generation' / 'bn_generation_sweep_queries.csv'
from datetime import datetime
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
model_filename = MODEL.replace("/", "_")
out_query_csv = repo_root / 'results' / 'discrete' / f'bn_queries_{model_filename}_{timestamp}.csv'
# Ensure the directory exists
out_query_csv.parent.mkdir(parents=True, exist_ok=True)
full_df.to_csv(out_query_csv, index=False)
print('Saved query+BN results to', out_query_csv)

Saved query+BN results to /home/bmihaljevic/repos/code-projects/aily/probabilistic-reasoning-llms/results/discrete/bn_queries_openai_o3-mini-high_20250930_125714.csv


In [45]:
# Inspect a single row: draw BN, call LLM, compare\n
from bn_query_sweep import inspect_row_and_call_llm, call_llm_for_query
from pathlib import Path

if ENABLE_LLM_CALLS:
    # Choose a row index from full_df
    #row_index = 330
    row_index = 8

    result = inspect_row_and_call_llm(
        full_df=full_df,
        all_bayesian_networks=all_bayesian_networks,
        row_index=row_index,
        openai_client=client,
        model=MODEL,
        prompts_path=prompt_path,
        draw_kwargs={"figsize": (6, 4)},
    )
    print(result)

In [46]:
# Optional: Call LLM on a subset AFTER query generation
# Provide indices of rows in full_df for which to call the LLM.
# Import _parse_field from bn_query_sweep instead of defining it locally
from bn_query_sweep import _parse_field

# Ensure LLM columns exist
if 'llm_probability' not in full_df.columns:
    full_df['llm_probability'] = None
if 'llm_response' not in full_df.columns:
    full_df['llm_response'] = None

if ENABLE_LLM_CALLS:
    # Select which rows to send to LLM (example below commented out)
    #selected_indices = list(full_df.sample(n=40, random_state=0).index)
    selected_indices = list(full_df.index)

    for ridx in selected_indices:
        row = full_df.iloc[ridx]
        bn = all_bayesian_networks[int(row['bn_index'])]['bn']
        query_vars = _parse_field(row['query_vars']) or []
        query_states = _parse_field(row['query_states']) or []
        evidence = _parse_field(row['evidence']) or None
        print(f"Processing BN {int(row['bn_index'])}/{len(all_bayesian_networks)}, Query {int(row['query_index'])}...")
        llm_prob, llm_response = call_llm_for_query(bn, query_vars, query_states, evidence)
        full_df.at[ridx, 'llm_probability'] = llm_prob
        full_df.at[ridx, 'llm_response'] = llm_response
else:
    print("LLM calls are disabled. Skipping batch LLM processing.")
    print(f"Would have processed {len(full_df)} queries if LLM calls were enabled.")

LLM calls are disabled. Skipping batch LLM processing.
Would have processed 48 queries if LLM calls were enabled.


In [47]:
out_llm_csv = out_query_csv.with_name(out_query_csv.stem + "_with_llm.csv")
# Save the DataFrame with LLM responses to a CSV file
#out_llm_csv = out_query_csv.replace(".csv", "_with_llm.csv")
full_df.to_csv(out_llm_csv, index=False)
print("Saved DataFrame with LLM responses to", out_llm_csv)

Saved DataFrame with LLM responses to /home/bmihaljevic/repos/code-projects/aily/probabilistic-reasoning-llms/results/discrete/bn_queries_openai_o3-mini-high_20250930_125714_with_llm.csv


In [48]:
# Print rows with non-empty llm_probability
llm_rows = full_df[full_df['llm_probability'].notna()]
print(f"Found {len(llm_rows)} rows with LLM probability values:")
print("=" * 80)
display(llm_rows)


Found 0 rows with LLM probability values:


Unnamed: 0,n,target_tw,achieved_tw,naming,arity,alpha,determinism,seed,variant_index,num_edges,...,bn_index,query_index,query_vars,query_states,evidence,distance,num_evidence,probability,llm_probability,llm_response


In [49]:
# Display LLM performance statistics
print("LLM Performance Analysis:")
print("=" * 50)

# Count successful LLM responses
successful_llm = full_df['llm_probability'].notna().sum()
total_queries = len(full_df)
print(f"Successful LLM responses: {successful_llm}/{total_queries} ({successful_llm/total_queries*100:.1f}%)")

# Save enhanced results with LLM data
enhanced_csv = repo_root / 'notebooks' / 'graph_generation' / 'bn_generation_sweep_queries_with_llm.csv'
full_df.to_csv(enhanced_csv, index=False)
print(f'Saved enhanced results with LLM data to {enhanced_csv}')

LLM Performance Analysis:
Successful LLM responses: 0/48 (0.0%)
Saved enhanced results with LLM data to /home/bmihaljevic/repos/code-projects/aily/probabilistic-reasoning-llms/notebooks/graph_generation/bn_generation_sweep_queries_with_llm.csv


In [50]:
# Calculate accuracy metrics for successful responses
if successful_llm > 0:
    # Filter to only successful LLM responses
    successful_df = full_df[full_df['llm_probability'].notna() & full_df['probability'].notna()]
    
    if len(successful_df) > 0:
        # Calculate absolute errors
        successful_df = successful_df.copy()
        successful_df['abs_error'] = abs(successful_df['llm_probability'] - successful_df['probability'])
        successful_df['rel_error'] = successful_df['abs_error'] / successful_df['probability']
        
        print(f"\nAccuracy Metrics (for {len(successful_df)} successful responses):")
        print(f"Mean Absolute Error: {successful_df['abs_error'].mean():.6f}")
        print(f"Mean Relative Error: {successful_df['rel_error'].mean():.6f}")
        print(f"Max Absolute Error: {successful_df['abs_error'].max():.6f}")
        print(f"Max Relative Error: {successful_df['rel_error'].max():.6f}")
        
        # Show some examples
        print(f"\nFirst 5 successful responses:")
        display(successful_df[['query_vars', 'query_states', 'evidence', 'probability', 'llm_probability', 'abs_error']].head())
    else:
        print("No successful LLM responses with exact inference results to compare.")
else:
    print("No successful LLM responses.")

No successful LLM responses.


In [51]:
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

# Set up the plotting style
plt.style.use('default')
sns.set_palette("husl")

# Filter to only successful LLM responses with exact inference results
plot_df = full_df[full_df['llm_probability'].notna() & full_df['probability'].notna()].copy()
plot_df['abs_error'] = abs(plot_df['llm_probability'] - plot_df['probability'])

if len(plot_df) > 0:
    # Identify all BN and query property columns (exclude result columns)
    exclude_cols = {'bn_index', 'query_vars', 'query_states', 'evidence', 'probability', 
                   'llm_probability', 'llm_response', 'abs_error', 'rel_error', 'target_tw', 'n', 'seed', 'variant_index',
                   'alpha', 'determinism', 'arity', 'query_index', 
                   'achieved_tw', 'num_nodes'}
    
    # Get all columns that are BN or query properties
    property_cols = [col for col in full_df.columns if col not in exclude_cols]
    
    # Calculate number of subplots needed
    n_props = len(property_cols)
    n_cols = min(4, n_props)  # Max 4 columns
    n_rows = (n_props + n_cols - 1) // n_cols  # Ceiling division
    
    # Create figure with subplots for all properties
    fig, axes = plt.subplots(n_rows, n_cols, figsize=(4*n_cols, 3*n_rows))
    fig.suptitle('Absolute Error by BN and Query Properties', fontsize=16, fontweight='bold')
    
    # Flatten axes for easier indexing
    if n_props == 1:
        axes = [axes]
    elif n_rows == 1:
        axes = axes.flatten()
    else:
        axes = axes.flatten()
    
    # Create boxplots for each property
    for i, prop in enumerate(property_cols):
        ax = axes[i]
        
        # Get unique values for this property
        unique_vals = sorted(plot_df[prop].unique())
        
        # Create boxplot data
        box_data = []
        labels = []
        
        for val in unique_vals:
            subset = plot_df[plot_df[prop] == val]['abs_error']
            if len(subset) > 0:  # Only include if there's data
                box_data.append(subset.values)
                labels.append(str(val))
        
        if box_data:  # Only plot if we have data
            ax.boxplot(box_data, labels=labels)
            ax.set_title(f'Absolute Error by {prop.replace("_", " ").title()}')
            ax.set_xlabel(prop.replace("_", " ").title())
            ax.set_ylabel('Absolute Error')
            ax.grid(True, alpha=0.3)
            
            # Rotate x-axis labels if there are many unique values
            if len(labels) > 5:
                ax.tick_params(axis='x', rotation=45)
    
    # Hide unused subplots
    for i in range(n_props, len(axes)):
        axes[i].set_visible(False)
    
    plt.tight_layout()
    plt.show()
    
    # Print summary statistics for each property
    print("\nSummary Statistics by Property:")
    print("=" * 50)
    
    for prop in property_cols:
        print(f"\n{prop.upper()}:")
        prop_stats = plot_df.groupby(prop)['abs_error'].agg(['count', 'mean', 'std', 'min', 'max'])
        print(prop_stats.round(6))
        
else:
    print("No successful LLM responses with exact inference results available for plotting.")


No successful LLM responses with exact inference results available for plotting.


In [52]:
# Re-define the function to ensure it's properly loaded with the verbose parameter
def compute_query_complexity(full_df, all_bayesian_networks, row_index, verbose=True):
    """
    Compute the complexity metrics for a specific query from a row in full_df.
    Now properly takes the query into account by only eliminating non-query variables.
    
    Parameters:
    - full_df: DataFrame containing query information
    - all_bayesian_networks: List of BN dictionaries with 'bn' and 'meta' keys
    - row_index: Index of the row in full_df to analyze
    - verbose: If True, print detailed progress information
    
    Returns:
    - dict: Complexity metrics including induced width, total cost, max factor size, etc.
    """
    from pgmpy.inference.EliminationOrder import WeightedMinFill
    from pgmpy.inference import VariableElimination
    from bn_query_sweep import _parse_field
    
    # Get the row data
    row = full_df.iloc[row_index]
    bn_index = int(row['bn_index'])
    
    # Get the Bayesian network
    bn = all_bayesian_networks[bn_index]['bn']
    
    # Parse query information
    query_vars = _parse_field(row['query_vars']) or []
    query_states = _parse_field(row['query_states']) or []
    evidence = _parse_field(row['evidence']) or {}
    
    if verbose:
        print(f"Computing complexity for query: P({query_vars}={query_states} | {evidence})")
        print(f"BN: {bn_index}, Query: {int(row['query_index'])}")
    
    # Ensure the model is valid
    bn.check_model()
    
    # Get cardinalities
    card = bn.get_cardinality()
    if verbose:
        print(f"Variable cardinalities: {card}")
    
    # QUERY-SPECIFIC COMPLEXITY COMPUTATION
    # Identify which variables need to be eliminated vs kept for the query
    all_vars = set(bn.nodes())
    query_vars_set = set(query_vars)
    evidence_vars_set = set(evidence.keys()) if evidence else set()
    
    # Variables that must be kept until the end (query variables)
    keep_vars = query_vars_set
    
    # Variables that can be eliminated (all others)
    eliminate_vars = all_vars - keep_vars
    
    if verbose:
        print(f"Variables to keep (query): {sorted(keep_vars)}")
        print(f"Variables to eliminate: {sorted(eliminate_vars)}")
        print(f"Evidence variables: {sorted(evidence_vars_set)}")
    
    # Handle evidence by reducing cardinalities
    # Evidence variables are instantiated, so they don't contribute to factor sizes
    effective_card = card.copy()
    for evar in evidence_vars_set:
        effective_card[evar] = 1  # Evidence variables are fixed, so cardinality = 1
    
    if verbose:
        print(f"Effective cardinalities (after evidence): {dict(effective_card)}")
    
    # Create elimination orderer and get optimal order for variables to eliminate
    if eliminate_vars:
        orderer = WeightedMinFill(bn)
        elim_order = orderer.get_elimination_order(nodes=list(eliminate_vars))
    else:
        elim_order = []  # No variables to eliminate
    
    if verbose:
        print(f"Elimination order (variables to eliminate): {elim_order}")
        if elim_order:
            complete_elim_order = elim_order + list(keep_vars)
            print(f"Complete elimination order: {complete_elim_order}")
    
    # Calculate induced width for the elimination order
    if elim_order:
        # For induced width calculation, we need to create a complete elimination order
        # that includes all variables, with query variables at the end
        complete_elim_order = elim_order + list(keep_vars)
        ve = VariableElimination(bn)
        induced_width = ve.induced_width(complete_elim_order)
    else:
        complete_elim_order = list(keep_vars)  # Only query variables
        induced_width = 0  # No elimination needed
    
    if verbose:
        print(f"Induced width: {induced_width}")
    
    # Simulate variable elimination to compute cost metrics
    cost = 0
    max_factor_size = 0
    moral = bn.to_markov_model()  # moralized undirected graph
    
    # Track factor sizes for each elimination step
    factor_sizes = []
    
    for step, x in enumerate(elim_order):
        nbrs = list(moral.neighbors(x))
        
        # Size of the intermediate factor created when eliminating x
        # Use effective cardinalities (evidence variables have cardinality 1)
        size = 1
        for v in nbrs + [x]:
            size *= effective_card[v]
        
        cost += size
        max_factor_size = max(max_factor_size, size)
        factor_sizes.append(size)
        
        if verbose:
            print(f"Step {step+1}: Eliminating {x}, neighbors: {nbrs}, factor size: {size}")
        
        # Connect neighbors (fill-in) and remove x
        for i in range(len(nbrs)):
            for j in range(i+1, len(nbrs)):
                moral.add_edge(nbrs[i], nbrs[j])
        moral.remove_node(x)
    
    # Calculate final factor size (the remaining query variables)
    if keep_vars:
        # The final factor contains all remaining query variables
        final_factor_size = 1
        for v in keep_vars:
            final_factor_size *= effective_card[v]
        cost += final_factor_size
        max_factor_size = max(max_factor_size, final_factor_size)
        if verbose:
            print(f"Final factor (query variables): {sorted(keep_vars)}, size: {final_factor_size}")
    
    # Calculate additional metrics
    num_vars = len(bn.nodes())
    num_edges = bn.number_of_edges()
    
    # Query-specific metrics
    num_query_vars = len(query_vars)
    num_evidence_vars = len(evidence) if evidence else 0
    num_eliminated_vars = len(elim_order)
    
    # Complexity metrics
    complexity_metrics = {
        'row_index': row_index,
        'bn_index': bn_index,
        'query_index': int(row['query_index']),
        'query_vars': query_vars,
        'query_states': query_states,
        'evidence': evidence,
        'num_vars': num_vars,
        'num_edges': num_edges,
        'num_query_vars': num_query_vars,
        'num_evidence_vars': num_evidence_vars,
        'num_eliminated_vars': num_eliminated_vars,
        'elimination_order': elim_order,
        'complete_elimination_order': complete_elim_order,
        'induced_width': induced_width,
        'total_cost': cost,
        'max_factor_size': max_factor_size,
        'avg_factor_size': cost / len(elim_order) if elim_order else 0,
        'factor_sizes': factor_sizes,
        'log_total_cost': np.log2(cost) if cost > 0 else 0,
        'log_max_factor_size': np.log2(max_factor_size) if max_factor_size > 0 else 0,
        'keep_vars': sorted(keep_vars),
        'eliminate_vars': sorted(eliminate_vars),
    }
    
    if verbose:
        print(f"\nQuery-Specific Complexity Summary:")
        print(f"  Variables eliminated: {num_eliminated_vars}/{num_vars}")
        print(f"  Query variables kept: {sorted(keep_vars)}")
        print(f"  Induced width: {induced_width}")
        print(f"  Total factor work: {cost:,}")
        print(f"  Max intermediate factor size: {max_factor_size:,}")
        print(f"  Average factor size: {cost / len(elim_order) if elim_order else 0:.1f}")
        print(f"  Log2(total cost): {np.log2(cost):.2f}")
        print(f"  Log2(max factor size): {np.log2(max_factor_size):.2f}")
    
    return complexity_metrics

print("Function re-defined successfully!")


Function re-defined successfully!


In [53]:
# Test the updated query-specific complexity computation function
# Choose a row index from full_df to analyze
test_row_index = 0  # Change this to any valid row index

print("=" * 80)
print(f"TESTING QUERY-SPECIFIC COMPLEXITY COMPUTATION FOR ROW {test_row_index}")
print("=" * 80)

# Compute complexity for the selected row
complexity_result = compute_query_complexity(full_df, all_bayesian_networks, test_row_index, verbose=True)

print("\n" + "=" * 80)
print("DETAILED COMPLEXITY METRICS:")
print("=" * 80)

# Display the results in a nice format
for key, value in complexity_result.items():
    if key not in ['elimination_order', 'complete_elimination_order', 'factor_sizes', 'query_vars', 'query_states', 'evidence', 'keep_vars', 'eliminate_vars']:
        print(f"{key:25}: {value}")
    elif key in ['elimination_order', 'complete_elimination_order']:
        print(f"{key:25}: {value[:5]}... (showing first 5 of {len(value)})")
    elif key == 'factor_sizes':
        print(f"{key:25}: {value[:5]}... (showing first 5 of {len(value)})")
    else:
        print(f"{key:25}: {value}")

print("\n" + "=" * 80)
print("FACTOR SIZE PROGRESSION:")
print("=" * 80)
for i, size in enumerate(complexity_result['factor_sizes']):
    print(f"Step {i+1:2d}: {size:8,} entries")

print("\n" + "=" * 80)
print("QUERY-SPECIFIC ANALYSIS:")
print("=" * 80)
print(f"Query variables kept: {complexity_result['keep_vars']}")
print(f"Variables eliminated: {complexity_result['eliminate_vars']}")
print(f"Variables eliminated: {complexity_result['num_eliminated_vars']}/{complexity_result['num_vars']} ({complexity_result['num_eliminated_vars']/complexity_result['num_vars']*100:.1f}%)")


TESTING QUERY-SPECIFIC COMPLEXITY COMPUTATION FOR ROW 0
Computing complexity for query: P(['K_kx9xcv']=['s2'] | {'O_cguird': 's1'})
BN: 0, Query: 0
Variable cardinalities: defaultdict(<class 'int'>, {'G_2ohusw': np.int64(2), 'O_gh8nq9': np.int64(3), 'K_kx9xcv': np.int64(3), 'U_bx3bu6': np.int64(3), 'Y_nizbt1': np.int64(2), 'Z_7j9n4o': np.int64(2), 'L_vy01po': np.int64(2), 'O_cguird': np.int64(3), 'R_dbfd2r': np.int64(2)})
Variables to keep (query): ['K_kx9xcv']
Variables to eliminate: ['G_2ohusw', 'L_vy01po', 'O_cguird', 'O_gh8nq9', 'R_dbfd2r', 'U_bx3bu6', 'Y_nizbt1', 'Z_7j9n4o']
Evidence variables: ['O_cguird']
Effective cardinalities (after evidence): {'G_2ohusw': np.int64(2), 'O_gh8nq9': np.int64(3), 'K_kx9xcv': np.int64(3), 'U_bx3bu6': np.int64(3), 'Y_nizbt1': np.int64(2), 'Z_7j9n4o': np.int64(2), 'L_vy01po': np.int64(2), 'O_cguird': 1, 'R_dbfd2r': np.int64(2)}


  0%|          | 0/8 [00:00<?, ?it/s]

Elimination order (variables to eliminate): ['Y_nizbt1', 'O_gh8nq9', 'O_cguird', 'G_2ohusw', 'Z_7j9n4o', 'U_bx3bu6', 'L_vy01po', 'R_dbfd2r']
Complete elimination order: ['Y_nizbt1', 'O_gh8nq9', 'O_cguird', 'G_2ohusw', 'Z_7j9n4o', 'U_bx3bu6', 'L_vy01po', 'R_dbfd2r', 'K_kx9xcv']
Induced width: 3
Step 1: Eliminating Y_nizbt1, neighbors: ['K_kx9xcv'], factor size: 6
Step 2: Eliminating O_gh8nq9, neighbors: ['G_2ohusw'], factor size: 6
Step 3: Eliminating O_cguird, neighbors: ['L_vy01po', 'G_2ohusw'], factor size: 4
Step 4: Eliminating G_2ohusw, neighbors: ['L_vy01po', 'K_kx9xcv'], factor size: 12
Step 5: Eliminating Z_7j9n4o, neighbors: ['U_bx3bu6', 'K_kx9xcv'], factor size: 18
Step 6: Eliminating U_bx3bu6, neighbors: ['L_vy01po', 'R_dbfd2r', 'K_kx9xcv'], factor size: 36
Step 7: Eliminating L_vy01po, neighbors: ['R_dbfd2r', 'K_kx9xcv'], factor size: 12
Step 8: Eliminating R_dbfd2r, neighbors: ['K_kx9xcv'], factor size: 6
Final factor (query variables): ['K_kx9xcv'], size: 3

Query-Specific

In [54]:
def compute_all_query_complexities(full_df, all_bayesian_networks, verbose=False):
    """
    Compute complexity metrics for all queries in full_df.
    
    Parameters:
    - full_df: DataFrame containing query information
    - all_bayesian_networks: List of BN dictionaries with 'bn' and 'meta' keys
    - verbose: If True, print progress information
    
    Returns:
    - pd.DataFrame: DataFrame with complexity metrics for each query
    """
    complexity_results = []
    
    for idx in range(len(full_df)):
        if verbose:
            print(f"Processing query {idx+1}/{len(full_df)}...")
        
        try:
            result = compute_query_complexity(full_df, all_bayesian_networks, idx, verbose=False)
            complexity_results.append(result)
        except Exception as e:
            print(f"Error processing row {idx}: {e}")
            # Add a row with error information
            complexity_results.append({
                'row_index': idx,
                'error': str(e),
                'induced_width': None,
                'total_cost': None,
                'max_factor_size': None,
            })
    
    # Convert to DataFrame
    complexity_df = pd.DataFrame(complexity_results)
    
    if verbose:
        print(f"\nComputed complexity for {len(complexity_results)} queries")
        if 'error' in complexity_df.columns:
            successful = len(complexity_df[complexity_df['error'].isna()])
            failed = len(complexity_df[complexity_df['error'].notna()])
        else:
            successful = len(complexity_df)
            failed = 0
        print(f"Successful computations: {successful}")
        print(f"Failed computations: {failed}")
    
    return complexity_df

# Example usage for a subset of queries (uncomment to run)
complexity_df = compute_all_query_complexities(full_df, all_bayesian_networks, verbose=True)
print("\nComplexity DataFrame:")
display(complexity_df.head())


Processing query 1/48...


  0%|          | 0/8 [00:00<?, ?it/s]

Processing query 2/48...


  0%|          | 0/8 [00:00<?, ?it/s]

Processing query 3/48...


  0%|          | 0/8 [00:00<?, ?it/s]

Processing query 4/48...


  0%|          | 0/8 [00:00<?, ?it/s]

Processing query 5/48...


  0%|          | 0/7 [00:00<?, ?it/s]

Processing query 6/48...


  0%|          | 0/7 [00:00<?, ?it/s]

Processing query 7/48...


  0%|          | 0/7 [00:00<?, ?it/s]

Processing query 8/48...


  0%|          | 0/8 [00:00<?, ?it/s]

Processing query 9/48...


  0%|          | 0/7 [00:00<?, ?it/s]

Processing query 10/48...


  0%|          | 0/7 [00:00<?, ?it/s]

Processing query 11/48...


  0%|          | 0/8 [00:00<?, ?it/s]

Processing query 12/48...


  0%|          | 0/7 [00:00<?, ?it/s]

Processing query 13/48...


  0%|          | 0/7 [00:00<?, ?it/s]

Processing query 14/48...


  0%|          | 0/8 [00:00<?, ?it/s]

Processing query 15/48...


  0%|          | 0/8 [00:00<?, ?it/s]

Processing query 16/48...


  0%|          | 0/8 [00:00<?, ?it/s]

Processing query 17/48...


  0%|          | 0/7 [00:00<?, ?it/s]

Processing query 18/48...


  0%|          | 0/8 [00:00<?, ?it/s]

Processing query 19/48...


  0%|          | 0/7 [00:00<?, ?it/s]

Processing query 20/48...


  0%|          | 0/7 [00:00<?, ?it/s]

Processing query 21/48...


  0%|          | 0/7 [00:00<?, ?it/s]

Processing query 22/48...


  0%|          | 0/7 [00:00<?, ?it/s]

Processing query 23/48...


  0%|          | 0/8 [00:00<?, ?it/s]

Processing query 24/48...


  0%|          | 0/7 [00:00<?, ?it/s]

Processing query 25/48...


  0%|          | 0/7 [00:00<?, ?it/s]

Processing query 26/48...


  0%|          | 0/8 [00:00<?, ?it/s]

Processing query 27/48...


  0%|          | 0/7 [00:00<?, ?it/s]

Processing query 28/48...


  0%|          | 0/8 [00:00<?, ?it/s]

Processing query 29/48...


  0%|          | 0/8 [00:00<?, ?it/s]

Processing query 30/48...


  0%|          | 0/7 [00:00<?, ?it/s]

Processing query 31/48...


  0%|          | 0/8 [00:00<?, ?it/s]

Processing query 32/48...


  0%|          | 0/8 [00:00<?, ?it/s]

Processing query 33/48...


  0%|          | 0/8 [00:00<?, ?it/s]

Processing query 34/48...


  0%|          | 0/7 [00:00<?, ?it/s]

Processing query 35/48...


  0%|          | 0/7 [00:00<?, ?it/s]

Processing query 36/48...


  0%|          | 0/8 [00:00<?, ?it/s]

Processing query 37/48...


  0%|          | 0/8 [00:00<?, ?it/s]

Processing query 38/48...


  0%|          | 0/8 [00:00<?, ?it/s]

Processing query 39/48...


  0%|          | 0/8 [00:00<?, ?it/s]

Processing query 40/48...


  0%|          | 0/8 [00:00<?, ?it/s]

Processing query 41/48...


  0%|          | 0/8 [00:00<?, ?it/s]

Processing query 42/48...


  0%|          | 0/8 [00:00<?, ?it/s]

Processing query 43/48...


  0%|          | 0/8 [00:00<?, ?it/s]

Processing query 44/48...


  0%|          | 0/7 [00:00<?, ?it/s]

Processing query 45/48...


  0%|          | 0/8 [00:00<?, ?it/s]

Processing query 46/48...


  0%|          | 0/7 [00:00<?, ?it/s]

Processing query 47/48...


  0%|          | 0/7 [00:00<?, ?it/s]

Processing query 48/48...


  0%|          | 0/8 [00:00<?, ?it/s]


Computed complexity for 48 queries
Successful computations: 48
Failed computations: 0

Complexity DataFrame:


Unnamed: 0,row_index,bn_index,query_index,query_vars,query_states,evidence,num_vars,num_edges,num_query_vars,num_evidence_vars,...,complete_elimination_order,induced_width,total_cost,max_factor_size,avg_factor_size,factor_sizes,log_total_cost,log_max_factor_size,keep_vars,eliminate_vars
0,0,0,0,[K_kx9xcv],[s2],{'O_cguird': 's1'},9,12,1,1,...,"[Y_nizbt1, O_gh8nq9, O_cguird, G_2ohusw, Z_7j9...",3,103,36,12.875,"[6, 6, 4, 12, 18, 36, 12, 6]",6.686501,5.169925,[K_kx9xcv],"[G_2ohusw, L_vy01po, O_cguird, O_gh8nq9, R_dbf..."
1,1,0,1,[Z_7j9n4o],[s1],{},9,12,1,0,...,"[Y_nizbt1, O_gh8nq9, O_cguird, G_2ohusw, L_vy0...",3,116,36,14.5,"[6, 6, 12, 12, 36, 18, 18, 6]",6.857981,5.169925,[Z_7j9n4o],"[G_2ohusw, K_kx9xcv, L_vy01po, O_cguird, O_gh8..."
2,2,0,2,[Z_7j9n4o],[s0],{},9,12,1,0,...,"[Y_nizbt1, O_gh8nq9, O_cguird, G_2ohusw, L_vy0...",3,116,36,14.5,"[6, 6, 12, 12, 36, 18, 18, 6]",6.857981,5.169925,[Z_7j9n4o],"[G_2ohusw, K_kx9xcv, L_vy01po, O_cguird, O_gh8..."
3,3,0,3,[O_cguird],[s2],"{'R_dbfd2r': 's1', 'K_kx9xcv': 's2'}",9,12,1,2,...,"[Y_nizbt1, O_gh8nq9, Z_7j9n4o, U_bx3bu6, R_dbf...",3,47,12,5.875,"[2, 6, 6, 6, 2, 4, 12, 6]",5.554589,3.584963,[O_cguird],"[G_2ohusw, K_kx9xcv, L_vy01po, O_gh8nq9, R_dbf..."
4,4,0,4,"[O_gh8nq9, O_cguird]","[s2, s0]",{},9,12,2,0,...,"[Y_nizbt1, Z_7j9n4o, U_bx3bu6, R_dbfd2r, K_kx9...",3,123,36,17.571429,"[6, 18, 36, 12, 12, 12, 18]",6.942515,5.169925,"[O_cguird, O_gh8nq9]","[G_2ohusw, K_kx9xcv, L_vy01po, R_dbfd2r, U_bx3..."


In [55]:
# Single row inspection with LLM toggle
from bn_query_sweep import inspect_row_and_call_llm, call_llm_for_query
from pathlib import Path

# Choose a row index from full_df
#row_index = 330
row_index = 8

if ENABLE_LLM_CALLS:
    result = inspect_row_and_call_llm(
        full_df=full_df,
        all_bayesian_networks=all_bayesian_networks,
        row_index=row_index,
        openai_client=client,
        model=MODEL,
        prompts_path=prompt_path,
        draw_kwargs={"figsize": (6, 4)},
    )
    print(result)
else:
    print("LLM calls are disabled. Skipping single row inspection with LLM.")
    # Still show the query details without LLM call
    row = full_df.iloc[row_index]
    print(f"Query: P({row['query_vars']}={row['query_states']} | {row['evidence']})")
    print(f"Exact probability: {row['probability']}")


LLM calls are disabled. Skipping single row inspection with LLM.
Query: P([np.str_('R_dbfd2r'), np.str_('G_2ohusw')]=['s0', 's1'] | {np.str_('Y_nizbt1'): 's1'})
Exact probability: 0.2072200625966507
