# How good are LLMs at doing probabilistic inference (continuous)

The idea is that we pass a bunch of CPTs and we ask the LLM to give us the probability of a specific case.

We also ask it to estimate the mean and std.

We then compare the LLM result with that of a Bayesian network.

In [None]:
import sys
from pathlib import Path
from os import getenv

from openai import OpenAI
from pgmpy.models import LinearGaussianBayesianNetwork

# Set the base path
base_path = Path("../../")  # One level up from the current working directory

# Add the src/ directory to sys.path using base_path
sys.path.append(str((base_path / "src").resolve()))


from bn_utils import draw_bayesian_network
from inference_continuous import query_lgbn, format_continuous_query
from yaml_utils import load_yaml
from llm_calling import run_llm_call

## Load Bayesian network

In [None]:
def create_cancer_model(use_random=False):
    """
    Create the Linear Gaussian Bayesian Network with either random or specific parameters.
    
    Args:
        use_random (bool): If True, use random parameters. If False, use predefined parameters.
    
    Returns:
        model: LinearGaussianBayesianNetwork with CPDs set
        cpds_dict: Dictionary mapping variable names to their CPDs
    """
    
    # Create the model structure
    model = LinearGaussianBayesianNetwork([
        ("Pollution", "Cancer"),
        ("Smoker", "Cancer"),
        ("Cancer", "Xray"),
        ("Cancer", "Dyspnoea"),
    ])
    
    if use_random:
        # Use random parameters
        model.get_random_cpds(inplace=True)
        print("Created model with random parameters")
    else:
        # Use specific predefined parameters
        from pgmpy.factors.continuous import LinearGaussianCPD
        
        # P(Pollution) = N(0.305; 1.04)
        pollution_cpd = LinearGaussianCPD('Pollution', [0.305], 1.04)
        
        # P(Smoker) = N(1.446; 0.102)
        smoker_cpd = LinearGaussianCPD('Smoker', [1.446], 0.102)
        
        # P(Cancer | Pollution, Smoker) = N(0.678*Pollution + -0.586*Smoker + 0.244; 0.909)
        cancer_cpd = LinearGaussianCPD('Cancer', [0.244, 0.678, -0.586], 0.909, 
                                      evidence=['Pollution', 'Smoker'])
        
        # P(Xray | Cancer) = N(-0.623*Cancer + -0.458; 0.135)
        xray_cpd = LinearGaussianCPD('Xray', [-0.458, -0.623], 0.135, 
                                    evidence=['Cancer'])
        
        # P(Dyspnoea | Cancer) = N(1.218*Cancer + -0.503; 0.271)
        dyspnoea_cpd = LinearGaussianCPD('Dyspnoea', [-0.503, 1.218], 0.271, 
                                        evidence=['Cancer'])
        
        # Add CPDs to model
        model.add_cpds(pollution_cpd, smoker_cpd, cancer_cpd, xray_cpd, dyspnoea_cpd)
        print("Created model with predefined parameters")
    
    # Verify model
    assert model.check_model(), "Model validation failed"
    
    # Create CPDs dictionary
    cpds_dict = {cpd.variable: cpd for cpd in model.get_cpds()}
    
    # Display model info
    nodes = model.nodes()
    edges = model.edges()
    cpds = model.get_cpds()
    
    cpd_strings = []
    for cpd in cpds:
        cpd_strings.append(str(cpd))
    
    cpds_as_string = "\n".join(cpd_strings)
    
    print(f"\nNodes in the model: {nodes}")
    print(f"Edges in the model: {edges}")
    print(f"CPDs in the model:")
    print(cpds_as_string)
    
    return model, cpds_dict

# Create model with predefined parameters
model, cpds_dict = create_cancer_model(use_random=False)

# Or create with random parameters:
# model, cpds_dict = create_cancer_model(use_random=True)

In [None]:
draw_bayesian_network(model)

## Prepare prompt

In [None]:
# Load prompt
prompt_path = base_path / "notebooks" / "continuous" / "prompts.yaml"
prompts = load_yaml(prompt_path)

In [None]:
# Example 1: Posterior estimation query
evidence = {'Xray': -1.0, 'Smoker': 2.0}
variable = 'Cancer'

query_str = format_continuous_query(variable, evidence)

prompt_str = prompts["prompt_base"].format(cpts=cpds_as_string, query=query_str)

messages = [
    {"role": "system", "content": prompts["system_prompt"]},
    {"role": "user", "content": prompt_str}
]

print("Query:", query_str)

In [None]:
# print(prompts["system_prompt"])
# print(messages[1]["content"])

## Run exact inference

In [None]:
# Run exact inference for posterior estimation
result = query_lgbn(model, variable, evidence)
print(f"Ground truth - Mean: {result['mean']:.4f}, Std: {result['std']:.4f}")

## Initialize LLM

In [None]:
# MODEL = "gpt-4o-mini" # To test the baseline performance of LLMs
# MODEL = "deepseek/deepseek-r1" 
# MODEL = "anthropic/claude-3.7-sonnet:thinking"
# MODEL = "google/gemini-2.5-pro-preview"
MODEL = "openai/o3-mini-high"

In [None]:
# Initialize OpenAI client
client = OpenAI(
    base_url="https://openrouter.ai/api/v1",
    api_key=getenv("OPENROUTER_API_KEY")
)

## Run LLM

In [None]:
response, messages = run_llm_call(
    openai_client=client,
    model=MODEL,
    messages=messages
)

print("LLM Response:")
print(response)

## Example 2: Probability Calculation Query

In [None]:
# Example 2: Probability calculation for interval
evidence2 = {'Dyspnoea': 0.5}
variable2 = 'Pollution'
prob_range = (0, 1)  # P(0 < Pollution < 1 | Dyspnoea = 0.5)

query_str2 = format_continuous_query(variable2, evidence2, prob_range)

prompt_str2 = prompts["prompt_base"].format(cpts=cpds_as_string, query=query_str2)

messages2 = [
    {"role": "system", "content": prompts["system_prompt"]},
    {"role": "user", "content": prompt_str2}
]

print("Query:", query_str2)

In [None]:
# Run exact inference for probability calculation
result2 = query_lgbn(model, variable2, evidence2, prob_range)
print(f"Ground truth - Probability: {result2['probability']:.4f}")

In [None]:
response2, messages2 = run_llm_call(
    openai_client=client,
    model=MODEL,
    messages=messages2
)

print("LLM Response for probability calculation:")
print(response2)