# LLM calls




In [None]:
import sys
from pathlib import Path
import re
from os import getenv
import pandas as pd
import numpy as np
from openai import OpenAI
import pickle

repo_root = Path(".").resolve().parents[1]
sys.path.append(str(repo_root / 'src'))

from llm_calling import extract_numeric_answer, create_probability_prompt, run_llm_call 
from yaml_utils import load_yaml

# Load all_bayesian_networks from disk
with open("bns.pkl", "rb") as f:
    all_bayesian_networks = pickle.load(f)

# Load df from CSV
df = pd.read_csv("bns_metadata.csv")

# Read the full_df DataFrame from a CSV file
full_df = pd.read_csv("queries.csv")
print("Loaded DataFrame from 'queries.csv'")
print(full_df.head())

In [None]:
MODEL = "openai/gpt-5"
MODEL = "deepseek/deepseek-chat-v3.1:free"
MODEL = "openai/o3-mini-high"  

client = OpenAI(
    base_url="https://openrouter.ai/api/v1",
    api_key=getenv("OPENROUTER_API_KEY")
)

# Load prompts
prompt_path = repo_root / "notebooks" / "discrete" / "prompts.yaml"
prompts = load_yaml(prompt_path)

In [None]:
# Optional: Call LLM on a subset AFTER query generation
# Provide indices of rows in full_df for which to call the LLM.
# Import _parse_field from bn_query_sweep instead of defining it locally
from experiments.bn_query_sweep import _parse_field, call_llm_for_query

# Ensure LLM columns exist
if 'llm_probability' not in full_df.columns:
    full_df['llm_probability'] = None
if 'llm_response' not in full_df.columns:
    full_df['llm_response'] = None

# Select which rows to send to LLM (example below commented out)
#selected_indices = list(full_df.sample(n=15, random_state=0).index)
selected_indices = list(full_df.index)

for ridx in selected_indices:
    row = full_df.iloc[ridx]
    bn = all_bayesian_networks[int(row['bn_index'])]['bn']
    query_vars = _parse_field(row['query_vars']) or []
    query_states = _parse_field(row['query_states']) or []
    evidence = _parse_field(row['evidence']) or None
    print(f"Processing BN {int(row['bn_index'])}/{len(all_bayesian_networks)}, Query {int(row['query_index'])}...")
    llm_prob, llm_response = call_llm_for_query(bn, query_vars, query_states, evidence)
    full_df.at[ridx, 'llm_probability'] = llm_prob
    full_df.at[ridx, 'llm_response'] = llm_response
    full_df.at[ridx, 'model'] = MODEL

In [None]:
import datetime
import os
from pathlib import Path

date_now = datetime.datetime.now().strftime('%Y-%m-%d')
full_df['date'] = date_now

csv_path = Path("llm_responses.csv")
# If file does not exist, save with header; if exists, append without header.
if not csv_path.exists():
    full_df.to_csv(csv_path, index=False)
else:
    full_df.to_csv(csv_path, mode='a', header=False, index=False)
print("DataFrame with LLM responses appended to", csv_path)
