# Set up

## Import libraries

In [1]:
import argparse
import glob
import importlib
import json
import os
import statistics
import sys
import time
from concurrent.futures import ProcessPoolExecutor
from functools import partial

import numpy as np
import pandas as pd
from openai import OpenAI
from pydantic import ValidationError
from sklearn.metrics import (accuracy_score, f1_score,
                             precision_score, recall_score)
from tqdm import tqdm

# Ensure project root is on sys.path (required for papermill fresh kernels)
current_dir = os.getcwd()
if current_dir not in sys.path:
    sys.path.insert(0, current_dir)

import llm_calls.deepseek_evaluator as etb
from llm_calls.deepseek_evaluator import EntailmentEvaluator
from llm_calls.prompts import *

## Declare paths 

# Execute Entailment API calls 

In [2]:
# Check API key and environment
print("Validating environment setup...")

# Check for OpenAI/DeepSeek API key
api_key = os.environ.get('OPENAI_API_KEY') or os.environ.get('DEEPSEEK_API_KEY')
if api_key:
    print(f"✓ API key found (length: {len(api_key)} chars)")
else:
    print("⚠ WARNING: No API key found in environment variables")
    print("  Looking for OPENAI_API_KEY or DEEPSEEK_API_KEY")
    print("  LLM API calls will likely fail without proper credentials")

# Check if deepseek_evaluator module is importable
try:
    import llm_calls.deepseek_evaluator as etb
    print(f"✓ deepseek_evaluator module loaded from: {etb.__file__}")
except Exception as e:
    print(f"✗ ERROR: Cannot import deepseek_evaluator: {e}")
    raise

# Check OpenAI library
try:
    from openai import OpenAI
    print(f"✓ OpenAI library available")
except Exception as e:
    print(f"⚠ WARNING: OpenAI library issue: {e}")

print("\nEnvironment validation complete.\n")


Validating environment setup...
✓ API key found (length: 35 chars)
✓ deepseek_evaluator module loaded from: c:\Users\aesteva\Documents\GitHub\fea_project\llm_calls\deepseek_evaluator.py
✓ OpenAI library available

Environment validation complete.



In [3]:
llm_model = "deepseek-reasoner"
input_file = "fea_iterations/loop_data/df_to_llm_iter_0.csv"
args_file = "ArgLevel_ClauseIds_df.xlsx"
prompt = "test_prompt_tot_json2"
output = "labeled_pairs/Results_DS_BtoS_iteration_1.csv"
previous_input_file = "Results_DS_BtoS_iteration_0.csv"

In [4]:
# Parameters
llm_model = "deepseek-reasoner"
input_file = "fea_iterations\\loop_data/df_to_llm_iter_0.csv"
args_file = "ArgLevel_ClauseIds_df.xlsx"
prompt = "test_prompt_tot_json2"
output = "labeled_pairs/Results_DS_BtoS_iteration_1_one_way"
previous_input_file = ""


In [5]:
# Validate input files exist
print(f"\n{'='*60}")
print("INPUT FILE VALIDATION")
print(f"{'='*60}")

print(f"Checking input_file: {input_file}")
if os.path.exists(input_file):
    df_input = pd.read_csv(input_file)
    print(f"✓ Input file found: {len(df_input)} rows")
    print(f"  Columns: {list(df_input.columns)}")
else:
    raise FileNotFoundError(f"Input file not found: {input_file}")

print(f"\nChecking args_file: {args_file}")
if os.path.exists(args_file):
    if args_file.endswith('.csv'):
        df_args = pd.read_csv(args_file)
    elif args_file.endswith('.xlsx'):
        df_args = pd.read_excel(args_file)
    else:
        raise ValueError(f"Unsupported file format: {args_file}")
    print(f"✓ Args file found: {len(df_args)} rows")
else:
    raise FileNotFoundError(f"Args file not found: {args_file}")

if previous_input_file:
    print(f"\nChecking previous_input_file: {previous_input_file}")
    if os.path.exists(previous_input_file):
        df_prev = pd.read_csv(previous_input_file)
        print(f"✓ Previous input file found: {len(df_prev)} rows")
    else:
        print(f"⚠ WARNING: Previous input file not found: {previous_input_file}")
        print(f"  Continuing without merging previous results")

# Create output directory if needed
output_dir = os.path.dirname(output)
if output_dir and not os.path.exists(output_dir):
    os.makedirs(output_dir, exist_ok=True)
    print(f"\n✓ Created output directory: {output_dir}")

print(f"{'='*60}\n")



INPUT FILE VALIDATION
Checking input_file: fea_iterations\loop_data/df_to_llm_iter_0.csv
✓ Input file found: 50 rows
  Columns: ['sentence_id_2', 'sentence_id_1', 'sentence_text_2', 'argument_id_2', 'sentence_text_1', 'argument_id_1', 'score']

Checking args_file: ArgLevel_ClauseIds_df.xlsx


✓ Args file found: 6181 rows



In [6]:
# Strip .csv extension since evaluator adds it automatically
output_arg = output.replace('.csv', '') if output.endswith('.csv') else output

sys.argv = [
    "deepseek_evaluator.py",
    "--model", llm_model,
    "--file", input_file,
    "--external", args_file,
    "--prompt", prompt,
    "--output", output_arg
]

print(f"\n{'='*60}")
print("EXECUTING LLM API CALLS")
print(f"{'='*60}")
print(f"Model: {llm_model}")
print(f"Input: {input_file}")
print(f"Output (arg): {output_arg}")
print(f"Expected file: {output}")
print(f"{'='*60}\n")

try:
    etb.main()
    print(f"\n✓ LLM API calls completed")
except Exception as e:
    print(f"\n✗ ERROR during LLM API execution: {e}")
    print(f"Error type: {type(e).__name__}")
    import traceback
    traceback.print_exc()
    raise

# Check for output file - evaluator adds .csv extension
if not os.path.exists(output):
    # Check if double extension was created
    double_ext = output + '.csv' if not output.endswith('.csv') else output.replace('.csv', '') + '.csv.csv'
    if os.path.exists(double_ext):
        print(f"⚠ WARNING: Output created with double extension: {double_ext}")
        print(f"  Renaming to: {output}")
        os.rename(double_ext, output)
    else:
        # List files in output directory for debugging
        output_dir = os.path.dirname(output) or '.'
        print(f"⚠ Files in {output_dir}:")
        for f in os.listdir(output_dir):
            print(f"  - {f}")
        raise FileNotFoundError(f"Output file was not created: {output}")

print(f"\n{'='*60}")
print("VALIDATING OUTPUT")
print(f"{'='*60}")

df_new = pd.read_csv(output)
print(f"✓ Output file loaded: {len(df_new)} rows")

# Check for required columns
required_cols = ['sentence_id_1', 'sentence_id_2']
missing_cols = [col for col in required_cols if col not in df_new.columns]
if missing_cols:
    print(f"⚠ WARNING: Missing expected columns: {missing_cols}")
    print(f"  Available columns: {list(df_new.columns)}")

# Check for errors in the data
if 'ERROR' in df_new.columns:
    error_count = df_new['ERROR'].notna().sum()
    if error_count > 0:
        print(f"⚠ WARNING: {error_count}/{len(df_new)} rows contain errors")
        print(f"  First error: {df_new[df_new['ERROR'].notna()]['ERROR'].iloc[0]}")

# Check for empty/null critical fields
null_counts = df_new[required_cols].isnull().sum()
if null_counts.any():
    print(f"⚠ WARNING: Null values found in critical columns:")
    for col, count in null_counts[null_counts > 0].items():
        print(f"  {col}: {count}/{len(df_new)} rows")

print(f"✓ Output validation complete\n")

# Merge with previous results
if previous_input_file and os.path.exists(previous_input_file):
    print(f"{'='*60}")
    print("MERGING WITH PREVIOUS RESULTS")
    print(f"{'='*60}")
    
    df_previous = pd.read_csv(previous_input_file)
    print(f"✓ Loaded previous results: {len(df_previous)} rows")
    
    df_merged = pd.concat([df_previous, df_new], ignore_index=True)
    print(f"✓ Merged: {len(df_previous)} previous + {len(df_new)} new = {len(df_merged)} total")
    
    # Validate merged data
    if len(df_merged) != len(df_previous) + len(df_new):
        print(f"⚠ WARNING: Merged row count doesn't match expected sum")
    
    df_merged.to_csv(output, index=False)
    print(f"✓ Saved merged results to {output}")
    print(f"{'='*60}\n")
else:
    print(f"\n✓ No previous input file to merge")
    print(f"✓ Output saved to {output}\n")



EXECUTING LLM API CALLS
Model: deepseek-reasoner
Input: fea_iterations\loop_data/df_to_llm_iter_0.csv
Output (arg): labeled_pairs/Results_DS_BtoS_iteration_1_one_way
Expected file: labeled_pairs/Results_DS_BtoS_iteration_1_one_way

Loading data from fea_iterations\loop_data/df_to_llm_iter_0.csv...
Loading data from ArgLevel_ClauseIds_df.xlsx...


Loaded 50 sentence pairs


Using model: deepseek-reasoner
Using prompt type: test_prompt_tot_json2
Running batch evaluation...


  0%|          | 0/50 [00:00<?, ?it/s]

 18%|█▊        | 9/50 [00:00<00:00, 83.57it/s]

 36%|███▌      | 18/50 [00:00<00:00, 70.27it/s]

 52%|█████▏    | 26/50 [00:00<00:00, 63.36it/s]

 66%|██████▌   | 33/50 [00:00<00:00, 51.74it/s]

 78%|███████▊  | 39/50 [00:00<00:00, 42.66it/s]

 88%|████████▊ | 44/50 [00:00<00:00, 37.24it/s]

 96%|█████████▌| 48/50 [00:01<00:00, 35.98it/s]

100%|██████████| 50/50 [00:01<00:00, 43.88it/s]




[DEBUG] content length: 1110, reasoning_content length: 5554
[DEBUG] content preview: {
  "sentence_id_1": "B0272003p",
  "sentence_id_2": "B0717005p",
  "answers": "YES, YES, YES",
  "reasoning": "1. In Argument 1, Sidney explicitly states that when magistrates act contrary to the welfare of the people, they forfeit legitimacy, and the nation has the right to resist, which inherentl
[DEBUG] reasoning preview: First, I need to assess whether Statement 1 entails Statement 2. Statement 1 is: "When magistrates act contrary to the principle of the welfare of the people, the magistrates forfeit their legitimacy." Statement 2 is: "In such a case, the people are justified in no longer recognizing the magistrate 
Saving progress at batch 1...


Saving progress at batch 2...
Deleting previous file: labeled_pairs/Results_DS_BtoS_iteration_1_one_way_progress_batch_1.csv


Saving progress at batch 3...
Deleting previous file: labeled_pairs/Results_DS_BtoS_iteration_1_one_way_progress_batch_2.csv


Saving progress at batch 4...
Deleting previous file: labeled_pairs/Results_DS_BtoS_iteration_1_one_way_progress_batch_3.csv


Saving progress at batch 5...
Deleting previous file: labeled_pairs/Results_DS_BtoS_iteration_1_one_way_progress_batch_4.csv


Saving progress at batch 6...
Deleting previous file: labeled_pairs/Results_DS_BtoS_iteration_1_one_way_progress_batch_5.csv


Saving progress at batch 7...
Deleting previous file: labeled_pairs/Results_DS_BtoS_iteration_1_one_way_progress_batch_6.csv


Saving progress at batch 8...
Deleting previous file: labeled_pairs/Results_DS_BtoS_iteration_1_one_way_progress_batch_7.csv


Saving progress at batch 9...
Deleting previous file: labeled_pairs/Results_DS_BtoS_iteration_1_one_way_progress_batch_8.csv


Saving progress at batch 10...
Deleting previous file: labeled_pairs/Results_DS_BtoS_iteration_1_one_way_progress_batch_9.csv
Saving progress at batch 11...
Deleting previous file: labeled_pairs/Results_DS_BtoS_iteration_1_one_way_progress_batch_10.csv


Saving progress at batch 12...
Deleting previous file: labeled_pairs/Results_DS_BtoS_iteration_1_one_way_progress_batch_11.csv


Saving progress at batch 13...
Deleting previous file: labeled_pairs/Results_DS_BtoS_iteration_1_one_way_progress_batch_12.csv


Saving progress at batch 14...
Deleting previous file: labeled_pairs/Results_DS_BtoS_iteration_1_one_way_progress_batch_13.csv


Saving progress at batch 15...
Deleting previous file: labeled_pairs/Results_DS_BtoS_iteration_1_one_way_progress_batch_14.csv


Saving progress at batch 16...
Deleting previous file: labeled_pairs/Results_DS_BtoS_iteration_1_one_way_progress_batch_15.csv


Saving progress at batch 17...
Deleting previous file: labeled_pairs/Results_DS_BtoS_iteration_1_one_way_progress_batch_16.csv


Saving progress at batch 18...
Deleting previous file: labeled_pairs/Results_DS_BtoS_iteration_1_one_way_progress_batch_17.csv


Saving progress at batch 19...
Deleting previous file: labeled_pairs/Results_DS_BtoS_iteration_1_one_way_progress_batch_18.csv


Saving progress at batch 20...
Deleting previous file: labeled_pairs/Results_DS_BtoS_iteration_1_one_way_progress_batch_19.csv


Saving progress at batch 21...
Deleting previous file: labeled_pairs/Results_DS_BtoS_iteration_1_one_way_progress_batch_20.csv


Saving progress at batch 22...
Deleting previous file: labeled_pairs/Results_DS_BtoS_iteration_1_one_way_progress_batch_21.csv


Saving progress at batch 23...
Deleting previous file: labeled_pairs/Results_DS_BtoS_iteration_1_one_way_progress_batch_22.csv


Saving progress at batch 24...
Deleting previous file: labeled_pairs/Results_DS_BtoS_iteration_1_one_way_progress_batch_23.csv


Saving progress at batch 25...
Deleting previous file: labeled_pairs/Results_DS_BtoS_iteration_1_one_way_progress_batch_24.csv
Saving progress at batch 26...
Deleting previous file: labeled_pairs/Results_DS_BtoS_iteration_1_one_way_progress_batch_25.csv


Saving progress at batch 27...
Deleting previous file: labeled_pairs/Results_DS_BtoS_iteration_1_one_way_progress_batch_26.csv


Saving progress at batch 28...
Deleting previous file: labeled_pairs/Results_DS_BtoS_iteration_1_one_way_progress_batch_27.csv


Saving progress at batch 29...
Deleting previous file: labeled_pairs/Results_DS_BtoS_iteration_1_one_way_progress_batch_28.csv


Saving progress at batch 30...
Deleting previous file: labeled_pairs/Results_DS_BtoS_iteration_1_one_way_progress_batch_29.csv


Saving progress at batch 31...
Deleting previous file: labeled_pairs/Results_DS_BtoS_iteration_1_one_way_progress_batch_30.csv


Saving progress at batch 32...
Deleting previous file: labeled_pairs/Results_DS_BtoS_iteration_1_one_way_progress_batch_31.csv


Saving progress at batch 33...
Deleting previous file: labeled_pairs/Results_DS_BtoS_iteration_1_one_way_progress_batch_32.csv


Saving progress at batch 34...
Deleting previous file: labeled_pairs/Results_DS_BtoS_iteration_1_one_way_progress_batch_33.csv


Saving progress at batch 35...
Deleting previous file: labeled_pairs/Results_DS_BtoS_iteration_1_one_way_progress_batch_34.csv


Saving progress at batch 36...
Deleting previous file: labeled_pairs/Results_DS_BtoS_iteration_1_one_way_progress_batch_35.csv


Saving progress at batch 37...
Deleting previous file: labeled_pairs/Results_DS_BtoS_iteration_1_one_way_progress_batch_36.csv
Saving progress at batch 38...
Deleting previous file: labeled_pairs/Results_DS_BtoS_iteration_1_one_way_progress_batch_37.csv


Saving progress at batch 39...
Deleting previous file: labeled_pairs/Results_DS_BtoS_iteration_1_one_way_progress_batch_38.csv


Saving progress at batch 40...
Deleting previous file: labeled_pairs/Results_DS_BtoS_iteration_1_one_way_progress_batch_39.csv
Saving progress at batch 41...
Deleting previous file: labeled_pairs/Results_DS_BtoS_iteration_1_one_way_progress_batch_40.csv


Saving progress at batch 42...
Deleting previous file: labeled_pairs/Results_DS_BtoS_iteration_1_one_way_progress_batch_41.csv


Saving progress at batch 43...
Deleting previous file: labeled_pairs/Results_DS_BtoS_iteration_1_one_way_progress_batch_42.csv
Saving progress at batch 44...
Deleting previous file: labeled_pairs/Results_DS_BtoS_iteration_1_one_way_progress_batch_43.csv


Saving progress at batch 45...
Deleting previous file: labeled_pairs/Results_DS_BtoS_iteration_1_one_way_progress_batch_44.csv


Saving progress at batch 46...
Deleting previous file: labeled_pairs/Results_DS_BtoS_iteration_1_one_way_progress_batch_45.csv


Saving progress at batch 47...
Deleting previous file: labeled_pairs/Results_DS_BtoS_iteration_1_one_way_progress_batch_46.csv


Saving progress at batch 48...
Deleting previous file: labeled_pairs/Results_DS_BtoS_iteration_1_one_way_progress_batch_47.csv


Saving progress at batch 49...
Deleting previous file: labeled_pairs/Results_DS_BtoS_iteration_1_one_way_progress_batch_48.csv


Saving progress at batch 50...
Deleting previous file: labeled_pairs/Results_DS_BtoS_iteration_1_one_way_progress_batch_49.csv
Saved results to labeled_pairs/Results_DS_BtoS_iteration_1_one_way.csv

✓ LLM API calls completed
  Renaming to: labeled_pairs/Results_DS_BtoS_iteration_1_one_way

VALIDATING OUTPUT
✓ Output file loaded: 50 rows
✓ Output validation complete


✓ No previous input file to merge
✓ Output saved to labeled_pairs/Results_DS_BtoS_iteration_1_one_way



In [7]:
# Strip .csv extension since evaluator adds it automatically
output_arg = output.replace('.csv', '') if output.endswith('.csv') else output

sys.argv = [
    "deepseek_evaluator.py",
    "--model", llm_model,
    "--file", input_file,
    "--external", args_file,
    "--prompt", prompt,
    "--output", output_arg
]

print(f"\n{'='*60}")
print("EXECUTING LLM API CALLS")
print(f"{'='*60}")
print(f"Model: {llm_model}")
print(f"Input: {input_file}")
print(f"Output (arg): {output_arg}")
print(f"Expected file: {output}")
print(f"{'='*60}\n")

try:
    etb.main()
    print(f"\n✓ LLM API calls completed")
except Exception as e:
    print(f"\n✗ ERROR during LLM API execution: {e}")
    print(f"Error type: {type(e).__name__}")
    import traceback
    traceback.print_exc()
    raise

# Resolve actual output path — evaluator adds .csv to the base name.
# Do NOT rename/move files — just find the correct path.
if os.path.exists(output):
    output_actual = output
elif os.path.exists(f"{output}.csv"):
    output_actual = f"{output}.csv"
    print(f"✓ Output found at {output_actual}")
elif output.endswith('.csv') and os.path.exists(output.replace('.csv', '') + '.csv'):
    output_actual = output
else:
    output_dir_check = os.path.dirname(output) or '.'
    print(f"⚠ Files in {output_dir_check}:")
    for f in os.listdir(output_dir_check):
        print(f"  - {f}")
    raise FileNotFoundError(f"Output file was not created: {output}")

print(f"\n{'='*60}")
print("VALIDATING OUTPUT")
print(f"{'='*60}")


EXECUTING LLM API CALLS
Model: deepseek-reasoner
Input: fea_iterations\loop_data/df_to_llm_iter_0.csv
Output (arg): labeled_pairs/Results_DS_BtoS_iteration_1_one_way
Expected file: labeled_pairs/Results_DS_BtoS_iteration_1_one_way

Loading data from fea_iterations\loop_data/df_to_llm_iter_0.csv...
Loading data from ArgLevel_ClauseIds_df.xlsx...


Loaded 50 sentence pairs


Using model: deepseek-reasoner
Using prompt type: test_prompt_tot_json2
Running batch evaluation...


  0%|          | 0/50 [00:00<?, ?it/s]

 22%|██▏       | 11/50 [00:00<00:00, 106.40it/s]

 44%|████▍     | 22/50 [00:00<00:00, 92.91it/s] 

 66%|██████▌   | 33/50 [00:00<00:00, 96.43it/s]

 88%|████████▊ | 44/50 [00:00<00:00, 99.45it/s]

100%|██████████| 50/50 [00:00<00:00, 98.35it/s]




[DEBUG] content length: 847, reasoning_content length: 6518
[DEBUG] content preview: {
  "sentence_id_1": "B0846005p",
  "sentence_id_2": "S0000774006p",
  "answers": "YES YES YES",
  "reasoning": "First, conceptually, if legitimate governance is essential for true peace, then maintaining its integrity during tensions is crucial to prevent conflict escalation. Second, contextually f
[DEBUG] reasoning preview: First, I need to assess whether Statement 1 entails Statement 2. Statement 1 is: "A legitimate and orderly governance is essential for true peace." Statement 2 is: "Maintaining the integrity of governance is important during a time of escalating tensions and potential civil conflict."

Entailment me
Saving progress at batch 1...


Saving progress at batch 2...
Deleting previous file: labeled_pairs/Results_DS_BtoS_iteration_1_one_way_progress_batch_1.csv


Saving progress at batch 3...
Deleting previous file: labeled_pairs/Results_DS_BtoS_iteration_1_one_way_progress_batch_2.csv


Saving progress at batch 4...
Deleting previous file: labeled_pairs/Results_DS_BtoS_iteration_1_one_way_progress_batch_3.csv
Saving progress at batch 5...
Deleting previous file: labeled_pairs/Results_DS_BtoS_iteration_1_one_way_progress_batch_4.csv


Saving progress at batch 6...
Deleting previous file: labeled_pairs/Results_DS_BtoS_iteration_1_one_way_progress_batch_5.csv


Saving progress at batch 7...
Deleting previous file: labeled_pairs/Results_DS_BtoS_iteration_1_one_way_progress_batch_6.csv


Saving progress at batch 8...
Deleting previous file: labeled_pairs/Results_DS_BtoS_iteration_1_one_way_progress_batch_7.csv
Saving progress at batch 9...
Deleting previous file: labeled_pairs/Results_DS_BtoS_iteration_1_one_way_progress_batch_8.csv


Saving progress at batch 10...
Deleting previous file: labeled_pairs/Results_DS_BtoS_iteration_1_one_way_progress_batch_9.csv


Saving progress at batch 11...
Deleting previous file: labeled_pairs/Results_DS_BtoS_iteration_1_one_way_progress_batch_10.csv


Saving progress at batch 12...
Deleting previous file: labeled_pairs/Results_DS_BtoS_iteration_1_one_way_progress_batch_11.csv
Saving progress at batch 13...
Deleting previous file: labeled_pairs/Results_DS_BtoS_iteration_1_one_way_progress_batch_12.csv


Saving progress at batch 14...
Deleting previous file: labeled_pairs/Results_DS_BtoS_iteration_1_one_way_progress_batch_13.csv


Saving progress at batch 15...
Deleting previous file: labeled_pairs/Results_DS_BtoS_iteration_1_one_way_progress_batch_14.csv


Saving progress at batch 16...
Deleting previous file: labeled_pairs/Results_DS_BtoS_iteration_1_one_way_progress_batch_15.csv
Saving progress at batch 17...
Deleting previous file: labeled_pairs/Results_DS_BtoS_iteration_1_one_way_progress_batch_16.csv


Saving progress at batch 18...
Deleting previous file: labeled_pairs/Results_DS_BtoS_iteration_1_one_way_progress_batch_17.csv
Saving progress at batch 19...
Deleting previous file: labeled_pairs/Results_DS_BtoS_iteration_1_one_way_progress_batch_18.csv


Saving progress at batch 20...
Deleting previous file: labeled_pairs/Results_DS_BtoS_iteration_1_one_way_progress_batch_19.csv
Saving progress at batch 21...
Deleting previous file: labeled_pairs/Results_DS_BtoS_iteration_1_one_way_progress_batch_20.csv


Saving progress at batch 22...
Deleting previous file: labeled_pairs/Results_DS_BtoS_iteration_1_one_way_progress_batch_21.csv


Saving progress at batch 23...
Deleting previous file: labeled_pairs/Results_DS_BtoS_iteration_1_one_way_progress_batch_22.csv


Saving progress at batch 24...
Deleting previous file: labeled_pairs/Results_DS_BtoS_iteration_1_one_way_progress_batch_23.csv


Saving progress at batch 25...
Deleting previous file: labeled_pairs/Results_DS_BtoS_iteration_1_one_way_progress_batch_24.csv


Saving progress at batch 26...
Deleting previous file: labeled_pairs/Results_DS_BtoS_iteration_1_one_way_progress_batch_25.csv


Saving progress at batch 27...
Deleting previous file: labeled_pairs/Results_DS_BtoS_iteration_1_one_way_progress_batch_26.csv


Saving progress at batch 28...
Deleting previous file: labeled_pairs/Results_DS_BtoS_iteration_1_one_way_progress_batch_27.csv


Saving progress at batch 29...
Deleting previous file: labeled_pairs/Results_DS_BtoS_iteration_1_one_way_progress_batch_28.csv


Saving progress at batch 30...
Deleting previous file: labeled_pairs/Results_DS_BtoS_iteration_1_one_way_progress_batch_29.csv
Saving progress at batch 31...
Deleting previous file: labeled_pairs/Results_DS_BtoS_iteration_1_one_way_progress_batch_30.csv


Saving progress at batch 32...
Deleting previous file: labeled_pairs/Results_DS_BtoS_iteration_1_one_way_progress_batch_31.csv


Saving progress at batch 33...
Deleting previous file: labeled_pairs/Results_DS_BtoS_iteration_1_one_way_progress_batch_32.csv
Saving progress at batch 34...
Deleting previous file: labeled_pairs/Results_DS_BtoS_iteration_1_one_way_progress_batch_33.csv


Saving progress at batch 35...
Deleting previous file: labeled_pairs/Results_DS_BtoS_iteration_1_one_way_progress_batch_34.csv


Saving progress at batch 36...
Deleting previous file: labeled_pairs/Results_DS_BtoS_iteration_1_one_way_progress_batch_35.csv


Saving progress at batch 37...
Deleting previous file: labeled_pairs/Results_DS_BtoS_iteration_1_one_way_progress_batch_36.csv


Saving progress at batch 38...
Deleting previous file: labeled_pairs/Results_DS_BtoS_iteration_1_one_way_progress_batch_37.csv
Saving progress at batch 39...
Deleting previous file: labeled_pairs/Results_DS_BtoS_iteration_1_one_way_progress_batch_38.csv


Saving progress at batch 40...
Deleting previous file: labeled_pairs/Results_DS_BtoS_iteration_1_one_way_progress_batch_39.csv


Saving progress at batch 41...
Deleting previous file: labeled_pairs/Results_DS_BtoS_iteration_1_one_way_progress_batch_40.csv
Saving progress at batch 42...
Deleting previous file: labeled_pairs/Results_DS_BtoS_iteration_1_one_way_progress_batch_41.csv


Saving progress at batch 43...
Deleting previous file: labeled_pairs/Results_DS_BtoS_iteration_1_one_way_progress_batch_42.csv


Saving progress at batch 44...
Deleting previous file: labeled_pairs/Results_DS_BtoS_iteration_1_one_way_progress_batch_43.csv


Saving progress at batch 45...
Deleting previous file: labeled_pairs/Results_DS_BtoS_iteration_1_one_way_progress_batch_44.csv


Saving progress at batch 46...
Deleting previous file: labeled_pairs/Results_DS_BtoS_iteration_1_one_way_progress_batch_45.csv


Saving progress at batch 47...
Deleting previous file: labeled_pairs/Results_DS_BtoS_iteration_1_one_way_progress_batch_46.csv


Saving progress at batch 48...
Deleting previous file: labeled_pairs/Results_DS_BtoS_iteration_1_one_way_progress_batch_47.csv


Saving progress at batch 49...
Deleting previous file: labeled_pairs/Results_DS_BtoS_iteration_1_one_way_progress_batch_48.csv


Saving progress at batch 50...
Deleting previous file: labeled_pairs/Results_DS_BtoS_iteration_1_one_way_progress_batch_49.csv
Saved results to labeled_pairs/Results_DS_BtoS_iteration_1_one_way.csv

✓ LLM API calls completed

VALIDATING OUTPUT


In [8]:
# Summary Report
print(f"\n{'='*60}")
print("EXECUTION SUMMARY")
print(f"{'='*60}")

df_final = pd.read_csv(output_actual)
print(f"Total rows in final output: {len(df_final)}")
print(f"Output file: {output_actual}")

if 'llm_conclusion_12' in df_final.columns:
    conclusion_counts = df_final['llm_conclusion_12'].value_counts()
    print(f"\nLLM Conclusions:")
    for conclusion, count in conclusion_counts.items():
        print(f"  {conclusion}: {count} ({count/len(df_final)*100:.1f}%)")
    
    # Check for failed calls
    failed = df_final['llm_conclusion_12'].isnull().sum()
    if failed > 0:
        print(f"\n⚠ Failed API calls: {failed}/{len(df_final)} ({failed/len(df_final)*100:.1f}%)")
else:
    print("\n⚠ Column 'llm_conclusion_12' not found in output")

print(f"\n{'='*60}")
print("✓ Pipeline execution complete")
print(f"{'='*60}\n")


EXECUTION SUMMARY
Total rows in final output: 50
Output file: labeled_pairs/Results_DS_BtoS_iteration_1_one_way

LLM Conclusions:
  NO: 28 (56.0%)
  YES: 22 (44.0%)

✓ Pipeline execution complete

