In [None]:
import pandas as pd
import numpy as np

In [None]:
INPUT_EXCEL_FILE1 ="C:\\Users\\vmanathunga\\Documents\\Worker_comp\\research_ready_facts_AI.xlsx"
OUTPUT_EXCEL_FILE1 ="C:\\Users\\vmanathunga\\Documents\\Worker_comp\\research_ready_facts_anthropic_COT_claude_3.0_haiku.xlsx"

In [None]:
!pip install anthropic

In [None]:
!pip install python-dotenv

In [None]:
import anthropic
anthropic_api_key = "XXXXXXXXXXXXXXXXXX"

In [None]:
import pandas as pd
import time
import os
from dotenv import load_dotenv # For loading API key from .env file

# --- Configuration ---

# Load environment variables from .env file (optional but recommended)
load_dotenv()

# 1. Anthropic API Key Setup
client = anthropic.Anthropic(api_key=anthropic_api_key)

# 2. Specify the Anthropic Model
# NOTE: The original code specified "claude-3-5-haiku-20241022".
# This has been changed to a more standard Haiku model name.
# Please verify the exact model identifier you intend to use, as
# "claude-3-5-haiku-20241022" might be a newer, specific, or internal version.
MODEL_NAME_LIST = ["claude-3-haiku-20240307"]

# 3. Excel File Paths
INPUT_EXCEL_FILE = INPUT_EXCEL_FILE1 # <<< UPDATE THIS to your input file name
# Changed output name to reflect Anthropic usage
OUTPUT_EXCEL_FILE = OUTPUT_EXCEL_FILE1# <<< UPDATE THIS to your desired output file name

# 4. Column Names (Adjust if different in your Excel)
FACTS_COLUMN = 'Annonymized_Facts'
# Decision column name will be dynamically generated, e.g., "claude-3-haiku-20240307_AI_Decision1"

# 5. Time Delays (in seconds)
# Delay AFTER processing each row (in the main loop)
MAIN_LOOP_DELAY_SECONDS = 0
# Delay BEFORE each API call (inside the function)
API_CALL_DELAY_SECONDS = 1      # Adjust as needed, especially if hitting rate limits

# --- Function to get decision from Anthropic ---
def get_anthropic_decision(facts_text, model_name): # Renamed function
    """
    Sends facts to Anthropic Claude and asks for a win (1) or loss (0) decision.
    Includes a delay before making the API call.

    Args:
        facts_text (str): The text from the 'Facts' column.
        model_name (str): The Anthropic model to use.

    Returns:
        int: 1 for predicted plaintiff win, 0 for predicted plaintiff loss,
             -1 if an error occurred or decision couldn't be parsed.
    """
    if not facts_text or not isinstance(facts_text, str) or len(facts_text.strip()) == 0:
        print("Warning: Empty or invalid facts text provided.")
        return -1

    # System Prompt for Anthropic
    system_prompt = """
    You are a legal expert in workers compensation claim disputes, who are reading the facts of a case. Internally, go step-by-step through each fact to determine whether the plaintiff likely won or lost, carefully considering each relevant detail and legal principle. Form your conclusion based solely on the facts presented. However, do not reveal your reasoning or any internal thoughts in your final answer.
    Respond ONLY with the number '1' if the plaintiff likely won.
    Respond ONLY with the number '0' if the plaintiff likely lost.
    Do NOT provide any explanation, commentary, or any text other than '1' or '0'.
    """

    user_message_content = f"""
    Facts:
    ---
    {facts_text}
    ---
    Decision (1 for win, 0 for loss):"""

    messages = [
        {"role": "user", "content": user_message_content}
    ]

    try:
        # --- ADDED DELAY before API Call ---
        if API_CALL_DELAY_SECONDS > 0:
            print(f"Waiting for {API_CALL_DELAY_SECONDS} second(s) before API call...")
            time.sleep(API_CALL_DELAY_SECONDS)
        # ------------------------------------

        # Call the Anthropic Messages API endpoint
        print(f"Making API call to {model_name}...")
        response = client.messages.create(
            model=model_name,
            system=system_prompt, # Pass system prompt here for Anthropic
            messages=messages,
            temperature=0,      # For deterministic results
            max_tokens=5,       # Needs only 1 token ('1' or '0'), plus a small buffer
        )
        print("API call complete.")

        # Attempt to parse the response
        # Anthropic's response content is typically in response.content[0].text
        if response.content and isinstance(response.content, list) and len(response.content) > 0:
            decision_text = response.content[0].text.strip()
        else:
            print(f"Warning: Received an unexpected response structure for facts: '{facts_text[:100]}...'")
            print(f"Full response object: {response}") # Log the full response for debugging
            return -1


        if decision_text == '1':
            return 1
        elif decision_text == '0':
            return 0
        else:
            print(f"Warning: Could not parse decision from response: '{decision_text}' for facts: '{facts_text[:100]}...'")
            return -1
    except anthropic.APIConnectionError as e:
        print(f"Anthropic API Connection Error: {e.__class__.__name__}: {e}")
        return -1
    except anthropic.RateLimitError as e:
        print(f"Anthropic Rate Limit Error: {e.__class__.__name__}: {e}. Consider increasing API_CALL_DELAY_SECONDS.")
        return -1
    except anthropic.APIStatusError as e:
        print(f"Anthropic API Status Error: {e.__class__.__name__}: status_code={e.status_code}, response={e.response}")
        return -1
    except Exception as e:
        print(f"An unexpected error occurred during Anthropic API call for facts: '{facts_text[:100]}...'. Error: {e.__class__.__name__}: {e}")
        return -1

# --- Main Processing Logic ---

print(f"\nReading Excel file: {INPUT_EXCEL_FILE}")
try:
    df = pd.read_excel(INPUT_EXCEL_FILE)
    print(f"Successfully read {len(df)} rows.")
except FileNotFoundError:
    print(f"Error: Input file not found at {INPUT_EXCEL_FILE}")
    exit()
except Exception as e:
    print(f"Error reading Excel file: {e}")
    exit()

# Check for input column
if FACTS_COLUMN not in df.columns:
    print(f"Error: Column '{FACTS_COLUMN}' not found in the Excel file.")
    exit()

# --- Main processing loop for each model and each run ---
for selected_model in MODEL_NAME_LIST:
    print(f"\n===== Starting processing for Model: {selected_model} =====")
    # The original code has `range(1,2)` meaning only one run (run_number = 1).
    # If multiple runs are desired, adjust the range, e.g., range(1, 6) for 5 runs.
    for run_number in range(1, 2): # Loop for runs (currently set for 1 run)
        # Create dynamic decision column name based on model and run number
        # Replace characters in model name that might be problematic for some systems if used in file names (though fine for column names)
        safe_model_name_part = selected_model.replace('/', '_').replace('-', '_')
        current_decision_column = f"{safe_model_name_part}_AI_Decision{run_number}"

        print(f"\n--- Model: {selected_model}, Run: {run_number} ---")
        print(f"Outputting decisions to column: {current_decision_column}")

        # Ensure the Decision column for this specific run/model exists, initialize if not
        if current_decision_column not in df.columns:
            print(f"Initializing column: {current_decision_column}")
            # Initialize with a type that can hold integers and pandas' NA
            df[current_decision_column] = pd.Series(dtype='Int64')
            df[current_decision_column] = df[current_decision_column].fillna(-1).astype(int) # Use -1 as placeholder
        else:
            print(f"Column '{current_decision_column}' already exists. Ensuring it's integer type.")
            # Ensure it's of a type that can handle -1, 0, 1 if it already exists
            df[current_decision_column] = df[current_decision_column].astype(int)


        print(f"\nProcessing {len(df)} rows using Anthropic model: {selected_model} (Run {run_number})...") # Changed "OpenAI" to "Anthropic"

        # --- Iterate and call Anthropic API for each row ---
        total_rows = len(df)
        for index, row in df.iterrows():
            print(f"\n--- Processing row {index + 1} of {total_rows} (Model: {selected_model}, Run: {run_number}) ---")
            facts = str(row[FACTS_COLUMN]) if pd.notna(row[FACTS_COLUMN]) else ""

            # Get the decision from Anthropic, passing the currently selected model
            decision = get_anthropic_decision(facts, selected_model) # Changed function call

            # Update the DataFrame in the dynamically named column
            df.loc[index, current_decision_column] = decision
            print(f"Row {index + 1}: Facts processed for column '{current_decision_column}'. Predicted Decision: {decision}")

            # Optional: Delay AFTER processing each row
            if index < total_rows - 1: # Avoid sleeping after the last row of this run
                if MAIN_LOOP_DELAY_SECONDS > 0:
                    print(f"Waiting for {MAIN_LOOP_DELAY_SECONDS} second(s) before next row...")
                    time.sleep(MAIN_LOOP_DELAY_SECONDS)

        print(f"\n--- Finished Run {run_number} for Model: {selected_model} ---")

        # --- Save the DataFrame at the end of each inner loop (run) ---
        print(f"Saving results to {OUTPUT_EXCEL_FILE} after Run {run_number} for Model {selected_model}...")
        try:
            df.to_excel(OUTPUT_EXCEL_FILE, index=False)
            print(f"Successfully saved results to: {OUTPUT_EXCEL_FILE}")
        except Exception as e:
            print(f"Error saving results to Excel file after Run {run_number} for Model {selected_model}: {e}")

    print(f"\n===== Finished all runs for Model: {selected_model} =====")

print("\nAll processing complete.")
print("\nScript finished.")