In [None]:
!pip install --upgrade openai

In [None]:
!pip install openai pandas openpyxl python-dotenv

In [None]:
!pip install python-dotenv

In [None]:
openai_api_key ="XXXXXXXX"

In [None]:
INPUT_EXCEL_FILE1 ="C:\\Users\\vmanathunga\\Documents\\research_ready_facts_AI.xlsx"
OUTPUT_EXCEL_FILE1 ="C:\\Users\\vmanathunga\\Documents\\research_ready_facts_openai_simp_O4mini.xlsx"

In [None]:
import openai
import pandas as pd
import time
import os
from dotenv import load_dotenv # For loading API key from .env file

# --- Configuration ---

# Load environment variables from .env file (optional but recommended)
load_dotenv()

# 1. OpenAI API Key Setup
openai.api_key = openai_api_key

# 2. Specify the OpenAI Model
# Use a GPT-4 model identifier ("gpt-3.5-turbo-0125", "gpt-4.1-mini","o4-mini")
#"gpt-4.1-mini", "o4-mini"
MODEL_NAME_LIST = ["o4-mini"]

# 3. Excel File Paths
INPUT_EXCEL_FILE = INPUT_EXCEL_FILE1  # Your input file
# Changed output name to reflect OpenAI usage
OUTPUT_EXCEL_FILE = OUTPUT_EXCEL_FILE1

# 4. Column Names (Adjust if different in your Excel)
FACTS_COLUMN = 'Annonymized_Facts'
DECISION_COLUMN = "AI_Decision"

# 5. Time Delays (in seconds)
# Delay AFTER processing each row (in the main loop)
MAIN_LOOP_DELAY_SECONDS = 0
# Delay BEFORE each API call (inside the function)
API_CALL_DELAY_SECONDS = 1   # Adjust as needed, especially if hitting rate limits

# --- Function to get decision from OpenAI ---
def get_openai_decision(facts_text, MODEL_NAME):
    """
    Sends facts to OpenAI GPT-4 and asks for a win (1) or loss (0) decision.
    Includes a delay before making the API call.

    Args:
        facts_text (str): The text from the 'Facts' column.

    Returns:
        int: 1 for predicted plaintiff win, 0 for predicted plaintiff loss,
             -1 if an error occurred or decision couldn't be parsed.
    """
    if not facts_text or not isinstance(facts_text, str) or len(facts_text.strip()) == 0:
        print("Warning: Empty or invalid facts text provided.")
        return -1

    # Simple Prompt
    system_prompt = """
    Analyze the following legal case facts. Based solely on these facts, predict whether the plaintiff likely won or lost the case.
    Respond ONLY with the number '1' if the plaintiff likely won.
    Respond ONLY with the number '0' if the plaintiff likely lost.
    Do NOT provide any explanation, commentary, or any text other than '1' or '0'.
    """

    user_prompt = f"""
    Facts:
    ---
    {facts_text}
    ---
    Decision (1 for win, 0 for loss):"""

    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_prompt}
    ]

    try:
        # --- ADDED DELAY before API Call ---
        print(f"Waiting for {API_CALL_DELAY_SECONDS} second(s) before API call...")
        time.sleep(API_CALL_DELAY_SECONDS)
        # ------------------------------------

        # Call the OpenAI Chat Completions endpoint
        print(f"Making API call to {MODEL_NAME}...") # Added print statement
        response = openai.chat.completions.create(
            model=MODEL_NAME,
            messages=messages,
            temperature=1,       # For deterministic results (like original Gemini config)
            #max_tokens=5,        # Needs only 1 token ('1' or '0'), small buffer
            #top_p=0              # Corresponds to Gemini setting (less critical with temp=0)
            # n=1, stop=None are defaults usually suitable here
        )
        print("API call complete.") # Added print statement

        # Attempt to parse the response
        decision_text = response.choices[0].message.content.strip()

        if decision_text == '1':
            return 1
        elif decision_text == '0':
            return 0
        else:
            print(f"Warning: Could not parse decision from response: '{decision_text}' for facts: '{facts_text[:100]}...'")
            return -1

    except openai.RateLimitError as e:
        print(f"OpenAI API rate limit exceeded: {e}. Consider increasing API_CALL_DELAY_SECONDS.")
        return -1
    except openai.AuthenticationError as e:
         print(f"OpenAI Authentication Error: {e}. Check your API key.")
         # No point in continuing if auth fails
         exit()
    except openai.APIError as e:
         print(f"OpenAI API returned an API Error: {e}")
         return -1
    except Exception as e:
        print(f"An unexpected error occurred during OpenAI API call for facts: '{facts_text[:100]}...'. Error: {e}")
        # Optional: Log the full error or response if debugging
        # print(f"Full response object (if available): {response}")
        return -1

# --- Main Processing Logic ---

print(f"\nReading Excel file: {INPUT_EXCEL_FILE}")
try:
    df = pd.read_excel(INPUT_EXCEL_FILE)
    print(f"Successfully read {len(df)} rows.")
except FileNotFoundError:
    print(f"Error: Input file not found at {INPUT_EXCEL_FILE}")
    exit()
except Exception as e:
    print(f"Error reading Excel file: {e}")
    exit()

# Check for input column
if FACTS_COLUMN not in df.columns:
    print(f"Error: Column '{FACTS_COLUMN}' not found in the Excel file.")
    exit()

# main function############

for selected_model in MODEL_NAME_LIST:
    print(f"\n===== Starting processing for Model: {selected_model} =====")
    for run_number in range(1, 4): # Loop 1 to 5 (inclusive)
        # Create dynamic decision column name based on model and run number
        # e.g., "gpt-3.5-turbo-0125_AI_Decision1", "gpt-3.5-turbo-0125_AI_Decision2", etc.
        current_decision_column = f"{selected_model}_AI_Decision{run_number}"

        print(f"\n--- Model: {selected_model}, Run: {run_number} ---")
        print(f"Outputting decisions to column: {current_decision_column}")

        # Ensure the Decision column for this specific run/model exists, initialize if not
        if current_decision_column not in df.columns:
            print(f"Initializing column: {current_decision_column}")
            df[current_decision_column] = -1 # Initialize with a placeholder
        else:
            print(f"Column '{current_decision_column}' already exists. Values will be updated/overwritten for this run.")

        print(f"\nProcessing {len(df)} rows using OpenAI model: {selected_model} (Run {run_number})...")

        # --- Iterate and call OpenAI API for each row ---
        total_rows = len(df)
        for index, row in df.iterrows():
            print(f"\n--- Processing row {index + 1} of {total_rows} (Model: {selected_model}, Run: {run_number}) ---")
            facts = str(row[FACTS_COLUMN]) if pd.notna(row[FACTS_COLUMN]) else ""

            # Get the decision from OpenAI, passing the currently selected model
            decision = get_openai_decision(facts, selected_model)

            # Update the DataFrame in the dynamically named column
            df.loc[index, current_decision_column] = decision
            print(f"Row {index + 1}: Facts processed for column '{current_decision_column}'. Predicted Decision: {decision}")

            # Optional: Delay AFTER processing each row
            if index < total_rows - 1: # Avoid sleeping after the last row of this run
                if MAIN_LOOP_DELAY_SECONDS > 0:
                    print(f"Waiting for {MAIN_LOOP_DELAY_SECONDS} second(s) before next row...")
                    time.sleep(MAIN_LOOP_DELAY_SECONDS)

        print(f"\n--- Finished Run {run_number} for Model: {selected_model} ---")

        # --- MODIFIED: Save the DataFrame at the end of each inner loop (run) ---
        print(f"Saving results to {OUTPUT_EXCEL_FILE} after Run {run_number} for Model {selected_model}...")
        try:
            df.to_excel(OUTPUT_EXCEL_FILE, index=False)
            print(f"Successfully saved results to: {OUTPUT_EXCEL_FILE}")
        except Exception as e:
            print(f"Error saving results to Excel file after Run {run_number} for Model {selected_model}: {e}")
        # --- END OF SAVE MODIFICATION FOR INNER LOOP ---

    print(f"\n===== Finished all runs for Model: {selected_model} =====")
# --- END OF MODIFIED SECTION ---

print("\nAll processing complete.") # This message now indicates all models and all their runs are done.

# The final save after all processing is now removed as it's done per run.

print("\nScript finished.")