In [None]:
import pandas as pd
import numpy as np

In [None]:
INPUT_EXCEL_FILE1 ="C:\\Users\\vmanathunga\\Documents\\Worker_comp\\research_ready_facts_AI.xlsx"
OUTPUT_EXCEL_FILE2 ="C:\\Users\\vmanathunga\\Documents\\Worker_comp\\research_ready_facts_gemini_simp_gemini_2.0_flash.xlsx"

In [None]:
python.exe -m pip install --upgrade pip

In [None]:
!pip install -q -U google-generativeai google-api-core 

In [None]:
import pandas as pd
import google.generativeai as genai
import os
import time
from google.api_core import exceptions # Import for specific exceptions

In [None]:
api_key ="XXXXXXXXXXX"

In [None]:
genai.configure(api_key=api_key)
# 2. Specify the Gemini Model
# Use the latest appropriate 'flash' model identifier (e.g., gemini-1.5-flash-latest)
# MODEL_NAME_LIST = ["gemini-1.5-pro", "gemini-2.0-flash"]
MODEL_NAME_LIST = ["gemini-2.0-flash"] # Or "gemini-1.5-flash-latest"

# 3. Excel File Paths
INPUT_EXCEL_FILE = INPUT_EXCEL_FILE1  # Replace with your input file name
OUTPUT_EXCEL_FILE = OUTPUT_EXCEL_FILE2 # Changed output name slightly

# 4. Column Names (Adjust if different in your Excel)
FACTS_COLUMN = "Annonymized_Facts"
DECISION_COLUMN = "AI_Decision"

# 5. Time Delays (in seconds)
# Delay AFTER processing each row (in the main loop)
MAIN_LOOP_DELAY_SECONDS = 0
# Delay BEFORE each API call (inside the function) - ADDED
API_CALL_DELAY_SECONDS = 0  # <<< Adjust this value as needed (e.g., 1, 2, 3)


# --- Create Gemini Model Instance (ONCE) ---
generation_config = {
  "temperature": 0,
  "top_p": None,
  "top_k": 1,
  "max_output_tokens": 5,
  "response_mime_type": "text/plain",
}

safety_settings = [
    {"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
    {"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
    {"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
    {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
]



# --- Function to get decision from Gemini ---
def get_gemini_decision(facts_text, model_instance):
    """
    Sends facts to Gemini and asks for a win (1) or loss (0) decision.
    Includes a delay before making the API call.

    Args:
        facts_text (str): The text from the 'Facts' column.
        model_instance: The pre-configured genai.GenerativeModel instance.

    Returns:
        int: 1 for predicted plaintiff win, 0 for predicted plaintiff loss,
             -1 if an error occurred or decision couldn't be parsed.
    """
    if not model_instance:
        print("Error: Model instance is not available.")
        return -1

    if not facts_text or not isinstance(facts_text, str) or len(facts_text.strip()) == 0:
        print("Warning: Empty or invalid facts text provided.")
        return -1

    prompt = f"""
    Analyze the following legal case facts. Based solely on these facts, predict whether the plaintiff likely won or lost the case.

    Respond ONLY with the number '1' if the plaintiff likely won.

    Respond ONLY with the number '0' if the plaintiff likely lost.

    Do NOT provide any explanation, commentary, or any text other than '1' or '0'.

    Facts:
    ---
    {facts_text}
    ---

    Decision (1 for win, 0 for loss):"""

    try:
        # --- ADDED DELAY before API Call ---
        print(f"Waiting for {API_CALL_DELAY_SECONDS} second(s) before API call...")
        time.sleep(API_CALL_DELAY_SECONDS)
        # ------------------------------------

        # Call generate_content on the *shared* model instance
        print("Making API call...") # Added print statement
        response = model_instance.generate_content(prompt)
        print("API call complete.") # Added print statement

        # Attempt to parse the response
        decision_text = response.text.strip()
        if decision_text == '1':
            return 1
        elif decision_text == '0':
            return 0
        else:
            print(f"Warning: Could not parse decision from response: '{decision_text}' for facts: '{facts_text[:100]}...'")
            return -1

    except Exception as e:
        print(f"An unexpected error occurred during Gemini API call for facts: '{facts_text[:100]}...'. Error: {e}")
        # Check for prompt feedback in case of blocks
        try:
             # Check response object exists and has prompt_feedback attribute
             if 'response' in locals() and response and hasattr(response, 'prompt_feedback') and response.prompt_feedback:
                  print(f"Prompt Feedback: {response.prompt_feedback}")
        except Exception as feedback_e:
             print(f"Could not retrieve prompt feedback: {feedback_e}")
        return -1

# --- Main Processing Logic ---

print(f"\nReading Excel file: {INPUT_EXCEL_FILE}")
try:
    df = pd.read_excel(INPUT_EXCEL_FILE)
    print(f"Successfully read {len(df)} rows.")
except FileNotFoundError:
    print(f"Error: Input file not found at {INPUT_EXCEL_FILE}")
    exit()
except Exception as e:
    print(f"Error reading Excel file: {e}")
    exit()

# Check for input column
if FACTS_COLUMN not in df.columns:
    print(f"Error: Column '{FACTS_COLUMN}' not found in the Excel file.")
    exit()
# main function############

for selected_model in MODEL_NAME_LIST:
    print(f"\n===== Starting processing for Model: {selected_model} =====")
    try:
      print(f"Creating Gemini model instance: {selected_model}")
      shared_model_instance = genai.GenerativeModel(
          model_name=selected_model,
          generation_config=generation_config,
          safety_settings=safety_settings
          )
      print("Model instance created successfully.")
    except Exception as e:
      print(f"Error creating Gemini model instance: {e}")
      shared_model_instance = None
      exit()

    for run_number in range(1, 2): # Loop 1 to 3 (inclusive)
        # Create dynamic decision column name based on model and run number
        # e.g., "gpt-3.5-turbo-0125_AI_Decision1", "gpt-3.5-turbo-0125_AI_Decision2", etc.
        current_decision_column = f"{selected_model}_AI_Decision{run_number}"

        print(f"\n--- Model: {selected_model}, Run: {run_number} ---")
        print(f"Outputting decisions to column: {current_decision_column}")

        # Ensure the Decision column for this specific run/model exists, initialize if not
        if current_decision_column not in df.columns:
            print(f"Initializing column: {current_decision_column}")
            df[current_decision_column] = -1 # Initialize with a placeholder
        else:
            print(f"Column '{current_decision_column}' already exists. Values will be updated/overwritten for this run.")

        print(f"\nProcessing {len(df)} rows using Gemini model: {selected_model} (Run {run_number})...")

        # --- Iterate and call Gemini API for each row ---
        total_rows = len(df)
        for index, row in df.iterrows():
            print(f"\n--- Processing row {index + 1} of {total_rows} (Model: {selected_model}, Run: {run_number}) ---")
            facts = str(row[FACTS_COLUMN]) if pd.notna(row[FACTS_COLUMN]) else ""

            # Get the decision from Gemini using the shared model
            decision = get_gemini_decision(facts, shared_model_instance)

            # Update the DataFrame in the dynamically named column
            df.loc[index, current_decision_column] = decision
            print(f"Row {index + 1}: Facts processed for column '{current_decision_column}'. Predicted Decision: {decision}")

            # Optional: Delay AFTER processing each row
            if index < total_rows - 1: # Avoid sleeping after the last row of this run
                if MAIN_LOOP_DELAY_SECONDS > 0:
                    print(f"Waiting for {MAIN_LOOP_DELAY_SECONDS} second(s) before next row...")
                    time.sleep(MAIN_LOOP_DELAY_SECONDS)

        print(f"\n--- Finished Run {run_number} for Model: {selected_model} ---")

        # --- MODIFIED: Save the DataFrame at the end of each inner loop (run) ---
        print(f"Saving results to {OUTPUT_EXCEL_FILE} after Run {run_number} for Model {selected_model}...")
        try:
            df.to_excel(OUTPUT_EXCEL_FILE, index=False)
            print(f"Successfully saved results to: {OUTPUT_EXCEL_FILE}")
        except Exception as e:
            print(f"Error saving results to Excel file after Run {run_number} for Model {selected_model}: {e}")
        # --- END OF SAVE MODIFICATION FOR INNER LOOP ---

    print(f"\n===== Finished all runs for Model: {selected_model} =====")
# --- END OF MODIFIED SECTION ---

print("\nAll processing complete.") # This message now indicates all models and all their runs are done.

# The final save after all processing is now removed as it's done per run.

print("\nScript finished.")