In [None]:
import os
import pandas as pd
from modules.postprocessing import *
from typing import Dict, Union, Tuple

# Define the function to process predictions
def process_predictions(data: pd.DataFrame):
    # Drop existing metrics-related columns
    metrics_columns = [col for col in data.columns if any(metric in col for metric in ["precision", "recall", "f1", "accuracy"])]
    data = data.drop(columns=metrics_columns)

    # Prepare to store the new results
    updated_data = data.copy()
    mapped_preds = []
    cleaned_preds = []
    cleaned_gt_values = []
    new_metrics = []

    # Iterate over rows to evaluate predictions
    for _, row in data.iterrows():
        predictions = eval(row["predictions"])  # Convert string to dictionary
        ground_truth = eval(row["ground_truth"])  # Convert string to dictionary

        # Evaluate predictions
        evaluated_results = evaluate_response_with_metrics(
            predictions, ground_truth, defined_schema, comparison_methods
        )

        # Extract mapped and cleaned predictions and store metrics
        mapped_preds.append(evaluated_results.get("mapped_pred", {}))
        cleaned_preds.append(evaluated_results.get("cleaned_pred", {}))
        cleaned_gt_values.append(evaluated_results.get("cleaned_gt", {}))
        new_metrics.append({k: v for k, v in evaluated_results.items() if k not in ["mapped_pred", "cleaned_pred", "cleaned_gt"]})

    # Update the dataframe with new results
    updated_data["mapped_pred"] = mapped_preds
    updated_data["cleaned_pred"] = cleaned_preds
    updated_data["cleaned_gt"] = cleaned_gt_values
    metrics_df = pd.DataFrame(new_metrics)  # Convert metrics dictionary to DataFrame
    updated_data = pd.concat([updated_data, metrics_df], axis=1)

    return updated_data

# Directories
input_dir = "llama3_70b_outputs/reg/experiments"
output_dir = "llama3_70b_outputs/reg/experiments2"

# Create output directory if it doesn't exist
os.makedirs(output_dir, exist_ok=True)

# Process each file
for file_name in os.listdir(input_dir):
    if file_name.endswith(".csv"):  # Only process CSV files
        input_path = os.path.join(input_dir, file_name)
        output_path = os.path.join(output_dir, file_name)

        print(f"Processing file: {file_name}")
        
        # Read the data
        data = pd.read_csv(input_path)
        
        # Process the data
        processed_data = process_predictions(data)
        
        # Save the processed data
        processed_data.to_csv(output_path, index=False)
        print(f"Processed file saved to: {output_path}")

print("Processing complete.")


Processing file: experiment_56_results.csv

initial_pred: {'file_date': '2008-09-30', 'foreign_principle_name': 'Japan Fisheries Association', 'registrant_name': 'Garvey Schubert Barer', 'registration_num': '3047', 'signer_name': 'Harold G. Bailey, Jr.', 'signer_title': ''}

mapped_pred: {'file_date': '2008-09-30', 'foreign_principle_name': 'Japan Fisheries Association', 'registrant_name': 'Garvey Schubert Barer', 'registration_num': '3047', 'signer_name': 'Harold G. Bailey, Jr.', 'signer_title': ''}

cleaned_pred: {'file_date': '2008-09-30', 'foreign_principle_name': 'Japan Fisheries Association', 'registrant_name': 'Garvey Schubert Barer', 'registration_num': '3047', 'signer_name': 'Harold G. Bailey, Jr.', 'signer_title': ''} 

ground_truth_value: {'file_date': ['September 30, 2008'], 'foreign_principle_name': '', 'registrant_name': ['Garvey Schubert Barer'], 'registration_num': ['3047'], 'signer_name': ['Harold G. Bailey, Jr.'], 'signer_title': ''} 
\cleaned_gt_value: {'file_date': 

IOPub data rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_data_rate_limit`.

Current values:
ServerApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
ServerApp.rate_limit_window=3.0 (secs)




Comparison result for pred_value: 1983-03-30 and gt_value: ['york\nZa\n1983\nday of\nApril'] is => False

Comparison result for pred_value: Bermuda Department of Tourism and gt_value: ['Bermuda Department of Tourism'] is => True

Comparison result for pred_value: 430 and gt_value: ['430'] is => True

Comparison result for pred_value: Ronald N. Bassett and gt_value: ['(Ronald N. Bassett )'] is => False

Comparison result for pred_value: General Manager, North America and gt_value: ['General Manager, North America'] is => True

 SCHEMA METRIC:  defaultdict(<function calculate_schema_metrics.<locals>.<lambda> at 0x7fe6e97f8d60>, {'file_date': {'tp': 0, 'fp': 1, 'fn': 0}, 'registrant_name': {'tp': 1, 'fp': 0, 'fn': 0}, 'registration_num': {'tp': 1, 'fp': 0, 'fn': 0}, 'signer_name': {'tp': 0, 'fp': 1, 'fn': 0}, 'signer_title': {'tp': 1, 'fp': 0, 'fn': 0}})

Comparison result for pred_value: 1983-03-30 and gt_value: ['york\nZa\n1983\nday of\nApril'] is => False

Comparison result for pred_v