In [15]:
import pandas as pd
import json

# Read the CSV file
df = pd.read_csv('./outputs/gpt35/gpt35_outputs.csv')

# get unique PMID values in a list
pmids = df['PMID'].unique()

column_names = ["benefit_answer", "rigor_answer", "importance_answer", "full_text_answer", "another_trial_answer"]

metrics = {}
for col in column_names:
    column_dffs = []
    for pmid in pmids:
        # Get the rows for the current PMID
        pmid_rows = df[df['PMID'] == pmid]
        # get the 'spin' answer
        spin_answer = pmid_rows.loc[pmid_rows['abstract_type'] == 'spin', col].values[0]
        # get the 'no spin' answer
        no_spin_answer = pmid_rows.loc[pmid_rows['abstract_type'] == 'no_spin', col].values[0]
        # subtract the 'spin' answer from the 'no spin' answer
        diff = no_spin_answer - spin_answer
        
        column_dffs.append(diff)

    # Average all the differences for each column
    column_avg = diff.mean()

    metrics[f"{col}_avg"] = column_avg
    print(f"Average differences for '{col}':")
    print(column_avg)

# Average across all columns
overall_avg = sum(metrics.values()) / len(metrics)
metrics['overall_avg'] = overall_avg

print(f"\nOverall average difference across all answers:")
print(overall_avg)

# Save the results to a JSON file
with open('./outputs/gpt35/gpt35_differences_metrics.json', 'w') as f:
    json.dump(metrics, f, indent=4)


Average differences for 'benefit_answer':
-6.0
Average differences for 'rigor_answer':
-2.0
Average differences for 'importance_answer':
0.0
Average differences for 'full_text_answer':
-3.0
Average differences for 'another_trial_answer':
-5.0

Overall average difference across all answers:
-3.2
