In [1]:
import pandas as pd
import json
import os
import random
from pathlib import Path
import numpy as np
from itertools import chain
import seaborn as sns
from matplotlib import pyplot as plt

import warnings
warnings.filterwarnings("ignore")
pd.set_option('display.max_rows', None)

In [2]:
rc_type = 'JRE'
res_path = "./Analysis_extended/Results"
threshold = 0.95
metrics = ['ts', 'cs', 'us']

In [3]:
datasets = ['NYT10', 'crossRE', 'tacred_new']

In [4]:
model_mapping = {
    "OpenAI/gpt-4o": "GPT",
    "openchat/openchat_3.5": "OpenChat",
    "meta-llama/Meta-Llama-3.1-8B-Instruct": "Llama",
    "mistralai/Mistral-Nemo-Instruct-2407": "Mistral",
    "google/gemma-2-9b-it": "Gemma",
    "PLM/UniRel": "UniRel",
    "PLM/RIFRE": "RIFRE",
    "PLM/TDEER": "TDEER",
    "PLM/SPN4RE": "SPN4RE",
}
plm_models = ["PLM/RIFRE", "PLM/SPN4RE", "PLM/TDEER"]
llm_models = ["openchat/openchat_3.5", "meta-llama/Meta-Llama-3.1-8B-Instruct", 
              "mistralai/Mistral-Nemo-Instruct-2407", "google/gemma-2-9b-it",
             "OpenAI/gpt-4o"]

In [5]:
def get_info(exp, file):
    parts = file.parts
    struct, parser, prompt, demo, seed, k = None, None, None, None, None, None
    if exp=='plm':
        seed = file.parts[-1].split('.')[0][-1]
    elif exp=='1stage':
        prompt = parts[-1].split('_')[-1].split('-')[0]
        k = int(parts[-1].split('-')[-1].split('.')[0])
        seed = parts[-2].split('-')[-1]
        demo = parts[-3]
    elif exp=='structure_extract':
        prompt = parts[-1].split('_')[-1].split('-')[1]
        struct = parts[-1].split('_')[-1].split('-')[0]
        parser = parts[-1].split('_')[1]
        k = int(parts[-1].split('_')[-1].split('-')[-1].split('.')[0])
        seed = parts[-2].split('-')[-1]
        demo = parts[-3]
    else:
        prompt = parts[-1].split('_')[1].split('-')[0]
        k = int(parts[-1].split('_')[-1].split('-')[-1].split('.')[0])
        seed = parts[-2].split('-')[-1]
        demo = parts[-3]
    return struct, prompt, seed, demo, parser, k

## Aggregate metrics

In [7]:
for exp in ['2stage', 'structure_extract', 'plm']:
    print(exp)
    df = pd.DataFrame()
    if exp=='plm':
        models = plm_models
    else:
        models = llm_models
    for metric in metrics:
        # print(metric)
        records = []
        for data in ['crossRE', 'NYT10', 'tacred_new']:
            # print(data)
            for model in models:
                # print(model)
                model_ = model_mapping[model]

                files = list(Path(f'{res_path}/{rc_type}/{exp}/{data}/{model}'
                                 ).rglob('*.jsonl'))

                for file in files:
                    is_valid_file = (
                        file.name.startswith(f'{metric}_') if metric in ['cs', 'us', 'ts', 'fs']
                        else file.name.startswith('prf_')
                    )
                    
                    if not is_valid_file:
                        continue
                        
                    struct, prompt, seed, demo, parser, k = get_info(exp, file)                    
                    with open(file, "r") as f:
                        for line in f.read().splitlines():
                            res_dict = json.loads(line)

                    norm = []
                    for key, val in res_dict.items():
                        if metric in ['ts', 'fs']:
                            score = val
                            norm.append(score)
                        elif metric in ['p', 'r', 'f']:
                            if metric in val:
                                score = val[metric]
                                norm.append(score)
                        else:
                            score = val[f'{metric}_{threshold}']
                            norm.append(score)

                    records.append({'model':f'{model_}_{struct}' if struct else f'{model_}_{exp}', 'parser': parser,
                                    'dataset':data, 'prompt':prompt,
                                   'seed':seed,
                                   metric:(np.mean(norm)*100)})     
        df_llm = pd.DataFrame(records)
        if len(df)==0:
            df = df_llm.copy()
        else:
            df = pd.merge(df, df_llm, how='left', on=['model', 'dataset', 'prompt', 'seed', 'parser'])
    if exp!="plm":
        df.to_csv(f'./eval_csvs/zero_metric_{exp}_{threshold}.csv', index=False)
    else:
        df.to_csv(f'./eval_csvs/zero_metric_{exp}.csv', index=False)
                    

2stage
structure_extract
