In [16]:
import pandas as pd
import numpy as np
import re
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

In [19]:
def metric(file, response_column, label_regex, label_mapping):
   
    data = pd.read_csv(file, index_col=0)

 
    def extract_label(text):
        match = re.search(label_regex, text)
        return match.group(1) if match else 'None'


    data['extracted_label'] = data[response_column].apply(extract_label)

   
    data['extracted_label_numeric'] = data['extracted_label'].map(label_mapping)


    filtered_data = data.dropna(subset=['extracted_label_numeric'])


    accuracy = accuracy_score(filtered_data['actual_label'], filtered_data['extracted_label_numeric'])
    precision = precision_score(filtered_data['actual_label'], filtered_data['extracted_label_numeric'], average='micro')
    recall = recall_score(filtered_data['actual_label'], filtered_data['extracted_label_numeric'], average='micro')
    f1 = f1_score(filtered_data['actual_label'], filtered_data['extracted_label_numeric'], average='micro')

   
    metrics = pd.DataFrame({
        "accuracy": [accuracy],
        "precision": [precision],
        "recall": [recall],
        "f1_score": [f1],
    })


    output_file_path = file.replace('.csv', '_metrics.csv')
    metrics.to_csv(output_file_path, index=False)

    return metrics


In [21]:
fpb_label_mapping = {'POSITIVE': 2, 'NEGATIVE': 0, 'NEUTRAL': 1, 'None': np.nan}
metric('fpb_llama_2_7b_0408.csv', 'llm_responses', r'Label: (POSITIVE|NEGATIVE|NEUTRAL)', fpb_label_mapping)

fomc_label_mapping = {'DOVISH': 0, 'HAWKISH': 1, 'NEUTRAL': 2, 'None': np.nan}
metric('fomc_results_test_llama_2_7b.csv', 'response', r'Label: (DOVISH|HAWKISH|NEUTRAL)', fomc_label_mapping)
metric('fomc_train_results_llama_2_7b.csv', 'response', r'Label: (DOVISH|HAWKISH|NEUTRAL)', fomc_label_mapping)


Unnamed: 0,accuracy,precision,recall,f1_score
0,0.509975,0.509975,0.509975,0.509975
