In [None]:
import sys
sys.path.append('/home/redacted/dev/moral-summarization')

import os
import pandas as pd
import numpy as np
from moral_summarization.utils import load_json, load_yaml
from moral_summarization.metrics import evaluate_on_df

cosine = '/home/redacted/dev/hf-models/hyperparameter_tuning/hyper_token_article_cosine'
linear = '/home/redacted/dev/hf-models/hyperparameter_tuning/hyper_token_article_linear'

In [None]:
def string_to_numpy_array(string_data):
    # Define a restricted environment for eval
    restricted_globals = {"np": np, "__builtins__": None}
    # Use eval to safely evaluate the string as a Python expression
    return eval(string_data, restricted_globals)

def get_results(hyperparameters_folder, idx_sum=1, process_raw_evaluations=True):
    # initialize dataframe to store results
    results = pd.DataFrame(columns=['scheduler', 'precision', 'recall', 'f1', 'accuracy'])

    configs = {}

    # loop through folder of hyperparameters_folder
    for hyperparameters_combination in os.listdir(hyperparameters_folder):
        folder = os.path.join(hyperparameters_folder, hyperparameters_combination)

        precision, recall, f1, accuracy = [], [], [], []
        for file in os.listdir(folder):
            if file.endswith('.yaml'):
                config = load_yaml(os.path.join(folder, file))
                configs[idx_sum+int(hyperparameters_combination)] = config

            if process_raw_evaluations is False:
                if file.endswith('.json'):
                    metrics_fold = load_json(os.path.join(folder, file))
                    f1_key = 'f1'
                    if 'precision' not in metrics_fold:
                        metrics_fold = metrics_fold['macro avg']
                        f1_key = 'f1-score'
                    precision.append(metrics_fold['precision'])
                    recall.append(metrics_fold['recall'])
                    f1.append(metrics_fold[f1_key])
                #accuracy.append(metrics_fold['accuracy'])

            else:
                if file.endswith('.csv'):
                    fold_results = pd.read_csv(os.path.join(folder, file))
                    for column in ['predictions', 'labels']:
                        fold_results[column] = fold_results[column].apply(string_to_numpy_array)

                    metrics_fold = evaluate_on_df(fold_results, 'token_classification')
                    f1_key = 'f1'
                    if 'precision' not in metrics_fold:
                        metrics_fold = metrics_fold['macro avg']
                        f1_key = 'f1-score'
                    precision.append(metrics_fold['precision'])
                    recall.append(metrics_fold['recall'])
                    f1.append(metrics_fold[f1_key])
                #accuracy.append(metrics_fold['accuracy'])
        
        precision = np.mean(precision)
        recall = np.mean(recall)
        f1 = np.mean(f1)
        accuracy = np.mean(accuracy)

        results.loc[idx_sum+int(hyperparameters_combination)] = \
            [config['training']['lr_scheduler_type'], precision, recall, f1, accuracy]
    
    return results, configs

def print_best_results(results):
    print("max F1:\n", results.loc[results['f1'].idxmax()])
    print('=========================')
    # print("max accuracy:\n", results.loc[results['accuracy'].idxmax()])
    # print('=========================')
    print("max precision:\n", results.loc[results['precision'].idxmax()])
    print('=========================')
    print("max recall:\n", results.loc[results['recall'].idxmax()])

In [None]:
results_linear, configs_linear = get_results(linear, idx_sum=1)
results_cosine, configs_cosine = get_results(cosine, idx_sum=20)

# concatenate results
results = pd.concat([results_linear, results_cosine])

In [None]:
print_best_results(results)