# Extract Best Results & Hyperparameters

### Definitions

In [1]:
import os
import pandas as pd
import warnings
warnings.filterwarnings("ignore")

def extract_hyperparameters(results_base, embedding, task):
    results_dir = os.path.join(results_base, embedding, task)
    file_list = os.listdir(results_dir)
    
    results_df = pd.DataFrame()
    
    for filename in file_list:
        filepath = os.path.join(results_dir, filename)
        with open(filepath, 'r') as file:
            df_new = pd.read_csv(file)
            if results_df.empty:
                results_df = df_new
            else:
                results_df = pd.concat([results_df, df_new])
    
    metric = "accuracy"
    if task.lower() == "stsb":
        metric = "pearson"
    if task.lower() == "cola":
        metric = "mcc"
        
    best = results_df[metric].max()
    best_row = results_df[results_df[metric] == best]
    
    hyperparam_grid = {}
    for key, val in best_row.to_dict().items():
        hyperparam_grid[key] = [list(val.values())[0]]
    print(f"embedding: {embedding}")
    print(f"task: {task}")
    print(f"{metric} score: {hyperparam_grid[metric][0]}")
    print()
    
    for key in ["pearson", "spearman", "accuracy", "mcc", "f1", "loss", "training time", "training energy", "device"]:
        if key in hyperparam_grid:
            del hyperparam_grid[key]

    if "num_classes" in hyperparam_grid:
        hyperparam_grid["num_classes"][0] = int(hyperparam_grid["num_classes"][0])

    return hyperparam_grid

### Get best row and return as dict

In [2]:
embedding = "cls"
task = "cola"
results_base = "./results/" # base directory for results .csv files

extract_hyperparameters(results_base, embedding, task)

embedding: cls
task: cola
mcc score: 0.4106901466550892



{'num_epochs': [50],
 'batch_size': [32],
 'learning_rate': [0.01],
 'category': ['BC'],
 'norm': [False],
 'input_size': [768],
 'layer_size': [768],
 'num_layers': [1],
 'weight_decay': [0.0001],
 'patience': [3],
 'min_delta': [0]}