In [1]:
import sys
import json
from pathlib import Path

import numpy as np
import pandas as pd

In [2]:
ROOT_DIR = Path("../runtimes/")

In [3]:
def parse_runtimes(filepath):
    if filepath.suffix == ".csv":
        return parse_runtimes_csv(filepath)
    elif filepath.suffix == ".json":
        return parse_runtimes_json(filepath)

    raise ValueError(f"Invalid filepath suffix for '{filepath}'")
        
def parse_runtimes_csv(filepath):
    data = pd.read_csv(filepath, header=None, index_col=0)
    return data.to_numpy().flatten()

def parse_runtimes_json(filepath):
    with open(filepath, "r") as file_in:
        data = json.load(file_in)
    
    (_algo_name, data), *_ = data.items()
    return np.array([el["time-taken"] for el in data])

In [4]:
runtimes = {}

for subdir in ROOT_DIR.iterdir():
    if not subdir.is_dir():
        print(f"Skipping {filepath}, expted a directory.", file=sys.stderr)
        continue

    dataset_name = subdir.name
    runtimes[dataset_name] = {}

    for filepath in subdir.iterdir():
        if not filepath.is_file():
            print(f"Skipping {filepath}, expted a regular file.", file=sys.stderr)
            continue
        algo_name = filepath.name
        algo_name = algo_name[: algo_name.find(".")]

        runtimes[dataset_name][algo_name] = parse_runtimes(filepath)

In [5]:
def agg(lst):
    if isinstance(lst, (np.ndarray, list)):
        #return np.sum(lst) / len(lst)
        return np.sum(lst)  # NOTE: sum instead of average because EG only uses 10 trials but each trains 10 models (so using sum is fairer)
    else:
        return float('nan')

In [15]:
pd.options.display.float_format = "{:.1f}".format
summed_runtimes = pd.DataFrame(runtimes).applymap(agg)
summed_runtimes / 3600 # Show in hours

Unnamed: 0,AOF,ACSIncome-Adult
LightGBM-cluster,1.7,11.0
Fairlearn-GS-10x10,21.4,43.8
FairGBM,3.5,9.9
FairGBM-cluster,5.7,18.2
Fairlearn-GS,10.3,37.1
LightGBM,1.4,4.6
Fairlearn-EG,,99.4


In [16]:
# Run-times relative to vanilla LightGBM
#pd.options.display.float_format = "{:.2f}".format
summed_runtimes / summed_runtimes.loc["LightGBM"]

Unnamed: 0,AOF,ACSIncome-Adult
LightGBM-cluster,1.2,2.4
Fairlearn-GS-10x10,14.7,9.6
FairGBM,2.4,2.1
FairGBM-cluster,4.0,4.0
Fairlearn-GS,7.1,8.1
LightGBM,1.0,1.0
Fairlearn-EG,,21.7
