In [299]:
import json
import os
import sys
from copy import deepcopy
from pathlib import Path

import pandas as pd

REPO_DIR = os.path.abspath('..')  # path to the root of the repository
sys.path.append(REPO_DIR)
os.environ["PROJECT_DIR"] = REPO_DIR
import lib

pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

In [300]:
EQ = 'EQ'
JDT = 'JDT'
LC = 'LC'
APACHE = 'apache'
SAFE = 'safe'
ZXING = 'zxing'
ALL_DATASETS = [EQ, JDT, LC, APACHE, SAFE, ZXING]
DATASET_NAMES = {
    EQ: 'EQ',
    JDT: 'JDT',
    LC: 'LC',
    APACHE: 'apache',
    SAFE: 'safe',
    ZXING: 'zxing',
}
REGRESSION_DATASETS = [x for x in ALL_DATASETS if lib.load_dataset_info(x)['task_type'] == lib.REGRESSION]
DETAILS = ['task_type', 'n_objects', 'n_features']
PARTS = ['test', 'val', 'train']


def format_scores(df, precision):
    def f(record):
        if record['task_type'] == lib.REGRESSION:
            for part in PARTS:
                for suffix in 'best', 'score':
                    key = f'{part}_{suffix}'
                    if key in record:
                        record[key] *= -1
        for k, v in list(record.items()):
            if isinstance(v, float):
                record[k] = round(v, precision)
        return record
    return df.apply(f, axis=1)


def load_record(output):
    output = Path(output)
    if not output.exists():
        return None
    path = output / 'stats.json'
    if not path.exists():
        print(f'WARNING! This path does not exist: {path}')
        return None
    stats = lib.load_json(path)
    metrics = stats.get('metrics')
    if metrics is None:
        return None

    dataset = Path(stats['dataset']).name
    info = lib.load_dataset_info(dataset)
    dataset, algorithm_name, experiment, suffix = str(output.relative_to(lib.env.OUTPUT_DIR)).split('\\', 4)
    r = {
        'dataset': DATASET_NAMES[dataset],
        'task_type': info['task_type'],
        'n_objects': info['size'],
        'n_features': info['n_num_features'] + info['n_cat_features'],
        'algorithm': algorithm_name + f' | {experiment}',
        's': suffix
    }
    for x in PARTS:
        if x in stats['metrics']:
            r[f'{x}_score'] = stats['metrics'][x]['PD']
    return r


def sort(df, by):
    if isinstance(by, str):
        by = [by]
    return df.sort_values(['n_objects'] + by, ascending=[True] + ['score' not in x for x in by]).reset_index(drop=True)


def make_df(outputs_and_names):
    df = []
    for output, algorithm_name in outputs_and_names:
        record = load_record(output)
        if not record:
            continue
        if algorithm_name is not None:
            record['algorithm'] = algorithm_name
        df.append(record)
    df = sort(pd.DataFrame(df).fillna(0.0), 'val_score').reset_index(drop=True)
    return df


def collect_outputs(experiment_dir, filter_info=None):
    if isinstance(filter_info, int):
        filter_info = [str(x) for x in range(filter_info)]
    if isinstance(filter_info, list):
        assert all(isinstance(x, str) for x in filter_info)
        filter_fn = lambda x: x.name in filter_info
    elif callable(filter_info):
        filter_fn = filter_info
    else:
        assert filter_info is None
        filter_fn = lambda x: True

    outputs = []
    if not isinstance(experiment_dir, Path):
        experiment_dir = lib.env.OUTPUT_DIR / experiment_dir
    if experiment_dir.exists():
        outputs.extend(
            filter(
                filter_fn,
                filter(Path.is_dir, experiment_dir.iterdir())
            )
        )
    return outputs


def aggregate(df):
    aggrs = dict(
        task_type=('task_type', 'first'),
        n_objects=('n_objects', 'first'),
        n_features=('n_features', 'first'),
        test_score=('test_score', 'mean'),
        test_std=('test_score', 'std'),
        val_score=('val_score', 'mean'),
        val_std=('val_score', 'std'),
        count=('test_score', 'count')
    )
    if 'train_score' in df.columns:
        aggrs.update(dict(
            train_score=('train_score', 'mean'),
            train_std=('train_score', 'std'),
        ))
    df = df.groupby(['dataset', 'algorithm']).agg(**aggrs)
    df['count'] = df['count'].astype(int)
    return df.reset_index().fillna(0.0)


def build_report(outputs_and_names):
    df = make_df(outputs_and_names)
    df = aggregate(df)
    df = sort(df, 'test_score')
    df = format_scores(df, 4)
    # df = df.set_index(['dataset', 'algorithm']).drop(columns=DETAILS)
    df = df.set_index(['dataset'] + DETAILS + ['algorithm'])
    return df[['test_score', 'val_score', 'train_score']]
    # return df[['test_score', 'test_std', 'val_score', 'val_std', 'train_score', 'train_std', 'count']]

## Default configurations (GBDT and FT-Transformer)

In [301]:
# all_datasets = set(deepcopy(ALL_DATASETS))
# n_seeds = 15
# ensemble_names = ['0_4', '5_9', '10_14']
# outputs_and_names = []
# for experiment, algorithm_name, datasets in [
#     ('ft_transformer/default', 'FT-Transformer', all_datasets),
#     ('catboost/default', 'CatBoost', all_datasets),
#     ('xgboost/default', 'XGBoost', all_datasets),
# ]:
#     for dataset in datasets:
#         for output in collect_outputs(dataset + '/' + experiment, n_seeds):
#             outputs_and_names.append((output, algorithm_name))
#         for output in collect_outputs(dataset + '/' + experiment + '_ensemble', ensemble_names):
#             outputs_and_names.append((output, '(e) ' + algorithm_name))
# build_report(outputs_and_names)

## All Neural Networks

In [302]:
all_datasets = set(deepcopy(ALL_DATASETS))
n_seeds = 15
outputs_and_names = []
for experiment, algorithm_name, datasets in [
    ('ft_transformer/default', 'FT-Transformer | default', [EQ]),
    ('ft_transformer/tuned_reproduced', 'FT-Transformer | tuned', [EQ]), #tuned_reproduced가 새로운 데이터세트에 튜닝한 것
    ('ft_transformer/default', 'FT-Transformer | default', [JDT]),
    ('ft_transformer/tuned_reproduced', 'FT-Transformer | tuned', [JDT]),
    ('ft_transformer/default', 'FT-Transformer | default', [LC]),
    ('ft_transformer/tuned_reproduced', 'FT-transformer | tuned', [LC]),
    ('ft_transformer/default', 'FT-Transformer | default', [APACHE]),
    ('ft_transformer/tuned_reproduced', 'FT-transformer | tuned', [APACHE]),
    ('ft_transformer/default', 'FT-Transformer | default', [SAFE]),
    ('ft_transformer/tuned_reproduced', 'FT-transformer | tuned', [SAFE]),
    ('ft_transformer/default', 'FT-Transformer | default', [ZXING]),
    ('ft_transformer/tuned_reproduced', 'FT-Transformer | tuned', [ZXING]),

]:
    for dataset in datasets:
        for output in collect_outputs(dataset + '/' + experiment, n_seeds):
            outputs_and_names.append((output, algorithm_name))
build_report(outputs_and_names)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,test_score,val_score,train_score
dataset,task_type,n_objects,n_features,algorithm,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
LC,binclass,20640,61,FT-Transformer | default,0.9002,0.8846,0.8371
JDT,binclass,20640,61,FT-Transformer | default,0.8572,0.8645,0.9281
zxing,binclass,98050,26,FT-Transformer | default,0.732,0.786,0.7733
apache,binclass,98050,26,FT-Transformer | default,0.697,0.6889,0.7755
EQ,binclass,98050,61,FT-Transformer | default,0.5207,0.754,0.7866
safe,binclass,98050,26,FT-Transformer | default,0.4,0.6933,0.7971


## Main Neural Networks and GDBT

In [303]:
# all_datasets = set(deepcopy(ALL_DATASETS))
# n_seeds = 15
# ensemble_names = ['0_4', '5_9', '10_14']
# outputs_and_names = []
# for experiment, algorithm_name, datasets in [
#     ('mlp/tuned', 'MLP', all_datasets),
#     ('resnet/tuned', 'ResNet', all_datasets),
#     ('ft_transformer/tuned', 'FT-Transformer', all_datasets - {YAHOO}),
#     ('ft_transformer/default', 'FT-Transformer | default', all_datasets),
#     ('catboost/tuned', 'CatBoost', all_datasets),
#     ('xgboost/tuned', 'XGBoost', all_datasets),
#     ('lightgbm_/tuned', 'LightGBM', {CALIFORNIA, ADULT, HIGGS}),
# ]:
#     for dataset in datasets:
#         for output in collect_outputs(dataset + '/' + experiment, n_seeds):
#             outputs_and_names.append((output, algorithm_name))
#         for output in collect_outputs(dataset + '/' + experiment + '_ensemble', ensemble_names):
#             outputs_and_names.append((output, '(e) ' + algorithm_name))
# build_report(outputs_and_names)

## Ablation Study

In [304]:
# all_datasets = {CALIFORNIA, HELENA, JANNIS, HIGGS, ALOI, YEAR, COVTYPE, MICROSOFT}
# n_seeds = 15
# outputs_and_names = []
# for experiment, algorithm_name, datasets in [
#     ('autoint/tuned', 'AutoInt', all_datasets),
#     ('ft_transformer/tuned_nobias', 'FT-Transformer | nobias', all_datasets),
#     ('ft_transformer/tuned', 'FT-Transformer', all_datasets),
# ]:
#     for dataset in datasets:
#         for output in collect_outputs(dataset + '/' + experiment, n_seeds):
#             outputs_and_names.append((output, algorithm_name))
# build_report(outputs_and_names)