# Determining the best parameters for each estimator and dataset

In [7]:
import os

import numpy as np
import pandas as pd

from config import dataset_map, data_dir

In [8]:
inverse_map = {value: key for key, value in dataset_map.items()}

In [9]:
labels = ['dtc', 'dtr', 'rfc', 'rfr']

for label in labels:
    score = 'auc' if label[-1] == 'c' else 'r2'

    data = pd.concat([
        pd.read_csv(os.path.join(data_dir, f'model_selection_{label}_md.csv')),
        pd.read_csv(os.path.join(data_dir, f'model_selection_{label}_msl.csv'))
    ])
    grouped = data.groupby(['name', 'estimator', 'params']).agg({score: 'mean'}).reset_index(drop=False)
    best_params = grouped.groupby(['name']).apply(lambda pdf: pdf.sort_values(score).iloc[-1]).reset_index(drop=True)
    best_params['name'] = best_params['name'].apply(lambda x: inverse_map.get(x, x))
    best_params = best_params[['name', 'params', score]]
    best_params.to_csv(
        os.path.join(data_dir, f'params_{label}.csv'),
        index=False
    )