In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import json
from IPython.display import display, HTML

In [2]:
def format_dataset(results, is_highlight = True, color = "navy"):
    formatted_result = {}
    for key, item in results.items():
        if isinstance(item, dict):
            enc_name = eval(key)[0]
            formatted_result[enc_name] = item 
        elif key == 'model_name':
            name = item
#         else: 
#             print(f'Type validation : {item}')
        
    results = pd.DataFrame.from_dict(formatted_result).T
    results = results[['train_score', 'val_score', 'test_score', 'time']]
    results.name = name
    
    if is_highlight:
        cm = sns.light_palette(color, as_cmap=True)
        return results.style.background_gradient(cmap=cm)
    
    return results

def open_dataset_validation(dataset_name, validation_type):
    with open(f'./results/{dataset_name}_{validation_type}.json', "r") as read_file:
        data = json.load(read_file)
    return data

def print_highlighted_data(data):
    for result in data:
        display(HTML(format_dataset(result, is_highlight=True).render()))
        
def get_list_of_datasets(data):
    results = []
    for result in data:   
        results.append(format_dataset(result, is_highlight=False))
    return results

def highlight_min_col(x):
    return ['background-color: red' if v in x.nsmallest(3).values else '' for v in x]

def highlight_min_row(x):
    return ['background-color: green' if v == x.min() else '' for v in x]

def get_df_results(results):
    train_df = pd.DataFrame()
    val_df = pd.DataFrame()
    test_df = pd.DataFrame()
    time = pd.DataFrame()

    names = []

    for result in results:
        names.append(result.name)

        train_df = train_df.append(result['train_score'])
        val_df = val_df.append(result['val_score'])
        test_df = test_df.append(result['test_score'])
        time = time.append(result['time'])

    train_df = train_df.T
    val_df = val_df.T
    test_df = test_df.T
    time = time.T

    train_df.columns = names
    val_df.columns = names
    test_df.columns = names
    time.columns = names
    
    train_df.name = 'train'
    val_df.name = 'val'
    test_df.name = 'test'
    time.name = 'time'
    
    return train_df, val_df, test_df, time

In [5]:
dataset_name = 'mimic'
validation_types = ['None', 'Single', 'Double']
for validation_type in validation_types:
    data = open_dataset_validation(dataset_name, validation_type)
    results = get_list_of_datasets(data)
    train_df, val_df, test_df, time = get_df_results(results)
    
    for df in [ test_df]:#train_df, val_df,, time
        print(f'Dataset : {dataset_name}')
        print(f'\t Validation type: {validation_type}')
        print(f'\t Result name : {df.name}')
        highlight_data = df.style.apply(highlight_min_row, axis = 1).apply(highlight_min_col, axis = 0)
        display(HTML(highlight_data.render()))

Dataset : mimic
	 Validation type: None
	 Result name : test


Unnamed: 0,LinearRegression,KNeighborsRegressor,DecisionTreeRegressor,RandomForestRegressor,MLPRegressor,LGBMRegressor
BackwardDifferenceEncoder,83.886,114.397,253.053,113.014,116.766,85.3485
FrequencyEncoder,79.5718,115.378,253.572,85.255,116.483,74.9897
HelmertEncoder,77.3658,114.397,221.148,86.3063,79.4962,76.0341
JamesSteinEncoder,83.7425,114.397,398.701,111.25,122.172,87.6164
LeaveOneOutEncoder,81.3309,114.397,118.17,117.894,83.5761,117.53
MEstimateEncoder,83.3314,114.397,398.701,111.25,85.9935,87.6164
OneHotEncoder,83.886,114.397,229.358,95.6498,116.733,84.8071
OrdinalEncoder,100.181,114.397,268.801,85.0247,85.2655,85.7505
SumEncoder,77.3658,114.397,245.434,86.7759,89.2989,74.8041
TargetEncoder,83.3319,114.397,398.701,111.25,101.237,87.6164


Dataset : mimic
	 Validation type: Single
	 Result name : test


Unnamed: 0,LinearRegression,KNeighborsRegressor,DecisionTreeRegressor,RandomForestRegressor,MLPRegressor,LGBMRegressor
BackwardDifferenceEncoder,77.3658,114.397,239.393,84.494,117.735,75.9018
CatBoostEncoder,80.7028,114.397,219.27,85.4648,104.588,73.4174
FrequencyEncoder,79.5648,115.323,253.588,85.2524,116.338,74.9897
HelmertEncoder,77.3658,114.397,228.985,86.3876,115.005,74.2031
JamesSteinEncoder,81.6168,114.397,222.607,84.8551,191.12,76.5161
LeaveOneOutEncoder,81.2863,114.397,118.238,118.009,111.164,117.643
MEstimateEncoder,81.2353,114.397,210.099,86.7404,75.8812,74.9903
OneHotEncoder,77.3658,114.397,247.186,86.0236,99.3873,74.8041
OrdinalEncoder,80.2765,114.397,229.016,84.8798,109.886,75.3688
SumEncoder,77.3658,114.397,244.34,86.3527,206.386,74.8041


Dataset : mimic
	 Validation type: Double
	 Result name : test


Unnamed: 0,LinearRegression,KNeighborsRegressor,DecisionTreeRegressor,RandomForestRegressor,MLPRegressor,LGBMRegressor
CatBoostEncoder,81.2803,114.397,227.465,79.5997,80.3438,71.7553
FrequencyEncoder,81.1614,114.976,419.356,116.292,118.662,88.2224
JamesSteinEncoder,81.6414,114.397,206.091,80.2682,78.4088,73.2844
LeaveOneOutEncoder,81.2807,114.397,244.362,79.6565,107.507,72.6443
MEstimateEncoder,81.2803,114.397,227.465,79.5997,80.3438,71.7553
TargetEncoder,81.2815,114.397,244.362,79.6565,92.6553,72.6443


In [146]:







#test_df.style.apply(highlight_min_row, axis = 1).apply(highlight_min, axis = 0)

Dataset : house_prices
	 Validation type: Single
	 Result name : train


Unnamed: 0,KNeighborsRegressor,DecisionTreeRegressor,RandomForestRegressor,MLPRegressor,LGBMRegressor,SVR,AdaBoostRegressor
BackwardDifferenceEncoder,0.0698911,0,0.00397884,0.0366886,0.00459648,0.166677,0.0339338
CatBoostEncoder,0.0558191,0,0.0042334,0.0481837,0.00420618,0.166653,0.032213
FrequencyEncoder,0.0669292,0,0.00402262,0.026801,0.00481723,0.166675,0.0339498
HelmertEncoder,0.0698579,0,0.00396447,0.030001,0.00357356,0.166677,0.0341295
JamesSteinEncoder,0.0469473,0,0.00344396,0.0417544,0.0030972,0.166658,0.0307422
LeaveOneOutEncoder,0.0521106,0,0.000228235,0.0443297,0.000580742,0.166638,0.00530579
MEstimateEncoder,0.048328,0,0.00334015,0.0373125,0.00360847,0.166634,0.0299087
OneHotEncoder,0.0698847,0,0.00397178,0.0780116,0.00381129,0.166677,0.0340964
OrdinalEncoder,0.0698633,0,0.00399597,0.0380163,0.00465964,0.166676,0.0339258
SumEncoder,0.0698851,0,0.00398328,0.0381957,0.00381129,0.166677,0.0341927


Dataset : house_prices
	 Validation type: Single
	 Result name : val


Unnamed: 0,KNeighborsRegressor,DecisionTreeRegressor,RandomForestRegressor,MLPRegressor,LGBMRegressor,SVR,AdaBoostRegressor
BackwardDifferenceEncoder,0.0736589,0.0528957,0.0236363,0.0389601,0.0200767,0.166954,0.0419402
CatBoostEncoder,0.0553715,0.0527743,0.0229887,0.0482353,0.0198193,0.166924,0.0396724
FrequencyEncoder,0.0704532,0.0507646,0.0234918,0.0330683,0.0202369,0.166952,0.0419089
HelmertEncoder,0.0735644,0.0510621,0.0235961,0.0356947,0.0200109,0.166954,0.0419609
JamesSteinEncoder,0.0641137,0.0480119,0.0246253,0.0566337,0.0196924,0.166943,0.0402996
LeaveOneOutEncoder,0.0552495,0.178205,0.178397,0.0455081,0.170847,0.166915,0.174958
MEstimateEncoder,0.0579163,0.0459335,0.0264569,0.0489296,0.0205974,0.166916,0.0413809
OneHotEncoder,0.0736589,0.0541392,0.0232352,0.0408721,0.0197069,0.166954,0.0420093
OrdinalEncoder,0.0735756,0.0494642,0.0237194,0.0399411,0.0199424,0.166953,0.0417387
SumEncoder,0.0736589,0.0508964,0.02328,0.0553005,0.0197069,0.166954,0.0421817


Dataset : house_prices
	 Validation type: Single
	 Result name : test


Unnamed: 0,KNeighborsRegressor,DecisionTreeRegressor,RandomForestRegressor,MLPRegressor,LGBMRegressor,SVR,AdaBoostRegressor
BackwardDifferenceEncoder,0.0729202,0.04677,0.0243411,0.045785,0.0210224,0.149369,0.04135
CatBoostEncoder,0.0535605,0.0506492,0.0247717,0.0489266,0.0204287,0.149328,0.0393927
FrequencyEncoder,0.0703431,0.0468669,0.0246244,0.0265697,0.0216477,0.149368,0.0409683
HelmertEncoder,0.0729664,0.0505473,0.0243798,0.0434114,0.0210309,0.149369,0.041543
JamesSteinEncoder,0.0571214,0.0482484,0.024912,0.0504659,0.0204431,0.149346,0.0394676
LeaveOneOutEncoder,0.0516922,0.166893,0.165372,0.0462052,0.160612,0.149321,0.156522
MEstimateEncoder,0.0526401,0.0475068,0.0258751,0.0458202,0.0224189,0.149322,0.0410047
OneHotEncoder,0.0729202,0.0555752,0.0243982,0.0522688,0.0207668,0.149369,0.0409807
OrdinalEncoder,0.0729166,0.0447567,0.024378,0.0448615,0.0196282,0.149369,0.0414165
SumEncoder,0.0729202,0.0481971,0.0244444,0.0462918,0.0207668,0.149369,0.0412467


Dataset : house_prices
	 Validation type: Single
	 Result name : time


Unnamed: 0,KNeighborsRegressor,DecisionTreeRegressor,RandomForestRegressor,MLPRegressor,LGBMRegressor,SVR,AdaBoostRegressor
BackwardDifferenceEncoder,6.51439,5.47828,68.0527,7.12081,16.6206,10.0095,33.0546
CatBoostEncoder,6.58195,7.10489,46.2847,7.58976,18.9099,7.23842,26.12
FrequencyEncoder,1.12754,0.914856,20.7785,2.36158,6.97312,1.52788,10.9197
HelmertEncoder,6.44689,5.12202,70.1607,7.66557,18.4339,9.99706,34.0235
JamesSteinEncoder,3.47101,3.3154,24.2308,4.1284,11.883,3.81143,13.1922
LeaveOneOutEncoder,5.3656,4.89368,35.4384,7.01593,25.0393,5.86416,23.3514
MEstimateEncoder,3.12297,2.69437,23.036,4.32106,9.23875,3.41279,13.0444
OneHotEncoder,6.9082,7.62481,44.0574,8.13025,13.0524,10.7406,30.3503
OrdinalEncoder,1.80792,1.84739,23.1399,3.41265,9.46746,2.35076,26.5689
SumEncoder,6.48602,5.08755,44.9899,7.61569,14.7264,9.96199,28.8735
