# AMLB shorter runs visualizations

In [1]:
import itertools
from pathlib import Path

import matplotlib.pyplot as plt
import pandas as pd
import Orange

from visualization_early import FRAMEWORK_TO_COLOR
from data_processing import get_print_friendly_name, impute_missing_results, calculate_ranks, is_old

In [2]:
PROJECT_ROOT = Path(".").absolute().parent
DATA_DIRECTORY = PROJECT_ROOT / "data"
FIGURE_DIRECTORY = PROJECT_ROOT / "figures" / "CD"
FIGURE_DIRECTORY.mkdir(parents=True, exist_ok=True)
FIGURE_DIRECTORY_BY_TIME = PROJECT_ROOT / "figures" / "CD_by_time"
FIGURE_DIRECTORY_BY_TIME.mkdir(parents=True, exist_ok=True)

In [3]:
results = pd.read_csv(DATA_DIRECTORY / "amlb_all.csv", dtype={"info": str})
results = results[~results["framework"].isin(["autosklearn2"])]
results["framework"] = results["framework"].apply(get_print_friendly_name)

In [4]:
results.framework.unique()

array(['AutoGluon(HQIL)_10min', 'AutoGluon(HQIL)_30min',
       'AutoGluon(HQIL)_5min', 'AutoGluon(HQIL)_60min',
       'AutoGluon(HQ)_10min', 'AutoGluon(HQ)_30min', 'AutoGluon(HQ)_5min',
       'AutoGluon(HQ)_60min', 'AutoGluon(B)_10min', 'AutoGluon(B)_30min',
       'AutoGluon(B)_5min', 'AutoGluon(B)_60min', 'FEDOT_10min',
       'FEDOT_30min', 'FEDOT_5min', 'FEDOT_60min', 'GAMA(B)_10min',
       'GAMA(B)_30min', 'GAMA(B)_5min', 'GAMA(B)_60min',
       'H2OAutoML_10min', 'H2OAutoML_30min', 'H2OAutoML_5min',
       'H2OAutoML_60min', 'NaiveAutoML_10min', 'NaiveAutoML_30min',
       'NaiveAutoML_5min', 'NaiveAutoML_60min', 'RF_60min', 'TPOT_10min',
       'TPOT_30min', 'TPOT_5min', 'TPOT_60min', 'TRF_60min',
       'autosklearn2_10min', 'autosklearn2_30min', 'autosklearn2_5min',
       'autosklearn2_60min', 'autosklearn_10min', 'autosklearn_30min',
       'autosklearn_5min', 'autosklearn_60min', 'CP_60min', 'flaml_10min',
       'flaml_30min', 'flaml_5min', 'flaml_60min', 'lightautoml_

In [5]:
from IPython.display import display

with pd.option_context("display.max_rows", None):
    display(results.groupby(by=["constraint", "framework"]).size())

constraint              framework            
Mixed Time Constraints  AutoGluon(B)_10min       1040
                        AutoGluon(B)_30min       1040
                        AutoGluon(B)_5min        1040
                        AutoGluon(B)_60min       1040
                        AutoGluon(HQ)_10min      1040
                        AutoGluon(HQ)_30min      1040
                        AutoGluon(HQ)_5min       1040
                        AutoGluon(HQ)_60min      1040
                        AutoGluon(HQIL)_10min    1040
                        AutoGluon(HQIL)_30min    1040
                        AutoGluon(HQIL)_5min     1040
                        AutoGluon(HQIL)_60min    1040
                        CP_60min                 1040
                        FEDOT_10min              1040
                        FEDOT_30min              1040
                        FEDOT_5min               1040
                        FEDOT_60min              1040
                        GAMA(B)_10mi

In [6]:
# Define a function to replace the prefix
def replace_prefix(framework, prefix):
    if framework.startswith(prefix):
        return framework[len(prefix):]
    return framework

In [8]:
name_mapping = {
    'constantpredictor_60min': 'CP_60min',
    'RandomForest_60min': 'RF_60min',
    'TunedRandomForest_60min': 'TRF_60min'
}

### All frameworks by time

In [9]:
strings_by_time = ['_5min', '_10min', '_30min', '_60min']
extra_frameworks = ['RF_60min', 'TRF_60min', 'CP_60min']

In [10]:
correlation_dictionary = dict()
for name_time in strings_by_time:
    print("This is the time constraint", name_time)
    this_dataframe = results[results['framework'].str.contains(name_time) | results['framework'].isin(extra_frameworks)]
    this_dataframe['framework'] = this_dataframe['framework'].apply(lambda x: name_mapping.get(x, x))
    data = this_dataframe
    data = impute_missing_results(data, with_results_from="CP_60min")
    data['framework'] = data['framework'].apply(lambda x: x.replace(name_time, ''))
    data['framework'] = data['framework'].apply(lambda x: x.replace("_60min", ''))
    correlation_dictionary[name_time.replace("_", "")] = calculate_ranks(data)

This is the time constraint _5min


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  this_dataframe['framework'] = this_dataframe['framework'].apply(lambda x: name_mapping.get(x, x))


This is the time constraint _10min
This is the time constraint _30min
This is the time constraint _60min


### Correlation matrix

In [11]:
import seaborn as sns
# Create a DataFrame
df_corr = pd.DataFrame(correlation_dictionary)

# Optionally, transpose the DataFrame if you want the times as columns
correlation_table = df_corr.corr()

In [12]:
df_corr

Unnamed: 0,5min,10min,30min,60min
AutoGluon(B),2.918269,2.740385,2.884615,2.663462
AutoGluon(HQ),4.966346,4.576923,4.451923,4.009615
AutoGluon(HQIL),6.019231,5.158654,5.610577,5.615385
CP,15.591346,15.634615,15.658654,15.365385
FEDOT,11.918269,11.230769,11.322115,12.793269
GAMA(B),11.076923,10.144231,10.158654,8.769231
H2OAutoML,7.038462,6.596154,6.951923,7.105769
MLJAR(B),7.471154,6.923077,7.629808,6.918269
NaiveAutoML,11.721154,12.043269,12.557692,10.990385
RF,10.740385,10.923077,11.634615,11.355769


In [13]:
# Define a function to color the lowest value in each column
def color_min(s):
    is_min = s == s.min()
    return ['background-color: yellow' if v else '' for v in is_min]

# Apply the function to the DataFrame
styled_df = df_corr.style.apply(color_min, axis=0).format(precision=2)

# Display the styled DataFrame
styled_df

Unnamed: 0,5min,10min,30min,60min
AutoGluon(B),2.92,2.74,2.88,2.66
AutoGluon(HQ),4.97,4.58,4.45,4.01
AutoGluon(HQIL),6.02,5.16,5.61,5.62
CP,15.59,15.63,15.66,15.37
FEDOT,11.92,11.23,11.32,12.79
GAMA(B),11.08,10.14,10.16,8.77
H2OAutoML,7.04,6.6,6.95,7.11
MLJAR(B),7.47,6.92,7.63,6.92
NaiveAutoML,11.72,12.04,12.56,10.99
RF,10.74,10.92,11.63,11.36


#### Color table by ranking

In [14]:
# Define the new column order
new_column_order = ['60min', '30min', '10min', '5min']

# Reindex the DataFrame to change the column order
df_corr = df_corr.reindex(columns=new_column_order)

# Display the updated DataFrame
df_corr_color = df_corr.copy()

# Drop the specified rows
df_corr_color = df_corr_color.drop(index=['CP', 'RF', 'TRF'])

# Calculate the ranking changes relative to 60min
df_corr_color['30min'] = (df_corr_color['30min'] - df_corr_color['60min']).round(3)
df_corr_color['10min'] = (df_corr_color['10min'] - df_corr_color['60min']).round(3)
df_corr_color['5min'] = (df_corr_color['5min'] - df_corr_color['60min']).round(3)

# Create the new DataFrame with the desired format
df_new = df_corr_color[['60min', '30min', '10min', '5min']]
df_new

Unnamed: 0,60min,30min,10min,5min
AutoGluon(B),2.663462,0.221,0.077,0.255
AutoGluon(HQ),4.009615,0.442,0.567,0.957
AutoGluon(HQIL),5.615385,-0.005,-0.457,0.404
FEDOT,12.793269,-1.471,-1.562,-0.875
GAMA(B),8.769231,1.389,1.375,2.308
H2OAutoML,7.105769,-0.154,-0.51,-0.067
MLJAR(B),6.918269,0.712,0.005,0.553
NaiveAutoML,10.990385,1.567,1.053,0.731
TPOT,10.365385,-0.538,-0.572,-0.562
autosklearn,7.644231,0.123,2.132,0.569


In [17]:
# Define a function to color the cells
def color_cells(val):
    if val > 0:
        color = '#f79f8c'
    elif val < 0:
        color = '#8cf7ab'
    else:
        color = 'white'
    return f'background-color: {color}'

# Apply the function to the DataFrame
styled_df = df_new.style.applymap(color_cells, subset=['30min', '10min', '5min']).format(precision=2)

# Display the styled DataFrame
styled_df

Unnamed: 0,60min,30min,10min,5min
AutoGluon(B),2.66,0.22,0.08,0.26
AutoGluon(HQ),4.01,0.44,0.57,0.96
AutoGluon(HQIL),5.62,-0.01,-0.46,0.4
FEDOT,12.79,-1.47,-1.56,-0.88
GAMA(B),8.77,1.39,1.38,2.31
H2OAutoML,7.11,-0.15,-0.51,-0.07
MLJAR(B),6.92,0.71,0.01,0.55
NaiveAutoML,10.99,1.57,1.05,0.73
TPOT,10.37,-0.54,-0.57,-0.56
autosklearn,7.64,0.12,2.13,0.57


In [18]:
# Transpose the DataFrame
df_new_transposed = df_new.transpose()

# Apply the function to the transposed DataFrame, excluding the '60min' row
styled_df_transposed = df_new_transposed.style.applymap(color_cells, subset=pd.IndexSlice[['30min', '10min', '5min'], :]).format(precision=2)

# Display the styled transposed DataFrame
styled_df_transposed

Unnamed: 0,AutoGluon(B),AutoGluon(HQ),AutoGluon(HQIL),FEDOT,GAMA(B),H2OAutoML,MLJAR(B),NaiveAutoML,TPOT,autosklearn,autosklearn2,flaml,lightautoml
60min,2.66,4.01,5.62,12.79,8.77,7.11,6.92,10.99,10.37,7.64,6.55,6.26,6.24
30min,0.22,0.44,-0.01,-1.47,1.39,-0.15,0.71,1.57,-0.54,0.12,0.04,-0.14,-0.17
10min,0.08,0.57,-0.46,-1.56,1.38,-0.51,0.01,1.05,-0.57,2.13,1.85,-0.15,-0.23
5min,0.26,0.96,0.4,-0.88,2.31,-0.07,0.55,0.73,-0.56,0.57,0.22,-0.26,-0.38


In [19]:
# Export the styled DataFrame to LaTeX
latex_code = styled_df.to_latex(hrules=True)

# Save the LaTeX code to a file
with open('styled_df.tex', 'w') as f:
    f.write(latex_code)

# Display the LaTeX code
print(latex_code)

\begin{tabular}{lrrrr}
\toprule
{} & {60min} & {30min} & {10min} & {5min} \\
\midrule
AutoGluon(B) & 2.66 & \background-color#f79f8c 0.22 & \background-color#f79f8c 0.08 & \background-color#f79f8c 0.26 \\
AutoGluon(HQ) & 4.01 & \background-color#f79f8c 0.44 & \background-color#f79f8c 0.57 & \background-color#f79f8c 0.96 \\
AutoGluon(HQIL) & 5.62 & \background-color#8cf7ab -0.01 & \background-color#8cf7ab -0.46 & \background-color#f79f8c 0.40 \\
FEDOT & 12.79 & \background-color#8cf7ab -1.47 & \background-color#8cf7ab -1.56 & \background-color#8cf7ab -0.88 \\
GAMA(B) & 8.77 & \background-color#f79f8c 1.39 & \background-color#f79f8c 1.38 & \background-color#f79f8c 2.31 \\
H2OAutoML & 7.11 & \background-color#8cf7ab -0.15 & \background-color#8cf7ab -0.51 & \background-color#8cf7ab -0.07 \\
MLJAR(B) & 6.92 & \background-color#f79f8c 0.71 & \background-color#f79f8c 0.01 & \background-color#f79f8c 0.55 \\
NaiveAutoML & 10.99 & \background-color#f79f8c 1.57 & \background-color#f79f8c 1.05 & \