In [1]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn.neural_network import MLPRegressor
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from uci_datasets import Dataset
import warnings
warnings.filterwarnings("ignore")

from constants import (
    ESTIMATED_ERROR_COL,
    PREDICTIONS_COL,
    SEED,
    TARGET_COL,
    TRUE_ERROR_COL,
)
from evaluate_models import create_df_with_results, evaluate_one_instance, round_value_in_table
from processing import get_model_f_output_and_compute_losses, get_trained_model_f_and_processed_data
from visualisation_functions import plot_conditional_loss

%load_ext autoreload
%autoreload 2

In [2]:
# Define parameters for different models
random_forest_params = {
    'criterion': 'squared_error',
    'max_depth': None,
    'random_state': SEED,
}

mlp_params = {
    'hidden_layer_sizes': (64,),
    'activation': 'relu',
    'solver': 'adam',
    'batch_size': 256,
    'learning_rate_init': 0.0005,
    'max_iter': 800,
    'random_state': SEED
}

mlp_params2 = {
    'hidden_layer_sizes': (64, 64),
    'activation': 'relu',
    'solver': 'adam',
    'batch_size': 256,
    'learning_rate_init': 0.0005,
    'max_iter': 800,
    'random_state': SEED
}

In [3]:
# Define constants and parameters for the evaluation
cost_list = [0.2, 0.5, 1, 2]
metrics_list = ['RwR_loss', 'human_rate', 'mae', 'rmse', 'bias', 'wape']
data_names = ['concrete', 'wine', 'airfoil', 'energy', 'housing', 'solar', 'forest', 'parkinsons']

In [None]:
# Define model pairs for comparison (each pair is [model_f, model_L])
model_pairs = {
    'LR+LR': [Pipeline([('scaler', StandardScaler()), ('lin_reg', LinearRegression())]), Pipeline([('scaler', StandardScaler()), ('lin_reg', LinearRegression())])],
    'LR+RF': [Pipeline([('scaler', StandardScaler()), ('lin_reg', LinearRegression())]), RandomForestRegressor(**random_forest_params)],
    'LR+MLP': [Pipeline([('scaler', StandardScaler()), ('lin_reg', LinearRegression())]), Pipeline([('scaler', StandardScaler()), ('mlp', MLPRegressor(**mlp_params))])],
    'LR+MLP2': [Pipeline([('scaler', StandardScaler()), ('lin_reg', LinearRegression())]), Pipeline([('scaler', StandardScaler()), ('mlp', MLPRegressor(**mlp_params2))])],
    'RF+LR': [RandomForestRegressor(**random_forest_params), Pipeline([('scaler', StandardScaler()), ('lin_reg', LinearRegression())])],
    'RF+RF': [RandomForestRegressor(**random_forest_params), RandomForestRegressor(**random_forest_params)],
    'RF+MLP': [RandomForestRegressor(**random_forest_params), Pipeline([('scaler', StandardScaler()), ('mlp', MLPRegressor(**mlp_params))])],
    'RF+MLP2': [RandomForestRegressor(**random_forest_params), Pipeline([('scaler', StandardScaler()), ('mlp', MLPRegressor(**mlp_params2))])],
    'MLP+LR': [Pipeline([('scaler', StandardScaler()), ('mlp', MLPRegressor(**mlp_params))]), Pipeline([('scaler', StandardScaler()), ('lin_reg', LinearRegression())])],
    'MLP+RF': [Pipeline([('scaler', StandardScaler()), ('mlp', MLPRegressor(**mlp_params))]), RandomForestRegressor(**random_forest_params)],
    'MLP+MLP': [Pipeline([('scaler', StandardScaler()), ('mlp', MLPRegressor(**mlp_params))]), Pipeline([('scaler', StandardScaler()), ('mlp', MLPRegressor(**mlp_params))])],
    'MLP+MLP2': [Pipeline([('scaler', StandardScaler()), ('mlp', MLPRegressor(**mlp_params))]), Pipeline([('scaler', StandardScaler()), ('mlp2', MLPRegressor(**mlp_params2))])],
    'MLP2+LR': [Pipeline([('scaler', StandardScaler()), ('mlp', MLPRegressor(**mlp_params2))]), Pipeline([('scaler', StandardScaler()), ('lin_reg', LinearRegression())])],
    'MLP2+RF': [Pipeline([('scaler', StandardScaler()), ('mlp', MLPRegressor(**mlp_params2))]), RandomForestRegressor(**random_forest_params)],
    'MLP2+MLP': [Pipeline([('scaler', StandardScaler()), ('mlp', MLPRegressor(**mlp_params2))]), Pipeline([('scaler', StandardScaler()), ('mlp', MLPRegressor(**mlp_params))])],
    'MLP2+MLP2': [Pipeline([('scaler', StandardScaler()), ('mlp', MLPRegressor(**mlp_params2))]), Pipeline([('scaler', StandardScaler()), ('mlp', MLPRegressor(**mlp_params2))])],
}

# Iterate over each dataset name in data_names
for data_name in data_names:
    results_all_pairs = dict()

    # Iterate over each model pair in model_pairs
    for model_name, (model_f, model_L) in model_pairs.items():
        results_one_pair = dict()

        # Iterate over each cost value in cost_list
        for metrics in metrics_list:
            results_one_pair[metrics] = []

        for cost in cost_list:
            print(f"==== Dataset: {data_name}, model_name: {model_name}, cost: {cost}")
            
            data = Dataset(data_name)
            results_one_cost = evaluate_one_instance(data, model_f, model_L, cost)
            for metrics in metrics_list:
                results_one_pair[metrics].append(results_one_cost[metrics])

        results_all_pairs[model_name] = results_one_pair

    # Create a DataFrame from the results and save to csv files
    df_output = create_df_with_results(results_all_pairs, cost_list)
    for metrics in metrics_list:
        df_output[metrics].to_csv(f'output_folder/{metrics}_{data_name}.csv')

In [5]:
# Compile results for each metric across all datasets
for metrics in metrics_list:
    df_one_metrics_all_datasets_list = []

    # Read and process each dataset's csv file for the current metric
    for data_name in data_names:
        df_temp = pd.read_csv(f'output_folder/{metrics}_{data_name}.csv')
        df_temp = df_temp.reindex(sorted(df_temp.columns), axis=1)
        df_temp.insert(loc=0, column='Dataset', value=data_name)
        df_one_metrics_all_datasets_list.append(df_temp)

    df_one_metrics_all_datasets = pd.concat(df_one_metrics_all_datasets_list, ignore_index=True)

    # Round the values in the DataFrame
    for column in df_one_metrics_all_datasets.columns[2:]:
        df_one_metrics_all_datasets[column] = df_one_metrics_all_datasets[column].apply(round_value_in_table)

    # Drop the 'Cost' column and remove duplicates for specific metrics
    if metrics not in ['RwR_loss', 'human_rate']:
        df_one_metrics_all_datasets = df_one_metrics_all_datasets.drop('Cost', axis=1)
        df_one_metrics_all_datasets.drop_duplicates(inplace=True)

    df_one_metrics_all_datasets.to_csv(f'output_folder/{metrics}_all.csv')

# 1. RwR Loss

In [None]:
df_output = pd.read_csv(f'output_folder/RwR_loss_all.csv', index_col='Unnamed: 0')
df_output[list(model_pairs.keys())] = df_output[list(model_pairs.keys())].apply(lambda col: col.str.replace(r'\s*\(.*?\)', '', regex=True))
df_output

# 2. Human Rate

In [None]:
df_output = pd.read_csv(f'output_folder/human_rate_all.csv', index_col='Unnamed: 0')
df_output[list(model_pairs.keys())] = df_output[list(model_pairs.keys())].apply(lambda col: col.str.replace(r'\s*\(.*?\)', '', regex=True))
df_output

# 3. MAE

In [None]:
df_output = pd.read_csv(f'output_folder/mae_all.csv', index_col='Unnamed: 0').reset_index(drop=True)
df_output[list(model_pairs.keys())] = df_output[list(model_pairs.keys())].apply(lambda col: col.str.replace(r'\s*\(.*?\)', '', regex=True))
df_output

# 4. RMSE

In [None]:
df_output = pd.read_csv(f'output_folder/rmse_all.csv', index_col='Unnamed: 0').reset_index(drop=True)
df_output[list(model_pairs.keys())] = df_output[list(model_pairs.keys())].apply(lambda col: col.str.replace(r'\s*\(.*?\)', '', regex=True))
df_output

# 5. Bias

In [None]:
df_output = pd.read_csv(f'output_folder/bias_all.csv', index_col='Unnamed: 0')
df_output[list(model_pairs.keys())] = df_output[list(model_pairs.keys())].apply(lambda col: col.str.replace(r'\s*\(.*?\)', '', regex=True))
df_output

# 6. WAPE

In [None]:
df_output = pd.read_csv(f'output_folder/wape_all.csv', index_col='Unnamed: 0').reset_index(drop=True)
df_output[list(model_pairs.keys())] = df_output[list(model_pairs.keys())].apply(lambda col: col.str.replace(r'\s*\(.*?\)', '', regex=True))
df_output

# 7. Visualization Example

In [None]:
data_name = 'energy'
data = Dataset(data_name)
model_f = Pipeline([('scaler', StandardScaler()), ('mlp', MLPRegressor(**mlp_params))])
model_L = RandomForestRegressor(**random_forest_params)

In [9]:
# Train model 'f' and prepare datasets for training/testing model 'L'
model_f_trained, X_train, X_test, Y_train, Y_test = get_trained_model_f_and_processed_data(
    data=data,
    split_index=0,
    model_f=model_f,
)

# Obtain predictions from the trained model 'f' and compute errors to train/test model 'L'
output_pred_train, output_pred_test, target_train, target_test = get_model_f_output_and_compute_losses(
    model_f_trained=model_f_trained,
    X_train=X_train,
    X_test=X_test,
    Y_train=Y_train,
    Y_test=Y_test
)

# Train model 'L' and obtain estimates of the errors for the test data
model_L_trained = model_L.fit(X_train, target_train)
target_pred_test = model_L_trained.predict(X_test)

In [10]:
# Create a dataframe with the actual targets, model 'f' predictions, and losses estimated by model 'L', and sort the dataframe by the target column
predictions_and_errors_test = pd.DataFrame(
    {TARGET_COL: Y_test,
     PREDICTIONS_COL: output_pred_test,
     ESTIMATED_ERROR_COL: np.sqrt(target_pred_test), 
     TRUE_ERROR_COL: target_test}).sort_values(by=TARGET_COL).reset_index(drop=True)

In [None]:
# Plot the target values, predictions, and estimated errors
plot_conditional_loss(predictions_and_errors_test, f'Dataset: {data_name[0].upper() + data_name[1:]}, Regressor: MLP, Calibrator: RF')