In [3]:
import pandas as pd
import joblib
import plotly.colors as colors
import plotly.graph_objects as go
from plotly.subplots import make_subplots

In [4]:
grid_RF_OS = joblib.load('./RandomForest/RandomForest_GridSearch_RandomOverSampling.joblib')
grid_RF_US = joblib.load('./RandomForest/RandomForest_GridSearch_RandomUnderSampling.joblib')
grid_HGB_OS = joblib.load('./GradientBoosting/GradientBoosting_GridSearch_RandomOverSampling.joblib')
grid_HGB_US = joblib.load('./GradientBoosting/GradientBoosting_GridSearch_RandomUnderSampling.joblib')

In [5]:
param_grids = {
    "RandomForest_OverSampling": {
        "n_estimators": [50, 100, 150],
        "max_depth": [3, 6, 9],
        "max_leaf_nodes": [3, 6, 9]},
    "RandomForest_UnderSampling": {
        "n_estimators": [50, 100, 150],
        "max_depth": [3, 6, 9],
        "max_leaf_nodes": [3, 6, 9]},
    "HistGradientBoosting_OverSampling": {
        "max_iter": [50, 100, 150],
        "learning_rate": [0.5, 0.1, 0.01],
        "max_depth": [3, 9, None]},
    "HistGradientBoosting_UnderSampling": {
        "max_iter": [50, 100, 150],
        "learning_rate": [0.5, 0.1, 0.01],
        "max_depth": [3, 9, None]},
}

In [6]:
results = []

cv_results = pd.DataFrame(grid_RF_OS.cv_results_)
cv_results = cv_results[(cv_results["param_classifier__max_leaf_nodes"] == 3)]
result = {"model": 'RandomForest_OverSampling', "cv_results": cv_results}
results.append(result)

cv_results = pd.DataFrame(grid_HGB_OS.cv_results_)
cv_results = cv_results[(cv_results["param_classifier__learning_rate"] == 0.1)]
result = {"model": 'HistGradientBoosting_OverSampling', "cv_results": cv_results}
results.append(result)

cv_results = pd.DataFrame(grid_RF_US.cv_results_)
cv_results = cv_results[(cv_results["param_classifier__max_leaf_nodes"] == 3)]
result = {"model": 'RandomForest_UnderSampling', "cv_results": cv_results}
results.append(result)

cv_results = pd.DataFrame(grid_HGB_US.cv_results_)
cv_results = cv_results[(cv_results["param_classifier__learning_rate"] == 0.1)]
result = {"model": 'HistGradientBoosting_UnderSampling', "cv_results": cv_results}
results.append(result)



In [11]:
fig = make_subplots(
    rows=3,
    cols=2,
    shared_xaxes=True,
    shared_yaxes=True,
    x_title="Train time (s)                                                                Predict time (s)",
    y_title="Test R2 score - higher is better",
    subplot_titles=["Train time vs score", "Predict time vs score"],
)
model_names = [result["model"] for result in results]
colors_list = colors.qualitative.Plotly * (
    len(model_names) // len(colors.qualitative.Plotly) + 1
)

for idx, result in enumerate(results):
    cv_results = result["cv_results"].round(3)
    model_name = result["model"]
    param_name = list(param_grids[model_name].keys())[0]
    cv_results[param_name] = cv_results["param_classifier__" + param_name]
    cv_results["model"] = model_name

    cv_results_1 = cv_results[(cv_results["param_classifier__max_depth"] == 3)]
    
    scatter_fig = go.Scatter(
        x=cv_results_1["mean_fit_time"],
        y=cv_results_1["mean_test_score"],
        error_x= dict(type='percent', array=cv_results_1["std_fit_time"]),
        error_y= dict(type='percent', array=cv_results_1["std_test_score"]),
        marker=dict(color=colors_list[idx]),
        legendgroup=model_name,
        name = model_name,
    )
    fig.add_trace(scatter_fig, row=1, col=1)
    
    scatter_fig = go.Scatter(
        x=cv_results_1["mean_score_time"],
        y=cv_results_1["mean_test_score"],
        error_x= dict(type='percent', array=cv_results_1["std_score_time"]),
        error_y= dict(type='percent', array=cv_results_1["std_test_score"]),
        marker=dict(color=colors_list[idx]),
        legendgroup=model_name,
        showlegend = False
    )
    fig.add_trace(scatter_fig, row=1, col=2)

    cv_results_2 = cv_results[(cv_results["param_classifier__max_depth"] == 6) | (cv_results["param_classifier__max_depth"].isna())]

    scatter_fig = go.Scatter(
        x=cv_results_2["mean_fit_time"],
        y=cv_results_2["mean_test_score"],
        error_x= dict(type='percent', array=cv_results_2["std_fit_time"]),
        error_y= dict(type='percent', array=cv_results_2["std_test_score"]),
        marker=dict(color=colors_list[idx]),
        legendgroup=model_name,
        showlegend = False
    )
    fig.add_trace(scatter_fig, row=2, col=1)
    
    scatter_fig = go.Scatter(
        x=cv_results_2["mean_score_time"],
        y=cv_results_2["mean_test_score"],
        error_x= dict(type='percent', array=cv_results_2["std_score_time"]),
        error_y= dict(type='percent', array=cv_results_2["std_test_score"]),
        marker=dict(color=colors_list[idx]),
        legendgroup=model_name,
        showlegend = False
    )
    fig.add_trace(scatter_fig, row=2, col=2)

    cv_results_3 = cv_results[(cv_results["param_classifier__max_depth"] == 9)]

    scatter_fig = go.Scatter(
        x=cv_results_3["mean_fit_time"],
        y=cv_results_3["mean_test_score"],
        error_x= dict(type='percent', array=cv_results_3["std_fit_time"]),
        error_y= dict(type='percent', array=cv_results_3["std_test_score"]),
        marker=dict(color=colors_list[idx]),
        legendgroup=model_name,
        showlegend = False
    )
    fig.add_trace(scatter_fig, row=3, col=1)
    
    scatter_fig = go.Scatter(
        x=cv_results_3["mean_score_time"],
        y=cv_results_3["mean_test_score"],
        error_x= dict(type='percent', array=cv_results_3["std_score_time"]),
        error_y= dict(type='percent', array=cv_results_3["std_test_score"]),
        marker=dict(color=colors_list[idx]),
        legendgroup=model_name,
        showlegend = False
    )
    fig.add_trace(scatter_fig, row=3, col=2)


fig.update_layout(
    legend=dict(x=1, y=0.01, traceorder="normal", borderwidth=1),
    title=dict(x=0.5, text="Speed-score trade-off of tree-based ensembles"),
    height = 500
) 


fig.show()