<a href="https://colab.research.google.com/github/cisimon7/Machine-Learning-with-plotly/blob/main/Cross_Validation_and_Grid_Search_Visualization.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from sklearn.linear_model import LassoCV
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import GridSearchCV

<b>Visualize Regulazation across cross-validation folds</b>

In [1]:
N_FOLD = 6

In [5]:
# Load and preprocess the data

df = px.data.gapminder()
X = df.drop(columns=['lifeExp', 'iso_num'])
X = pd.get_dummies(X, columns=['country','continent','iso_alpha'])
y = df['lifeExp']

In [6]:
# Train model to predict life expectancy
model = LassoCV(cv=N_FOLD, normalize=True)
model.fit(X,y)
mean_alphas = model.mse_path_.mean(axis=-1)

In [7]:
fig = go.Figure([
    go.Scatter(
        x=model.alphas_, y=model.mse_path_[:, i],
        name=f"Fold: {i+1}", opacity=.5, line=dict(dash='dash'),
        hovertemplate="alpha: %{x} <br>MSE: %{y}"
    )
    for i in range(N_FOLD)
])
fig.add_traces(go.Scatter(
    x=model.alphas_, y=mean_alphas,
    name='Mean', line=dict(color='black', width=3),
    hovertemplate="alpha: %{x} <br>MSE: %{y}",
))

fig.add_shape(
    type="line", line=dict(dash='dash'),
    x0=model.alpha_, y0=0,
    x1=model.alpha_, y1=1,
    yref='paper'
)

fig.update_layout(
    xaxis_title='alpha',
    xaxis_type="log",
    yaxis_title="Mean Square Error (MSE)"
)
fig.show()

<b>Grid Search Visualization using px.density_heatmap and px.box</b>

In [2]:
N_FOLD = 6

# Load and huffle dataframe
df = px.data.iris()
df = df.sample(frac=1, random_state=0)

X = df[['sepal_width', 'sepal_length']]
y = df['petal_width']

In [18]:
# Define and fit the grid
model = DecisionTreeRegressor()
param_grid = {
    'criterion':['mse', 'friedman_mse', 'mae'],
    'max_depth':range(2,5)
}
grid = GridSearchCV(model, param_grid, cv=N_FOLD)
grid.fit(X,y)
grid_df = pd.DataFrame(grid.cv_results_)

In [4]:
# Convert the wide format of the grid into the long format
# accepted by plotly.express
melted = (
    grid_df
    .rename(columns=lambda col: col.replace('param_', ''))
    .melt(
        value_vars=[f'split{i}_test_score' for i in range(N_FOLD)],
        id_vars=['mean_test_score', 'mean_fit_time', 'criterion', 'max_depth'],
        var_name="cv_split",
        value_name="r_squared"
    )
)

# Format the cariable nmaes for simplicity
melted['cv_split'] = (
    melted['cv_split'].str.replace('_test_score', '').str.replace('split','')
)

In [5]:
# Single function call to plot each figure
fig_hmap = px.density_heatmap(
    melted, x="max_depth", y='criterion',
    histfunc="sum", z="r_squared",
    title='Grid search results on individual fold',
    hover_data=['mean_fit_time'],
    facet_col="cv_split", facet_col_wrap=3,
    labels={'mean_test_score': "mean_r_squared"}
)

fig_box = px.box(
    melted, x='max_depth', y='r_squared',
    title='Grid search results ',
    hover_data=['mean_fit_time'],
    points='all',
    color="criterion",
    hover_name='cv_split',
    labels={'mean_test_score': "mean_r_squared"}
)

# Display
fig_hmap.show()
fig_box.show()