<a href="https://colab.research.google.com/github/iterative/dvclive/blob/main/examples/DVCLive-scikit-learn.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# DVCLive and scikit-learn: Regression Example

## Setup

In [None]:
!pip install dvclive scikit-learn

In [None]:
!git init -q
!git config --local user.email "you@example.com"
!git config --local user.name "Your Name"
!dvc init -q
!git commit -m "DVC init"

In [None]:
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split

X, y = load_boston(return_X_y=True)

X_train, X_test, y_train, y_test = train_test_split(
    X,
    y,
    random_state=42)

# Tracking experiments with DVCLive

In [None]:
from dvclive import Live

from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error

for n_estimators in (10, 50, 100):

  with Live() as live:

    live.log_param("n_estimators", n_estimators)

    model = RandomForestRegressor(n_estimators=n_estimators)
    model.fit(X_train, y_train)

    y_train_pred = model.predict(X_train)

    live.log_metric("train/mse", mean_squared_error(y_train, y_train_pred), plot=False)

    y_test_pred = model.predict(X_test)

    live.log_metric("test/mse", mean_squared_error(y_test, y_test_pred), plot=False)

## Comparing results

In [None]:
import dvc.api
import pandas as pd

columns = ["Experiment", "train.mse", "test.mse", "n_estimators"]
df = pd.DataFrame(dvc.api.exp_show(), columns=columns)

df.dropna(inplace=True)
df.reset_index(drop=True, inplace=True)
df

In [None]:
!dvc plots diff $(dvc exp list --names-only)

In [None]:
from IPython.display import HTML
HTML(filename='./dvc_plots/index.html')