# General ALE Plotting Examples with Synthetic Data

## PDPBox is used to generate comparative PDP plots

In [None]:
from multiprocessing import cpu_count

import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestRegressor

from alepython import ale_plot
from pdpbox import info_plots, pdp

### Generate the synthetic data and train our model

In [None]:
np.random.seed(1)
X = pd.DataFrame(np.random.random((int(1e4), 4)), columns=["a", "b", "d", "e"])

# Introduce some correlations into the data.
X["c"] = X["a"] + 0.8 * np.random.random(X.shape[0])
X = X[["a", "b", "c", "d", "e"]]

y = X["a"] + X["b"] - X["c"] + 0.2 * np.random.random(X.shape[0])
y[X["b"] > 0.5] -= 3 * X["a"]

y += 10

# Apply a transformation to one of the columns to illustrate quantile-based axis labelling.
X["a"] = X["a"] ** 3

model = RandomForestRegressor(
    n_estimators=10,
    max_depth=10,
    min_samples_leaf=4,
    random_state=1,
    n_jobs=cpu_count(),
)
model.fit(X, y)

### Note the high correlation between 'a' and 'c'

In [None]:
X.corr()

In [None]:
plt.hexbin(X["a"] ** (1 / 3), X["b"], gridsize=20)
plt.xlabel("a^{1/3}")
plt.ylabel("b")
_ = plt.colorbar()

In [None]:
plt.hexbin(X["a"] ** (1 / 3), X["c"], gridsize=20)
plt.xlabel("a^{1/3}")
plt.ylabel("c")
_ = plt.colorbar()

In [None]:
np.random.seed(1)
fig, axes = ale_plot(
    model,
    X,
    ("a",),
    bins=20,
    monte_carlo=True,
    monte_carlo_rep=200,
    monte_carlo_ratio=10,
    center=True,
    quantile_axis=True,
    verbose=True,
)
axes["ale"].xaxis.set_tick_params(rotation=40)

In [None]:
np.random.seed(1)
fig, axes = ale_plot(
    model,
    X,
    ("b",),
    bins=6,
    monte_carlo=True,
    monte_carlo_rep=200,
    monte_carlo_ratio=10,
    center=True,
    quantile_axis=True,
    verbose=True,
)
axes["ale"].xaxis.set_tick_params(rotation=40)

In [None]:
np.random.seed(1)
fig, axes = ale_plot(
    model,
    X,
    ("c",),
    bins=20,
    monte_carlo=True,
    monte_carlo_rep=200,
    monte_carlo_ratio=10,
    center=True,
    plot_quantiles=True,
    quantile_axis=True,
    verbose=True,
)
axes["ale"].xaxis.set_tick_params(rotation=45)
axes["quantiles_x"].xaxis.set_tick_params(rotation=45)

In [None]:
pdp_isolate_out = pdp.pdp_isolate(
    model=model, dataset=X, model_features=X.columns, feature="a", num_grid_points=20
)
fig, axes = pdp.pdp_plot(
    pdp_isolate_out,
    "a",
    plot_lines=True,
    frac_to_plot=200,
    x_quantile=True,
    center=True,
    figsize=(7, 5),
)
axes["pdp_ax"].xaxis.set_tick_params(rotation=45)

In [None]:
pdp_isolate_out = pdp.pdp_isolate(
    model=model, dataset=X, model_features=X.columns, feature="b", num_grid_points=20
)
fig, axes = pdp.pdp_plot(
    pdp_isolate_out,
    "b",
    plot_lines=True,
    frac_to_plot=200,
    x_quantile=True,
    center=True,
    figsize=(7, 5),
)
axes["pdp_ax"].xaxis.set_tick_params(rotation=45)

In [None]:
pdp_isolate_out = pdp.pdp_isolate(
    model=model, dataset=X, model_features=X.columns, feature="c", num_grid_points=20
)
fig, axes = pdp.pdp_plot(
    pdp_isolate_out,
    "c",
    plot_lines=True,
    frac_to_plot=200,
    x_quantile=True,
    center=True,
    figsize=(7, 5),
)
axes["pdp_ax"].xaxis.set_tick_params(rotation=45)

In [None]:
plt.plot(X["a"], y, marker="o", linestyle="", label="raw")
mask = X["b"] < 0.5
plt.plot(
    X["a"][mask], y[mask], marker="o", linestyle="", c="C1", alpha=0.3, label="b < 0.5"
)
mask = (X["b"] < 0.5) & (X["b"] > 0.4)
plt.plot(
    X["a"][mask],
    y[mask],
    marker="o",
    linestyle="",
    c="C2",
    alpha=0.3,
    label="0.4 < b < 0.5",
)
_ = plt.legend(loc="best")

In [None]:
_ = plt.plot(X["b"], y, marker="o", linestyle="")

In [None]:
_ = plt.plot(X["c"], y, marker="o", linestyle="")

In [None]:
fig, axes = ale_plot(model, X, ("a", "b"), bins=20, quantile_axis=True,)
axes["ale"].xaxis.set_tick_params(rotation=45)

In [None]:
fig, axes = ale_plot(
    model,
    X,
    ("a", "b"),
    bins=20,
    quantile_axis=True,
    include_first_order=True,
    plot_kwargs={"kind": "grid"},
)
axes["ale"].xaxis.set_tick_params(rotation=45)

In [None]:
fig, axes = ale_plot(model, X, ("a", "c"), bins=20, quantile_axis=True,)
axes["ale"].xaxis.set_tick_params(rotation=45)

In [None]:
fig, axes = ale_plot(
    model,
    X,
    ("a", "c"),
    bins=20,
    quantile_axis=True,
    plot_kwargs={"kind": "grid", "indicate_empty": 0},
)
axes["ale"].xaxis.set_tick_params(rotation=45)

In [None]:
fig, axes = ale_plot(
    model,
    X,
    ("a", "c"),
    bins=20,
    quantile_axis=True,
    plot_quantiles=True,
    plot_kwargs={"kind": "grid", "indicate_empty": 0.4},
)
axes["ale"].xaxis.set_tick_params(rotation=45)
axes["quantiles_x"].xaxis.set_tick_params(rotation=45)

In [None]:
fig, axes = ale_plot(
    model,
    X,
    ("a", "c"),
    bins=20,
    quantile_axis=True,
    include_first_order=True,
    plot_kwargs={"kind": "contourf"},
)
axes["ale"].xaxis.set_tick_params(rotation=45)

In [None]:
fig, axes = ale_plot(
    model,
    X,
    ("a", "c"),
    bins=20,
    quantile_axis=True,
    include_first_order=True,
    plot_kwargs={"kind": "gridcontour"},
)
axes["ale"].xaxis.set_tick_params(rotation=45)

In [None]:
pdp_interact_out = pdp.pdp_interact(
    model=model,
    dataset=X,
    model_features=X.columns,
    features=["a", "b"],
    num_grid_points=[15, 15],
)

In [None]:
_ = pdp.pdp_interact_plot(pdp_interact_out, ["a", "b"], x_quantile=True, plot_pdp=False)