# General ALE Plotting Examples - Dummy Data - PDPBox Comparison

In [None]:
from multiprocessing import cpu_count

import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestRegressor

from alepython import ale_plot
from pdpbox import info_plots, pdp

mpl.rc("figure", dpi=100)

### Generate the dummy data

In [None]:
np.random.seed(1)
X = pd.DataFrame(np.random.random((int(1e4), 5)), columns=["a", "b", "c", "d", "e"])
y = X["a"] + X["b"] - X["c"] + 0.2 * np.random.random(X.shape[0])
y[X["b"] > 0.5] -= 3 * X["a"]
X["a"] = X["a"] ** 3

model = RandomForestRegressor(
    n_estimators=50, max_depth=6, min_samples_leaf=4, random_state=1, n_jobs=cpu_count()
)
model.fit(X, y)

In [None]:
X.corr()

In [None]:
np.random.seed(1)
fig, ax = ale_plot(
    model,
    X,
    ("a",),
    bins=20,
    monte_carlo=True,
    monte_carlo_rep=5,
    monte_carlo_ratio=20,
    center=True,
    quantile_axis=True,
)
ax.xaxis.set_tick_params(rotation=35)

In [None]:
np.random.seed(1)
fig, ax = ale_plot(
    model,
    X,
    ("b",),
    bins=20,
    monte_carlo=True,
    monte_carlo_rep=200,
    monte_carlo_ratio=10,
    center=True,
    quantile_axis=True,
)
ax.xaxis.set_tick_params(rotation=35)

In [None]:
np.random.seed(1)
fig, ax = ale_plot(
    model,
    X,
    ("c",),
    bins=20,
    monte_carlo=True,
    monte_carlo_rep=200,
    monte_carlo_ratio=10,
    center=True,
    quantile_axis=True,
    verbose=True,
)
ax.xaxis.set_tick_params(rotation=35)

In [None]:
pdp_isolate_out = pdp.pdp_isolate(
    model=model, dataset=X, model_features=X.columns, feature="a", num_grid_points=10
)
_ = pdp.pdp_plot(
    pdp_isolate_out,
    "a",
    plot_lines=True,
    frac_to_plot=20,
    x_quantile=True,
    center=True,
    figsize=(7, 5),
)

In [None]:
pdp_isolate_out = pdp.pdp_isolate(
    model=model, dataset=X, model_features=X.columns, feature="b", num_grid_points=10
)
_ = pdp.pdp_plot(
    pdp_isolate_out,
    "b",
    plot_lines=True,
    frac_to_plot=20,
    x_quantile=True,
    center=True,
    figsize=(7, 5),
)

In [None]:
pdp_isolate_out = pdp.pdp_isolate(
    model=model, dataset=X, model_features=X.columns, feature="c", num_grid_points=20
)
_ = pdp.pdp_plot(
    pdp_isolate_out,
    "c",
    plot_lines=True,
    frac_to_plot=20,
    x_quantile=True,
    center=True,
    figsize=(7, 5),
)

In [None]:
plt.plot(X["a"], y, marker="o", linestyle="", label="raw")
mask = X["b"] < 0.5
plt.plot(
    X["a"][mask], y[mask], marker="o", linestyle="", c="C1", alpha=0.3, label="b < 0.5"
)
mask = (X["b"] < 0.5) & (X["b"] > 0.4)
plt.plot(
    X["a"][mask],
    y[mask],
    marker="o",
    linestyle="",
    c="C2",
    alpha=0.3,
    label="0.4 < b < 0.5",
)
plt.legend(loc="best")

In [None]:
plt.plot(X["b"], y, marker="o", linestyle="")

In [None]:
plt.plot(X["c"], y, marker="o", linestyle="")

In [None]:
fig, ax = ale_plot(
    model,
    X,
    ("a", "b"),
    bins=10,
    monte_carlo=True,
    monte_carlo_rep=5,
    monte_carlo_ratio=20,
    center=True,
    quantile_axis=True,
)
ax.xaxis.set_tick_params(rotation=20)

In [None]:
fig, ax = ale_plot(
    model,
    X,
    ("a", "b"),
    bins=10,
    monte_carlo=True,
    monte_carlo_rep=5,
    monte_carlo_ratio=20,
    center=True,
    quantile_axis=True,
    pure_second_order=False,
)
ax.xaxis.set_tick_params(rotation=20)

In [None]:
pdp_interact_out = pdp.pdp_interact(
    model=model,
    dataset=X,
    model_features=X.columns,
    features=["a", "b"],
    num_grid_points=[20, 20],
)

In [None]:
_ = pdp.pdp_interact_plot(pdp_interact_out, ["a", "b"], x_quantile=True, plot_pdp=False)