In [None]:
from itertools import combinations

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestRegressor

from alepython import ale_plot

In [None]:
rng = np.random.default_rng(0)
X = pd.DataFrame(rng.random((int(1e4), 2)), columns=["a", "b"])

# Introduce correlation between 'a', 'b', and 'c'.
X["c"] = 0.4 * X["a"] + 0.6 * X["b"] + rng.random(X.shape[0])
X = X[["a", "b", "c"]]

# Construct the output variable.
y = 0.5 * X["a"] + 0.1 * X["b"] - 0.3 * X["c"] + 0.2 * rng.random(X.shape[0])

# Introduce an interaction effect between 'b' and 'c'.
y[X["b"] > 0.5] -= X["c"]

model = RandomForestRegressor(
    n_estimators=100,
    max_depth=10,
    min_samples_leaf=4,
    random_state=1,
    n_jobs=-1,
)
model.fit(X, y)

### Get first-order ALE plots and data

In [None]:
first_order_data = []
for feature in X.columns:
    fig, axes, data = ale_plot(
        model,
        X,
        feature,
        bins=20,
        fig=plt.figure(),
        monte_carlo=True,
        monte_carlo_rep=200,
        monte_carlo_ratio=20,
        verbose=True,
        plot_quantiles=True,
        center=True,
        quantile_axis=True,
        return_data=True,
        rng=np.random.default_rng(0),
    )
    first_order_data.append(data)
    axes["ale"].xaxis.set_tick_params(rotation=45)
    axes["quantiles_x"].xaxis.set_tick_params(rotation=45)

### Get second-order ALE plots and data

In [None]:
second_order_data = []
for features in combinations(X.columns, 2):
    fig, axes, data = ale_plot(
        model,
        X,
        features,
        bins=20,
        fig=plt.figure(),
        plot_quantiles=True,
        quantile_axis=True,
        return_data=True,
        n_jobs=-1,
    )
    second_order_data.append(data)
    axes["ale"].xaxis.set_tick_params(rotation=45)
    axes["quantiles_x"].xaxis.set_tick_params(rotation=45)

### Importances based off the vertical extent of the first-order ALE plots

In [None]:
first_order_imps = {}
for feature, data in zip(X.columns, first_order_data):
    quantiles, ale = data
    first_order_imps[feature] = np.ptp(ale)
first_order_imps = pd.Series(first_order_imps, name="1st Order Importance")
first_order_imps

### Importances based off the amplitude of the second-order ALE plots

In [None]:
second_order_imps = {}
for features, data in zip(combinations(X.columns, 2), second_order_data):
    quantiles, ale, samples = data
    second_order_imps[features] = np.ptp(ale)
second_order_imps = pd.Series(second_order_imps, name="2nd Order Importance")
second_order_imps

### Evaluate the feature correlations

In [None]:
X.corr()