In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from interpret.glassbox import ExplainableBoostingRegressor
from interpret import show

In [None]:
df = pd.read_csv("Datasets/drivingData.csv")
print(df.head())

# ---------------------------------------------------------
# 2) Random sampling of 10,000 rows
#    - For reproducibility, set a random seed (can be anything)
# ---------------------------------------------------------
random_seed = 42
num_samples = 10_000
sampled_df = df.sample(n=num_samples, random_state=random_seed)

X_df = sampled_df.drop(columns=["RSRP"])
y_df = sampled_df["RSRP"]

testRatio = 0.2
valRatio = 0.2
X_train_test, X_test, y_train_test, y_test = train_test_split(
    X_df, y_df, test_size=testRatio, random_state=random_seed
)
X_train, X_val, y_train, y_val = train_test_split(
    X_train_test, y_train_test, test_size=valRatio, random_state=random_seed
)

# 5) Normalize target
normalize = True
if normalize:
    min_train = y_train.min()
    max_train = y_train.max()
    denom = max_train - min_train if max_train != min_train else 1.0
    
    y_train = (y_train - min_train) / denom
    y_val   = (y_val - min_train)   / denom
    y_test  = (y_test - min_train)  / denom

In [None]:

# ---------------------------------------------------------
# 6) Train an EBM with parameters analogous to GAMI‐Net
#    (see explanation below)
# ---------------------------------------------------------
model = ExplainableBoostingRegressor(
    # Basic training settings
    random_state=random_seed,
    # We can interpret these as "epochs" for boosting
    max_rounds=1500,
    # Step size for each boosting iteration
    learning_rate=0.001,
    # Number of pairwise interactions
    interactions=20,
    # Early stopping
    early_stopping_rounds=50,
    early_stopping_tolerance=1e-4,
    # EBM uses bagging by default; you can tune bag counts
    outer_bags=4,
    # Tree complexity
    max_leaves=40,         # analogous to 'numNode'
    min_samples_leaf=5,    # analogous to 'tolerance' for leaf size
    # By default, EBM will use 256 bins, which you can tune
    max_bins=256,
    # We can let EBM hold out a portion of training data to validate internally
    validation_size=0.2,
)


In [None]:

print("Training EBM...")
model.fit(X_train, y_train)
print("EBM training complete.")

# Evaluate on train
y_train_pred = model.predict(X_train)
train_rmse = np.sqrt(np.mean((y_train_pred - y_train) ** 2))
print(f"Train RMSE: {train_rmse:.4f}")

# Evaluate on validation
y_val_pred = model.predict(X_val)
val_rmse = np.sqrt(np.mean((y_val_pred - y_val) ** 2))
print(f"Val RMSE: {val_rmse:.4f}")

# Evaluate on test
y_test_pred = model.predict(X_test)
test_rmse = np.sqrt(np.mean((y_test_pred - y_test) ** 2))
print(f"Test RMSE: {test_rmse:.4f}")



In [None]:

# ---------------------------------------------------------
# 7) Global explanation
# ---------------------------------------------------------
global_exp = model.explain_global()
show(global_exp)

exp_data = global_exp.data()