#### Outcome Analysis: Resilience

In [1]:
from modeva import DataSet
from modeva import TestSuite
from modeva.models import MoElasticNet, MoMoERegressor, MoXGBRegressor

#### Data Loading and Preparation

In [2]:
ds = DataSet()
ds.load(name="BikeSharing")
ds.data

✓ Auth code found in local storage.
Authenticating Modeva...
✓ License is active and valid.
✓ Authenticated successfully!


Unnamed: 0,season,yr,mnth,hr,holiday,weekday,workingday,weathersit,temp,atemp,hum,windspeed,cnt
0,1,0,1,0,0,6,0,1,0.24,0.2879,0.81,0.0000,16
1,1,0,1,1,0,6,0,1,0.22,0.2727,0.80,0.0000,40
2,1,0,1,2,0,6,0,1,0.22,0.2727,0.80,0.0000,32
3,1,0,1,3,0,6,0,1,0.24,0.2879,0.75,0.0000,13
4,1,0,1,4,0,6,0,1,0.24,0.2879,0.75,0.0000,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...
17374,1,1,12,19,0,1,1,2,0.26,0.2576,0.60,0.1642,119
17375,1,1,12,20,0,1,1,2,0.26,0.2576,0.60,0.1642,89
17376,1,1,12,21,0,1,1,1,0.26,0.2576,0.60,0.1642,90
17377,1,1,12,22,0,1,1,1,0.26,0.2727,0.56,0.1343,61


In [3]:
ds.set_random_split()
ds.set_target("cnt")
ds.scale_numerical(features=("cnt",), method="log1p")
ds.set_active_features(["mnth", "hr", "holiday", "weekday", "weathersit", "temp", "hum", "windspeed"])
ds.preprocess()
ds.feature_names

['mnth', 'hr', 'holiday', 'weekday', 'weathersit', 'temp', 'hum', 'windspeed']

#### Build GLM, Xgboost (Depth-2 and 5), MoE ofXgboost Depth-2

In [9]:
# xgboost depth 2 (interpretable)
model_xgb2 = MoXGBRegressor(name = "XGB_model_2", max_depth=2)
# train model with input: ds.train_x and target: ds.train_y
model_xgb2.fit(ds.train_x, ds.train_y)
ts_xgb2 = TestSuite(ds, model_xgb2) 
results_xgb2 = ts_xgb2.diagnose_accuracy_table()
results_xgb2.table

Unnamed: 0,MSE,MAE,R2
train,0.259076,0.390789,0.87091
test,0.271801,0.397787,0.865674
GAP,0.012725,0.006998,-0.005236


#### Residual Analysis

In [11]:
# Test Using Random Forest Proximity
results_RF = ts_xgb2.diagnose_residual_cluster(
   dataset="test", # dataset to use
   response_type="abs_residual", # response type
   metric="MSE", #metric to use
   n_clusters=10, # number of clusters
   cluster_method="pam", # clustering method
   sample_size=2000, # sample size
   rf_n_estimators=100, # number of trees
   rf_max_depth=5, # max depth of trees
)
results_RF.plot("cluster_performance", figsize=(6,4))

#### Resilience assessment using Worst-Cluster (K-means) drift scenario 

In [18]:

results = ts_xgb2.diagnose_resilience(method="worst-cluster", n_clusters=10, metric="MSE")
results.plot(figsize=(6,4))

#### Resilience assessment using Worst-Sample drift scenario 

In [21]:
# resilience assessment using Worst-Sample scenario
results = ts_xgb2.diagnose_resilience(method="worst-sample", metric="MSE")
results.plot(figsize=(6,4))

##### Characterizing the worst drift condition

In [36]:
# resilience assessment using Worst-Sample scenario
data_results = ds.data_drift_test(
   **results.value[0.1]["data_info"],
   distance_metric="PSI",
   psi_method="uniform",
   psi_bins=10)
data_results.plot("summary", figsize=(6,4))

In [35]:
# Checking available plots
data_results.get_figure_names()

['summary',
 ('density', 'mnth'),
 ('density', 'hr'),
 ('density', 'holiday'),
 ('density', 'weekday'),
 ('density', 'weathersit'),
 ('density', 'temp'),
 ('density', 'hum'),
 ('density', 'windspeed'),
 ('density', 'cnt')]

##### Plotting most important variables

In [27]:
data_results.plot(name=('density', 'cnt'), figsize=(6,4))

In [26]:
data_results.plot(name=('density', 'hr'), figsize=(6,4))

#### Resilience against outer sample drift scenario

In [20]:

results = ts_xgb2.diagnose_resilience(method="outer-sample", metric="MSE")
results.plot(figsize=(6,4))