### Mixture of Expert Example

#### Load and prepared data

In [1]:
import numpy as np
import pandas as pd
from modeva import DataSet
# Loading data into pandas dataframe
df = pd.read_csv("credit_default.csv")
df.columns = ["employment",	"income", "dti", "score", "amount", 
              "tenure",	"emp_length", "delinquencies",
              "savings", "utilization", "default"]

In [2]:
# Create dataset object holder
ds = DataSet()
ds.load_dataframe(data = df) 

✓ Auth code found in local storage.
Authenticating Modeva...
✓ License is active and valid.
✓ Authenticated successfully!


In [3]:
## Preprocess the data
ds.encode_categorical(method="ordinal")
ds.scale_numerical(features=tuple(ds.feature_names_numerical), method="standardize") # standardized numerical features
ds.set_target('default')
ds.preprocess()
## Split data into training and testing sets randomly
ds.set_random_split() 

#### Fit Monotonic xgboost

In [5]:
from modeva.models import MoXGBClassifier             # Import xgboost library
# for xgboost with monotonic constraints
model_xgb2_mono = MoXGBClassifier(name = "XGB_model", max_depth=2, n_estimators=700, learning_rate = 0.01, monotone_constraints="(0, -1, 1, -1, 0, 0, -1, 1, -1, 1)")
# train model with input: ds.train_x and target: ds.train_y
model_xgb2_mono.fit(ds.train_x, ds.train_y)

In [7]:
from modeva import TestSuite
ts_xgb2_mono = TestSuite(ds, model_xgb2_mono) # store bundle of dataset and model in ts
# View model performance metrics 
results_xgb2_mono = ts_xgb2_mono.diagnose_accuracy_table()
results_xgb2_mono.table

Unnamed: 0,AUC,ACC,F1,LogLoss,Brier
train,0.805599,0.783625,0.0,0.404629,0.135442
test,0.800143,0.7905,0.0,0.40015,0.134097
GAP,-0.005456,0.006875,0.0,-0.004479,-0.001345


In [9]:
results_xgb2_mono.plot(name=("confusion_matrix", "test"))

#### Fit a MoE with all features for clustering

In [22]:
from modeva.models import MoMoEClassifier
model_moe = MoMoEClassifier(name="MOE_raw",
                         max_depth=2,
                         n_estimators=200,
                         n_clusters=3,
                         learning_rate = 0.05,
                         monotone_constraints=(0, -1, 1, -1, 0, 0, -1, 1, -1, 1))
model_moe.fit(ds.train_x, ds.train_y)

In [23]:
ts_moe = TestSuite(ds, model_moe) # store bundle of dataset and model in ts
# View model performance metrics 
results_moe = ts_moe.diagnose_accuracy_table()
results_moe.table

Unnamed: 0,AUC,ACC,F1,LogLoss,Brier
train,0.899992,0.854875,0.576432,0.324193,0.102397
test,0.876102,0.846,0.538922,0.341422,0.109051
GAP,-0.02389,-0.008875,-0.03751,0.017229,0.006654


In [24]:
results_moe.plot(name=("confusion_matrix", "test"))

#### Understanding Clusters

In [25]:
from modeva import TestSuite
ts = TestSuite(ds, model_moe)
results = ts.interpret_moe_cluster_analysis()
results.plot()

##### Extract the features with high PSI 

In [29]:
psi_df = pd.DataFrame() # Create pandas dataframe to store results
# Calculate PSI of each cluster
for cluster_id, item in results.value.items():
    data_results = ds.data_drift_test(**item["data_info"],
                                      distance_metric="PSI",
                                      psi_method="uniform",
                                      psi_bins=10)
    psi_df = pd.concat([psi_df, 
                        pd.DataFrame(data_results.value["Distance_Scores"], index=[cluster_id])])
psi_df = psi_df.T
psi_df

Unnamed: 0,0,1,2
utilization,5.601684,1.518487,0.377562
score,4.243836,0.926484,0.401078
dti,3.014038,0.554979,0.091168
income,0.95681,0.268286,0.102121
savings,0.948118,0.220502,0.090339
employment,0.887031,0.195703,0.064573
delinquencies,0.669345,0.187783,0.013501
emp_length,0.47481,0.426879,4.730833
default,0.043692,0.017439,0.000126
amount,0.029832,0.031991,0.054106


#### Fit a new MoE with reduced cluster features

In [30]:
top_k = 3 # take top 3 vraiables defining clusters
cluster_features = psi_df.mean(1).sort_values(ascending=False).iloc[:top_k].index.tolist()
cluster_features

['utilization', 'emp_length', 'score']

In [31]:
model_moe_2 = MoMoEClassifier(name="MOE_3_cluster_features",
                         max_depth=2,
                         n_estimators=100,
                         n_clusters=2,
                         feature_names=ds.feature_names,
                         cluster_features=cluster_features,
                         monotone_constraints=(0, -1, 1, -1, 0, 0, -1, 1, -1, 1))
model_moe_2.fit(ds.train_x, ds.train_y)

#### Compare performance

In [32]:
tsc = TestSuite(ds, models=[model_moe, model_moe_2])
results = tsc.compare_accuracy_table()
results.table

Unnamed: 0_level_0,MOE_raw,MOE_raw,MOE_raw,MOE_raw,MOE_raw,MOE_3_cluster_features,MOE_3_cluster_features,MOE_3_cluster_features,MOE_3_cluster_features,MOE_3_cluster_features
Unnamed: 0_level_1,AUC,ACC,F1,LogLoss,Brier,AUC,ACC,F1,LogLoss,Brier
train,0.899992,0.854875,0.576432,0.324193,0.102397,0.903774,0.861125,0.6165,0.313462,0.098793
test,0.876102,0.846,0.538922,0.341422,0.109051,0.869045,0.8495,0.566906,0.346901,0.111543
GAP,-0.02389,-0.008875,-0.03751,0.017229,0.006654,-0.03473,-0.011625,-0.049593,0.033438,0.012751


#### Retrieve the expert model objects
##### Both experts (model.estimators_) and gating models (model.pr_estimators_) are saved in a dict, with cluster id as their keys

In [38]:
model_moe_2.estimators_

{'0': MoXGBClassifier(base_score=None, booster=None, callbacks=None,
                 colsample_bylevel=None, colsample_bynode=None,
                 colsample_bytree=None, device=None, early_stopping_rounds=None,
                 enable_categorical=False, eval_metric=None, feature_types=None,
                 gamma=None, grow_policy=None, importance_type=None,
                 interaction_constraints=None, learning_rate=None, max_bin=None,
                 max_cat_threshold=None, max_cat_to_onehot=None,
                 max_delta_step=None, max_depth=2, max_leaves=None,
                 min_child_weight=None, missing=nan,
                 monotone_constraints=(0, -1, 1, -1, 0, 0, -1, 1, -1, 1),
                 multi_strategy=None, n_estimators=100, n_jobs=None,
                 num_parallel_tree=None, objective='binary:logistic', ...),
 '1': MoXGBClassifier(base_score=None, booster=None, callbacks=None,
                 colsample_bylevel=None, colsample_bynode=None,
                 

#### Retrieve the gating model object

In [36]:
model_moe_2.pr_estimators_

{'0': CalibratedClassifierCV(cv=3,
                        estimator=MoXGBClassifier(base_score=None, booster=None,
                                                  callbacks=None,
                                                  colsample_bylevel=None,
                                                  colsample_bynode=None,
                                                  colsample_bytree=None,
                                                  device=None,
                                                  early_stopping_rounds=None,
                                                  enable_categorical=False,
                                                  eval_metric=None,
                                                  feature_types=None, gamma=None,
                                                  grow_policy=None,
                                                  importance_type=None,
                                                  interaction_constraints=None,
           

#### Usage example
##### examples to get final logodds for classifiers. 

In [39]:
def get_logodds(model, X):
    X_cluster = X[:, model.cluster_features_index_]
    
    pred = np.zeros((X.shape[0],))
    denominator = np.zeros((X.shape[0],))
    for cluster_id in model.unique_labels_:
        # gating
        gating_pred = model.pr_estimators_[str(cluster_id)].predict_proba(X_cluster)[:, -1]
        # expert
        expert_pred = model.estimators_[str(cluster_id)].decision_function(X)
        # mix
        pred += gating_pred * expert_pred
        denominator += gating_pred
    pred = pred / denominator
    return pred

logodds = get_logodds(model=model_moe_2, X=ds.train_x)
logodds

array([ 0.96724962, -4.80232567,  1.79297159, ..., -3.56607748,
       -5.52288524, -4.16895773])