### GBLT and Neural-Tree Examples

#### Load and prepared data

In [69]:
import numpy as np
import pandas as pd
from modeva import DataSet
# Loading data into pandas dataframe
df = pd.read_csv("credit_default.csv")
df.columns = ["employment",	"income", "dti", "score", "amount", 
              "tenure",	"emp_length", "delinquencies",
              "savings", "utilization", "default"]

In [70]:
# Create dataset object holder
ds = DataSet()
ds.load_dataframe(data = df) 

In [71]:
## Preprocess the data
ds.encode_categorical(method="ordinal")
ds.scale_numerical(features=tuple(ds.feature_names_numerical), method="standardize") # standardized numerical features
ds.set_target('default')
ds.preprocess()
## Split data into training and testing sets randomly
ds.set_random_split() 

#### GBLT Training

In [84]:
N_TREES = 4 # number of trees in boosting
# For classification tasks
from modeva.models import MoGLMTreeBoostClassifier
model_gblt = MoGLMTreeBoostClassifier(name = "GBLT", max_depth=1, n_estimators=N_TREES)
# train model with input: ds.train_x and target: ds.train_y
model_gblt.fit(ds.train_x, ds.train_y)

In [85]:
# Create a testsuite that bundles dataset and model
from modeva import TestSuite
ts = TestSuite(ds, model_gblt) # store bundle of dataset and model in fs
# View model performance metrics
result = ts.diagnose_accuracy_table()
# display the output
result.table

Unnamed: 0,AUC,ACC,F1,LogLoss,Brier
train,0.9153,0.8742,0.6597,0.3113,0.0936
test,0.9169,0.8755,0.6631,0.313,0.0926
GAP,0.0016,0.0012,0.0034,0.0017,-0.0009


In [86]:
results = ts.interpret_ei()
results.plot(n_bars=10)

In [91]:
result = ts.interpret_effects(features="score")
result.plot()

#### Neural Tree Training

In [87]:
# For classification tasks
from modeva.models import MoNeuralTreeClassifier
model_neut = MoNeuralTreeClassifier(name = "NeuralTree", n_estimators=N_TREES)
# train model with input: ds.train_x and target: ds.train_y
model_neut.fit(ds.train_x, ds.train_y)

In [93]:
ts_neut = TestSuite(ds, model_neut) # store bundle of dataset and model in fs
# View model performance metrics
result = ts_neut.diagnose_accuracy_table()
# display the output
result.table

Unnamed: 0,AUC,ACC,F1,LogLoss,Brier
train,0.9156,0.8738,0.6558,0.3108,0.0935
test,0.917,0.8765,0.6639,0.3129,0.0926
GAP,0.0014,0.0027,0.0082,0.0021,-0.0008


In [94]:
result = ts_neut.interpret_effects(features="score")
result.plot()

#### Imposing Monotonicities

In [104]:
model_neut_mono = MoNeuralTreeClassifier(name = "NeuralTree-mono", n_estimators=N_TREES, nn_max_epochs = 10, 
                                    feature_names = ds.feature_names, mono_increasing_list = tuple(["utilization", "dti", "delinquencies"]),
                                    mono_decreasing_list = tuple(["score", "income", "savings", "emp_length"]))
# train model with input: ds.train_x and target: ds.train_y
model_neut_mono.fit(ds.train_x, ds.train_y)

In [105]:
ts_neut_mono = TestSuite(ds, model_neut_mono) # store bundle of dataset and model in fs
# View model performance metrics
result = ts_neut_mono.diagnose_accuracy_table()
# display the output
result.table

Unnamed: 0,AUC,ACC,F1,LogLoss,Brier
train,0.9141,0.8735,0.6579,0.3125,0.094
test,0.9153,0.876,0.6667,0.3147,0.0932
GAP,0.0012,0.0025,0.0088,0.0022,-0.0008


In [106]:
result = ts_neut_mono.interpret_effects(features="score")
result.plot()

In [107]:
tsc = TestSuite(ds, models=[model_gblt, model_neut, model_neut_mono])
results = tsc.compare_accuracy_table()
results.table

Unnamed: 0_level_0,GBLT,GBLT,GBLT,GBLT,GBLT,NeuralTree,NeuralTree,NeuralTree,NeuralTree,NeuralTree,NeuralTree-mono,NeuralTree-mono,NeuralTree-mono,NeuralTree-mono,NeuralTree-mono
Unnamed: 0_level_1,AUC,ACC,F1,LogLoss,Brier,AUC,ACC,F1,LogLoss,Brier,AUC,ACC,F1,LogLoss,Brier
train,0.9153,0.8742,0.6597,0.3113,0.0936,0.9156,0.8738,0.6558,0.3108,0.0935,0.9141,0.8735,0.6579,0.3125,0.094
test,0.9169,0.8755,0.6631,0.313,0.0926,0.917,0.8765,0.6639,0.3129,0.0926,0.9153,0.876,0.6667,0.3147,0.0932
GAP,0.0016,0.0012,0.0034,0.0017,-0.0009,0.0014,0.0027,0.0082,0.0021,-0.0008,0.0012,0.0025,0.0088,0.0022,-0.0008


In [108]:
tsc = TestSuite(ds, models=[model_gblt, model_neut, model_neut_mono])
results = tsc.compare_residual_cluster(dataset="test")
results.plot("cluster_performance")