In [None]:
# | default_exp _experiments.compas

# COMPAS

## Running in Google Colab

You can run this experiment in Google Colab by clicking the button below:

<a href="https://colab.research.google.com/github/airtai/monotonic-nn/blob/main/nbs/experiments/Compas.ipynb" target=”_blank”>
  <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open in Colab" />
</a>


In [None]:
# | hide

from IPython.display import Markdown, display_markdown

try:
    import google.colab

    in_colab = True
except:
    in_colab = False

if in_colab:
    display(
        Markdown(
            """
### If you see this message, you are running in Google Colab
Along with this interactive tutorial the content of this notebook is organized and formatted for documentation purpuoses. 

You can ignore the '# | hide', '# | notest' and '# | echo: false' comments, they are not important for the tutorial.
    """
        )
    )

## Dataset

COMPAS [1] is a dataset containing the criminal records of 6,172 individuals
arrested in Florida. The task is to predict whether the individual will commit a crime again
in 2 years. The probability predicted by the system will be used as a risk score. As mentioned in [2] 13 attributes for prediction. The risk score should be monotonically increasing w.r.t. four attributes, number of prior adult convictions, number of juvenile felony, number of juvenile misdemeanor, and number of other convictions. The `monotonicity_indicator` corresponding to these features are set to 1.

References: 

1. S. Mattu J. Angwin, J. Larson and L. Kirchner. Machine bias: There’s software used across the country to predict future criminals. and it’s biased against blacks. ProPublica, 2016.

2. Xingchao Liu, Xing Han, Na Zhang, and Qiang Liu. Certified monotonic neural networks. Advances in Neural Information Processing Systems, 33:15427–15438, 2020


In [None]:
monotonicity_indicator = {
    "priors_count": 1,
    "juv_fel_count": 1,
    "juv_misd_count": 1,
    "juv_other_count": 1,
    "age": 0,
    "race_0": 0,
    "race_1": 0,
    "race_2": 0,
    "race_3": 0,
    "race_4": 0,
    "race_5": 0,
    "sex_0": 0,
    "sex_1": 0,
}

In [None]:
# | hide

from IPython.display import Markdown, display_markdown

try:
    import google.colab

    in_colab = True
except:
    in_colab = False

if in_colab:
    display(
        Markdown(
            """
### If you see this message, you are running in Google Colab
Along with this interactive tutorial the content of this notebook is organized and formatted for documentation purpuoses. 

You can ignore the '# | hide', '# | notest' and '# | echo: false' comments, they are not important for the tutorial.
    """
        )
    )

In [None]:
# | hide

if in_colab:
    !pip install monotonic-nn

In [None]:
# | hide

if in_colab:
    !pip install "monotonic-nn[experiments]"

In [None]:
# | include: false

from airt.keras.experiments import (
    create_tuner_stats,
    find_hyperparameters,
    get_train_n_test_data,
)

In [None]:
# | include: false
import shutil
from os import environ

import tensorflow as tf

In [None]:
# | include: false

environ["TF_FORCE_GPU_ALLOW_GROWTH"] = "true"

gpus = tf.config.list_physical_devices("GPU")
if gpus:
    # Restrict TensorFlow to only use the first GPU
    try:
        tf.config.set_visible_devices(gpus[1], "GPU")
        logical_gpus = tf.config.list_logical_devices("GPU")
        print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPU")
    except RuntimeError as e:
        # Visible devices must be set before GPUs have been initialized
        print(e)

3 Physical GPUs, 1 Logical GPU


These are a few examples of the dataset:

In [None]:
# | echo: false

train_df, test_df = get_train_n_test_data(dataset_name="compas")
display(train_df.head().T.style)

Unnamed: 0,0,1,2,3,4
priors_count,0.368421,0.0,0.026316,0.394737,0.052632
juv_fel_count,0.0,0.0,0.0,0.0,0.0
juv_misd_count,0.0,0.0,0.0,0.0,0.0
juv_other_count,0.0,0.0,0.0,0.0,0.0
age,0.230769,0.051282,0.179487,0.230769,0.102564
race_0,1.0,1.0,0.0,1.0,1.0
race_1,0.0,0.0,1.0,0.0,0.0
race_2,0.0,0.0,0.0,0.0,0.0
race_3,0.0,0.0,0.0,0.0,0.0
race_4,0.0,0.0,0.0,0.0,0.0


## Hyperparameter search

The choice of the batch size and the maximum number of epochs depends on the dataset size. For this dataset, we use the following values:

In [None]:
batch_size = 8
max_epochs = 50

We use the Type-2 architecture built using `MonoDense` layer with the following set of hyperparameters ranges:

In [None]:
def hp_params_f(hp):
    return dict(
        units=hp.Int("units", min_value=16, max_value=32, step=1),
        n_layers=hp.Int("n_layers", min_value=2, max_value=2),
        activation=hp.Choice("activation", values=["elu"]),
        learning_rate=hp.Float(
            "learning_rate", min_value=1e-4, max_value=1e-2, sampling="log"
        ),
        weight_decay=hp.Float(
            "weight_decay", min_value=3e-2, max_value=0.3, sampling="log"
        ),
        dropout=hp.Float("dropout", min_value=0.0, max_value=0.5, sampling="linear"),
        decay_rate=hp.Float(
            "decay_rate", min_value=0.8, max_value=1.0, sampling="reverse_log"
        ),
    )

The following fixed parameters are used to build the Type-2 architecture for this dataset:

- `final_activation` is used to build the final layer for regression problem (set to `None`) or for the classification problem (`"sigmoid"`),

- `loss` is used for training regression (`"mse"`) or classification (`"binary_crossentropy"`) problem, and

- `metrics` denotes metrics used to compare with previously published results: `"accuracy"` for classification and "`mse`" or "`rmse`" for regression.

Parameters `objective` and `direction` are used by the tuner such that `objective=f"val_{metrics}"` and direction is either `"min` or `"max"`.

Parameters `max_trials` denotes the number of trial performed buy the tuner, `patience` is the number of epochs allowed to perform worst than the best one before stopping the current trial. The parameter `execution_per_trial` denotes the number of runs before calculating the results of a trial, it should be set to value greater than 1 for small datasets that have high variance in results.

In [None]:
final_activation = "sigmoid"
loss = "binary_crossentropy"
metrics = "accuracy"
objective = "val_accuracy"
direction = "max"
max_trials = 50
executions_per_trial = 1
patience = 5

In [None]:
# | include: false
# | notest

tuner = find_hyperparameters(
    "compas",
    monotonicity_indicator=monotonicity_indicator,
    hp_params_f=hp_params_f,
    final_activation=final_activation,
    loss=loss,
    metrics=metrics,
    objective=objective,
    direction=direction,
    max_trials=max_trials,
    patience=patience,
    executions_per_trial=executions_per_trial,
    batch_size=batch_size,
    max_epochs=max_epochs,
)

Trial 51 Complete [00h 00m 35s]
val_accuracy: 0.6890688538551331

Best val_accuracy So Far: 0.6995951533317566
Total elapsed time: 00h 00m 35s
INFO:tensorflow:Oracle triggered exit


In [None]:
# | include: false
# | notest

stats = create_tuner_stats(
    tuner,
    batch_size=batch_size,
    max_epochs=max_epochs,
)

Unnamed: 0,units,n_layers,activation,learning_rate,weight_decay,dropout,decay_rate,val_accuracy_mean,val_accuracy_std,val_accuracy_min,val_accuracy_max,params
0,26,2,elu,0.086301,0.147297,0.162063,0.927282,0.692955,0.00271,0.689069,0.695547,2237


Unnamed: 0,units,n_layers,activation,learning_rate,weight_decay,dropout,decay_rate,val_accuracy_mean,val_accuracy_std,val_accuracy_min,val_accuracy_max,params
1,22,1,elu,0.093473,0.149578,0.11241,0.852179,0.689393,0.002261,0.687449,0.693117,196
0,26,2,elu,0.086301,0.147297,0.162063,0.927282,0.692955,0.00271,0.689069,0.695547,2237


Unnamed: 0,units,n_layers,activation,learning_rate,weight_decay,dropout,decay_rate,val_accuracy_mean,val_accuracy_std,val_accuracy_min,val_accuracy_max,params
1,22,1,elu,0.093473,0.149578,0.11241,0.852179,0.689393,0.002261,0.687449,0.693117,196
0,26,2,elu,0.086301,0.147297,0.162063,0.927282,0.692955,0.00271,0.689069,0.695547,2237
2,27,2,elu,0.084685,0.137518,0.175917,0.899399,0.694413,0.003464,0.689879,0.698785,2317


Unnamed: 0,units,n_layers,activation,learning_rate,weight_decay,dropout,decay_rate,val_accuracy_mean,val_accuracy_std,val_accuracy_min,val_accuracy_max,params
1,22,1,elu,0.093473,0.149578,0.11241,0.852179,0.689393,0.002261,0.687449,0.693117,196
3,31,3,elu,0.018339,0.105921,0.48039,0.964135,0.692308,0.002217,0.689069,0.694737,4058
0,26,2,elu,0.086301,0.147297,0.162063,0.927282,0.692955,0.00271,0.689069,0.695547,2237
2,27,2,elu,0.084685,0.137518,0.175917,0.899399,0.694413,0.003464,0.689879,0.698785,2317


Unnamed: 0,units,n_layers,activation,learning_rate,weight_decay,dropout,decay_rate,val_accuracy_mean,val_accuracy_std,val_accuracy_min,val_accuracy_max,params
1,22,1,elu,0.093473,0.149578,0.11241,0.852179,0.689393,0.002261,0.687449,0.693117,196
3,31,3,elu,0.018339,0.105921,0.48039,0.964135,0.692308,0.002217,0.689069,0.694737,4058
0,26,2,elu,0.086301,0.147297,0.162063,0.927282,0.692955,0.00271,0.689069,0.695547,2237
4,28,3,elu,0.105227,0.120702,0.16027,0.872222,0.693603,0.000923,0.692308,0.694737,3599
2,27,2,elu,0.084685,0.137518,0.175917,0.899399,0.694413,0.003464,0.689879,0.698785,2317


Unnamed: 0,units,n_layers,activation,learning_rate,weight_decay,dropout,decay_rate,val_accuracy_mean,val_accuracy_std,val_accuracy_min,val_accuracy_max,params
1,22,1,elu,0.093473,0.149578,0.11241,0.852179,0.689393,0.002261,0.687449,0.693117,196
5,25,2,elu,0.069011,0.153525,0.180772,0.874505,0.692146,0.002649,0.689879,0.696356,2157
3,31,3,elu,0.018339,0.105921,0.48039,0.964135,0.692308,0.002217,0.689069,0.694737,4058
0,26,2,elu,0.086301,0.147297,0.162063,0.927282,0.692955,0.00271,0.689069,0.695547,2237
4,28,3,elu,0.105227,0.120702,0.16027,0.872222,0.693603,0.000923,0.692308,0.694737,3599
2,27,2,elu,0.084685,0.137518,0.175917,0.899399,0.694413,0.003464,0.689879,0.698785,2317


Unnamed: 0,units,n_layers,activation,learning_rate,weight_decay,dropout,decay_rate,val_accuracy_mean,val_accuracy_std,val_accuracy_min,val_accuracy_max,params
1,22,1,elu,0.093473,0.149578,0.11241,0.852179,0.689393,0.002261,0.687449,0.693117,196
6,23,2,elu,0.089831,0.140927,0.106579,0.824555,0.690688,0.001983,0.689069,0.693927,1672
5,25,2,elu,0.069011,0.153525,0.180772,0.874505,0.692146,0.002649,0.689879,0.696356,2157
3,31,3,elu,0.018339,0.105921,0.48039,0.964135,0.692308,0.002217,0.689069,0.694737,4058
0,26,2,elu,0.086301,0.147297,0.162063,0.927282,0.692955,0.00271,0.689069,0.695547,2237
4,28,3,elu,0.105227,0.120702,0.16027,0.872222,0.693603,0.000923,0.692308,0.694737,3599
2,27,2,elu,0.084685,0.137518,0.175917,0.899399,0.694413,0.003464,0.689879,0.698785,2317


Unnamed: 0,units,n_layers,activation,learning_rate,weight_decay,dropout,decay_rate,val_accuracy_mean,val_accuracy_std,val_accuracy_min,val_accuracy_max,params
1,22,1,elu,0.093473,0.149578,0.11241,0.852179,0.689393,0.002261,0.687449,0.693117,196
7,19,1,elu,0.16981,0.145653,0.175619,0.921521,0.689393,0.001086,0.688259,0.690688,157
6,23,2,elu,0.089831,0.140927,0.106579,0.824555,0.690688,0.001983,0.689069,0.693927,1672
5,25,2,elu,0.069011,0.153525,0.180772,0.874505,0.692146,0.002649,0.689879,0.696356,2157
3,31,3,elu,0.018339,0.105921,0.48039,0.964135,0.692308,0.002217,0.689069,0.694737,4058
0,26,2,elu,0.086301,0.147297,0.162063,0.927282,0.692955,0.00271,0.689069,0.695547,2237
4,28,3,elu,0.105227,0.120702,0.16027,0.872222,0.693603,0.000923,0.692308,0.694737,3599
2,27,2,elu,0.084685,0.137518,0.175917,0.899399,0.694413,0.003464,0.689879,0.698785,2317


Unnamed: 0,units,n_layers,activation,learning_rate,weight_decay,dropout,decay_rate,val_accuracy_mean,val_accuracy_std,val_accuracy_min,val_accuracy_max,params
1,22,1,elu,0.093473,0.149578,0.11241,0.852179,0.689393,0.002261,0.687449,0.693117,196
7,19,1,elu,0.16981,0.145653,0.175619,0.921521,0.689393,0.001086,0.688259,0.690688,157
6,23,2,elu,0.089831,0.140927,0.106579,0.824555,0.690688,0.001983,0.689069,0.693927,1672
8,26,2,elu,0.07877,0.151123,0.080289,0.866129,0.691012,0.001679,0.689879,0.693927,2237
5,25,2,elu,0.069011,0.153525,0.180772,0.874505,0.692146,0.002649,0.689879,0.696356,2157
3,31,3,elu,0.018339,0.105921,0.48039,0.964135,0.692308,0.002217,0.689069,0.694737,4058
0,26,2,elu,0.086301,0.147297,0.162063,0.927282,0.692955,0.00271,0.689069,0.695547,2237
4,28,3,elu,0.105227,0.120702,0.16027,0.872222,0.693603,0.000923,0.692308,0.694737,3599
2,27,2,elu,0.084685,0.137518,0.175917,0.899399,0.694413,0.003464,0.689879,0.698785,2317


Unnamed: 0,units,n_layers,activation,learning_rate,weight_decay,dropout,decay_rate,val_accuracy_mean,val_accuracy_std,val_accuracy_min,val_accuracy_max,params
1,22,1,elu,0.093473,0.149578,0.11241,0.852179,0.689393,0.002261,0.687449,0.693117,196
7,19,1,elu,0.16981,0.145653,0.175619,0.921521,0.689393,0.001086,0.688259,0.690688,157
6,23,2,elu,0.089831,0.140927,0.106579,0.824555,0.690688,0.001983,0.689069,0.693927,1672
8,26,2,elu,0.07877,0.151123,0.080289,0.866129,0.691012,0.001679,0.689879,0.693927,2237
9,27,4,elu,0.004705,0.174339,0.07236,0.791007,0.691174,0.000724,0.690688,0.692308,3829
5,25,2,elu,0.069011,0.153525,0.180772,0.874505,0.692146,0.002649,0.689879,0.696356,2157
3,31,3,elu,0.018339,0.105921,0.48039,0.964135,0.692308,0.002217,0.689069,0.694737,4058
0,26,2,elu,0.086301,0.147297,0.162063,0.927282,0.692955,0.00271,0.689069,0.695547,2237
4,28,3,elu,0.105227,0.120702,0.16027,0.872222,0.693603,0.000923,0.692308,0.694737,3599
2,27,2,elu,0.084685,0.137518,0.175917,0.899399,0.694413,0.003464,0.689879,0.698785,2317


The following table describes the best models and their hyperparameters found by the tuner:

In [None]:
# | echo: false
# | notest

df = stats.sort_values(by=f"{objective}_mean", ascending=(direction == "min")).head()

df.reset_index(drop=True).T.style

Unnamed: 0,0,1,2,3,4
units,27,28,26,31,25
n_layers,2,3,2,3,2
activation,elu,elu,elu,elu,elu
learning_rate,0.084685,0.105227,0.086301,0.018339,0.069011
weight_decay,0.137518,0.120702,0.147297,0.105921,0.153525
dropout,0.175917,0.160270,0.162063,0.480390,0.180772
decay_rate,0.899399,0.872222,0.927282,0.964135,0.874505
val_accuracy_mean,0.694413,0.693603,0.692955,0.692308,0.692146
val_accuracy_std,0.003464,0.000923,0.002710,0.002217,0.002649
val_accuracy_min,0.689879,0.692308,0.689069,0.689069,0.689879


In [None]:
# | include: false
# | notest

print(df.to_latex(index=False))

\begin{tabular}{rrlrrrrrrrrr}
\toprule
units & n_layers & activation & learning_rate & weight_decay & dropout & decay_rate & val_accuracy_mean & val_accuracy_std & val_accuracy_min & val_accuracy_max & params \\
\midrule
27 & 2 & elu & 0.084685 & 0.137518 & 0.175917 & 0.899399 & 0.694413 & 0.003464 & 0.689879 & 0.698785 & 2317 \\
28 & 3 & elu & 0.105227 & 0.120702 & 0.160270 & 0.872222 & 0.693603 & 0.000923 & 0.692308 & 0.694737 & 3599 \\
26 & 2 & elu & 0.086301 & 0.147297 & 0.162063 & 0.927282 & 0.692955 & 0.002710 & 0.689069 & 0.695547 & 2237 \\
31 & 3 & elu & 0.018339 & 0.105921 & 0.480390 & 0.964135 & 0.692308 & 0.002217 & 0.689069 & 0.694737 & 4058 \\
25 & 2 & elu & 0.069011 & 0.153525 & 0.180772 & 0.874505 & 0.692146 & 0.002649 & 0.689879 & 0.696356 & 2157 \\
\bottomrule
\end{tabular}



## The optimal model

These are the best hyperparameters found by previous runs of the tuner:

In [None]:
def final_hp_params_f(hp):
    return dict(
        units=hp.Fixed("units", value=27),
        n_layers=hp.Fixed("n_layers", 2),
        activation=hp.Fixed("activation", value="elu"),
        learning_rate=hp.Fixed("learning_rate", value=0.084685),
        weight_decay=hp.Fixed("weight_decay", value=0.137518),
        dropout=hp.Fixed("dropout", value=0.175917),
        decay_rate=hp.Fixed("decay_rate", value=0.899399),
    )

In [None]:
# | include: false
# | notest


shutil.rmtree("tuner_final/compas", ignore_errors=True)

final_tuner = find_hyperparameters(
    "compas",
    monotonicity_indicator=monotonicity_indicator,
    hp_params_f=final_hp_params_f,
    max_trials=1,
    final_activation=final_activation,
    loss=loss,
    metrics=metrics,
    objective=objective,
    direction=direction,
    batch_size=batch_size,
    max_epochs=1,
    patience=patience,
    executions_per_trial=1,
    dir_root="tuner_final",
)

Trial 1 Complete [00h 00m 11s]
val_accuracy: 0.5417004227638245

Best val_accuracy So Far: 0.5417004227638245
Total elapsed time: 00h 00m 11s
INFO:tensorflow:Oracle triggered exit


In [None]:
# | include: false
# | notest

final_stats = create_tuner_stats(
    final_tuner,
    batch_size=batch_size,
    max_epochs=max_epochs,
)

Unnamed: 0,units,n_layers,activation,learning_rate,weight_decay,dropout,decay_rate,val_accuracy_mean,val_accuracy_std,val_accuracy_min,val_accuracy_max,params
0,27,2,elu,0.084685,0.137518,0.175917,0.899399,0.69166,0.001056,0.690688,0.693117,2317


The final evaluation of the optimal model:

In [None]:
# | echo: false
# | notest

final_stats.T.style

Unnamed: 0,0
units,27
n_layers,2
activation,elu
learning_rate,0.084685
weight_decay,0.137518
dropout,0.175917
decay_rate,0.899399
val_accuracy_mean,0.691660
val_accuracy_std,0.001056
val_accuracy_min,0.690688
