In [1]:
%pip install aequitas

Collecting aequitas
  Downloading aequitas-0.42.0-py3-none-any.whl (2.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.2/2.2 MB[0m [31m12.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting ohio>=0.2.0 (from aequitas)
  Downloading ohio-0.5.0-py3-none-any.whl (26 kB)
Collecting Flask==0.12.2 (from aequitas)
  Downloading Flask-0.12.2-py2.py3-none-any.whl (83 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m83.0/83.0 kB[0m [31m6.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting Flask-Bootstrap==3.3.7.1 (from aequitas)
  Downloading Flask-Bootstrap-3.3.7.1.tar.gz (456 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m456.4/456.4 kB[0m [31m17.3 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting markdown2==2.3.5 (from aequitas)
  Downloading markdown2-2.3.5.zip (161 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m161.9/161.9 kB[0m [31m8.7 MB/s[0m eta [36m0:00:

In [2]:
import tensorflow as tf
from tensorflow import keras

import copy
import numpy as np       # Random number generation
import seaborn as sns    # Plotting library
import pandas as pd      # Read/write data
from aequitas.group import Group                # Fairness metrics
import matplotlib as mpl
import matplotlib.pyplot as plt     # Plotting method
from sklearn.preprocessing import LabelEncoder  # Categorical encoding for LGBM models
from sklearn import metrics                     # ROC metrics
from sklearn.ensemble import RandomForestClassifier

In [3]:
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

Num GPUs Available:  1


In [4]:
from google.colab import drive
drive.mount("/content/drive")

Mounted at /content/drive


In [None]:
!cp /content/drive/MyDrive/Colab\ Notebooks/ECE697/Project/random_search.py .

In [None]:
from random_search import RandomValueTrial, suggest_callable_hyperparams  # Random search wrapper methods

In [5]:
# Define path to datasets. Replace `base_path` with the appropriate value.
base_path = "/content/drive/MyDrive/Colab Notebooks/ECE697/Project/Income Data Variants 1m/"

datasets_paths = {
    "TypeI":   base_path + "income_07_type1.csv",
    "TypeII":  base_path + "income_07_type2.csv",
    "TypeIII": base_path + "income_07_type3.csv",
    "TypeIV":  base_path + "income_07_type4.csv",
    "TypeV":   base_path + "income_07_type5.csv",
    "TypeVI":  base_path + "income_07_type6.csv"
 }

In [6]:
# Read the datasets with pandas.
datasets = {key: pd.read_csv(path,usecols=range(1,33)) for key, path in datasets_paths.items()}

In [7]:
# Define the label field and categorical columns.
label = "fraud_bool"

categorical_features = [
    "payment_type",
    "employment_status",
    "housing_status",
    "source",
    "device_os",
]

In [8]:
# Create the train and test sets. Shuffle data with `sample` method.
# The split was done by month. The first 6 months as the train, the last 2 months as test.
train_dfs = {key: df[df["month"]<6].sample(frac=1, replace=False) for key, df in datasets.items()}
test_dfs = {key: df[df["month"]>=6].sample(frac=1, replace=False) for key, df in datasets.items()}

In [9]:
# Encode the categorical variables in the datasets to integers.
# This is expected by LGBM (or columns with the `categorical` data type).

for name in datasets.keys():  # For each dataset in the suite
    train = train_dfs[name]
    test = test_dfs[name]

    for feat in categorical_features:
        encoder = LabelEncoder()
        encoder.fit(train[feat])  # Fit an encoder to the train set.
        train[feat] = encoder.transform(train[feat])  # Transform train set.
        test[feat] = encoder.transform(test[feat])    # Transform test set.

In [41]:
METRICS = [
      keras.metrics.BinaryCrossentropy(name='cross entropy'),  # same as model's loss
      keras.metrics.MeanSquaredError(name='Brier score'),
      keras.metrics.TruePositives(name='tp'),
      keras.metrics.FalsePositives(name='fp'),
      keras.metrics.TrueNegatives(name='tn'),
      keras.metrics.FalseNegatives(name='fn'),
      keras.metrics.BinaryAccuracy(name='accuracy'),
      keras.metrics.Precision(name='precision'),
      keras.metrics.Recall(name='recall'),
      keras.metrics.AUC(name='auc'),
      keras.metrics.AUC(name='prc', curve='PR'), # precision-recall curve
]

def make_model(metrics=METRICS, output_bias=None, train_feat_shape=train_dfs["TypeI"].shape[-1]):
  if output_bias is not None:
    output_bias = tf.keras.initializers.Constant(output_bias)
  model = keras.Sequential([
      keras.layers.Dense(
          16, activation='tanh',
          input_shape=(train_feat_shape,)),
      keras.layers.Dense(16, activation='tanh'),
      #keras.layers.Dropout(0.3),
      keras.layers.Dense(16, activation='tanh'),
      #keras.layers.Dropout(0.5),
      keras.layers.Dense(1, activation='sigmoid',
                         bias_initializer=output_bias),
  ])

  model.compile(
      optimizer=keras.optimizers.Adam(learning_rate=1e-3),
      loss=keras.losses.BinaryCrossentropy(),
      metrics=metrics)

  return model

In [42]:
EPOCHS = 100
BATCH_SIZE = 2048

early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_prc',
    verbose=1,
    patience=10,
    mode='max',
    restore_best_weights=True)

In [43]:
model = make_model()
model.summary()

Model: "sequential_12"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_24 (Dense)            (None, 16)                528       
                                                                 
 dense_25 (Dense)            (None, 16)                272       
                                                                 
 dense_26 (Dense)            (None, 16)                272       
                                                                 
 dense_27 (Dense)            (None, 1)                 17        
                                                                 
Total params: 1,089
Trainable params: 1,089
Non-trainable params: 0
_________________________________________________________________


In [44]:
def plot_cm(labels, predictions, threshold=0.5):
  cm = metrics.confusion_matrix(labels, predictions > threshold)
  plt.figure(figsize=(5,5))
  sns.heatmap(cm, annot=True, fmt="d")
  plt.title('Confusion matrix @{:.2f}'.format(threshold))
  plt.ylabel('Actual label')
  plt.xlabel('Predicted label')

  print('Legitimate Transactions Detected (True Negatives): ', cm[0][0])
  print('Legitimate Transactions Incorrectly Detected (False Positives): ', cm[0][1])
  print('Fraudulent Transactions Missed (False Negatives): ', cm[1][0])
  print('Fraudulent Transactions Detected (True Positives): ', cm[1][1])
  print('Total Fraudulent Transactions: ', np.sum(cm[1]))

In [45]:
# Cell with train loop.

# Define number of trials in Random search.
n_trials=100
# Random state for sampling seeds.
np.random.seed(42)
# Seeds for the random search sampling algorithm.
seeds = np.random.choice(list(range(1_000_000)), size=n_trials, replace=False)

EPOCHS = 200
BATCH_SIZE = 2048

early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_prc',
    verbose=1,
    patience=10,
    mode='max',
    restore_best_weights=True)

# Variable to store the results.
runs = {}
tf_runs = {}
total_predictions = {}

#for trial in range(n_trials):
    #seed = seeds[trial]
    #trial = RandomValueTrial(seed=seed)
    # Hyperparameters for the random search trial.
    #test_hyperparams = suggest_callable_hyperparams(trial, hyperparam_space)
    #del test_hyperparams["classpath"] # Remove unnecessary key in hyperparaemters.

    # Update list of tested hyperparameters.
    #prev_hyperparams = runs.get("hyperparams", [])
    #prev_hyperparams.append(test_hyperparams)
    #runs["hyperparams"] = prev_hyperparams


    # two tabs separation
for dataset_name in datasets.keys():  # Run hyperparameters on all variants of datastes.
    neg, pos = np.bincount(train_dfs[dataset_name]['fraud_bool'])
    total = neg + pos
    print('\n\n\nExamples for {}:\n    Total: {}\n    Positive: {} ({:.2f}% of total)\n\n\n'.format(dataset_name, total, pos, 100 * pos / total))
    initial_bias = np.log([pos/neg])

    X_train = train_dfs[dataset_name].drop(columns=["fraud_bool"])
    y_train = train_dfs[dataset_name]["fraud_bool"]
    X_test = test_dfs[dataset_name].drop(columns=["fraud_bool"])
    y_test = test_dfs[dataset_name]["fraud_bool"]

    model = make_model(output_bias=initial_bias, train_feat_shape=X_train.shape[-1])
    # Fit model to training data.
    careful_bias_history = model.fit(
      X_train,
      y_train,
      batch_size=BATCH_SIZE,
      epochs=EPOCHS,
      verbose=0)
    # Obtain predictions in test data.
    predictions_direct = model.predict(X_test, batch_size=BATCH_SIZE)
    predictions = predictions_direct.tolist()
    predictions_evaluation = model.evaluate(X_test, y_test, batch_size=BATCH_SIZE, verbose=0)

    tf_metric_results = {}
    for name, value in zip(model.metrics_names, predictions_evaluation):
      tf_metric_results[name] = value
    tf_runs[dataset_name] = tf_metric_results

    total_predictions[dataset_name] = (y_test, predictions)

    """

    # Obtain ROC curve for the predictions.
    fprs, tprs, thresholds = metrics.roc_curve(y_test, predictions)
    # Obtain threshold and recall. We select 5% FPR as threshold.
    threshold = np.min(thresholds[fprs==max(fprs[fprs < 0.05])])
    recall = np.max(tprs[fprs==max(fprs[fprs < 0.05])])

    # Binarize predictions for Aequitas.
    preds_binary = (predictions > threshold).astype(int).tolist()

    # Create a dataframe with protected group column, predictions and labels.
    # Here, we select income < 0.7 as threshold.
    aequitas_df = pd.DataFrame(
        {
            "income": (X_test["income"] < 0.7).map({True: "Minority", False: "Majority"}),
             "score": preds_binary,
            "label_value": y_test.values

        }
    )

    # Obtain FPR results for different groups.
    g = Group()
    aequitas_results = g.get_crosstabs(aequitas_df, score_thresholds=None, attr_cols=["income"])[0]  #,score_col=["preds"], label_col=["y"]

    # Store the results for the trained model
    results = {}
    results["recall"] = recall
    results["recall Minority"] = aequitas_results[aequitas_results["attribute_value"] == "Minority"][["tpr"]].values[0][0]
    results["recall Majority"] = aequitas_results[aequitas_results["attribute_value"] == "Majority"][["tpr"]].values[0][0]
    results["fpr Minority"] = aequitas_results[aequitas_results["attribute_value"] == "Minority"][["fpr"]].values[0][0]
    results["fpr Majority"] = aequitas_results[aequitas_results["attribute_value"] == "Majority"][["fpr"]].values[0][0]

    # Store the results in the runs variable
    #prev_runs = runs.get(dataset_name, [])
    #prev_runs.append(results)
    runs[dataset_name] = results
    """




Examples for TypeI:
    Total: 794989
    Positive: 8151 (1.03% of total)






Examples for TypeII:
    Total: 794988
    Positive: 8150 (1.03% of total)






Examples for TypeIII:
    Total: 794989
    Positive: 8151 (1.03% of total)






Examples for TypeIV:
    Total: 794989
    Positive: 8151 (1.03% of total)






Examples for TypeV:
    Total: 794990
    Positive: 8152 (1.03% of total)






Examples for TypeVI:
    Total: 794990
    Positive: 8152 (1.03% of total)





In [38]:
#print(f"{len(X_test['income'])}, {len(preds_binary)}, {len(y_test.values)}")
#print(f"{type(X_test['income'])}, {type(preds_binary)}, {type(y_test.values)}")

In [46]:
tf_runs

{'TypeI': {'loss': 0.07438496500253677,
  'cross entropy': 0.07438496500253677,
  'Brier score': 0.013854006305336952,
  'tp': 0.0,
  'fp': 0.0,
  'tn': 202133.0,
  'fn': 2878.0,
  'accuracy': 0.9859617352485657,
  'precision': 0.0,
  'recall': 0.0,
  'auc': 0.5,
  'prc': 0.014038271270692348},
 'TypeII': {'loss': 0.07432568818330765,
  'cross entropy': 0.07432568818330765,
  'Brier score': 0.013852769508957863,
  'tp': 0.0,
  'fp': 0.0,
  'tn': 202133.0,
  'fn': 2878.0,
  'accuracy': 0.9859617352485657,
  'precision': 0.0,
  'recall': 0.0,
  'auc': 0.5,
  'prc': 0.014038271270692348},
 'TypeIII': {'loss': 0.07446231693029404,
  'cross entropy': 0.07446231693029404,
  'Brier score': 0.013855586759746075,
  'tp': 0.0,
  'fp': 0.0,
  'tn': 202133.0,
  'fn': 2878.0,
  'accuracy': 0.9859617352485657,
  'precision': 0.0,
  'recall': 0.0,
  'auc': 0.5,
  'prc': 0.014038271270692348},
 'TypeIV': {'loss': 0.07455934584140778,
  'cross entropy': 0.07455934584140778,
  'Brier score': 0.013857531

In [None]:
# Create a dataframe with the results for each model in each dataset.
rs_results = pd.DataFrame(runs)

In [None]:
# Helper method to obtain the metric values for a given model.
def get_results(results, variant, metric):
    col = results[variant]
    values = []
    for idx, val in col.iteritems():
        values.append(val[metric])
    return values

In [None]:
# Obtain the relevant metrics to plots from the dataframe.
variants = list(datasets_paths.keys())

plot_results = {"Variant": [], "Recall": [], "FPR Ratio": []}

print(rs_results.head(10))
print(type(rs_results))

for variant in variants:
    plot_results["Recall"] += get_results(rs_results, variant, "recall")
    # Obtain the FPR if both groups.
    for fpr_majority, fpr_minority in zip(get_results(rs_results, variant, "fpr Majority"), get_results(rs_results, variant, "fpr Minority")):
        # Calculate FPR ratio as higher fpr / lower fpr
        if fpr_majority > fpr_minority:
            plot_results["FPR Ratio"] += [fpr_minority/fpr_majority]
        else:
            plot_results["FPR Ratio"] += [fpr_majority/fpr_minority]
    plot_results["Variant"] += [variant] * len(get_results(rs_results, variant, "recall"))

# Create a dataframe for easier plots.
plot_results = pd.DataFrame(plot_results)

                 TypeI  TypeII  TypeIII  TypeIV  TypeV  TypeVI
recall             0.0     0.0      0.0     0.0    0.0     0.0
recall Minority    0.0     0.0      0.0     0.0    0.0     0.0
recall Majority    0.0     0.0      0.0     0.0    0.0     0.0
fpr Minority       0.0     0.0      0.0     0.0    0.0     0.0
fpr Majority       0.0     0.0      0.0     0.0    0.0     0.0
<class 'pandas.core.frame.DataFrame'>


  for idx, val in col.iteritems():


TypeError: ignored

In [None]:
# Create a plot with the full results of the random search algorithm.
sns.set()
sns.set_style("whitegrid", {"grid.linestyle": "--"})

sns.jointplot(data=plot_results, x="Recall", y="FPR Ratio", hue="Variant")
plt.ylim((0,1));
plt.xlim((0,1));

In [None]:
# Create the final plot. Highlight the top models:
top_model_base = plot_results.loc[plot_results['Variant'] == 'Base'].sort_values('Recall', ascending=False).index.values
top_model = copy.deepcopy(top_model_base)
top_model = np.r_[top_model, top_model_base + (100)]

plot_results['index'] = plot_results.index
plot_results['is_top'] = plot_results.apply(lambda x: 1 if x['index'] in top_model else 0, axis=1)

In [None]:
sns.set_style("whitegrid", {"grid.linestyle": "--", "grid.alpha":0.1})
DPI = 200
plt.rcParams['figure.dpi'] = DPI
plt.rcParams['figure.figsize'] = (10,5)

fig, (ax1, ax2) = plt.subplots(1, 2)

# LEFT PLOT
sns.scatterplot(ax=ax1, data=plot_results.loc[(~plot_results.index.isin(top_model)), :], x="Recall", y="FPR Ratio", hue="Variant", alpha=0.2)
sns.scatterplot(ax=ax1, data=plot_results.loc[plot_results.index.isin(top_model), :], x="Recall", y="FPR Ratio", hue="Variant", legend=False)
ax1.set(
    ylim=(0,1),
    xlim=(0,1)
)

# RIGHT PLOT
sns.scatterplot(ax=ax2, data=plot_results.loc[(~plot_results.index.isin(top_model)) & (plot_results["Variant"].isin(["Base", "Type II", "Type V", "Type IV"])), :], x="Recall", y="FPR Ratio", hue="Variant", alpha=0.2, palette=[sns.color_palette()[0], sns.color_palette()[2], sns.color_palette()[4], sns.color_palette()[5]], legend=False)
sns.scatterplot(ax=ax2, data=plot_results.loc[(plot_results.index.isin(top_model)) & (plot_results["Variant"].isin(["Base", "Type II", "Type V", "Type IV"])), :], x="Recall", y="FPR Ratio", hue="Variant", palette=[sns.color_palette()[0], sns.color_palette()[2], sns.color_palette()[4], sns.color_palette()[5]], legend=False)
ax2.set(
    ylim=(0,0.4),
    ylabel="",
    xticks=np.arange(0.2, 0.8, 0.1),
    yticks=np.arange(0, 0.5, 0.1),
    xlim=(0.2, 0.6),
)

rect = plt.Rectangle((0.2, 0.004), 0.4, 0.396, facecolor=(0.1, 0.1, 0.1, 0.05), edgecolor="grey", linestyle="-")
ax1.add_patch(rect)
handles, labels = ax1.get_legend_handles_labels()
handles = list(handles) + [rect]
labels = list(labels) + ["Plot on the right"]
ax1.legend(handles, labels, title="Variant")

sns.move_legend(
    ax1,
    loc="lower center",
    bbox_to_anchor=[1.08, -0.32],
    ncol=7
)

In [None]:
# Encode the categorical variables in the datasets to integers.

for name in datasets.keys():  # For each dataset in the suite
    train = train_dfs[name]
    test = test_dfs[name]

    for feat in categorical_features:
        encoder = LabelEncoder()
        encoder.fit(train[feat])  # Fit an encoder to the train set.
        train[feat] = encoder.transform(train[feat])  # Transform train set.
        test[feat] = encoder.transform(test[feat])    # Transform test set.

In [None]:
for dataset_name in datasets.keys():  # Run hyperparameters on all variants of datastes.
    X_train = train_dfs[dataset_name].drop(columns=["fraud_bool"])
    y_train = train_dfs[dataset_name]["fraud_bool"]
    X_test = test_dfs[dataset_name].drop(columns=["fraud_bool"])
    y_test = test_dfs[dataset_name]["fraud_bool"]

    # make model training call with X_train


    # test with X_test

    # output metrics
