### Updates
* get rid of run_model function
* added codes to calculate confusion matrix
* analyze weights and biases

In [0]:
# Import General Libraries 
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import tensorflow as tf
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

# Deep Learning Libraries
from keras.layers import Dense
from keras.models import Sequential
from keras.callbacks import EarlyStopping
from sklearn.utils import class_weight
from sklearn import preprocessing
from keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold

In [0]:
# Parameters
dpsgd = True
learning_rate = 0.15
noise_multiplier = 1.1
l2_norm_clip = 1.0
batch_size = 1
epochs = 3
microbatches = 1
model_dir = None

In [0]:
def data_processing(version):
    data = pd.read_csv('nc_sc_ga_va_clean_v0.csv')

    target = data['action_taken_name']
    target = to_categorical(target) 
    predictors = data.drop(['action_taken_name'], axis=1)
    predictors = predictors.drop(predictors.columns[0], axis=1)
    
    return predictors, target

In [22]:
predictors, target = data_processing(7)

n_cols = predictors.shape[1]

model = tf.keras.Sequential([
    tf.keras.layers.Dense(100, activation='relu', input_shape = (n_cols,)),
      tf.keras.layers.Dense(25, activation='relu'),
      tf.keras.layers.Dense(20, activation='relu'),
      tf.keras.layers.Dense(2, activation='softmax'),
])

model.compile(optimizer='adam', loss = 'categorical_crossentropy', metrics=['accuracy'])

x_train, x_test, y_train, y_test = train_test_split(predictors, target, test_size = 0.20, random_state = 0)

model.fit(x_train, y_train, epochs=epochs, batch_size=batch_size, verbose = 1)

print('\n# Evaluate on test data')
results = model.evaluate(x_test, y_test, batch_size=128)
print('test loss, test acc:', results)

Epoch 1/3
Epoch 2/3
Epoch 3/3

# Evaluate on test data
test loss, test acc: [0.40055356973338935, 0.8291446]


In [23]:
y_true = y_test

y_true_one_col = []

for row in y_true:
    if row[1] == 1:
        y_true_one_col.append(1)
    else:
        y_true_one_col.append(0)
        
y_true_one_col = np.asarray(y_true_one_col)

from sklearn.metrics import confusion_matrix
y_pred = model.predict(x_test)
y_pred=np.argmax(y_pred, axis=1)
confusion_matrix(y_true_one_col, y_pred)

array([[  7061,  28624],
       [  3620, 149416]])

In [0]:
def calc_di(prot_var_name, unprot_var_name):

    prot_df = x_test[prot_var_name]
    unprot_df = x_test[unprot_var_name]
    
    prot_total = prot_df.value_counts()[1]
    unprot_total = unprot_df.value_counts()[1]
    
    prot_pred = np.argmax(model.predict(x_test[prot_df == 1]), axis = 1)
    unprot_pred = np.argmax(model.predict(x_test[unprot_df == 1]), axis = 1)
    
    prot_1 = np.count_nonzero(prot_pred)
    unprot_1 = np.count_nonzero(unprot_pred)
    
    return (prot_1/prot_total)/(unprot_1/unprot_total)

In [25]:
calc_di('applicant_race_name_1_0', 'applicant_race_name_1_1')

0.8601278228287698

In [26]:
calc_di('applicant_ethnicity_name_Hispanic or Latino', 'applicant_ethnicity_name_Not Hispanic or Latino')

0.9982569173950081

In [27]:
calc_di('applicant_sex_name_Female', 'applicant_sex_name_Male')

0.9598980633402103