In [1]:
#Install What-If Tool Widget and SHAP library
!pip install --upgrade --quiet witwidget shap

In [2]:
! pip install tensorflow



In [3]:
# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import tensorflow as tf
tf.compat.v1.enable_eager_execution()
import shap
from witwidget.notebook.visualization import WitWidget, WitConfigBuilder




In [4]:
# Load the dataset
url = "https://project1cc.s3.amazonaws.com/cox-violent-parsed_filt.csv"
df = pd.read_csv(url)
df.head()

Unnamed: 0,id,name,first,last,sex,dob,age,age_cat,race,juv_fel_count,...,vr_charge_desc,type_of_assessment,decile_score.1,score_text,screening_date,v_type_of_assessment,v_decile_score,v_score_text,priors_count.1,event
0,1.0,miguel hernandez,miguel,hernandez,Male,18/04/1947,69,Greater than 45,Other,0,...,,Risk of Recidivism,1,Low,14/08/2013,Risk of Violence,1,Low,0,0
1,2.0,miguel hernandez,miguel,hernandez,Male,18/04/1947,69,Greater than 45,Other,0,...,,Risk of Recidivism,1,Low,14/08/2013,Risk of Violence,1,Low,0,0
2,3.0,michael ryan,michael,ryan,Male,06/02/1985,31,25 - 45,Caucasian,0,...,,Risk of Recidivism,5,Medium,31/12/2014,Risk of Violence,2,Low,0,0
3,4.0,kevon dixon,kevon,dixon,Male,22/01/1982,34,25 - 45,African-American,0,...,Felony Battery (Dom Strang),Risk of Recidivism,3,Low,27/01/2013,Risk of Violence,1,Low,0,1
4,5.0,ed philo,ed,philo,Male,14/05/1991,24,Less than 25,African-American,0,...,,Risk of Recidivism,4,Low,14/04/2013,Risk of Violence,3,Low,4,0


In [5]:
# Data preprocessing

# Remove entries with no indication of recidivism or no compass score
df = df[df['is_recid'] != -1]
df = df[df['decile_score'] != -1]

# Rename the recidivism column into recidivism_within_2_years
df['recidivism_within_2_years'] = df['is_recid']

# Hot encoding of the COMPASS Label (0 and 1)
df['COMPASS_determination'] = np.where(df['score_text'] == 'Low', 0, 1)

# Transform categorical values into numerical values
df = pd.get_dummies(df, columns=['sex', 'race'])

df.head()

Unnamed: 0,id,name,first,last,dob,age,age_cat,juv_fel_count,decile_score,juv_misd_count,...,recidivism_within_2_years,COMPASS_determination,sex_Female,sex_Male,race_African-American,race_Asian,race_Caucasian,race_Hispanic,race_Native American,race_Other
0,1.0,miguel hernandez,miguel,hernandez,18/04/1947,69,Greater than 45,0,1,0,...,0,0,0,1,0,0,0,0,0,1
1,2.0,miguel hernandez,miguel,hernandez,18/04/1947,69,Greater than 45,0,1,0,...,0,0,0,1,0,0,0,0,0,1
3,4.0,kevon dixon,kevon,dixon,22/01/1982,34,25 - 45,0,3,0,...,1,0,0,1,1,0,0,0,0,0
4,5.0,ed philo,ed,philo,14/05/1991,24,Less than 25,0,4,0,...,1,0,0,1,1,0,0,0,0,0
5,6.0,ed philo,ed,philo,14/05/1991,24,Less than 25,0,4,0,...,1,0,0,1,1,0,0,0,0,0


In [6]:
#Feature engineering

input_features = ['sex_Female', 'sex_Male', 'age', 'race_African-American', 'race_Caucasian', 'race_Hispanic',
                  'race_Native American', 'race_Other', 'priors_count', 'juv_fel_count', 'juv_misd_count', 'juv_other_count']

to_keep = input_features + ['recidivism_within_2_years', 'COMPASS_determination']

# Remove all columns that are not from the conditions above
to_remove = [col for col in df.columns if col not in to_keep]
df = df.drop(columns=to_remove)

input_columns = df.columns.tolist()

# Set the label to COMPASS_determination
labels = df['COMPASS_determination']
# labels = df[['recidivism_within_2_years', 'COMPASS_determination']]

df.head()

Unnamed: 0,age,juv_fel_count,juv_misd_count,juv_other_count,priors_count,recidivism_within_2_years,COMPASS_determination,sex_Female,sex_Male,race_African-American,race_Caucasian,race_Hispanic,race_Native American,race_Other
0,69,0,0,0,0,0,0,0,1,0,0,0,0,1
1,69,0,0,0,0,0,0,0,1,0,0,0,0,1
3,34,0,0,0,0,1,0,0,1,1,0,0,0,0
4,24,0,0,1,4,1,0,0,1,1,0,0,0,0
5,24,0,0,1,4,1,0,0,1,1,0,0,0,0


In [7]:
df_for_training = df.drop(columns=['COMPASS_determination', 'recidivism_within_2_years'])
train_size = int(len(df_for_training) * 0.8)

train_data = df_for_training[:train_size]
train_labels = labels[:train_size]

test_data_with_labels = df[train_size:]

In [9]:
# Create the model
input_size = len(train_data.iloc[0])

model = tf.keras.Sequential([
    tf.keras.layers.Dense(200, activation='relu', input_shape=(input_size,)),
    tf.keras.layers.Dense(50, activation='relu'),
    tf.keras.layers.Dense(25, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [22]:
model.summary()

In [23]:
# Train the model
model.fit(train_data.values, train_labels.values, epochs=4, batch_size=32, validation_split=0.1)

Epoch 1/4
[1m394/394[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.1672 - val_loss: 0.1669
Epoch 2/4
[1m394/394[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.1677 - val_loss: 0.1659
Epoch 3/4
[1m394/394[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - loss: 0.1672 - val_loss: 0.1665
Epoch 4/4
[1m394/394[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - loss: 0.1675 - val_loss: 0.1650


<keras.src.callbacks.history.History at 0x1de16604490>

In [24]:
# Create the shap container
import shap

# Create a SHAP explainer using the TensorFlow model and session
explainer = shap.DeepExplainer(model, train_data[:200])

AttributeError: 'tuple' object has no attribute 'as_list'

In [21]:
# Calculate SHAP values
shap_values = explainer.shap_values(train_data.values[:5])
shap_values

AttributeError: 'tuple' object has no attribute 'as_list'

In [None]:
# Plot SHAP summary plot
shap.summary_plot(shap_values, data=X)

In [None]:
# Create the WIT datapoint explorer and editor
config_builder = (WitConfigBuilder(test_examples.tolist(), feature_names=X.columns.tolist())
                  .set_ai_platform_model('your_project_id', 'your_model_name', 'your_model_version')
                  .set_target_feature('target_column')
                  .set_model_type('classification'))

# Display the What-If Tool widget
WitWidget(config_builder)