In [57]:
import aux_functions
from app_functions import attempt_predict
from transformers import TimeTransformer, BoolTransformer

import json
import joblib
import pickle
import requests
from time import sleep
import random

import warnings
warnings.filterwarnings("error")

import pandas as pd
pd.set_option('display.max_columns', 100)
import os
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import seaborn as sns
import numpy as np
import datetime as dt

from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline, Pipeline
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.model_selection import cross_val_score
# from category_encoders import OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, OneHotEncoder, LabelEncoder, RobustScaler, OrdinalEncoder
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB

from sklearn.utils import resample
from sklearn.metrics import precision_score, recall_score, f1_score,  accuracy_score, roc_auc_score, make_scorer, confusion_matrix, roc_curve

# needed to use matplotlib inside jupyter notebook
%matplotlib inline 


# Requirement Validation Functions

In [58]:
def verify_success_rate(y_true, y_pred, min_success_rate=0.1):
    """
    Verifies the success rate on a test set is above a provided minimum
    
    
    """
    
    precision = precision_score(y_true, y_pred, pos_label=True)
    is_satisfied = (precision >= min_success_rate)
    
    return is_satisfied, precision

In [59]:
def verify_no_discrimination(X_test, y_true, y_pred, sensitive_column='Officer-defined ethnicity', max_diff=0.05, min_samples=50):
    """
    Verifies that no subdeparment has discrimination in between protected races
    """
    
    departments = X_test['station'].unique()
    sensitive_classes = X_test[sensitive_column].unique()
    
    is_satisfied = True
    problematic_departments = []
    good_deparments = []
    ignored_departments = []
    for department in departments:
        precisions = {}
        for sensitive_class in sensitive_classes:
            mask = (X_test[sensitive_column] == sensitive_class) & (X_test['station'] == department)
            if np.sum(y_true[mask]) > min_samples:   # the department needs to have at least some positive labels so that precision makes sense
                precisions[sensitive_class] = precision_score(y_true[mask], y_pred[mask], pos_label=1, zero_division=0) # defaults to 0 if the model predicted 0 success outcomes
                
        if len(precisions) > 1:    
            diff = np.max(list(precisions.values())) - np.min(list(precisions.values()))

            if diff > max_diff:
                is_satisfied = False
                problematic_departments.append((department, diff, precisions))
            else:
                good_deparments.append((department, diff, precisions))
        else:
            ignored_departments.append((department, None, []))
    
    global_precisions = {}
    for sensitive_class in sensitive_classes:
        mask = (X_test[sensitive_column] == sensitive_class)
        if np.sum(y_true[mask]) > min_samples: # the department needs to have at least some positive labels so that precision makes sense
            global_precisions[sensitive_class] = precision_score(y_true[mask], y_pred[mask], pos_label=1, zero_division=0) # defaults to 0 if the model predicted 0 success outcomes
    
    if len(precisions) > 1:    
        diff = np.max(list(precisions.values())) - np.min(list(precisions.values()))
        if diff > max_diff:
            is_satisfied = False
        
    return is_satisfied, problematic_departments, good_deparments, global_precisions


# Get the data

In [60]:
df_ = pd.read_csv("data/train.csv")

drop_cols = ['Self-defined ethnicity', 'Outcome', 'Outcome linked to object of search', 'Removal of more than just outer clothing']


df_clean = aux_functions.clean_data(df_, drop_cols)

df_train, df_test = train_test_split(df_clean, test_size=0.3, random_state=42)

df_test.head()

Unnamed: 0,observation_id,Type,Date,Part of a policing operation,Latitude,Longitude,Gender,Age range,Officer-defined ethnicity,Legislation,Object of search,station,target
66286,a43826b5-8688-4f5f-8d4e-79c42158f9df,Person search,2021-04-22T01:55:00+00:00,False,52.521888,-1.466936,Male,18-24,Black,Misuse of Drugs Act 1971 (section 23),Controlled drugs,warwickshire,0
729029,b3fc1379-1f09-45ad-8f94-7d5168d5a378,Person search,2021-09-21T09:30:42+00:00,,53.377962,-2.948176,Male,18-24,White,Misuse of Drugs Act 1971 (section 23),Controlled drugs,merseyside,0
775662,bcbd5fe7-8103-4125-8c8d-cfbadcf89544,Person and Vehicle search,2020-08-05T17:35:00+00:00,False,51.111656,0.514738,Male,10-17,White,Misuse of Drugs Act 1971 (section 23),Psychoactive substances,kent,0
289711,9225a076-e0bb-47f9-9ca5-8de3ade85263,Person search,2020-06-08T11:00:00+00:00,,,,Male,25-34,Black,Misuse of Drugs Act 1971 (section 23),Controlled drugs,btp,0
732874,f8d11725-a639-40af-8321-1cec72629d5c,Person search,2021-09-02T14:15:55+00:00,,54.574652,-1.238405,Male,10-17,White,Police and Criminal Evidence Act 1984 (section 1),Offensive weapons,cleveland,0


# Recover the Model

In [61]:
with open(os.path.join("columns.json"), 'r') as fh:
    columns = json.load(fh)

with open(os.path.join("dtypes.pickle"), 'rb') as fh:
    dtypes = pickle.load(fh)

pipeline = joblib.load(os.path.join("pipeline.pickle"))

# Predict

In [62]:
# use the pipeline and get predictions and probas

# TRAIN SET

y_train = df_train["target"].copy()
X_train = df_train.drop(columns=["target"]).copy()


# TEST SET

y_test = df_test["target"].copy()
X_test = df_test.drop(columns=["target"]).copy()

# get the predictions
y_pred_train = pipeline.predict(X_train)
y_pred_test = pipeline.predict(X_test)
# get the probas
y_proba_test = pipeline.predict_proba(X_test)

In [63]:
verify_success_rate(y_test, y_pred_test, min_success_rate=0.1)

(True, 0.21932930639450116)

In [64]:
is_satisfied, problematic_departments, good_deparments, global_precisions = verify_no_discrimination(X_test, y_test, y_pred_test, sensitive_column='Officer-defined ethnicity', max_diff=0.05, min_samples=5)

print("Race")

if not is_satisfied:
    print("Requirement failed")
    print("Global rates: {}".format(global_precisions))
    print("Num problematic departments: {}".format(len(problematic_departments)))
    print("Num good departments: {}".format(len(good_deparments)))
    print("avg diff:", np.mean([p[1] for p in problematic_departments]))
else:
    print("Requirement satisfied!")

Race
Requirement failed
Global rates: {'Black': 0.2693548387096774, 'White': 0.2160289243748117, 'Mixed': 0.23863636363636365, 'Asian': 0.19912152269399708, 'Other': 0.13186813186813187}
Num problematic departments: 20
Num good departments: 14
avg diff: 0.335716954455294


In [65]:
is_satisfied, problematic_departments, good_deparments, global_precisions = verify_no_discrimination(X_test, y_test, y_pred_test, sensitive_column='Gender', max_diff=0.05, min_samples=5)

print("Gender")

if not is_satisfied:
    print("Requirement failed")
    print("Global rates: {}".format(global_precisions))
    print("Num problematic departments: {}".format(len(problematic_departments)))
    print("Num good departments: {}".format(len(good_deparments)))
    print("avg diff:", np.mean([p[1] for p in problematic_departments]))
else:
    print("Requirement satisfied!")

Gender
Requirement failed
Global rates: {'Male': 0.2231404958677686, 'Female': 0.18284424379232506, 'Other': 0.0}
Num problematic departments: 16
Num good departments: 19
avg diff: 0.15499673691575364


In [66]:
is_satisfied, problematic_departments, good_deparments, global_precisions = verify_no_discrimination(X_test, y_test, y_pred_test, sensitive_column='Age range', max_diff=0.05, min_samples=5)

print("Age")

if not is_satisfied:
    print("Requirement failed")
    print("Global rates: {}".format(global_precisions))
    print("Num problematic departments: {}".format(len(problematic_departments)))
    print("Num good departments: {}".format(len(good_deparments)))
    print("avg diff:", np.mean([p[1] for p in problematic_departments]))
else:
    print("Requirement satisfied!")

Age
Requirement failed
Global rates: {'18-24': 0.23685698134539288, '10-17': 0.1574468085106383, '25-34': 0.241112828438949, 'over 34': 0.20525291828793774, 'under 10': 0.0}
Num problematic departments: 21
Num good departments: 15
avg diff: 0.26143752221350935
