In [3]:
import aux_functions
from transformers import TimeTransformer, BoolTransformer

import json
import joblib
import pickle
import requests

import pandas as pd
pd.set_option('display.max_columns', 100)
import os
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import seaborn as sns
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline, Pipeline
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.model_selection import cross_val_score
# from category_encoders import OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, OneHotEncoder, LabelEncoder, RobustScaler, OrdinalEncoder
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB

from sklearn.utils import resample
from sklearn.metrics import precision_score, recall_score, f1_score,  accuracy_score, roc_auc_score, make_scorer, confusion_matrix, roc_curve

# needed to use matplotlib inside jupyter notebook
%matplotlib inline 

# Get the data

In [4]:
df_ = pd.read_csv("data/train.csv")

drop_cols = ['Self-defined ethnicity', 'Outcome', 'Outcome linked to object of search', 'Removal of more than just outer clothing']


df_clean = aux_functions.clean_data(df_, drop_cols)

df_train, df_test = train_test_split(df_clean, test_size=0.3, random_state=42)

df_test.head()

Unnamed: 0,observation_id,Type,Date,Part of a policing operation,Latitude,Longitude,Gender,Age range,Officer-defined ethnicity,Legislation,Object of search,station,target
438050,b7287b3c-fb25-42a5-afa3-e8320817eb6f,Person search,2021-07-30T16:00:00+00:00,False,53.797198,-1.789092,Male,25-34,Asian,Misuse of Drugs Act 1971 (section 23),Controlled drugs,west-yorkshire,0
8521,4f9c9bbb-8806-4a9f-a83d-a7d1c9957527,Person search,2020-12-20T01:26:43+00:00,,51.508963,-0.073894,Male,over 34,White,Misuse of Drugs Act 1971 (section 23),Controlled drugs,city-of-london,0
601361,1e57860f-9cb6-4afe-aabf-00b11c5012d2,Person search,2021-05-12T13:05:27+00:00,,53.397291,-3.034496,Male,over 34,White,Misuse of Drugs Act 1971 (section 23),Controlled drugs,merseyside,0
282181,8d166c18-7351-41e0-8be6-3f9fb574d8d9,Person search,2020-06-15T11:20:00+00:00,,53.041893,-2.97916,Male,over 34,White,Misuse of Drugs Act 1971 (section 23),Controlled drugs,north-wales,0
797640,3a2bdbf8-83f2-448b-bcf7-a2502632d5a8,Person search,2020-08-13T23:00:00+00:00,False,51.022386,-0.353517,Male,10-17,White,Police and Criminal Evidence Act 1984 (section 1),Offensive weapons,sussex,0


# Recover Model

In [2]:
with open(os.path.join("columns.json"), 'r') as fh:
    columns = json.load(fh)

with open(os.path.join("dtypes.pickle"), 'rb') as fh:
    dtypes = pickle.load(fh)

pipeline_recovered = joblib.load(os.path.join("pipeline.pickle"))

# Define APP Protection Function

In [60]:
def attempt_predict(obs_dict):

    try:
        observation_id_ = obs_dict["observation_id"]
    except:
        response = {
                "observation_id": None,
                "error": "observation_id field is missing from request"
            }
        return response
    if type(observation_id_) != str:
        response = {
                "observation_id": observation_id_,
                "error": 'Provided "observation_id" field is not of the correct data type'
            }
        return response

    
    try:
        type_ = obs_dict["Type"]
    except:
        response = {
                "type": None,
                "error": "Type field is missing from request"
            }
        return response
    if type(type_) != str:
        response = {
                "type": type_,
                "error": 'Provided "Type" field is not of the correct data type'
            }
        return response
    
    try:
        date_ = obs_dict["Date"]
    except:
        response = {
                "Date": None,
                "error": "Date field is missing from request"
            }
        return response
    if type(date_) != str:
        response = {
                "Date": date_,
                "error": 'Provided "Date" field is not of the correct data type'
            }
        return response

    try:
        policing_op_ = obs_dict["Part of a policing operation"]
    except:
        response = {
                "Part of a policing operation": None,
                "error": 'Part of a policing operation field is missing from request'
            }
        return response
    if type(policing_op_) != bool:
        response = {
                "Part of a policing operation": policing_op_,
                "error": 'Provided "Part of a policing operation" field is not of the correct data type'
            }
        return response
    
    try:
        lat_ = obs_dict["Latitude"]
    except:
        response = {
                "Latitude": None,
                "error": "Type field is missing from request"
            }
        return response
    if type(lat_) != float:
        response = {
                "Latitude": lat_,
                "error": 'Provided "Latitude" field is not of the correct data type'
            }
        return response

    try:
        long_ = obs_dict["Longitude"]
    except:
        response = {
                "Longitude": None,
                "error": "Type field is missing from request"
            }
        return response
    if type(long_) != float:
        response = {
                "Longitude": long_,
                "error": 'Provided "Longitude" field is not of the correct data type'
            }
        return response

    try:
        gend_ = obs_dict["Gender"]
    except:
        response = {
                "Gender": None,
                "error": "Type field is missing from request"
            }
        return response
    if type(gend_) != str:
        response = {
                "Gender": gend_,
                "error": 'Provided "Gender" field is not of the correct data type'
            }
        return response

    try:
        age_range_ = obs_dict["Age range"]
    except:
        response = {
                "Age range": None,
                "error": "Type field is missing from request"
            }
        return response
    if type(age_range_) != str:
        response = {
                "Age range": age_range_,
                "error": 'Provided "Age range" field is not of the correct data type'
            }
        return response

    try:
        officer_def_ethnicity_ = obs_dict["Officer-defined ethnicity"]
    except:
        response = {
                "Officer-defined ethnicity": None,
                "error": "Officer-defined ethnicity field is missing from request"
            }
        return response
    if type(officer_def_ethnicity_) != str:
        response = {
                "Officer-defined ethnicity": officer_def_ethnicity_,
                "error": 'Provided "Officer-defined ethnicity" field is not of the correct data type'
            }
        return response

    try:
        legislation_ = obs_dict["Legislation"]
    except:
        response = {
                "Legislation": None,
                "error": "Legislation field is missing from request"
            }
        return response
    if type(legislation_) != str:
        response = {
                "Legislation": legislation_,
                "error": 'Provided "Legislation" field is not of the correct data type'
            }
        return response

    try:
        obj_search_ = obs_dict["Object of search"]
    except:
        response = {
                "Object of search": None,
                "error": "Object of search field is missing from request"
            }
        return response
    if type(obj_search_) != str:
        response = {
                "Object of search": obj_search_,
                "error": 'Provided "Object of search" field is not of the correct data type'
            }
        return response

    try:
        station_ = obs_dict["station"]
    except:
        response = {
                "station": None,
                "error": "station field is missing from request"
            }
        return response
    if type(station_) != str:
        response = {
                "station": station_,
                "error": 'Provided "station" field is not of the correct data type'
            }
        return response


    observation =   {'observation_id': observation_id_,
                    'Type': type_,
                    'Date': date_,
                    'Part of a policing operation': policing_op_,
                    'Latitude': lat_,
                    'Longitude': long_,
                    'Gender': gend_,
                    'Age range': age_range_,
                    'Officer-defined ethnicity': officer_def_ethnicity_,
                    'Legislation': legislation_,
                    'Object of search': obj_search_,
                    'station': station_}
    
    return observation

# Generate Observation

In [45]:
y_test = df_test["target"].copy()
X_test = df_test.drop(columns=["target"]).copy()

In [46]:
obs_dict = X_test.iloc[0, :].to_dict()

obs_dict

{'observation_id': 'b7287b3c-fb25-42a5-afa3-e8320817eb6f',
 'Type': 'Person search',
 'Date': '2021-07-30T16:00:00+00:00',
 'Part of a policing operation': False,
 'Latitude': 53.797198,
 'Longitude': -1.789092,
 'Gender': 'Male',
 'Age range': '25-34',
 'Officer-defined ethnicity': 'Asian',
 'Legislation': 'Misuse of Drugs Act 1971 (section 23)',
 'Object of search': 'Controlled drugs',
 'station': 'west-yorkshire'}

In [69]:
obs_dict = {'observation_id': "b7287b3c-fb25-42a5-afa3-e8320817eb6f",
    'Type': 'Person search',
    'Date': '2021-07-30T16:00:00+00:00',
    'Part of a policing operation': False,
    'Latitude': 53.797198,
    'Longitude': -1.789092,
    'Gender': 'Male',
    'Age range': '25-34',
    'Officer-defined ethnicity': 'Asian',
    'Legislation': 'Misuse of Drugs Act 1971 (section 23)',
    'Object of search': 'Controlled drugs',
    'station': 'west-yorkshire'}

attempt_predict(obs_dict)

{'observation_id': 'b7287b3c-fb25-42a5-afa3-e8320817eb6f',
 'Type': 'Person search',
 'Date': '2021-07-30T16:00:00+00:00',
 'Part of a policing operation': False,
 'Latitude': 53.797198,
 'Longitude': -1.789092,
 'Gender': 'Male',
 'Age range': '25-34',
 'Officer-defined ethnicity': 'Asian',
 'Legislation': 'Misuse of Drugs Act 1971 (section 23)',
 'Object of search': 'Controlled drugs',
 'station': 'west-yorkshire'}