In [1]:
import numpy as np
import pandas
import requests, requests_cache, urllib, io
import json, re
import collections, numbers
requests_cache.install_cache('data/cache')
BASE_URL = 'https://data.police.uk/api/'

In [None]:
# Fetch all available forces*dates, and cache them
availability_url = urllib.parse.urljoin(BASE_URL, 'crimes-street-dates')
availability = requests.get(availability_url).json()
for df in availability:
    date = df['date']
    print(date)
    for force in df['stop-and-search']:
        p = [('force',force), ('date',date)]
        url = urllib.parse.urljoin(BASE_URL, 'stops-force') + '?' + urllib.parse.urlencode(p)
        requests.get(url)

In [9]:
# Fetch a list of all forces * dates
availability_url = urllib.parse.urljoin(BASE_URL, 'crimes-street-dates')
availability = requests.get(availability_url).json()
availability = [(force, df['date']) for df in availability for force in df['stop-and-search']]

def get_dataframe(force, date):
    p = [('force',force), ('date',date)]
    url = urllib.parse.urljoin(BASE_URL, 'stops-force') + '?' + urllib.parse.urlencode(p)
    response = requests.get(url)
    df = response.json()
    df = pandas.json_normalize(df, sep='_')
    df.insert(0, 'month', date)
    df.insert(0, 'force', force)
    return df
df = [get_dataframe(*x) for x in availability]

In [10]:
police = pandas.concat(df, axis=0, ignore_index=True, sort=False)

In [11]:
police.to_csv('data/stop-and-search.csv', index=False)

## Basic tabulations

In [17]:
print(police.groupby('officer_defined_ethnicity').apply(len))
print('Missing values:', np.sum(pandas.isna(police['officer_defined_ethnicity'])))

officer_defined_ethnicity
Asian    151411
Black    288736
Mixed      2563
Other     34755
White    634456
dtype: int64
Missing values: 93172


In [18]:
print(police.groupby('outcome').apply(len))
print('Missing values:', np.sum(pandas.isna(police['outcome'])))

outcome
False                                                           117363
                                                                 24618
A no further action disposal                                    744401
Arrest                                                          138414
Article found - Detailed outcome unavailable                      2773
Caution (simple or conditional)                                   4658
Community resolution                                             57293
Local resolution                                                  4742
Offender cautioned                                                 791
Offender given penalty notice                                     2856
Penalty Notice for Disorder                                      15468
Summons / charged by post                                        17626
Suspect arrested                                                 31217
Suspect summonsed to court                                        301

In [29]:
# Let's separate outcome=False versus outcome=<any other>,
# and discard rows where outcome is left blank.
# (Pandas.groupby will also discard rows where the groupby columns are missing.)

police['y'] = np.where(police['outcome']==False, 'find', 'no-find')
ok = police['outcome'] != ''

police.loc[ok].groupby(['y','officer_defined_ethnicity']).apply(len).unstack()

officer_defined_ethnicity,Asian,Black,Mixed,Other,White
y,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
find,13718,30831,91,3305,59055
no-find,135019,256287,2428,31064,562717
