In [1]:
import numpy as np
import pandas as pd
import geopandas as gpd
from police_api import PoliceAPI
from matplotlib import pyplot as plt
%matplotlib inline
plt.rcParams['figure.figsize'] = (20, 10)

In [9]:
# Load street crime data
df_street = pd.read_csv('crime_street_test.csv',index_col=0)
print(len(df_street))
df_street.dropna(subset=['Last outcome category'],inplace=True)
print(len(df_street))
df_street = df_street[::10] # Memory requirements
print(len(df_street))
df_street.head()

6603858
5041108
504111


Unnamed: 0,Crime ID,Longitude,Latitude,Location,LSOA code,LSOA name,Crime type,Last outcome category,datetime,force
0,f6f9ddb405acc0d8174c8dde5ba502b796ee8c9d5762c9...,-2.575411,54.991255,On or near B6318,E01019225,Carlisle 002D,Public order,Investigation complete; no suspect identified,2018-07-01,northumbria
14,c723815ce62034a4fbc4c6c401f5bf8c3927f6f1c71075...,-1.747655,54.966718,On or near Stargate Lane,E01008248,Gateshead 001C,Violence and sexual offences,Investigation complete; no suspect identified,2018-07-01,northumbria
28,293eba9dc35e4c9e270d1effabf7686e82c6b88388e829...,-1.762749,54.976409,On or near Cross Drive,E01008250,Gateshead 001E,Other theft,Investigation complete; no suspect identified,2018-07-01,northumbria
39,cf2b2a4d8f75f059cbbf84f6f4ce7fe09194ac06602688...,-1.764579,54.957371,On or near Alma Terrace,E01008195,Gateshead 002A,Violence and sexual offences,Investigation complete; no suspect identified,2018-07-01,northumbria
55,301eb4918c4119326546ad41b8494fb3d292703d77d321...,-1.78359,54.964838,On or near Shaftoe Close,E01008199,Gateshead 002D,Violence and sexual offences,Investigation complete; no suspect identified,2018-07-01,northumbria


In [10]:
df_street.drop(columns=['Crime ID','LSOA code','LSOA name','Location','force'],inplace=True)


In [11]:
from shapely.geometry import Point

In [12]:
resolved = ['Offender given community sentence','Offender given a caution','Offender sent to prison',
             'Local resolution','Defendant found not guilty','Offender given suspended prison sentence',
            'Offender given conditional discharge','Offender given a drugs possession warning','Court result unavailable',
            'Offender given penalty notice','Offender otherwise dealt with','Offender fined','Suspect charged as part of another case',
            'Offender ordered to pay compensation','Offender given absolute discharge','Defendant sent to Crown Court','Offender deprived of property']
notresolved = ['Unable to prosecute suspect','Investigation complete; no suspect identified','Court case unable to proceed',
               'Formal action is not in the public interest']
waiting=['Under investigation','Awaiting court outcome','Status update unavailable','Action to be taken by another organisation']

def outcome(c):
  if c in waiting:
    return 'Awaiting resolution'
  elif c in notresolved:
    return 'No resolution'
  elif c in resolved:
    return 'Resolution'
print(outcome('Under investigation'))

df_street['Type of outcome'] = df_street['Last outcome category'].apply(outcome)

prosecuted = ['Offender given suspended prison sentence','Awaiting court outcome','Defendant found not guilty',
              'Offender sent to prison','Court result unavailable','Defendant sent to Crown Court','Offender given conditional discharge']

def prosecution(c):
    if c in prosecuted:
        return 'Prosecution'
    else:
        return'No prosecution/Unknown'
df_street['Prosecution'] = df_street['Last outcome category'].apply(prosecution)


Awaiting resolution


In [18]:
def plot_scatterplot_forces(ax, gdf_map, df1,df2,df3):
    """Plots a scaterplot of a dataframe with columns Latitude
    and Longitude on top of map of UK police forces."""
    
    # Convert df to geodataframe
    df1['Coordinates'] = list(zip(df1.Longitude, df1.Latitude))
    df1['Coordinates'] = df1['Coordinates'].apply(Point)
    gdf1 = gpd.GeoDataFrame(df1, geometry='Coordinates')
    gdf1.crs = {'init' :'epsg:4326'} 
    df2['Coordinates'] = list(zip(df2.Longitude, df2.Latitude))
    df2['Coordinates'] = df2['Coordinates'].apply(Point)
    gdf2 = gpd.GeoDataFrame(df2, geometry='Coordinates')
    gdf2.crs = {'init' :'epsg:4326'} 
    df3['Coordinates'] = list(zip(df3.Longitude, df3.Latitude))
    df3['Coordinates'] = df3['Coordinates'].apply(Point)
    gdf3 = gpd.GeoDataFrame(df3, geometry='Coordinates')
    gdf3.crs = {'init' :'epsg:4326'} 


    # Project both geodataframes to Mercator projection
    gdf_map = gdf_map.to_crs({'init': 'epsg:3395'}) 
    gdf1 = gdf1.to_crs({'init': 'epsg:3395'})
    gdf2 = gdf2.to_crs({'init': 'epsg:3395'})
    gdf3 = gdf3.to_crs({'init': 'epsg:3395'})

    # Plot scatterplot
    gdf_map.plot(color='C0', ax=ax)
    gdf1.plot(ax=ax, color='C1', markersize=0.1, alpha=0.1)
    gdf2.plot(ax=ax, color='C2', markersize=0.1, alpha=0.1)
    gdf3.plot(ax=ax, color='C3', markersize=0.1, alpha=0.1)


# Load geojson file of entire UK
#gdf_uk = gpd.read_file("uk.geojson")

## Example plot

In [None]:
# Plot
fig, ax = plt.subplots(figsize=(20, 20))
plot_scatterplot_forces(ax, gdf_uk, df_street[df_street['Type of outcome']=='Awaiting resolution'][['Longitude', 'Latitude']],
                        df_street[df_street['Type of outcome']=='Resolution'][['Longitude', 'Latitude']],
                        df_street[df_street['Type of outcome']=='No resolution'][['Longitude', 'Latitude']])
plt.axis('off')