In [397]:
import pandas as pd
import numpy as np
import chart_studio.plotly as py
import cufflinks as cf
import seaborn as sns
import plotly.express as px
import matplotlib as plt
%matplotlib inline
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot

init_notebook_mode(connected=True)
cf.go_offline()

In [406]:
df = pd.read_csv('2021-06-metropolitan-stop-and-search.csv')
df.columns

Index(['Type', 'Date', 'Part of a policing operation', 'Policing operation',
       'Latitude', 'Longitude', 'Gender', 'Age range',
       'Self-defined ethnicity', 'Officer-defined ethnicity', 'Legislation',
       'Object of search', 'Outcome', 'Outcome linked to object of search',
       'Removal of more than just outer clothing'],
      dtype='object')

In [245]:
def criminality(df):
    if df['Outcome'] == "A no further action disposal" or df['Outcome'] == "Caution (simple or conditional)":
        return 0
    else:
        return 1
    
df['criminality'] = df.apply(criminality, axis=1)

In [246]:
def ethnicity(df):
    et = df['Self-defined ethnicity']
    if "White" in et and (not 'Mixed' in et):
        return 'White'
    elif "Black" in et and (not 'Mixed' in et):
        return 'Black'
    elif "Asian" in et and (not 'Mixed' in et):
        return 'Asian'
    elif 'Mixed' in et:
        return 'Mixed'
    elif et == "Other ethnic group - Any other ethnic group":
        return 'Other'
    else:
        return 'Not stated'

df['Self-defined ethnicity'].fillna('Other ethnic group - Not stated', inplace=True)
df['ethnicity'] = df.apply(ethnicity, axis=1)

In [247]:
counts = df['ethnicity'].value_counts()
ethnicities = pd.DataFrame({'ethnicity':counts.index, 'count':counts.values})
ethnicities

Unnamed: 0,ethnicity,count
0,White,5215
1,Not stated,4324
2,Black,4067
3,Asian,2023
4,Mixed,580
5,Other,476


In [260]:
counts = df['ethnicity'].value_counts()
ethnicities = pd.DataFrame({'ethnicity':counts.index, 'total':counts.values})
df_nocrime = df[df['criminality'] == 0]
counts_nocrime = df_nocrime['ethnicity'].value_counts()
df_crime = df[df['criminality'] == 1]
counts_crime = df_crime['ethnicity'].value_counts()
ethnicities['crime'] = counts_crime.values
ethnicities['nocrime'] = counts_nocrime.values
ethnicities

Unnamed: 0,ethnicity,total,crime,nocrime
0,White,5215,1559,3656
1,Not stated,4324,1148,3496
2,Black,4067,828,2919
3,Asian,2023,586,1437
4,Mixed,580,197,383
5,Other,476,144,332


In [371]:
ethnicities['percentage_crime'] = ethnicities['crime'] / ethnicities['total'] 
ethnicities['percentage_nocrime'] = ethnicities['nocrime'] / ethnicities['total'] 
ethnicities

Unnamed: 0,ethnicity,total,crime,nocrime,percentage,percentage_crime,percentage_nocrime
0,White,5215,1559,3656,0.298945,0.298945,0.701055
1,Not stated,4324,1148,3496,0.265495,0.265495,0.808511
2,Black,4067,828,2919,0.20359,0.20359,0.717728
3,Asian,2023,586,1437,0.289669,0.289669,0.710331
4,Mixed,580,197,383,0.339655,0.339655,0.660345
5,Other,476,144,332,0.302521,0.302521,0.697479


In [267]:
fig = go.Figure()


total = px.bar(ethnicities, x="count", y="ethnicity", color='ethnicity', 
             color_discrete_sequence=px.colors.qualitative.T10)

crime = px.bar(ethnicities_crime, x="count", y="ethnicity", color='ethnicity', 
             color_discrete_sequence=px.colors.qualitative.T10)

nocrime = px.bar(ethnicities_crime, x="count", y="ethnicity", color='ethnicity', 
             color_discrete_sequence=px.colors.qualitative.T10)

fig.add_traces(
    list(crime.select_traces())
)
fig.add_traces(
    list(nocrime.select_traces())
)

fig.show()

In [292]:
# Create figure
fig = go.Figure()

# Add traces to figure
fig.add_trace(go.Bar(
    x=ethnicities['ethnicity'],
    y=ethnicities['nocrime'],
    name='Nocrime',
    marker_color='#4C78A8'
))

fig.add_trace(go.Bar(
    x=ethnicities['ethnicity'],
    y=ethnicities['crime'],
    name='Crime',
    marker_color='#E45756'
))

fig.update_layout(barmode='stack',
                  title = 'Stacked bar chart!')

# Set graph title, x-axis title, and y-axis title
fig.update_layout(
    title="Stop and searches by ethnicity and criminal outcome",
    xaxis_title="Ethnicity",
    yaxis_title="Number of People"
)

fig.show()

In [306]:
# Create figure
fig = go.Figure()

# Add traces to figure
fig.add_trace(go.Bar(
    x=ethnicities['ethnicity'],
    y=ethnicities['total'],
    name='Nocrime',
    marker_color='#4C78A8'
))

fig.add_trace(go.Bar(
    x=ethnicities['ethnicity'],
    y=ethnicities['crime'],
    name='Crime',
    marker_color='#E45756'
))


# Set graph title, x-axis title, and y-axis title
fig.update_layout(
    title="Stop and searches by ethnicity and criminal outcome",
    xaxis_title="Ethnicity",
    yaxis_title="Number of People"
)

fig.show()

In [377]:
trace1 = go.Bar(
    x=ethnicities['ethnicity'],
    y=ethnicities['crime'],
    #text=ethnicities['percentage_crime'].apply(lambda x: '{percent:.2%}'.format(percent=x)),
    name='Crime',
    marker_color="#E45756",
)

trace2 = go.Bar(
    x=ethnicities['ethnicity'],
    y=ethnicities['nocrime'],
    #text=ethnicities['percentage_nocrime'].apply(lambda x: '{percent:.2%}'.format(percent=x)),
    name='Non-crime',
    marker_color="#54A54B"
)

trace3 = go.Bar(
    x=ethnicities['ethnicity'],
    y=ethnicities['total'],
    name='Total',
    marker_color="#4C78A8"
)
layout = go.Layout(
    updatemenus=[
        dict(
            type="dropdown",
            active=0,
            xanchor="right",
            yanchor="top",
            direction="down",
            buttons=list([
                dict(label="Summary",
                     method="update",
                     args=[{"visible": [True, True, True]
                     },
                     {"title": "Summary of stop and searches by ethnicity",
                     }]),
                dict(label="Crime",
                     method="update",
                     args=[{"visible": [True, False, False]},
                           {"title": "Criminal officer judgment stop and searches by ethnicity"}]),
                dict(label="Non-crime",
                     method="update",
                     args=[{"visible": [False, True, False]},
                     {"title": "Non-criminal officer judgment top and searches with by ethnicity"}]),
                dict(label="Total",
                     method="update",
                     args=[{"visible": [False, False, True]
                     },
                     {"title": "Total stop and searches by ethnicity",
                     }])
            ]),
        )
    ])

fig = go.Figure(data=[trace1, trace2, trace3], layout=layout)

fig.update_layout(
    xaxis_title="Ethnicity",
    yaxis_title="Number of People",
    title="Summary of stop and searches by ethnicity"
)
fig.show()

In [423]:
import datetime 

df["Date"] = pd.to_datetime(df["Date"])
df["weekday"] = df["Date"].dt.day_name()
df['date'] = df['Date'].dt.date

2021-06-11    734
2021-06-05    679
2021-06-25    666
2021-06-24    662
2021-06-04    648
2021-06-19    634
2021-06-09    626
2021-06-03    623
2021-06-10    620
2021-06-23    614
2021-06-07    596
2021-06-17    584
2021-06-02    583
2021-06-22    582
2021-06-06    554
2021-06-12    552
2021-06-01    550
2021-06-18    546
2021-06-08    532
2021-06-16    531
2021-06-26    530
2021-06-30    516
2021-06-15    505
2021-06-28    494
2021-06-21    457
2021-06-20    438
2021-06-29    431
2021-06-14    426
2021-06-13    394
2021-06-27    337
2021-05-31     41
Name: date, dtype: int64

In [434]:
date_counts = df['date'].value_counts()
dates = pd.DataFrame({'date':date_counts.index, 'count':date_counts.values})
dates

Unnamed: 0,date,count
0,2021-06-11,734
1,2021-06-05,679
2,2021-06-25,666
3,2021-06-24,662
4,2021-06-04,648
5,2021-06-19,634
6,2021-06-09,626
7,2021-06-03,623
8,2021-06-10,620
9,2021-06-23,614


In [447]:
go.Scatter(x=dates['date'],  y=dates['count'])


Scatter({
    'x': array([datetime.date(2021, 6, 11), datetime.date(2021, 6, 5),
                datetime.date(2021, 6, 25), datetime.date(2021, 6, 24),
                datetime.date(2021, 6, 4), datetime.date(2021, 6, 19),
                datetime.date(2021, 6, 9), datetime.date(2021, 6, 3),
                datetime.date(2021, 6, 10), datetime.date(2021, 6, 23),
                datetime.date(2021, 6, 7), datetime.date(2021, 6, 17),
                datetime.date(2021, 6, 2), datetime.date(2021, 6, 22),
                datetime.date(2021, 6, 6), datetime.date(2021, 6, 12),
                datetime.date(2021, 6, 1), datetime.date(2021, 6, 18),
                datetime.date(2021, 6, 8), datetime.date(2021, 6, 16),
                datetime.date(2021, 6, 26), datetime.date(2021, 6, 30),
                datetime.date(2021, 6, 15), datetime.date(2021, 6, 28),
                datetime.date(2021, 6, 21), datetime.date(2021, 6, 20),
                datetime.date(2021, 6, 29), datetime.date(2021,