## Imports and Loading Data

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import re

In [2]:
df_sf_police = pd.read_csv("../data/police_data_sf.csv")
df_sj_police = pd.read_csv("../data/police_data_san_jose.csv")
df_oak_police = pd.read_csv("../data/police_data_oakland.csv")

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


In [3]:
data = {'San Francisco':df_sf_police,'San Jose':df_sj_police,'Oakland':df_oak_police}

In [4]:
for name,df in data.items():
    print (name,df.columns)

San Francisco Index(['raw_row_number', 'date', 'time', 'location', 'lat', 'lng', 'district',
       'subject_age', 'subject_race', 'subject_sex', 'type', 'arrest_made',
       'search_conducted', 'search_vehicle', 'search_basis', 'reason_for_stop',
       'raw_search_vehicle_description', 'raw_result_of_contact_description'],
      dtype='object')
San Jose Index(['raw_row_number', 'date', 'time', 'location', 'lat', 'lng',
       'subject_race', 'type', 'arrest_made', 'citation_issued', 'outcome',
       'contraband_found', 'search_conducted', 'reason_for_stop',
       'use_of_force_description', 'use_of_force_reason', 'raw_search',
       'raw_call_desc', 'raw_race', 'raw_event_desc'],
      dtype='object')
Oakland Index(['raw_row_number', 'date', 'time', 'location', 'lat', 'lng', 'beat',
       'subject_age', 'subject_race', 'subject_sex', 'officer_assignment',
       'contraband_found', 'contraband_drugs', 'contraband_weapons',
       'search_conducted', 'search_basis', 'reason_for_s

# Data Checks

In [5]:
for name,df in data.items():
    print(name,"\nThere are {missing_race} missing race fields in the report file\n".format(missing_race=df.subject_race.isna().sum()))
    try:
        print(name,"\nThere are {missing_gender} missing gender fields in the report file\n".format(missing_gender=df.subject_sex.isna().sum()))
    except:
        print(name,"does not contain subject_sex\n")

San Francisco 
There are 0 missing race fields in the report file

San Francisco 
There are 0 missing gender fields in the report file

San Jose 
There are 5485 missing race fields in the report file

San Jose does not contain subject_sex

Oakland 
There are 0 missing race fields in the report file

Oakland 
There are 90 missing gender fields in the report file



In [6]:
#Printing some of the values we might see in these fields
print(df_sf_police.type.value_counts())
print()
print(df_sf_police.subject_sex.value_counts())
print()
print(df_sf_police.subject_age.value_counts())

vehicular    905070
Name: type, dtype: int64

male      639219
female    265851
Name: subject_sex, dtype: int64

25.0     32958
30.0     30355
26.0     27836
27.0     27540
28.0     27339
         ...  
96.0         5
97.0         4
100.0        4
11.0         3
98.0         2
Name: subject_age, Length: 91, dtype: int64


In [7]:
for name,df in data.items():
    print(name,"\nThere are {search} missing search conducted fields in the report file\n".format(search=df.search_conducted.isna().sum()))
    print(name,"\nThere are {contraband} missing contraband found fields in the report file\n".format(contraband=df.contraband_found.isna().sum()))
    print(name,"\nThere are {arrest} missing arrest fields in the report file\n".format(arrest=df.outcome.isna().sum()))


San Francisco 
There are 0 missing search conducted fields in the report file

San Francisco 
There are 851689 missing contraband found fields in the report file

San Francisco 
There are 15681 missing arrest fields in the report file

San Jose 
There are 3568 missing search conducted fields in the report file

San Jose 
There are 111124 missing contraband found fields in the report file

San Jose 
There are 94745 missing arrest fields in the report file

Oakland 
There are 0 missing search conducted fields in the report file

Oakland 
There are 92250 missing contraband found fields in the report file

Oakland 
There are 34107 missing arrest fields in the report file



In [8]:
print(df_sf_police.search_conducted.value_counts())
print()
print(df_sf_police.contraband_found.value_counts())
print()
print(df_sf_police.arrest_made.value_counts())

False    851689
True      53381
Name: search_conducted, dtype: int64

False    45405
True      7976
Name: contraband_found, dtype: int64

False    893145
True      11925
Name: arrest_made, dtype: int64


## Search/Stop & Contraband/Search Explorations

Some explorations below using charts/tables that show the % of stops that result in searches and % of searches that result in contraband being found.

In [9]:

df_pivot=df_oak_police.pivot_table(values='raw_row_number',index='subject_race', columns=['search_conducted'], aggfunc='count').assign(searches_over_total_stops=lambda x: x[1]/x.sum(axis=1))
df_pivot.style.set_caption("Oakland, CA").set_table_styles([{
    'selector': 'caption',
    'props': [
        ('color', 'black'),
        ('font-size', '16px'),
        ('width', '500px')
    ],
}])

search_conducted,False,True,searches_over_total_stops
subject_race,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
asian/pacific islander,6795,1304,0.161008
black,48900,30025,0.380424
hispanic,19535,6722,0.256008
other,3792,706,0.156959
white,13228,2400,0.153571


In [10]:
df_pivot=df_sf_police.pivot_table(values='raw_row_number',index='subject_race', columns=['search_conducted'], aggfunc='count').assign(searches_over_total_stops=lambda x: x[1]/x.sum(axis=1))
df_pivot.style.set_caption("San Francisco, CA").set_table_styles([{
    'selector': 'caption',
    'props': [
        ('color', 'black'),
        ('font-size', '16px'),
        ('width', '500px')
    ],
}])

search_conducted,False,True,searches_over_total_stops
subject_race,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
asian/pacific islander,154823,2861,0.018144
black,128574,23622,0.155208
hispanic,104569,11445,0.098652
other,103112,3746,0.035056
white,360611,11707,0.031444


In [11]:
df_pivot=df_sj_police.pivot_table(values='raw_row_number',index='subject_race', columns=['contraband_found'], aggfunc='count').assign(contraband_found_over_total_searches=lambda x: x[1]/x.sum(axis=1))
df_pivot.style.set_caption("San Jose, CA").set_table_styles([{
    'selector': 'caption',
    'props': [
        ('color', 'black'),
        ('font-size', '16px'),
        ('width', '450px')
    ],
}])

contraband_found,False,True,contraband_found_over_total_searches
subject_race,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
asian/pacific islander,1767,388,0.180046
black,3577,821,0.186676
hispanic,22891,4266,0.157087
other,1216,358,0.227446
white,5139,1272,0.198409


In [12]:
df_pivot=df_sf_police.pivot_table(values='raw_row_number',index='subject_race', columns=['contraband_found'], aggfunc='count').assign(contraband_found_over_total_searches=lambda x: x[1]/x.sum(axis=1))
df_pivot.style.set_caption("San Francisco, CA").set_table_styles([{
    'selector': 'caption',
    'props': [
        ('color', 'black'),
        ('font-size', '16px'),
        ('width', '450px')
    ],
}])

contraband_found,False,True,contraband_found_over_total_searches
subject_race,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
asian/pacific islander,1828,1033,0.361063
black,21439,2183,0.092414
hispanic,10280,1165,0.101791
other,2985,761,0.20315
white,8873,2834,0.242077


In [13]:
df_pivot=df_oak_police.pivot_table(values='raw_row_number',index='subject_race', columns=['contraband_found'], aggfunc='count').assign(contraband_found_over_total_searches=lambda x: x[1]/x.sum(axis=1))
df_pivot.style.set_caption("Oakland, CA").set_table_styles([{
    'selector': 'caption',
    'props': [
        ('color', 'black'),
        ('font-size', '16px'),
        ('width', '450px')
    ],
}])

contraband_found,False,True,contraband_found_over_total_searches
subject_race,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
asian/pacific islander,1128,176,0.134969
black,25661,4364,0.145346
hispanic,5606,1116,0.166022
other,621,85,0.120397
white,1989,411,0.17125


## Combining interesting fields from 3 dataframes into one

In [14]:
all_df=df_sf_police[['search_conducted','subject_race','contraband_found','reason_for_stop']].copy()
all_df['city']='San Francisco'


sj=df_sj_police[['search_conducted','subject_race','reason_for_stop']].copy()
sj['city']='San Jose'
all_df=all_df.append(sj)

oak=df_oak_police[['search_conducted','subject_race','reason_for_stop']].copy()
oak['city']='Oakland'
all_df=all_df.append(oak)
all_df['count']=1


## Plotting

In [15]:
def compute_ratio(col):
    ratio= col.sum()/col.size
    return (ratio.round(3)*100).astype(float)

def format_mean(num):
    return np.mean(num)

In [18]:
#Updating NAs in the race data to display as category Missing
all_df.loc[pd.isna(all_df.subject_race),'subject_race']='Missing'


grouped_by_race_city=all_df.groupby([all_df.city,all_df.subject_race])


In [19]:
all_df.pivot_table(values='city',index=['subject_race'], columns=['search_conducted'], aggfunc='count').assign(searches_over_total_stops=lambda x: x[1]/x.sum(axis=1))


search_conducted,False,True,searches_over_total_stops
subject_race,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Missing,5462,15,0.002739
asian/pacific islander,175407,6320,0.034777
black,186485,58045,0.237374
hispanic,176358,45324,0.204455
other,114231,6026,0.050109
white,393552,20518,0.049552


In [20]:
g=grouped1.search_conducted.sum()/grouped1.search_conducted.size()
g.to_frame()

NameError: name 'grouped1' is not defined

## Reason for Stops

Reasons
- Muni Code Violation - Municipal Code Violations include regulations of conduct, weapons, noise, building, driving, business licenses, sidewalks, roadways, and any other problem area affecting local cities or counties.
- Vehicle Code Violation - same as Traffic Violation
- MPC = penal code - It is a body of rules that prevents acts that threaten, harm or otherwise endanger the safety and welfare of the public by imposing punishment on those who breach these laws.
- Moving Violation
- Consensual - The law requires an officer to have reasonable suspicion in order to stop or detain someone. Not all officers are honest, though. To get around the requirement of reasonable suspicion, a police officer will sometimes claim in the police report that the encounter with the citizen was consensual when in actuality it was a detainment. This is because if the encounter is consensual, the officer does not need to have reasonable suspicion to stop the person. Not in SF.
- BOL/APB/Watch Bulletin : dangerous or missing persons
- Assistance - only in SF, majority not arrested

In [None]:
#Reasons for stop in SF
test=df_sf_police.dropna(subset=['reason_for_stop'],axis=0)
test.reason_for_stop.value_counts()

In [None]:
#Reasons for stop in SJ
# Lots of and reasons
test=df_sj_police.dropna(subset=['reason_for_stop'],axis=0)
test.reason_for_stop.value_counts().head(20)

In [None]:
#Reasons for stop in Oakland
test=df_oak_police.dropna(subset=['reason_for_stop'],axis=0)
test.reason_for_stop.value_counts().head(20)

In [None]:
test=df_sf_police.dropna(subset=['reason_for_stop'],axis=0)
t=test[test.reason_for_stop.str.contains("BOLO")]
t.groupby(['arrest_made','subject_race']).count()

In [None]:
test=df_sj_police.dropna(subset=['reason_for_stop'],axis=0)
test[test.reason_for_stop.str.contains("\|")].search_conducted.value_counts()

In [None]:
#Since we saw that blacks are searched are much higher rates than whites in SF, what's the reason for the stop?
test=df_sf_police.dropna(subset=['reason_for_stop','subject_race'],axis=0)
test=test[test.subject_race=='black']
test[test.search_conducted].reason_for_stop.value_counts().head(20)

In [None]:
def get_reasons(df):
    
    return df['reason_for_stop'].value_counts().head(5)

group_reasons=df_sf_police.groupby("subject_race")
output=group_reasons.apply(get_reasons)
output.unstack().plot(kind='bar',stacked=True,figsize=(9, 6),title='Reasons for Stop by Racial Group')

In [None]:

grouped=all_df.groupby(['reason_for_stop','subject_race'])['count'].sum()
grouped.unstack()

## Search Basis
- Search Incident to Arrest - police can search body/nearby of person if lawfully arresting a suspected criminal
- Vehicle Inventory - An inventory search is the routine inventory of an impounded vehicle. The Supreme Court has ruled that evidence of a crime uncovered during such a search can be used for prosecution. A warrant is not required because the state, in legally impounding the vehicle, bears legal liability for the contents of the vehicle. 

In [None]:
print(df_sf_police.raw_search_vehicle_description.isna().sum(),"missing values for search description")
test=df_sf_police.dropna(subset=['raw_search_vehicle_description'],axis=0)
test.raw_search_vehicle_description.value_counts()

In [None]:
test=test[test.raw_search_vehicle_description!='No Search']
print(test[test.subject_race=='white'].raw_search_vehicle_description.value_counts())
test[test.subject_race=='black'].raw_search_vehicle_description.value_counts()

In [None]:

test['raw_search_vehicle_description'].str.split(',')

def remove(row):
    output=row.raw_search_vehicle_description.split('|')
    if output[0]!='No Search':
        return output[0].split(',')[0]
    elif output[1]!='No Search':
        return output[1].split(',')[0]
    else:
        return output[2].split(',')[0]

In [None]:
#Use apply  to remove the "|No Search" and the ", Result"
test['reason'] = test.apply(remove, axis = 1)
test.reason.value_counts()

In [None]:
def get_reasons(df):
    
    return df['reason'].value_counts()

group_reasons=test.groupby("subject_race")
output=group_reasons.apply(get_reasons)
output.unstack().plot(kind='bar',stacked=True,figsize=(9, 6),title='Reasons for Search by Racial Group in San Francisco')

Largest discrepancy btw white/black appears to be the probation/parole condition.
Could look specifically at search with/without consent rates?

In [None]:
filtered=test[test.reason.str.contains('Consent')]
filtered.groupby(['contraband_found','subject_race']).reason.value_counts()

In [None]:
filtered=test[test.reason.str.contains('without Consent')]
df_pivot=filtered.pivot_table(values='raw_row_number',index='subject_race', columns=['contraband_found'], aggfunc='count').assign(contraband_found_over_total_searches=lambda x: x[1]/x.sum(axis=1))
df_pivot.style.set_caption("Searches without consent in San Francisco, CA").set_table_styles([{
    'selector': 'caption',
    'props': [
        ('color', 'black'),
        ('font-size', '16px'),
        ('width', '450px')
    ],
}])

## Better Plotting for Search and Contraband Ratios

In [None]:
#There are no missing contraband_found fields if search was conducted in SF
print(df_sf_police[df_sf_police.search_conducted==True].contraband_found.isna().sum())

#There are no missing contraband_found fields if search was conducted in Oakland
print(df_oak_police[df_oak_police.search_conducted==True].contraband_found.isna().sum())

#There are no missing contraband_found fields if search was conducted in SJ
print(df_sj_police[df_sj_police.search_conducted==True].contraband_found.isna().sum())

#Missing search conducted in SF is 0
print(df_sf_police.search_conducted.isna().sum())

#There are no missing search conducted in Oakland
print(df_oak_police.search_conducted.isna().sum())

#There are 3568 missing  search conducted in SJ
print(df_sj_police.search_conducted.isna().sum())

#Investigating why there are 3568 missing search conducted
df_sj_police[df_sj_police.search_conducted.isna()].subject_race.value_counts()
#There are 3568 rows where search_conducted is null in the San Jose data. Most of the subject_races in this data where 
#the search_conducted field is null are other (2622), hispanic (474), white (217), black (129) and asian/pacific islander
#(118). This leaves 8 rows where race and search_conducted were null. We aren't able to infer whether search was
#conducted from other fields so we will leave blank but understand we may be missing some data from searches that were
#conducted but not reported.

print(df_sj_police.subject_race.isna().sum())
df_sj_police[df_sj_police.subject_race.isna()].search_conducted.value_counts()
#The San Jose data is also missing subject_race in 5485 rows. Only 15 of these had search_conducted. This is a very
#small number of the overall data so we are okay leaving this out of our analysis.


In [None]:
temp=df_sf_police.groupby(['subject_race','search_conducted'])['raw_row_number'].count().unstack()
temp=temp.reset_index()
temp=temp.assign(searches_over_total_stops=lambda x: x[1]/x.sum(axis=1))
temp=temp.assign(total=lambda x: x.sum(axis=1))

f, ax = plt.subplots(figsize = (10,8))
sns.set_color_codes('pastel')
sns.barplot(y = 'total', x = 'subject_race', data = temp,
            label = 'Search not conducted', color = 'b', edgecolor = 'w')
sns.set_color_codes('muted')
sns.barplot(y = True, x = 'subject_race', data = temp,
            label = 'Search conducted', color = 'b', edgecolor = 'w')
ax.legend(ncol = 1, loc = 'upper left')
ax.set_title("What percentage of stops result in searches in San Francisco?")
ax.set_xlabel("Subject Race")
ax.set_ylabel("Count")
sns.despine(left = True, bottom = True)

count=0
for p in ax.patches:
    if count < 5:
        ax.text(p.get_x() - 0.01, 
                p.get_height() * 1.02, 
               '{0:.2f}%'.format(temp.searches_over_total_stops[count]*100),   
                color='black', 
                rotation='horizontal', 
                size='large')
        count+=1
    
plt.show()

In [None]:
temp=df_sj_police.groupby(['subject_race','search_conducted'])['raw_row_number'].count().unstack()
temp=temp.reset_index()
temp=temp.assign(searches_over_total_stops=lambda x: x[1]/x.sum(axis=1))
temp=temp.assign(total=lambda x: x.sum(axis=1))

f, ax = plt.subplots(figsize = (10,8))
sns.set_color_codes('pastel')
sns.barplot(y = 'total', x = 'subject_race', data = temp,
            label = 'Search not conducted', color = 'b', edgecolor = 'w')
sns.set_color_codes('muted')
sns.barplot(y = True, x = 'subject_race', data = temp,
            label = 'Search conducted', color = 'b', edgecolor = 'w')
ax.legend(ncol = 1, loc = 'upper right')
ax.set_title("What percentage of stops result in searches in San Jose?")
ax.set_xlabel("Subject Race")
ax.set_ylabel("Count")
sns.despine(left = True, bottom = True)

count=0
for p in ax.patches:
    if count < 5:
        ax.text(p.get_x() - 0.01, 
                p.get_height() * 1.02, 
               '{0:.2f}%'.format(temp.searches_over_total_stops[count]*100),   
                color='black', 
                rotation='horizontal', 
                size='large')
        count+=1
    
plt.show()

In [None]:
temp=df_oak_police.groupby(['subject_race','search_conducted'])['raw_row_number'].count().unstack()
temp=temp.reset_index()
temp=temp.assign(searches_over_total_stops=lambda x: x[1]/x.sum(axis=1))
temp=temp.assign(total=lambda x: x.sum(axis=1))

f, ax = plt.subplots(figsize = (10,8))
sns.set_color_codes('pastel')
sns.barplot(y = 'total', x = 'subject_race', data = temp,
            label = 'Search not conducted', color = 'b', edgecolor = 'w')
sns.set_color_codes('muted')
sns.barplot(y = True, x = 'subject_race', data = temp,
            label = 'Search conducted', color = 'b', edgecolor = 'w')
ax.legend(ncol = 1, loc = 'upper right')
ax.set_title("What percentage of stops result in searches in Oakland?")
ax.set_xlabel("Subject Race")
ax.set_ylabel("Count")
sns.despine(left = True, bottom = True)

count=0
for p in ax.patches:
    if count < 5:
        ax.text(p.get_x() - 0.01, 
                p.get_height() * 1.02, 
               '{0:.2f}%'.format(temp.searches_over_total_stops[count]*100),   
                color='black', 
                rotation='horizontal', 
                size='large')
        count+=1
    
plt.show()

In [None]:
temp=df_sf_police.groupby(['subject_race','contraband_found'])['raw_row_number'].count().unstack()
temp=temp.reset_index()
temp=temp.assign(successful_searches=lambda x: x[1]/x.sum(axis=1))
temp=temp.assign(total=lambda x: x.sum(axis=1))

f, ax = plt.subplots(figsize = (10,8))
sns.set_color_codes('pastel')
sns.barplot(y = 'total', x = 'subject_race', data = temp,
            label = 'Contraband not found', color = 'b', edgecolor = 'w')
sns.set_color_codes('muted')
sns.barplot(y = True, x = 'subject_race', data = temp,
            label = 'Contraband found', color = 'b', edgecolor = 'w')
ax.legend(ncol = 1, loc = 'upper right')
ax.set_title("What percentage of searches find contraband in San Francisco?")
ax.set_xlabel("Subject Race")
ax.set_ylabel("Count")
sns.despine(left = True, bottom = True)

count=0
for p in ax.patches:
    if count < 5:
        ax.text(p.get_x() - 0.01, 
                p.get_height() * 1.02, 
               '{0:.2f}%'.format(temp.successful_searches[count]*100),  
                color='black', 
                rotation='horizontal', 
                size='large')
        count+=1
    
plt.show()

In [None]:
temp=df_oak_police.groupby(['subject_race','contraband_found'])['raw_row_number'].count().unstack()
temp=temp.reset_index()
temp=temp.assign(successful_searches=lambda x: x[1]/x.sum(axis=1))
temp=temp.assign(total=lambda x: x.sum(axis=1))

f, ax = plt.subplots(figsize = (10,8))
sns.set_color_codes('pastel')
sns.barplot(y = 'total', x = 'subject_race', data = temp,
            label = 'Contraband not found', color = 'b', edgecolor = 'w')
sns.set_color_codes('muted')
sns.barplot(y = True, x = 'subject_race', data = temp,
            label = 'Contraband found', color = 'b', edgecolor = 'w')
ax.legend(ncol = 1, loc = 'upper right')
ax.set_title("What percentage of searches find contraband in Oakland?")
ax.set_xlabel("Subject Race")
ax.set_ylabel("Count")
sns.despine(left = True, bottom = True)

count=0
for p in ax.patches:
    if count < 5:
        ax.text(p.get_x() - 0.01, 
                p.get_height() * 1.02, 
               '{0:.2f}%'.format(temp.successful_searches[count]*100),  
                color='black', 
                rotation='horizontal', 
                size='large')
        count+=1
    
plt.show()

In [None]:
temp=df_sj_police.groupby(['subject_race','contraband_found'])['raw_row_number'].count().unstack()
temp=temp.reset_index()
temp=temp.assign(successful_searches=lambda x: x[1]/x.sum(axis=1))
temp=temp.assign(total=lambda x: x.sum(axis=1))

f, ax = plt.subplots(figsize = (10,8))
sns.set_color_codes('pastel')
sns.barplot(y = 'total', x = 'subject_race', data = temp,
            label = 'Contraband not found', color = 'b', edgecolor = 'w')
sns.set_color_codes('muted')
sns.barplot(y = True, x = 'subject_race', data = temp,
            label = 'Contraband found', color = 'b', edgecolor = 'w')
ax.legend(ncol = 1, loc = 'upper right')
ax.set_title("What percentage of searches find contraband in San Jose?")
ax.set_xlabel("Subject Race")
ax.set_ylabel("Count")
sns.despine(left = True, bottom = True)

count=0
for p in ax.patches:
    if count < 5:
        ax.text(p.get_x() - 0.01, 
                p.get_height() * 1.02, 
               '{0:.2f}%'.format(temp.successful_searches[count]*100),  
                color='black', 
                rotation='horizontal', 
                size='large')
        count+=1
    
plt.show()

## Subject Gender

In [None]:
#Searches by gender are way less, somewhat expected that police dont suspect women as much
temp=df_sf_police.groupby(['subject_sex','search_conducted'])['raw_row_number'].count().unstack()
temp=temp.reset_index()
temp=temp.assign(searches_over_total_stops=lambda x: x[1]/x.sum(axis=1))
temp=temp.assign(total=lambda x: x.sum(axis=1))

f, ax = plt.subplots(figsize = (10,8))
sns.set_color_codes('pastel')
sns.barplot(y = 'total', x = 'subject_sex', data = temp,
            label = 'Search not conducted', color = 'b', edgecolor = 'w')
sns.set_color_codes('muted')
sns.barplot(y = True, x = 'subject_sex', data = temp,
            label = 'Search conducted', color = 'b', edgecolor = 'w')
ax.legend(ncol = 1, loc = 'upper left')
ax.set_title("What percentage of stops result in searches in San Francisco?")
ax.set_xlabel("Subject Race")
ax.set_ylabel("Count")
sns.despine(left = True, bottom = True)

count=0
for p in ax.patches:
    if count < 2:
        ax.text(p.get_x() - 0.01, 
                p.get_height() * 1.02, 
               '{0:.2f}%'.format(temp.searches_over_total_stops[count]*100),   
                color='black', 
                rotation='horizontal', 
                size='large')
        count+=1
    
plt.show()

In [None]:
#Racial differences in search rates still exist by gender
temp=df_sf_police[df_sf_police.subject_sex=='female'].groupby(['subject_race','search_conducted'])['raw_row_number'].count().unstack()
temp=temp.reset_index()
temp=temp.assign(searches_over_total_stops=lambda x: x[1]/x.sum(axis=1))
temp=temp.assign(total=lambda x: x.sum(axis=1))

f, ax = plt.subplots(figsize = (10,8))
sns.set_color_codes('pastel')
sns.barplot(y = 'total', x = 'subject_race', data = temp,
            label = 'Search not conducted', color = 'b', edgecolor = 'w')
sns.set_color_codes('muted')
sns.barplot(y = True, x = 'subject_race', data = temp,
            label = 'Search conducted', color = 'b', edgecolor = 'w')
ax.legend(ncol = 1, loc = 'upper left')
ax.set_title("What percentage of stops of females result in searches in San Francisco?")
ax.set_xlabel("Subject Race")
ax.set_ylabel("Count")
sns.despine(left = True, bottom = True)

count=0
for p in ax.patches:
    if count < 5:
        ax.text(p.get_x() - 0.01, 
                p.get_height() * 1.02, 
               '{0:.2f}%'.format(temp.searches_over_total_stops[count]*100),   
                color='black', 
                rotation='horizontal', 
                size='large')
        count+=1
    
plt.show()

In [None]:
#Racial differences in search rates still exist by gender
temp=df_sf_police[df_sf_police.subject_sex=='male'].groupby(['subject_race','search_conducted'])['raw_row_number'].count().unstack()
temp=temp.reset_index()
temp=temp.assign(searches_over_total_stops=lambda x: x[1]/x.sum(axis=1))
temp=temp.assign(total=lambda x: x.sum(axis=1))

f, ax = plt.subplots(figsize = (10,8))
sns.set_color_codes('pastel')
sns.barplot(y = 'total', x = 'subject_race', data = temp,
            label = 'Search not conducted', color = 'b', edgecolor = 'w')
sns.set_color_codes('muted')
sns.barplot(y = True, x = 'subject_race', data = temp,
            label = 'Search conducted', color = 'b', edgecolor = 'w')
ax.legend(ncol = 1, loc = 'upper left')
ax.set_title("What percentage of stops of males result in searches in San Francisco?")
ax.set_xlabel("Subject Race")
ax.set_ylabel("Count")
sns.despine(left = True, bottom = True)

count=0
for p in ax.patches:
    if count < 5:
        ax.text(p.get_x() - 0.01, 
                p.get_height() * 1.02, 
               '{0:.2f}%'.format(temp.searches_over_total_stops[count]*100),   
                color='black', 
                rotation='horizontal', 
                size='large')
        count+=1
    
plt.show()

In [None]:
#Racial differences in contraband rates with women
#Police appear to have higher bias against searching white and Asian women (hence high success rate)
temp=df_sf_police[df_sf_police.subject_sex=='female'].groupby(['subject_race','contraband_found'])['raw_row_number'].count().unstack()
temp=temp.reset_index()
temp=temp.assign(successful_searches=lambda x: x[1]/x.sum(axis=1))
temp=temp.assign(total=lambda x: x.sum(axis=1))

f, ax = plt.subplots(figsize = (10,8))
sns.set_color_codes('pastel')
sns.barplot(y = 'total', x = 'subject_race', data = temp,
            label = 'Contraband not found', color = 'b', edgecolor = 'w')
sns.set_color_codes('muted')
sns.barplot(y = True, x = 'subject_race', data = temp,
            label = 'Contraband found', color = 'b', edgecolor = 'w')
ax.legend(ncol = 1, loc = 'upper right')
ax.set_title("What percentages of females searched have contraband in San Francisco?")
ax.set_xlabel("Subject Race")
ax.set_ylabel("Count")
sns.despine(left = True, bottom = True)

count=0
for p in ax.patches:
    if count < 5:
        ax.text(p.get_x() - 0.01, 
                p.get_height() * 1.02, 
               '{0:.2f}%'.format(temp.successful_searches[count]*100),   
                color='black', 
                rotation='horizontal', 
                size='large')
        count+=1
    
plt.show()

In [None]:
#Racial differences in contraband rates with women
#Police appear to have higher bias against searching white and Asian women (hence high success rate)
temp=df_sf_police[df_sf_police.subject_sex=='male'].groupby(['subject_race','contraband_found'])['raw_row_number'].count().unstack()
temp=temp.reset_index()
temp=temp.assign(successful_searches=lambda x: x[1]/x.sum(axis=1))
temp=temp.assign(total=lambda x: x.sum(axis=1))

f, ax = plt.subplots(figsize = (10,8))
sns.set_color_codes('pastel')
sns.barplot(y = 'total', x = 'subject_race', data = temp,
            label = 'Contraband not found', color = 'b', edgecolor = 'w')
sns.set_color_codes('muted')
sns.barplot(y = True, x = 'subject_race', data = temp,
            label = 'Contraband found', color = 'b', edgecolor = 'w')
ax.legend(ncol = 1, loc = 'upper right')
ax.set_title("What percentages of males searched have contraband in San Francisco?")
ax.set_xlabel("Subject Race")
ax.set_ylabel("Count")
sns.despine(left = True, bottom = True)

count=0
for p in ax.patches:
    if count < 5:
        ax.text(p.get_x() - 0.01, 
                p.get_height() * 1.02, 
               '{0:.2f}%'.format(temp.successful_searches[count]*100),   
                color='black', 
                rotation='horizontal', 
                size='large')
        count+=1
    
plt.show()