In [1]:
import pandas as pd
import numpy as np

In [2]:
df1 = pd.read_excel('source/21852_P960944_Traffic_Stops_2022_YTD.xlsx', sheet_name='TRAFFIC STOPS 1')

In [3]:
df2 = pd.read_excel('source/21852_P960944_Traffic_Stops_2022_YTD.xlsx', sheet_name='TRAFFIC STOPS 2')

In [4]:
# read in july 3-15
df3 = pd.read_excel('source/21884_P962815_Traffic_Stop_2024_July.xlsx', sheet_name='TRAFFIC STOPS')

In [5]:
df_og = pd.concat([df1, df2], ignore_index=True)

In [6]:
df_og_july = pd.concat([df_og, df3], ignore_index=True)

In [7]:
# make a working copy
df = df_og.copy()

In [8]:
# make a copy thru july 15
df = df_og_july.copy()

In [9]:
# load arrests data
arr1 = pd.read_excel('source/21852_P960944_Traffic_Stops_2022_YTD.xlsx', sheet_name='TRAFFIC STOP ARREST')

In [10]:
arr2 = pd.read_excel('source/21884_P962815_Traffic_Stop_2024_July.xlsx', sheet_name='TRAFFIC STOP ARREST')

In [11]:
arr = pd.concat([arr1, arr2], ignore_index=True)

In [12]:
# Process data

In [13]:
df['DATE/TIME OF STOP'] = pd.to_datetime(df['DATE/TIME OF STOP'])

In [14]:
df['month_year'] = df['DATE/TIME OF STOP'].dt.strftime('%m/%Y')

In [15]:
df['day'] = df['DATE/TIME OF STOP'].dt.strftime('%Y-%m-%d')

In [16]:
df['year'] = df['DATE/TIME OF STOP'].dt.year

In [14]:
# create subsets

In [17]:
# first 6.5 months
conditions = [
    (df['day']>='2023-01-01') & (df['day']<='2023-07-15'),
    (df['day']>='2022-01-01') & (df['day']<='2022-07-15'),
    (df['day']>='2024-01-01') & (df['day']<='2024-07-15')]
choices = ['1st_half_2023', '1st_half_2022', '1st_half_2024']
df['period'] = np.select(conditions, choices, default='')

In [18]:
# subset for 9.5 months before and after snelling was confirmed
conditions = [
    (df['day']>='2023-10-01') & (df['day']<='2024-07-15'),
    (df['day']>='2022-10-01') & (df['day']<='2023-07-15')]
choices = ['after', 'before']
df['snelling'] = np.select(conditions, choices, default='')

In [19]:
# create district col

In [20]:
def get_district(beat):
    if len(beat) == 3:
        district = beat[0]
    else:
        district = beat[:2]
    return district

df['LOCATION BEAT'] = df['LOCATION BEAT'].astype(str)
df['district'] = df['LOCATION BEAT'].apply(get_district)

In [21]:
df.head()

Unnamed: 0,DATE,DATE/TIME OF STOP,STOP BLOCK-LEVEL ADDRESS,LOCATION BEAT,CPD UNIT NO,YOB,RACE,SEX,TICKET ISSUED Y/N,INITIAL STATUTE,...,DRV/PASS OTHER,DRV/PASS OTHER AMT,CONTACT CARD NO,REASON FOR STOP,month_year,day,year,period,snelling,district
0,22870227,2022-01-01 00:01:00,86XX S WOLCOTT AVE,614,6.0,1958,BLACK,M,N,"9-76-050(D) LIGHT, LICENSE PLATE",...,,,,,01/2022,2022-01-01,2022,1st_half_2022,,6
1,21771149,2022-01-01 00:03:00,56XX W BELMONT AVE,2514,16.0,1964,WHITE,M,N,9-36-020(B) OVERTAKING VEHICLE ON RIGHT-DRIVIN...,...,,,,,01/2022,2022-01-01,2022,1st_half_2022,,25
2,22882362,2022-01-01 00:09:00,2XX S WOOD ST,1225,213.0,1979,BLACK,M,N,9-76-050(B) HEADLIGHT TWO REQUIRED-MOTOR VEHICLE,...,,,,,01/2022,2022-01-01,2022,1st_half_2022,,12
3,21779781,2022-01-01 00:24:00,18XX W PETERSON AVE,2413,24.0,1983,BLACK,M,N,9-80-020 RED LIGHTS & FLASHING,...,,,,,01/2022,2022-01-01,2022,1st_half_2022,,24
4,23019248,2022-01-01 00:30:00,46XX W PALMER ST,2522,25.0,1991,HISPANIC,F,N,"9-76-050(D) LIGHT, LICENSE PLATE",...,,,,,01/2022,2022-01-01,2022,1st_half_2022,,25


In [22]:
crosswalk = df[['LOCATION BEAT', 'district']].copy()

crosswalk.drop_duplicates(subset=['LOCATION BEAT'], keep='first', inplace=True)

# export beat and distrct crosswalk
crosswalk.to_csv('output/beats_to_districts.csv')

In [23]:
arr['DATE/TIME'] = pd.to_datetime(arr['DATE/TIME'])

In [24]:
arr['month_year'] = arr['DATE/TIME'].dt.strftime('%m/%Y')

In [25]:
arr['day'] = arr['DATE/TIME'].dt.strftime('%Y-%m-%d')

In [26]:
# subset for 9 months before and after snelling was confirmed
conditions = [
    (arr['day']>='2023-10-01') & (arr['day']<='2024-07-15'),
    (arr['day']>='2022-10-01') & (arr['day']<='2023-07-15')]
choices = ['after', 'before']
arr['snelling'] = np.select(conditions, choices, default='')

In [27]:
# first 6.5 months
conditions = [
    (arr['day']>='2023-01-01') & (arr['day']<='2023-07-15'),
    (arr['day']>='2022-01-01') & (arr['day']<='2022-07-15'),
    (arr['day']>='2024-01-01') & (arr['day']<='2024-07-15')]
choices = ['1st_half_2023', '1st_half_2022', '1st_half_2024']
arr['period'] = np.select(conditions, choices, default='')

# Create reason for stop crosswalk

CPD provided the IDOT summary reason for stop (e.g., moving, equipment) for each statute in the updated July 2024 FOIA. I'll try to use these to map statutes in the first FOIA

In [32]:
reason_dict = dict(zip(df3['INITIAL STATUTE'],df3['REASON FOR STOP']))

In [33]:
df['reason_for_stop'] = df['INITIAL STATUTE'].map(reason_dict)

In [34]:
df.groupby('reason_for_stop', dropna=False).size()

reason_for_stop
EQUIPMENT    356420
LICENSE      517115
MOVING       355032
NaN           12785
dtype: int64

In [23]:
(12785/len(df))*100

1.0299254361373729

In [None]:
# mapped 99% of all stops by their reason!!

# Total stops

In [108]:
df.groupby('period', dropna=False).size()

period
                 469774
1st_half_2022    268993
1st_half_2023    316513
1st_half_2024    186072
dtype: int64

130k less stops than first half of 2023, a 41% decrease

In [109]:
186072-316513

-130441

In [110]:
-130441/316513

-0.4121189335035212

In [115]:
# 197 days in first half 2024
186072/197

944.5279187817259

83k fewer stops than first half of 2022, a more than 30% decrease

In [111]:
186072-268993

-82921

In [112]:
-82921/268993

-0.30826452732970744

177,342 stops in the first half of 2024 is still more than the total number of stops CPD made annually in 2010-2015 (according to IDOT).
<br>
<br>
sqlite> select year, count(*) from stops where AgencyName="CHICAGO POLICE"  group by year;
<br>
<table>
2004|242548
2005|202951
2006|202104
2007|202823
2008|201917
2009|193851
2010|163895
2011|140942
2012|116476
2013|100676
2014|87355
2015|85965
2016|187133
2017|285065
2018|489460
2019|598515
2020|327290
2021|377899
2022|511738
</table>

45% black, 34% hispanic, just 15% white.

In [118]:
# by race
r = df[df['period'] == '1st_half_2024'].groupby('RACE').size().reset_index(name='count')
r['pct'] = r['count']/r['count'].sum()
r

Unnamed: 0,RACE,count,pct
0,AMER INDIAN / ALASKAN NATIVE,1146,0.006159
1,ASIAN,8713,0.046827
2,BLACK,83794,0.450341
3,HAWAIIAN/PACIFIC ISLANDER,536,0.002881
4,HISPANIC,63363,0.340537
5,WHITE,28516,0.153256


In [121]:
0.450341/0.153256

2.9384885420472933

In [215]:
83794/28516

2.9384906719034927

In [122]:
0.340537/0.153256

2.2220141462650727

In [119]:
# by race
r = df[df['period'] == '1st_half_2023'].groupby('RACE').size().reset_index(name='count')
r['pct'] = r['count']/r['count'].sum()
r

Unnamed: 0,RACE,count,pct
0,AMER INDIAN / ALASKAN NATIVE,1667,0.005267
1,ASIAN,11455,0.036195
2,BLACK,167671,0.5298
3,HAWAIIAN/PACIFIC ISLANDER,1415,0.004471
4,HISPANIC,91419,0.288862
5,WHITE,42853,0.135405


In [124]:
0.529800/0.135405

3.9127063254680405

In [125]:
0.288862/0.135405

2.1333185628300284

In [120]:
# by race
r = df[df['period'] == '1st_half_2022'].groupby('RACE').size().reset_index(name='count')
r['pct'] = r['count']/r['count'].sum()
r

Unnamed: 0,RACE,count,pct
0,AMER INDIAN / ALASKAN NATIVE,1322,0.004915
1,ASIAN,8605,0.031992
2,BLACK,156542,0.582005
3,HAWAIIAN/PACIFIC ISLANDER,1019,0.003789
4,HISPANIC,66156,0.245961
5,WHITE,35326,0.131338


In [126]:
0.582005/0.131338

4.431352693051515

In [127]:
0.245961/0.131338

1.8727329485754314

In [171]:
# by race by year
r = df[df['year'] == 2024].groupby('RACE').size().reset_index(name='count')
r['pct'] = r['count']/r['count'].sum()
r

Unnamed: 0,RACE,count,pct
0,AMER INDIAN / ALASKAN NATIVE,1146,0.006159
1,ASIAN,8713,0.046827
2,BLACK,83794,0.450341
3,HAWAIIAN/PACIFIC ISLANDER,536,0.002881
4,HISPANIC,63363,0.340537
5,WHITE,28516,0.153256


In [208]:
df.groupby('year').size()

year
2022    516376
2023    538904
2024    186072
dtype: int64

# Pretextual stops

In [None]:
df[(df['TICKET ISSUED Y/N'] == 'N') & ]

In [26]:
# number of "pretextual stops"
df.loc[
    (df['TICKET ISSUED Y/N'] == 'N') &
    (df['reason_for_stop'] != 'MOVING')
].groupby('year').size()

year
2022    347766
2023    382903
2024    129033
dtype: int64

# Total stops by month

In [95]:
df.groupby('month_year').size().reset_index(name='count')

Unnamed: 0,month_year,count
0,01/2022,34125
1,01/2023,60379
2,01/2024,46628
3,02/2022,37665
4,02/2023,54203
5,02/2024,35144
6,03/2022,40166
7,03/2023,51886
8,03/2024,32445
9,04/2022,44027


Stops declined in 2024.

In [CPD stops by month](https://docs.google.com/spreadsheets/d/1npZSD9v7Fjfw0vAodHsmfOL6pgSq3V6fiIzNOjGhmx8/edit?usp=sharing)

In [87]:
df.groupby('snelling').size()

snelling
          507440
after     297085
before    429085
dtype: int64

In [88]:
297085-429085

-132000

In [89]:
-132000/429085

-0.30763135509281375

In the 8 months since Snelling has been in office, traffic stops have declined by 132,000, or 30%, since the previous Oct to Jun period.

# Stops by race by month

In [17]:
# 145 stops missing race
df.groupby('RACE', dropna=False).size()

RACE
AMER IND/ALASKAN NATIVE             12
AMER INDIAN / ALASKAN NATIVE      6732
ASIAN                            45545
BLACK                           650793
HAWAIIAN/PACIFIC ISLANDER         5165
HISPANIC                        356272
WHITE                           168946
NaN                                145
dtype: int64

In [19]:
pd.pivot_table(df,
              index='month_year',
              columns='RACE',
              values='DATE/TIME OF STOP', # every record has date/time
              aggfunc='count')

RACE,AMER IND/ALASKAN NATIVE,AMER INDIAN / ALASKAN NATIVE,ASIAN,BLACK,HAWAIIAN/PACIFIC ISLANDER,HISPANIC,WHITE
month_year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
01/2022,,225.0,1169.0,19263.0,114.0,8347.0,5003.0
01/2023,,332.0,2275.0,32175.0,229.0,17209.0,8154.0
01/2024,,283.0,2012.0,21072.0,123.0,16038.0,7099.0
02/2022,,186.0,1262.0,21866.0,91.0,9114.0,5142.0
02/2023,,265.0,2104.0,28898.0,294.0,15048.0,7588.0
02/2024,,201.0,1480.0,16076.0,96.0,11729.0,5561.0
03/2022,,194.0,1258.0,23637.0,169.0,9539.0,5367.0
03/2023,,257.0,1962.0,27587.0,280.0,14373.0,7418.0
03/2024,,208.0,1495.0,14406.0,92.0,11382.0,4861.0
04/2022,,195.0,1271.0,26241.0,206.0,10872.0,5236.0


Compare racial breakdown of stops in the first 6 months of 2024 with previous years

In [60]:
pd.pivot_table(df,
              index='RACE',
              columns='period',
              values='DATE/TIME OF STOP',
              aggfunc='count')

period,Unnamed: 1_level_0,1st_half_2022,1st_half_2023,1st_half_2024
RACE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
AMER IND/ALASKAN NATIVE,12.0,,,
AMER INDIAN / ALASKAN NATIVE,2848.0,1245.0,1554.0,1085.0
ASIAN,18724.0,7824.0,10740.0,8257.0
BLACK,270476.0,143883.0,156562.0,79872.0
HAWAIIAN/PACIFIC ISLANDER,2395.0,932.0,1334.0,504.0
HISPANIC,150674.0,60549.0,84678.0,60371.0
WHITE,68980.0,32507.0,40210.0,27249.0


In [41]:
g = df24_1st.groupby('RACE').size().reset_index(name='stops')
g['pct'] = g['stops']/g['stops'].sum()
g

Unnamed: 0,RACE,stops,pct
0,AMER INDIAN / ALASKAN NATIVE,1085,0.006118
1,ASIAN,8257,0.046561
2,BLACK,79872,0.450394
3,HAWAIIAN/PACIFIC ISLANDER,504,0.002842
4,HISPANIC,60371,0.340429
5,WHITE,27249,0.153656


In [42]:
g = df23_1st.groupby('RACE').size().reset_index(name='stops')
g['pct'] = g['stops']/g['stops'].sum()
g

Unnamed: 0,RACE,stops,pct
0,AMER INDIAN / ALASKAN NATIVE,1554,0.005266
1,ASIAN,10740,0.036397
2,BLACK,156562,0.530578
3,HAWAIIAN/PACIFIC ISLANDER,1334,0.004521
4,HISPANIC,84678,0.286968
5,WHITE,40210,0.136269


In [43]:
g = df22_1st.groupby('RACE').size().reset_index(name='stops')
g['pct'] = g['stops']/g['stops'].sum()
g

Unnamed: 0,RACE,stops,pct
0,AMER INDIAN / ALASKAN NATIVE,1245,0.005042
1,ASIAN,7824,0.031684
2,BLACK,143883,0.582664
3,HAWAIIAN/PACIFIC ISLANDER,932,0.003774
4,HISPANIC,60549,0.245197
5,WHITE,32507,0.131639


The share of Black drivers stopped has declined by 13% from 58% in the first half of 2022 to 45% in the first half of 2024. Black drivers did not make up the majority of drivers stopped by CPD in the first half 2024. <br>

<b>TODO:</b> Is it the first time since 2004 that Black drivers did not make up the majority of CPD stops?

# Stops by beat

In [147]:
beat_pivot = pd.pivot_table(df,
              index='LOCATION BEAT',
              columns='period',
              values='DATE/TIME OF STOP',
              aggfunc='count')

In [148]:
beat_pivot.reset_index(inplace=True)

In [149]:
beat_pivot

period,LOCATION BEAT,Unnamed: 2,1st_half_2022,1st_half_2023,1st_half_2024
0,1011,2605.0,1939.0,1981.0,755.0
1,1012,1582.0,657.0,1094.0,474.0
2,1013,3430.0,1515.0,2388.0,1282.0
3,1014,2428.0,1170.0,1646.0,738.0
4,1021,2078.0,1347.0,1493.0,602.0
...,...,...,...,...,...
272,931,1065.0,718.0,853.0,295.0
273,932,742.0,465.0,432.0,268.0
274,933,638.0,462.0,536.0,183.0
275,934,830.0,448.0,496.0,305.0


In [150]:
len(df[df['period'] == '1st_half_2024']) - len(df[df['period'] == '1st_half_2023'])

-130441

In [151]:
(len(df[df['period'] == '1st_half_2024']) - len(df[df['period'] == '1st_half_2023']))/len(df[df['period'] == '1st_half_2023'])

-0.4121189335035212

In [152]:
# export for flourish
beat_pivot.to_csv('output/first_half_by_beat.csv', index=False)

[Flourish viz](https://public.flourish.studio/visualisation/18743732/) <br>
In [CPD Jan thru Jun stops by beat 2022-2024](https://docs.google.com/spreadsheets/d/1m4uKnwavTyTRuGq1hvJF04ZciVCPSVIqIZYl3D_xshs/edit?usp=sharing)

In [123]:
# snelling
beat_pivot_snelling = pd.pivot_table(df,
              index='LOCATION BEAT',
              columns='snelling',
              values='DATE/TIME OF STOP',
              aggfunc='count')

In [126]:
beat_pivot_snelling.reset_index(inplace=True)

In [128]:
# export for flourish
beat_pivot_snelling.to_csv('output/snelling_by_beat.csv', index=False)

In [80]:
df[df['LOCATION BEAT'] == 1413].groupby('RACE').size()

RACE
AMER INDIAN / ALASKAN NATIVE      11
ASIAN                             88
BLACK                            407
HAWAIIAN/PACIFIC ISLANDER          5
HISPANIC                        1834
WHITE                            697
dtype: int64

In [81]:
df[df['LOCATION BEAT'] == 1414].groupby('RACE').size()

RACE
AMER INDIAN / ALASKAN NATIVE      28
ASIAN                            125
BLACK                            647
HAWAIIAN/PACIFIC ISLANDER         15
HISPANIC                        1594
WHITE                            999
dtype: int64

In [24]:
# race by beat for first six months or post-snelling
# race stopped in districts in the first half of 2024
race_by_beat_2024 = pd.pivot_table(df[df['period'] == '1st_half_2024'],
              index='LOCATION BEAT',
              columns='RACE',
              values='DATE/TIME OF STOP',
              aggfunc='count',
              dropna=False)

In [25]:
race_by_beat_2024.reset_index(inplace=True)

In [26]:
race_by_beat_2024.to_csv('output/race_by_beat_2024.csv')

In [None]:
# race by beat for first six months of 2023 or pre-snelling

In [28]:
# beats in __ district by month
pd.pivot_table(df[df['district'] == '10'],
              index='month_year',
              columns='LOCATION BEAT',
              values='DATE/TIME OF STOP',
              aggfunc='count',
              dropna=False)

LOCATION BEAT,1011,1012,1013,1014,1021,1022,1023,1024,1031,1032,1033,1034
month_year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
01/2022,278,102,262,159,190,202,81,274,385,389,224,205
01/2023,475,241,509,379,346,506,331,784,706,728,591,587
01/2024,238,130,342,194,182,395,255,502,277,450,396,369
02/2022,299,101,219,154,211,164,82,231,197,275,177,220
02/2023,376,210,430,336,286,386,302,681,443,554,438,392
02/2024,142,106,307,142,100,201,145,309,235,271,281,204
03/2022,322,100,216,182,225,189,63,336,274,410,218,253
03/2023,264,141,360,230,173,285,286,629,393,503,366,321
03/2024,136,95,299,185,119,245,186,296,230,308,264,235
04/2022,359,125,250,200,218,203,126,355,280,472,235,326


In [154]:
df.groupby('LOCATION BEAT').size().reset_index(name='stops').sort_values('stops', ascending=False).head(10)

Unnamed: 0,LOCATION BEAT,stops
14,1112,16223
112,1834,14642
73,1533,13716
240,725,13173
68,1522,12542
128,2011,12291
9,1032,12065
7,1024,12029
110,1832,11766
109,1831,11597


# Stops by district

In [204]:
# snelling
pd.pivot_table(df,
              index='district',
              columns='snelling',
              values='DATE/TIME OF STOP',
              aggfunc='count')

snelling,Unnamed: 1_level_0,after,before
district,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,20035.0,11896.0,20037.0
10,37044.0,23923.0,35531.0
11,43743.0,25756.0,43027.0
12,22797.0,15273.0,19502.0
14,17613.0,19951.0,17517.0
15,25905.0,10211.0,21238.0
16,13078.0,12731.0,12615.0
17,20172.0,16350.0,15219.0
18,26031.0,16360.0,28256.0
19,14419.0,9202.0,12664.0


In [205]:
# jan thru jun
pd.pivot_table(df,
              index='district',
              columns='period',
              values='DATE/TIME OF STOP',
              aggfunc='count')

period,Unnamed: 1_level_0,1st_half_2022,1st_half_2023,1st_half_2024
district,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,18561.0,12167.0,13107.0,8133.0
10,39369.0,17956.0,26690.0,12483.0
11,47007.0,23341.0,30980.0,11198.0
12,20799.0,13483.0,13423.0,9867.0
14,20438.0,8787.0,12509.0,13347.0
15,20903.0,14948.0,15260.0,6243.0
16,12885.0,7109.0,9967.0,8463.0
17,19206.0,11411.0,11254.0,9870.0
18,26471.0,15091.0,18484.0,10601.0
19,12985.0,8566.0,8600.0,6134.0


Both tables in [CPD stops by District](https://docs.google.com/spreadsheets/d/1V_ogp_b80PvwNVOsgiw5SJD0V9xIHxQhjcUPzz9Es9k/edit?usp=sharing)

In [22]:
# race stopped in districts in the first half of 2024
pd.pivot_table(df[df['period'] == '1st_half_2024'],
              index='district',
              columns='RACE',
              values='DATE/TIME OF STOP',
              aggfunc='count',
              dropna=False)

RACE,AMER INDIAN / ALASKAN NATIVE,ASIAN,BLACK,HAWAIIAN/PACIFIC ISLANDER,HISPANIC,WHITE
district,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1,98,511,3994,22,1823,1225
10,60,94,3901,39,7591,564
11,30,127,7034,15,3037,540
12,80,401,3785,30,3458,1429
14,102,554,2612,33,5812,3664
15,13,34,4567,4,1109,272
16,75,392,822,46,3573,2972
17,98,933,1096,34,4690,2584
18,121,736,3931,30,2174,3152
19,57,709,1520,30,1637,1809


In [135]:
# race stopped in districts in the first half of 2023
pd.pivot_table(df[df['period'] == '1st_half_2023'],
              index='district',
              columns='RACE',
              values='DATE/TIME OF STOP',
              aggfunc='count',
              dropna=False)

RACE,AMER INDIAN / ALASKAN NATIVE,ASIAN,BLACK,HAWAIIAN/PACIFIC ISLANDER,HISPANIC,WHITE
district,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1,114,902,7480,57,2302,2252
10,80,169,9715,529,15032,1165
11,56,239,21645,38,7277,1725
12,91,531,5790,41,4694,2276
14,94,517,3071,20,5432,3375
15,17,80,12245,21,2304,593
16,107,454,1089,50,4463,3804
17,137,851,1488,39,5617,3122
18,269,1276,7310,131,3044,6450
19,142,834,2234,38,2130,3222


Both 2023 and 2024 are in this google spreadsheet [here](https://docs.google.com/spreadsheets/d/1Ic8YGpfLDXYdfMxGuJtnZin8mXB_HP-0COCZDgAe244/edit?usp=sharing)

In [29]:
# stops by district by month
# race stopped in districts in the first half of 2023
district_by_month = pd.pivot_table(df,
              index='month_year',
              columns='district',
              values='DATE/TIME OF STOP',
              aggfunc='count',
              dropna=False)

In [30]:
# export to google sheet
district_by_month.to_csv('output/district_by_month.csv')

In this [google sheet](https://docs.google.com/spreadsheets/d/1qf_4rkflM2Qfm58s5fu8_zKw414U7CuN3QzBTVGoHeE/edit?usp=sharing)

# Dexter Reed

He was killed in beat [1122](https://www.wbez.org/2024/04/26/dexter-reed-shooting-reignites-fear-about-traffic-stops) on Mar. 21, 2024

In [92]:
df[df['LOCATION BEAT'] == 1122].groupby('month_year').size().reset_index()

Unnamed: 0,month_year,0
0,01/2022,362
1,01/2023,515
2,01/2024,453
3,02/2022,395
4,02/2023,401
5,02/2024,153
6,03/2022,378
7,03/2023,421
8,03/2024,198
9,04/2022,368


# Plaintiffs in ACLU

In [191]:
# eric wilkins - 6/20/2022 - East 110th Street and South Michigan Avenue - beat 513
df.loc[
    (df['day'] == '2022-06-20') &
    (df['STOP BLOCK-LEVEL ADDRESS'].str.contains('110'))
]

Unnamed: 0,DATE,DATE/TIME OF STOP,STOP BLOCK-LEVEL ADDRESS,LOCATION BEAT,CPD UNIT NO,YOB,RACE,SEX,TICKET ISSUED Y/N,INITIAL STATUTE,...,DRV/PASS OTHER AMT,CONTACT CARD NO,REASON FOR STOP,month_year,day,year,period,snelling,district,reason_for_stop
229100,20337747,2022-06-20 18:00:00,110XX S VINCENNES AVE,2234,22.0,1977,BLACK,F,N,9-8-020(C)(1) DISOBEY RED CIRCULAR STEADY SIGN...,...,,,,06/2022,2022-06-20,2022,1st_half_2022,,22,MOVING
229191,23204230,2022-06-20 19:02:00,110XX S AVENUE J,433,4.0,1995,HISPANIC,F,N,9-24-010(B) STOP AT STOP SIGN,...,,,,06/2022,2022-06-20,2022,1st_half_2022,,4,MOVING
229553,22585346,2022-06-20 21:25:00,110XX S WESTERN AVE,2212,22.0,1999,BLACK,M,N,9-76-090(B) LIGHT HEADLIGHTS SUNSET/SUNUP,...,,,,06/2022,2022-06-20,2022,1st_half_2022,,22,EQUIPMENT


In [193]:
# eric wilkins - 4/18/2024 - South Wabash Avenue and East 104th Street - beat 512
df.loc[
    (df['day'] == '2024-04-18') &
    (df['STOP BLOCK-LEVEL ADDRESS'].str.contains('104'))
]

Unnamed: 0,DATE,DATE/TIME OF STOP,STOP BLOCK-LEVEL ADDRESS,LOCATION BEAT,CPD UNIT NO,YOB,RACE,SEX,TICKET ISSUED Y/N,INITIAL STATUTE,...,DRV/PASS OTHER AMT,CONTACT CARD NO,REASON FOR STOP,month_year,day,year,period,snelling,district,reason_for_stop
1184463,,2024-04-18 16:00:00,104XX S AVENUE M,432,4.0,1968,HISPANIC,M,N,9-76-050(A) HEADLIGHT REQUIRED-MOTORCYCLE,...,,26781430,,04/2024,2024-04-18,2024,1st_half_2024,after,4,EQUIPMENT


In [195]:
# Mahari bell - 4/4/2022 -r 201 South Western Avenue - beat 1225 - FOUND
df.loc[
    (df['day'] == '2022-04-04') &
    (df['STOP BLOCK-LEVEL ADDRESS'].str.contains('2XX S WESTERN'))
]

Unnamed: 0,DATE,DATE/TIME OF STOP,STOP BLOCK-LEVEL ADDRESS,LOCATION BEAT,CPD UNIT NO,YOB,RACE,SEX,TICKET ISSUED Y/N,INITIAL STATUTE,...,DRV/PASS OTHER AMT,CONTACT CARD NO,REASON FOR STOP,month_year,day,year,period,snelling,district,reason_for_stop
116983,21922852,2022-04-04 14:56:00,42XX S WESTERN AVE,921,8.0,1996,HISPANIC,M,N,9-8-020(C)(1) DISOBEY RED CIRCULAR STEADY SIGN...,...,,,,04/2022,2022-04-04,2022,1st_half_2022,,9,MOVING
117602,223399898,2022-04-04 20:21:00,2XX S WESTERN AVE,1225,213.0,1998,BLACK,M,N,"9-12-050(B) LANES, FAIL TO KEEP IN",...,,,,04/2022,2022-04-04,2022,1st_half_2022,,12,MOVING
117777,22907711,2022-04-04 21:42:00,62XX S WESTERN AVE,825,211.0,1978,HISPANIC,F,N,9-76-050(B) HEADLIGHT TWO REQUIRED-MOTOR VEHICLE,...,,,,04/2022,2022-04-04,2022,1st_half_2022,,8,EQUIPMENT


In [197]:
# Mahari bell - 5/9/2022 -20 East 35th Street - beat
df.loc[
    (df['day'] == '2022-05-09') &
    (df['STOP BLOCK-LEVEL ADDRESS'].str.contains('E 35TH'))
]

Unnamed: 0,DATE,DATE/TIME OF STOP,STOP BLOCK-LEVEL ADDRESS,LOCATION BEAT,CPD UNIT NO,YOB,RACE,SEX,TICKET ISSUED Y/N,INITIAL STATUTE,...,DRV/PASS OTHER AMT,CONTACT CARD NO,REASON FOR STOP,month_year,day,year,period,snelling,district,reason_for_stop
166270,23325071,2022-05-09 02:49:00,XX E 35TH ST,211,2.0,1998,BLACK,M,N,9-76-050(B) HEADLIGHT TWO REQUIRED-MOTOR VEHICLE,...,,,,05/2022,2022-05-09,2022,1st_half_2022,,2,EQUIPMENT
166618,22931693,2022-05-09 13:21:00,3XX E 35TH ST,212,2.0,1991,BLACK,M,N,9-76-160(B) DISPLAY ST REG-REAR MOTRCYCL/TRLR/...,...,,,,05/2022,2022-05-09,2022,1st_half_2022,,2,LICENSE


In [200]:
# Mahari bell - 5/29/2022 -233 west jackson boulevard - FOUND
# Mahari Bell - 1/17/2023 - 23 East Jackson - FOUND
df.loc[
    (df['day'] == '2023-01-17') &
    (df['STOP BLOCK-LEVEL ADDRESS'].str.contains('E JACKSON'))
]

Unnamed: 0,DATE,DATE/TIME OF STOP,STOP BLOCK-LEVEL ADDRESS,LOCATION BEAT,CPD UNIT NO,YOB,RACE,SEX,TICKET ISSUED Y/N,INITIAL STATUTE,...,DRV/PASS OTHER AMT,CONTACT CARD NO,REASON FOR STOP,month_year,day,year,period,snelling,district,reason_for_stop
550702,23484246,2023-01-17 19:02:00,XX E JACKSON BLVD,113,1.0,1999,ASIAN,M,N,9-16-050(B) NO TURN SIGNALS-VIOLATION,...,,,,01/2023,2023-01-17,2023,1st_half_2023,before,1,MOVING
550742,23459362,2023-01-17 19:21:00,XX E JACKSON BLVD,113,1.0,1998,BLACK,F,N,9-16-050(B) NO TURN SIGNALS-VIOLATION,...,,,,01/2023,2023-01-17,2023,1st_half_2023,before,1,MOVING


In [203]:
# Essence Jackson - 2/3/2022 - 7358 south lafayette - FOUND
# Essence Jackson - 11/22/2023 - 60th and halsted - FOUND
# Essence Jackson - 
df.loc[
    (df['day'] == '2023-11-22') &
    (df['STOP BLOCK-LEVEL ADDRESS'].str.contains('HALSTED'))
]

Unnamed: 0,DATE,DATE/TIME OF STOP,STOP BLOCK-LEVEL ADDRESS,LOCATION BEAT,CPD UNIT NO,YOB,RACE,SEX,TICKET ISSUED Y/N,INITIAL STATUTE,...,DRV/PASS OTHER AMT,CONTACT CARD NO,REASON FOR STOP,month_year,day,year,period,snelling,district,reason_for_stop
1007404,TV567376,2023-11-22 00:45:00,105XX S HALSTED ST,2233,22.0,1966,BLACK,M,Y,625 ILCS 5.0/3-702-A CANC/REVOKE/SUSPEND REGIS/IL,...,,,,11/2023,2023-11-22,2023,,after,22,LICENSE
1007450,TV566014,2023-11-22 01:44:00,91XX S HALSTED ST,2223,22.0,1999,BLACK,F,Y,9-8-020(C)(1) DISOBEY RED CIRCULAR STEADY SIGN...,...,,,,11/2023,2023-11-22,2023,,after,22,MOVING
1007469,26559695,2023-11-22 02:05:00,63XX S HALSTED ST,723,7.0,1954,BLACK,M,N,9-76-050(B) HEADLIGHT TWO REQUIRED-MOTOR VEHICLE,...,,,,11/2023,2023-11-22,2023,,after,7,EQUIPMENT
1007480,32601058,2023-11-22 02:19:00,54XX S HALSTED ST,935,9.0,1998,BLACK,F,N,"9-76-050(D) LIGHT, LICENSE PLATE",...,,,,11/2023,2023-11-22,2023,,after,9,EQUIPMENT
1007483,26559698,2023-11-22 02:20:00,65XX S HALSTED ST,723,7.0,1993,BLACK,M,N,9-76-050(B) HEADLIGHT TWO REQUIRED-MOTOR VEHICLE,...,,,,11/2023,2023-11-22,2023,,after,7,EQUIPMENT
1007501,TV567640,2023-11-22 02:54:00,88XX S HALSTED ST,2223,22.0,1973,BLACK,F,Y,625 ILCS 5.0/3-701-1 NO VALID REGISTRATION,...,,,,11/2023,2023-11-22,2023,,after,22,LICENSE
1007603,26568225,2023-11-22 08:12:00,72XX S HALSTED ST,732,7.0,1988,BLACK,F,N,625 ILCS 5.0/3-414 EXPIRATION OF REGISTRATION,...,,,,11/2023,2023-11-22,2023,,after,7,LICENSE
1007630,26572071,2023-11-22 08:38:00,70XX S HALSTED ST,732,7.0,1992,BLACK,M,N,625 ILCS 5.0/3-414 EXPIRATION OF REGISTRATION,...,,,,11/2023,2023-11-22,2023,,after,7,LICENSE
1007763,26572050,2023-11-22 10:42:00,60XX S HALSTED ST,712,7.0,1989,BLACK,M,N,625 ILCS 5.0/3-414 EXPIRATION OF REGISTRATION,...,,,,11/2023,2023-11-22,2023,,after,7,LICENSE
1007779,26560243,2023-11-22 10:54:00,58XX S HALSTED ST,712,7.0,1991,BLACK,M,N,625 ILCS 5.0/3-708 OPERATE MTR VEHICLE/REGIS/S...,...,,,,11/2023,2023-11-22,2023,,after,7,LICENSE


# Tickets

In [206]:
df.groupby('TICKET ISSUED Y/N', dropna=False).size()

TICKET ISSUED Y/N
N    1189313
Y      52039
dtype: int64

In [210]:
df[df['period'] == '1st_half_2024'].groupby('TICKET ISSUED Y/N', dropna=False).size()

TICKET ISSUED Y/N
N    172410
Y     13662
dtype: int64

In [212]:
# citywide ticketing rate in 2024
13662/len(df[df['period'] == '1st_half_2024'])

0.07342319102282988

In [209]:
pd.pivot_table(df[df['TICKET ISSUED Y/N'] == 'Y'],
              index='district',
              columns='period',
              values='DATE/TIME OF STOP',
              aggfunc='count')

period,Unnamed: 1_level_0,1st_half_2022,1st_half_2023,1st_half_2024
district,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,286,341,118,108
10,168,138,156,261
11,1542,1077,1274,1347
12,937,698,609,618
14,510,125,591,367
15,257,314,160,239
16,475,259,241,607
17,171,110,87,99
18,2628,1126,1421,1710
19,1247,474,989,1262


# Contraband

In [157]:
contraband_pivot = pd.pivot_table(df[df['CONTRABAND_FOUND_I'] == 'Y'],
              index='district',
              columns='period',
              values='DATE/TIME OF STOP',
              aggfunc='count')

contraband_pivot

period,Unnamed: 1_level_0,1st_half_2022,1st_half_2023,1st_half_2024
district,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,89,77,83,94
10,167,103,92,95
11,138,109,113,146
12,77,47,56,38
14,50,51,32,25
15,71,41,49,29
16,44,39,37,26
17,132,52,108,66
18,282,119,170,244
19,38,13,20,22


Contraband rate by district in [google sheet](https://docs.google.com/spreadsheets/d/1uhttBqWmptswU8p9Uzvza326T1Lm7vE3es9jueuY5f0/edit?usp=sharing)

In [156]:
# contraband by month

contraband_monthly = pd.pivot_table(df[df['CONTRABAND_FOUND_I'] == 'Y'],
              index='district',
              columns='period',
              values='DATE/TIME OF STOP',
              aggfunc='count', dropna=False)

contraband_monthly

period,Unnamed: 1_level_0,1st_half_2022,1st_half_2023,1st_half_2024
district,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,89,77,83,94
10,167,103,92,95
11,138,109,113,146
12,77,47,56,38
14,50,51,32,25
15,71,41,49,29
16,44,39,37,26
17,132,52,108,66
18,282,119,170,244
19,38,13,20,22


# Arrests

In [159]:
arr.groupby('month_year').size().reset_index()

Unnamed: 0,month_year,0
0,01/2022,2851
1,01/2023,4412
2,01/2024,5442
3,02/2022,3933
4,02/2023,4408
5,02/2024,5260
6,03/2022,4300
7,03/2023,4004
8,03/2024,5672
9,04/2022,4144


They are up 20% since Snelling and up 16% since the last period in 2023 [google sheet](https://docs.google.com/spreadsheets/d/1K78RW7N3FdOeePNEjVUfVexg8VfLkA8AAAssxc4ELnM/edit?gid=0#gid=0)

In [165]:
g = arr.groupby('period').size().reset_index()
g

Unnamed: 0,period,0
0,,41450
1,1st_half_2022,26236
2,1st_half_2023,26415
3,1st_half_2024,30736


In [166]:
d = df.groupby('period').size().reset_index()
d

Unnamed: 0,period,0
0,,469774
1,1st_half_2022,268993
2,1st_half_2023,316513
3,1st_half_2024,186072


In [168]:
m = pd.merge(g,d,on='period')
m['pct arrests'] = m['0_x']/m['0_y']
m

Unnamed: 0,period,0_x,0_y,pct arrests
0,,41450,469774,0.088234
1,1st_half_2022,26236,268993,0.097534
2,1st_half_2023,26415,316513,0.083456
3,1st_half_2024,30736,186072,0.165183


In [59]:
g = pd.pivot_table(arr,
              index='STATUTE',
              columns='snelling',
              values='DATE/TIME',
              aggfunc='count')
g.sort_values('after', ascending=False).head(10)

snelling,Unnamed: 1_level_0,after,before
STATUTE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
OPERATE UNINSURED MTR VEHICLE,3034.0,3382.0,2212.0
ISSUANCE OF WARRANT,4992.0,2738.0,3248.0
DRIVING/NEVER ISSUED LICENSE,791.0,1949.0,374.0
NO VALID REGISTRATION,1641.0,1877.0,1242.0
PCS - POSSESSION - POSS AMT CON SUB EXCEPT (A)(D),1643.0,1463.0,1188.0
DRIVING ON SUSPENDED LICENSE,2211.0,1358.0,1726.0
POSS TITLE/REGISTRATION NOT AUTHORIZED ON VEHICLE,834.0,1064.0,585.0
DRIVING ON REVOKED LICENSE,1306.0,990.0,1056.0
IVC - DRIVING UNDER INFLUENCE OF ALCOHOL,1149.0,928.0,784.0
RESISTING/OBSTRUCT/PC OFF/CORR EMP/FRFTR,582.0,656.0,422.0


In [60]:
g.sort_values('before', ascending=False).head(10)

snelling,Unnamed: 1_level_0,after,before
STATUTE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
ISSUANCE OF WARRANT,4992.0,2738.0,3248.0
OPERATE UNINSURED MTR VEHICLE,3034.0,3382.0,2212.0
DRIVING ON SUSPENDED LICENSE,2211.0,1358.0,1726.0
NO VALID REGISTRATION,1641.0,1877.0,1242.0
PCS - POSSESSION - POSS AMT CON SUB EXCEPT (A)(D),1643.0,1463.0,1188.0
DRIVING ON REVOKED LICENSE,1306.0,990.0,1056.0
IVC - DRIVING UNDER INFLUENCE OF ALCOHOL,1149.0,928.0,784.0
UNLWFL POSS CANNABIS/DRIVER,818.0,626.0,696.0
POSS TITLE/REGISTRATION NOT AUTHORIZED ON VEHICLE,834.0,1064.0,585.0
AGG UUW/VEHICLE/LOADED/NO FCCA,898.0,493.0,579.0


# reasons

In [183]:
g = df[df['period'] == '1st_half_2024'].groupby('reason_for_stop',dropna=False).size().reset_index(name='stops')
g['pct'] = g['stops']/g['stops'].sum()
g

Unnamed: 0,reason_for_stop,stops,pct
0,EQUIPMENT,49078,0.263758
1,LICENSE,85150,0.457619
2,MOVING,50116,0.269337
3,,1728,0.009287


In [213]:
# license and equipement
49078 + 85150

134228

In [214]:
0.263758 + 0.457619

0.7213769999999999

A WBEZ analysis finds that in 2024, more than 70% drivers were stopped for license or equipment violations versus less than 30% for moving violations, like ignoring stop signs or going over the speed limit. 

In [185]:
# top moving violations
df[(df['period'] == '1st_half_2024') & (df['reason_for_stop'] == 'MOVING')].groupby('STATUTE',dropna=False).size().reset_index(name='stops').sort_values('stops', ascending=False).head(10)

Unnamed: 0,STATUTE,stops
109,9-24-010(B) STOP AT STOP SIGN,14115
158,9-8-020(C)(1) DISOBEY RED CIRCULAR STEADY SIGN...,4462
163,,2381
105,9-16-050(B) NO TURN SIGNALS-VIOLATION,2211
153,9-76-230 DRIVING WHILE USING CELLULAR PHONE PR...,2066
41,625 ILCS 5.0/11-601-B IVC - DRIVING 21-25 MPH ...,1514
80,625 ILCS 5.0/12-503-A OBSTRUCTED FRONT WINDSHIELD,1382
104,9-16-040(A) U-TURN 100FT INTERSECTION,1053
128,9-40-090 DIMMING HEADLIGHTS REQUIRED,882
92,"9-12-050(B) LANES, FAIL TO KEEP IN",832


In [184]:
g = df[df['period'] == '1st_half_2023'].groupby('reason_for_stop',dropna=False).size().reset_index(name='stops')
g['pct'] = g['stops']/g['stops'].sum()
g

Unnamed: 0,reason_for_stop,stops,pct
0,EQUIPMENT,91552,0.289252
1,LICENSE,133151,0.420681
2,MOVING,88989,0.281154
3,,2821,0.008913


In [186]:
g = df[df['period'] == '1st_half_2022'].groupby('reason_for_stop',dropna=False).size().reset_index(name='stops')
g['pct'] = g['stops']/g['stops'].sum()
g

Unnamed: 0,reason_for_stop,stops,pct
0,EQUIPMENT,85365,0.31735
1,LICENSE,96667,0.359366
2,MOVING,83016,0.308618
3,,3945,0.014666


In [128]:
r = df[df['period'] == '1st_half_2024'].groupby('STATUTE').size().reset_index(name='count').sort_values('count', ascending=False)
r['pct'] = r['count']/r['count'].sum()
r.head(10)

Unnamed: 0,STATUTE,count,pct
540,9-76-160(F) REGISTRATION PLATES,25428,0.14259
524,9-76-050(B) HEADLIGHT TWO REQUIRED-MOTOR VEHICLE,17827,0.099966
536,9-76-160(A) DISPLAY ST REG-FRONT/REAR,15983,0.089626
432,9-24-010(B) STOP AT STOP SIGN,14115,0.079151
537,9-76-160(B) DISPLAY ST REG-REAR MOTRCYCL/TRLR/...,13516,0.075792
525,"9-76-050(C) LIGHT, TAIL LIGHTS REQUIRED",11408,0.063971
332,625 ILCS 5.0/3-701-1 NO VALID REGISTRATION,9902,0.055526
325,625 ILCS 5.0/3-414 EXPIRATION OF REGISTRATION,5888,0.033017
556,9-8-020(C)(1) DISOBEY RED CIRCULAR STEADY SIGN...,4462,0.025021
547,9-76-210(A) BROKEN/INOPERABLE LAMPS,3535,0.019823


In [140]:
t = pd.pivot_table(df[df['period'] == '1st_half_2024'],
              index='STATUTE',
              columns='TICKET ISSUED Y/N',
              values='CONTACT CARD NO',
              aggfunc='count',dropna=False)

t.fillna(value=0, inplace=True)

t['total'] = t['N'] + t['Y']
t['no ticket pct of all stops'] = t['N']/186072
t.sort_values('total', ascending=False).head(20)

TICKET ISSUED Y/N,N,Y,total,no ticket pct of all stops
STATUTE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
9-76-160(F) REGISTRATION PLATES,25302.0,126.0,25428.0,0.13598
9-76-050(B) HEADLIGHT TWO REQUIRED-MOTOR VEHICLE,17495.0,332.0,17827.0,0.094023
9-76-160(A) DISPLAY ST REG-FRONT/REAR,15634.0,349.0,15983.0,0.084021
9-24-010(B) STOP AT STOP SIGN,12899.0,1216.0,14115.0,0.069323
9-76-160(B) DISPLAY ST REG-REAR MOTRCYCL/TRLR/SEMI-TRLR,13085.0,431.0,13516.0,0.070322
"9-76-050(C) LIGHT, TAIL LIGHTS REQUIRED",11267.0,141.0,11408.0,0.060552
625 ILCS 5.0/3-701-1 NO VALID REGISTRATION,7815.0,2087.0,9902.0,0.042
625 ILCS 5.0/3-414 EXPIRATION OF REGISTRATION,5794.0,94.0,5888.0,0.031138
9-8-020(C)(1) DISOBEY RED CIRCULAR STEADY SIGNAL STOP,3856.0,606.0,4462.0,0.020723
9-76-210(A) BROKEN/INOPERABLE LAMPS,3408.0,127.0,3535.0,0.018315


In [141]:
t.sort_values('total', ascending=False).to_csv('output/statute_by_ticket.csv')

https://docs.google.com/spreadsheets/d/13aw1e8iPHuL6NpRhPF1Yq1f632ss4OORMXPa1ILeNcU/edit?usp=sharing

In [None]:
# at least half in 2024 were for registration and lights - not associated with immediate traffic or public safety concerns

In [32]:
r = df[df['period'] == '1st_half_2023'].groupby('INITIAL STATUTE').size().reset_index(name='count').sort_values('count', ascending=False)
r['pct'] = r['count']/r['count'].sum()
r.head(10)

Unnamed: 0,INITIAL STATUTE,count,pct
585,9-76-160(F) REGISTRATION PLATES,38454,0.130304
568,9-76-050(B) HEADLIGHT TWO REQUIRED-MOTOR VEHICLE,32111,0.10881
581,9-76-160(A) DISPLAY ST REG-FRONT/REAR,31598,0.107072
474,9-24-010(B) STOP AT STOP SIGN,31277,0.105984
569,"9-76-050(C) LIGHT, TAIL LIGHTS REQUIRED",20394,0.069106
582,9-76-160(B) DISPLAY ST REG-REAR MOTRCYCL/TRLR/...,17351,0.058795
370,625 ILCS 5.0/3-701-1 NO VALID REGISTRATION,13032,0.04416
363,625 ILCS 5.0/3-414 EXPIRATION OF REGISTRATION,8247,0.027946
592,9-76-210(A) BROKEN/INOPERABLE LAMPS,7795,0.026414
594,9-76-220(A) OBSTRUCTION DRIVER'S VIEW/TINTED W...,6957,0.023574


In [70]:
# reasons by district
d = pd.pivot_table(df[df['period'] == '1st_half_2024'],
              index='STATUTE',
              columns='district',
              values='DATE/TIME OF STOP',
              aggfunc='count')

d.to_csv('output/statute_by_district_2024.csv')

# Pretextual stops

In [36]:
# non moving, no ticket
df[(df['reason_for_stop'] != 'MOVING') & (df['TICKET ISSUED Y/N'] == 'N')].groupby('year').size().reset_index()

Unnamed: 0,year,0
0,2022,347766
1,2023,382903
2,2024,129033


In [37]:
# total
df.groupby('year').size().reset_index()

Unnamed: 0,year,0
0,2022,516376
1,2023,538904
2,2024,186072


In [38]:
# non moving, yes ticket
df[(df['reason_for_stop'] != 'MOVING') & (df['TICKET ISSUED Y/N'] == 'Y')].groupby('year').size().reset_index()

Unnamed: 0,year,0
0,2022,8845
1,2023,10850
2,2024,6923


In [42]:
# moving, no ticket
df[(df['reason_for_stop'] == 'MOVING') & (df['TICKET ISSUED Y/N'] == 'N')].groupby('year').size().reset_index()

Unnamed: 0,year,0
0,2022,150506
1,2023,135728
2,2024,43377


In [40]:
# moving, yes ticket
df[(df['reason_for_stop'] == 'MOVING') & (df['TICKET ISSUED Y/N'] == 'Y')].groupby('year').size().reset_index()

Unnamed: 0,year,0
0,2022,9259
1,2023,9423
2,2024,6739
