What code sections are most frequent?

Which ones most often lead to convictions?

Which ones have the most severe racial disparities?

In what localities (fips) are these disparities most severe?

In [1]:
import numpy as numpy
import pandas as pd

In [2]:
cases = pd.read_csv("data100k.csv")
cases.head(3).T

Unnamed: 0,0,1,2
person_id,102090000000110,343221000000125,343221000000125
HearingDate,2019-02-28,2009-12-07,2011-01-20
CodeSection,A.46.2-862,B.46.2-301,A.46.2-707
codesection,covered elsewhere,covered elsewhere,covered elsewhere
ChargeType,Misdemeanor,Misdemeanor,Misdemeanor
chargetype,Misdemeanor,Misdemeanor,Misdemeanor
Class,1,1,3
DispositionCode,Guilty,Guilty,Guilty
disposition,Conviction,Conviction,Conviction
Plea,,,


In [3]:
cases['CodeSection'].value_counts()

CodeSection
A.46.2-862         26379
B.46.2-301         25967
46.2-300           17934
C.46.2-862         11728
18.2-250.1         10573
                   ...  
54.1-3805              1
27-97 9-22 901.        1
9999999999999          1
Z.18.2-71              1
4VAC20-610-30B         1
Name: count, Length: 4207, dtype: int64

In [4]:
cases['DispositionCode'].value_counts()

DispositionCode
Guilty                     156563
Nolle Prosequi              54680
Dismissed                   42520
Guilty In Absentia          31958
Not Guilty                   5807
Not Guilty/Acquitted         1623
Not True Bill                 250
No Indictment Presented       178
Dismissed/Other                19
Name: count, dtype: int64

In [5]:
cases['conviction'] = [x in ['Guilty', 'Guilty In Absentia'] for x in cases['DispositionCode']]

In [6]:
cases.head(3).T

Unnamed: 0,0,1,2
person_id,102090000000110,343221000000125,343221000000125
HearingDate,2019-02-28,2009-12-07,2011-01-20
CodeSection,A.46.2-862,B.46.2-301,A.46.2-707
codesection,covered elsewhere,covered elsewhere,covered elsewhere
ChargeType,Misdemeanor,Misdemeanor,Misdemeanor
chargetype,Misdemeanor,Misdemeanor,Misdemeanor
Class,1,1,3
DispositionCode,Guilty,Guilty,Guilty
disposition,Conviction,Conviction,Conviction
Plea,,,


In [7]:
#Highest conviction rates by code section

convict_rate =cases.groupby('CodeSection').agg({'conviction': ['count', 'mean']})
convict_rate = convict_rate.reset_index()
convict_rate.columns = ['CodeSection', 'count', 'mean']
convict_rate = convict_rate.query("count > 50")
convict_rate = convict_rate.sort_values('mean', ascending=False)
convict_rate

Unnamed: 0,CodeSection,count,mean
1806,23-55,55,0.981818
1755,23-22.1(A),131,0.954198
2103,29-17(C),70,0.942857
3961,A.46.2-862,26379,0.929414
3992,B.18.2-266,1879,0.905269
...,...,...,...
1436,19.2-123,125,0.096000
1102,18.2-374.1:1(A),112,0.089286
1450,19.2-135,83,0.084337
1433,19.2-100,238,0.000000


In [8]:
#Which ones have the most severe racial disparities?

cases['Race'].value_counts()

Race
White Caucasian(Non-Hispanic)                  114421
Black(Non-Hispanic)                             80173
White Caucasian (Non-Hispanic)                  41679
Black (Non-Hispanic)                            33254
Hispanic                                         9319
White                                            3527
Other(Includes Not Applicable.. Unknown)         3452
Asian Or Pacific Islander                        2787
Black                                            2200
MISSING                                          1022
Unknown (Includes Not Applicable.. Unknown)       785
Other (Includes Not Applicable.. Unknown)         615
American Indian                                   302
Unknown                                            54
Asian or Pacific Islander                           7
American Indian Or Alaskan Native                   1
Name: count, dtype: int64

In [9]:
replace_map = {'White Caucasian(Non-Hispanic)':'White Caucasian (Non-Hispanic)',
'Black(Non-Hispanic)':'Black (Non-Hispanic)',
'White Caucasian (Non-Hispanic)':'White Caucasian (Non-Hispanic)',
'Black (Non-Hispanic)':'Black (Non-Hispanic)',
'Hispanic':'Hispanic',
'White':'White Caucasian (Non-Hispanic)',
'Other(Includes Not Applicable.. Unknown)':'Other or Missing',
'Asian Or Pacific Islander':'Asian or Pacific Islander',
'Black':'Black (Non-Hispanic)',
'MISSING':'Other or Missing',
'Unknown (Includes Not Applicable.. Unknown)':'Other or Missing',
'Other (Includes Not Applicable.. Unknown)':'Other or Missing',
'American Indian':'American Indian Or Alaskan Native',
'Unknown':'Other or Missing',
'Asian or Pacific Islander':'Asian or Pacific Islander',
'American Indian Or Alaskan Native':'American Indian Or Alaskan Native'}
cases['Race'] = cases['Race'].map(replace_map)
cases['Race'].value_counts()

Race
White Caucasian (Non-Hispanic)       159627
Black (Non-Hispanic)                 115627
Hispanic                               9319
Other or Missing                       5928
Asian or Pacific Islander              2794
American Indian Or Alaskan Native       303
Name: count, dtype: int64

In [10]:
convict_rate_race = cases.groupby('Race').agg({'conviction': ['count', 'mean']})
convict_rate_race

Unnamed: 0_level_0,conviction,conviction
Unnamed: 0_level_1,count,mean
Race,Unnamed: 1_level_2,Unnamed: 2_level_2
American Indian Or Alaskan Native,303,0.785479
Asian or Pacific Islander,2794,0.662491
Black (Non-Hispanic),115627,0.632638
Hispanic,9319,0.830347
Other or Missing,5928,0.739879
White Caucasian (Non-Hispanic),159627,0.633715


In [11]:
convict_rate_race_cs = cases.groupby(['CodeSection', 'Race']).agg({'conviction': ['count', 'mean']})
convict_rate_race_cs = convict_rate_race_cs.reset_index()
convict_rate_race_cs.columns = ['CodeSection', 'Race', 'count', 'convictrate']
convict_rate_race_cs

Unnamed: 0,CodeSection,Race,count,convictrate
0,(74-4) 26-123,Black (Non-Hispanic),1,0.000000
1,01-2007,White Caucasian (Non-Hispanic),1,1.000000
2,1,Black (Non-Hispanic),5,0.600000
3,1,White Caucasian (Non-Hispanic),3,0.333333
4,1-12,Black (Non-Hispanic),62,0.435484
...,...,...,...,...
6635,Z.18.2-91,White Caucasian (Non-Hispanic),166,0.740964
6636,Z.18.2-91; 26,Black (Non-Hispanic),1,1.000000
6637,Z.18.2-92,Black (Non-Hispanic),1,0.000000
6638,Z.18.2-95,Black (Non-Hispanic),2,1.000000


In [12]:
convict_rate_race_cs = convict_rate_race_cs.query("count > 30")
convict_rate_race_cs = convict_rate_race_cs.drop('count', axis=1)
convict_rate_race_cs

Unnamed: 0,CodeSection,Race,convictrate
4,1-12,Black (Non-Hispanic),0.435484
75,10-42,White Caucasian (Non-Hispanic),0.395349
76,10-43,Black (Non-Hispanic),0.170732
78,10-43,White Caucasian (Non-Hispanic),0.353659
99,10-62,Black (Non-Hispanic),0.212121
...,...,...,...
6558,NO DMV,Black (Non-Hispanic),0.640000
6561,NO DMV,White Caucasian (Non-Hispanic),0.608911
6620,Z.18.2-47,Black (Non-Hispanic),0.363636
6633,Z.18.2-91,Black (Non-Hispanic),0.725191


In [13]:
convict_rate_wide = pd.pivot_table(convict_rate_race_cs,
                                   index = 'CodeSection', 
                                   columns = 'Race', 
                                   values = 'convictrate')
convict_rate_wide

Race,American Indian Or Alaskan Native,Asian or Pacific Islander,Black (Non-Hispanic),Hispanic,Other or Missing,White Caucasian (Non-Hispanic)
CodeSection,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1-12,,,0.435484,,,
10-42,,,,,,0.395349
10-43,,,0.170732,,,0.353659
10-62,,,0.212121,,,0.228261
13-1-5,,,0.578125,,,0.658537
...,...,...,...,...,...,...
G.46.2-870,,,,,,0.593750
MISSING,,,0.589147,,,0.307692
NO DMV,,,0.640000,,,0.608911
Z.18.2-47,,,0.363636,,,


In [14]:
convict_rate_wide['black_white_diff'] = convict_rate_wide['Black (Non-Hispanic)'] - convict_rate_wide['White Caucasian (Non-Hispanic)']
convict_rate_wide.sort_values('black_white_diff', ascending=False)

Race,American Indian Or Alaskan Native,Asian or Pacific Islander,Black (Non-Hispanic),Hispanic,Other or Missing,White Caucasian (Non-Hispanic),black_white_diff
CodeSection,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
MISSING,,,0.589147,,,0.307692,0.281455
23-10,,,0.448276,,,0.213592,0.234684
46.2-752,,,0.690647,,,0.492813,0.197834
19.2-128(B),,,0.660714,,,0.482143,0.178571
14.2-81,,,0.676190,,,0.500000,0.176190
...,...,...,...,...,...,...,...
D.18.2-266,,,,,,0.763889,
D.46.2-894,,,,,,0.678571,
G.18.2-266,,,,,,0.909091,
G.46.2-870,,,,,,0.593750,
