In [1]:
import pandas as pd
from sodapy import Socrata
import numpy as np

# Get information from Cook County Sentencing Data Database

In [2]:
# Call API
client = Socrata("datacatalog.cookcountyil.gov", None)



In [3]:
# Retrive Data
results = client.get("tg8v-tm6u", limit=300000)

In [4]:
# Generate DataFrame
original_data = pd.DataFrame.from_records(results)

# Explore data
list(original_data.columns.values)

['case_id',
 'case_participant_id',
 'offense_category',
 'primary_charge',
 'charge_id',
 'charge_version_id',
 'disposition_charged_offense_title',
 'disposition_charged_chapter',
 'disposition_charged_act',
 'disposition_charged_section',
 'disposition_charged_class',
 'disposition_charged_aoic',
 'disposition_date',
 'charge_disposition',
 'sentence_phase',
 'sentence_date',
 'sentence_judge',
 'sentence_type',
 'current_sentence',
 'commitment_type',
 'court_name',
 'court_facility',
 'length_of_case_in_days',
 'age_at_incident',
 'gender',
 'race',
 'incident_begin_date',
 'arrest_date',
 'law_enforcement_agency',
 'received_date',
 'arraignment_date',
 'updated_offense_category',
 'charge_count',
 'commitment_term',
 'commitment_unit',
 'incident_end_date',
 'charge_disposition_reason',
 'incident_city',
 'unit']

# Cleaning Process

## Select only chosen columns

In [87]:
data = original_data[['case_id','case_participant_id','charge_id','charge_version_id','court_facility','court_name','age_at_incident','gender','race','charge_disposition','length_of_case_in_days','current_sentence','primary_charge','sentence_date','offense_category','commitment_term','commitment_unit','sentence_type']]
data.head()

Unnamed: 0,case_id,case_participant_id,charge_id,charge_version_id,court_facility,court_name,age_at_incident,gender,race,charge_disposition,length_of_case_in_days,current_sentence,primary_charge,sentence_date,offense_category,commitment_term,commitment_unit,sentence_type
0,44670309710,218297158761,297139645442,83571817251,Markham Courthouse,District 6 - Markham,27,Male,Black,Nolle On Remand,619,True,False,6/2/1986 12:00:00 AM,PROMIS Conversion,,,Conversion
1,44670309710,218297158761,297176911341,94830742153,Markham Courthouse,District 6 - Markham,27,Male,Black,Nolle On Remand,619,True,False,6/2/1986 12:00:00 AM,PROMIS Conversion,,,Conversion
2,44670309710,218297158761,297177207102,94826043158,Markham Courthouse,District 6 - Markham,27,Male,Black,Nolle On Remand,619,True,False,6/2/1986 12:00:00 AM,PROMIS Conversion,,,Conversion
3,44670309710,218297158761,297178094385,70461958445,Markham Courthouse,District 6 - Markham,27,Male,Black,Plea Of Guilty,10982,True,False,10/16/2014 12:00:00 AM,PROMIS Conversion,30.0,Year(s),Prison
4,44670309710,218297158761,297178094385,70461958445,Markham Courthouse,District 6 - Markham,27,Male,Black,Plea Of Guilty,619,False,False,6/2/1986 12:00:00 AM,PROMIS Conversion,30.0,Year(s),Conversion


In [88]:
# Explore values in columns
data.count()

case_id                   236124
case_participant_id       236124
charge_id                 236124
charge_version_id         236124
court_facility            234258
court_name                234741
age_at_incident           233071
gender                    235334
race                      234890
charge_disposition        236124
length_of_case_in_days    217350
current_sentence          236124
primary_charge            236124
sentence_date             236124
offense_category          236124
commitment_term           234516
commitment_unit           234516
sentence_type             236124
dtype: int64

## Drop NaN Values

In [89]:
data = data.dropna()
data.count()

case_id                   211576
case_participant_id       211576
charge_id                 211576
charge_version_id         211576
court_facility            211576
court_name                211576
age_at_incident           211576
gender                    211576
race                      211576
charge_disposition        211576
length_of_case_in_days    211576
current_sentence          211576
primary_charge            211576
sentence_date             211576
offense_category          211576
commitment_term           211576
commitment_unit           211576
sentence_type             211576
dtype: int64

## Filter data after 2000 & before 2019

In [90]:
# Explore type of values in each column
data.dtypes

case_id                   object
case_participant_id       object
charge_id                 object
charge_version_id         object
court_facility            object
court_name                object
age_at_incident           object
gender                    object
race                      object
charge_disposition        object
length_of_case_in_days    object
current_sentence            bool
primary_charge              bool
sentence_date             object
offense_category          object
commitment_term           object
commitment_unit           object
sentence_type             object
dtype: object

In [91]:
# Create a copy of the Sentence Date to filter it
hdate = data['sentence_date']
data['year'] = hdate

data.head()

Unnamed: 0,case_id,case_participant_id,charge_id,charge_version_id,court_facility,court_name,age_at_incident,gender,race,charge_disposition,length_of_case_in_days,current_sentence,primary_charge,sentence_date,offense_category,commitment_term,commitment_unit,sentence_type,year
3,44670309710,218297158761,297178094385,70461958445,Markham Courthouse,District 6 - Markham,27,Male,Black,Plea Of Guilty,10982,True,False,10/16/2014 12:00:00 AM,PROMIS Conversion,30,Year(s),Prison,10/16/2014 12:00:00 AM
4,44670309710,218297158761,297178094385,70461958445,Markham Courthouse,District 6 - Markham,27,Male,Black,Plea Of Guilty,619,False,False,6/2/1986 12:00:00 AM,PROMIS Conversion,30,Year(s),Conversion,6/2/1986 12:00:00 AM
5,44670309710,218297158761,297140236964,70609573999,Markham Courthouse,District 6 - Markham,27,Male,Black,Nolle On Remand,619,True,False,6/2/1986 12:00:00 AM,PROMIS Conversion,30,Year(s),Conversion,6/2/1986 12:00:00 AM
6,44670309710,218297158761,297178390146,80671262249,Markham Courthouse,District 6 - Markham,27,Male,Black,Plea Of Guilty,10982,True,False,10/16/2014 12:00:00 AM,PROMIS Conversion,30,Year(s),Prison,10/16/2014 12:00:00 AM
7,44670309710,218297158761,297178390146,80671262249,Markham Courthouse,District 6 - Markham,27,Male,Black,Plea Of Guilty,619,False,False,6/2/1986 12:00:00 AM,PROMIS Conversion,30,Year(s),Conversion,6/2/1986 12:00:00 AM


In [92]:
# Select only the year
data['year'] = data['year'].replace(regex='[0-9]*\/[0-9]*\/', value='')
data['year'] = data['year'].replace(regex='\s[0-9]*\:[0-9]*\:[0-9]*\s[A-Z]*', value='')

In [93]:
# Change value type
data['year'] = data['year'].astype(int)

In [94]:
data.dtypes

case_id                   object
case_participant_id       object
charge_id                 object
charge_version_id         object
court_facility            object
court_name                object
age_at_incident           object
gender                    object
race                      object
charge_disposition        object
length_of_case_in_days    object
current_sentence            bool
primary_charge              bool
sentence_date             object
offense_category          object
commitment_term           object
commitment_unit           object
sentence_type             object
year                       int32
dtype: object

In [95]:
# Filter year
data = data[(data.year > 1999) & (data.year < 2020)]

In [96]:
# Confirm values
data.describe()

Unnamed: 0,year
count,211516.0
mean,2014.734895
std,2.43294
min,2000.0
25%,2013.0
50%,2015.0
75%,2017.0
max,2019.0


## Analize Offense Category and consolidate into related categories

In [97]:
# Review all the information
sorted(data.offense_category.unique())

['Aggravated Assault Police Officer',
 'Aggravated Assault Police Officer Firearm',
 'Aggravated Battery',
 'Aggravated Battery Police Officer',
 'Aggravated Battery Police Officer Firearm',
 'Aggravated Battery With A Firearm',
 'Aggravated DUI',
 'Aggravated Discharge Firearm',
 'Aggravated Fleeing and Eluding',
 'Aggravated Identity Theft',
 'Aggravated Robbery',
 'Aggravated Robbery BB Gun',
 'Armed Robbery',
 'Armed Violence',
 'Arson',
 'Arson and Attempt Arson',
 'Attempt Armed Robbery',
 'Attempt Arson',
 'Attempt Homicide',
 'Attempt Sex Crimes',
 'Attempt Vehicular Hijacking',
 'Battery',
 'Benefit Recipient Fraud',
 'Bomb Threat',
 'Bribery',
 'Burglary',
 'Child Abduction',
 'Child Pornography',
 'Communicating With Witness',
 'Compelling Gang Membership',
 'Credit Card Cases',
 'Criminal Damage to Property',
 'Criminal Trespass To Residence',
 'DUI',
 'Deceptive Practice',
 'Disarming Police Officer',
 'Dog Fighting',
 'Domestic Battery',
 'Driving With Suspended Or Revoke

In [98]:
# Count of categories
len(data.offense_category.unique())

88

In [99]:
# Consolidate Homicide category
data.offense_category = data.offense_category.replace(to_replace=['Attempt Homicide','Reckless Homicide'],value='Homicide')

len(data.offense_category.unique())

86

In [100]:
# Consolidate Offense Against Police Officers category
data.offense_category = data.offense_category.replace(to_replace=['Aggravated Assault Police Officer','Aggravated Assault Police Officer Firearm','Police Shooting','Impersonating Police Officer'],value='Offense Against Police Officers')

len(data.offense_category.unique())

83

In [101]:
# Consolidate Battery category
data.offense_category = data.offense_category.replace(to_replace=['Aggravated Battery','Aggravated Battery Police Officer','Aggravated Battery Police Officer Firearm','Aggravated Battery With A Firearm','Domestic Battery'],value='Battery')

len(data.offense_category.unique())

78

In [102]:
# Consolidate Robbery, Burglery and Theft category
data.offense_category = data.offense_category.replace(to_replace=['Aggravated Identity Theft','Aggravated Robbery','Aggravated Robbery BB Gun','Armed Robbery','Attempt Armed Robbery','Burglary','Identity Theft','Residential Burglary','Retail Theft','Robbery','Theft','Theft by Deception','Possession Of Burglary Tools'],value='Robbery/Burglery/Theft')

len(data.offense_category.unique())

66

In [103]:
# Consolidate Arson category
data.offense_category = data.offense_category.replace(to_replace=['Arson and Attempt Arson','Attempt Arson'],value='Arson')

len(data.offense_category.unique())

64

In [104]:
# Consolidate Firearms and Explosives category
data.offense_category = data.offense_category.replace(to_replace=['Aggravated Discharge Firearm','Armed Violence','Disarming Police Officer','Gun Running','Possession of Explosives','UUW - Unlawful Use of Weapon','Gun - Non UUW','Bomb Threat','Reckless Discharge of Firearm'],value='Firearms and Explosives')

len(data.offense_category.unique())

56

In [105]:
# Consolidate Motor Vehicles Offenses category
data.offense_category = data.offense_category.replace(to_replace=['Aggravated DUI','Attempt Vehicular Hijacking','DUI','Driving With Suspended Or Revoked License','Major Accidents','Possession of Stolen Motor Vehicle','Vehicular Hijacking','Vehicular Invasion'],value='Motor Vehicle Offenses')

len(data.offense_category.unique())

49

In [106]:
# Consolidate Judicial Process Violations category
data.offense_category = data.offense_category.replace(to_replace=['Communicating With Witness','Escape - Failure to Return','Obstructing Justice','Perjury','Tampering','Violate Bail Bond','Violation Order Of Protection'],value='Judicial Process Violations')

len(data.offense_category.unique())

43

In [107]:
# Consolidate Sex Offenses category
data.offense_category = data.offense_category.replace(to_replace=['Attempt Sex Crimes','Child Pornography','Failure to Register as a Sex Offender','Pandering','Prostitution','Sex Crimes','Violation of Sex Offender Registration'],value='Sex Offenses')

len(data.offense_category.unique())

37

In [108]:
# Consolidate Human Trafficking, Detention and Kidnapping category
data.offense_category = data.offense_category.replace(to_replace=['Child Abduction','Human Trafficking','Kidnapping','Unlawful Restraint'],value='Human Trafficking/Detention/Kidnapping')

len(data.offense_category.unique())

34

In [109]:
# Consolidate Trespassing category
data.offense_category = data.offense_category.replace(to_replace=['Home Invasion','Criminal Trespass To Residence'],value='Trespassing')

len(data.offense_category.unique())

33

In [110]:
# Consolidate Fraud and Deception category
data.offense_category = data.offense_category.replace(to_replace=['Benefit Recipient Fraud','Credit Card Cases','Deceptive Practice','Forgery','Fraud','Fraudulent ID'],value='Fraud/Deception')

len(data.offense_category.unique())

28

In [111]:
# Consolidate Corruption category
data.offense_category = data.offense_category.replace(to_replace=['Intimidation','Official Misconduct','Bribery'],value='Corruption')

len(data.offense_category.unique())

26

In [112]:
# Consolidate Inside Penal Institutions category
data.offense_category = data.offense_category.replace(to_replace=['Possession of Contraband in Penal Institution','Possession of Shank in Penal Institution'],value='Inside Penal Institutions')

len(data.offense_category.unique())

25

In [113]:
# Consolidate Other Offense category
data.offense_category = data.offense_category.replace(to_replace=['Dog Fighting','Gambling','Failure To Pay Child Support','Compelling Gang Membership'],value='Other Offense')

len(data.offense_category.unique())

21

In [114]:
data.offense_category.value_counts()

Narcotics                                 60250
Robbery/Burglery/Theft                    48208
Motor Vehicle Offenses                    29928
Firearms and Explosives                   25996
Battery                                   13544
Sex Offenses                               7510
Fraud/Deception                            4860
Other Offense                              3927
Homicide                                   3698
Judicial Process Violations                3434
PROMIS Conversion                          2232
Aggravated Fleeing and Eluding             2148
Criminal Damage to Property                1657
Trespassing                                1321
Offense Against Police Officers             882
Human Trafficking/Detention/Kidnapping      635
Arson                                       397
Corruption                                  345
Stalking                                    311
Inside Penal Institutions                   164
Hate Crimes                             