In [2]:
import pandas as pd
from sodapy import Socrata
import numpy as np
import datetime

# Get information from Cook County Sentencing Data Database

In [4]:
client = Socrata("datacatalog.cookcountyil.gov", None)



In [6]:
results = client.get("tg8v-tm6u", limit=300000)

In [44]:
data = pd.DataFrame.from_records(results)
list(data.columns.values)

['case_id',
 'case_participant_id',
 'offense_category',
 'primary_charge',
 'charge_id',
 'charge_version_id',
 'disposition_charged_offense_title',
 'disposition_charged_chapter',
 'disposition_charged_act',
 'disposition_charged_section',
 'disposition_charged_class',
 'disposition_charged_aoic',
 'disposition_date',
 'charge_disposition',
 'sentence_phase',
 'sentence_date',
 'sentence_judge',
 'sentence_type',
 'current_sentence',
 'commitment_type',
 'court_name',
 'court_facility',
 'length_of_case_in_days',
 'age_at_incident',
 'gender',
 'race',
 'incident_begin_date',
 'arrest_date',
 'law_enforcement_agency',
 'received_date',
 'arraignment_date',
 'updated_offense_category',
 'charge_count',
 'commitment_term',
 'commitment_unit',
 'incident_end_date',
 'charge_disposition_reason',
 'incident_city',
 'unit']

# Cleaning Process

## Select only chosen columns

In [45]:
data = data[['case_id','case_participant_id','charge_id','charge_version_id','court_facility','court_name','age_at_incident','gender','race','charge_disposition','length_of_case_in_days','current_sentence','primary_charge','sentence_date','offense_category','commitment_term','commitment_unit','sentence_type']]
data.head()

Unnamed: 0,case_id,case_participant_id,charge_id,charge_version_id,court_facility,court_name,age_at_incident,gender,race,charge_disposition,length_of_case_in_days,current_sentence,primary_charge,sentence_date,offense_category,commitment_term,commitment_unit,sentence_type
0,44670309710,218297158761,297139645442,83571817251,Markham Courthouse,District 6 - Markham,27,Male,Black,Nolle On Remand,619,True,False,6/2/1986 12:00:00 AM,PROMIS Conversion,,,Conversion
1,44670309710,218297158761,297176911341,94830742153,Markham Courthouse,District 6 - Markham,27,Male,Black,Nolle On Remand,619,True,False,6/2/1986 12:00:00 AM,PROMIS Conversion,,,Conversion
2,44670309710,218297158761,297177207102,94826043158,Markham Courthouse,District 6 - Markham,27,Male,Black,Nolle On Remand,619,True,False,6/2/1986 12:00:00 AM,PROMIS Conversion,,,Conversion
3,44670309710,218297158761,297178094385,70461958445,Markham Courthouse,District 6 - Markham,27,Male,Black,Plea Of Guilty,10982,True,False,10/16/2014 12:00:00 AM,PROMIS Conversion,30.0,Year(s),Prison
4,44670309710,218297158761,297178094385,70461958445,Markham Courthouse,District 6 - Markham,27,Male,Black,Plea Of Guilty,619,False,False,6/2/1986 12:00:00 AM,PROMIS Conversion,30.0,Year(s),Conversion


In [46]:
data.count()

case_id                   236124
case_participant_id       236124
charge_id                 236124
charge_version_id         236124
court_facility            234258
court_name                234741
age_at_incident           233071
gender                    235334
race                      234890
charge_disposition        236124
length_of_case_in_days    217350
current_sentence          236124
primary_charge            236124
sentence_date             236124
offense_category          236124
commitment_term           234516
commitment_unit           234516
sentence_type             236124
dtype: int64

## Drop NaN Values

In [47]:
data = data.dropna()
data.count()

case_id                   211576
case_participant_id       211576
charge_id                 211576
charge_version_id         211576
court_facility            211576
court_name                211576
age_at_incident           211576
gender                    211576
race                      211576
charge_disposition        211576
length_of_case_in_days    211576
current_sentence          211576
primary_charge            211576
sentence_date             211576
offense_category          211576
commitment_term           211576
commitment_unit           211576
sentence_type             211576
dtype: int64

## Filter data after 2000

In [48]:
data.dtypes

case_id                   object
case_participant_id       object
charge_id                 object
charge_version_id         object
court_facility            object
court_name                object
age_at_incident           object
gender                    object
race                      object
charge_disposition        object
length_of_case_in_days    object
current_sentence            bool
primary_charge              bool
sentence_date             object
offense_category          object
commitment_term           object
commitment_unit           object
sentence_type             object
dtype: object

In [49]:
hdate = data['sentence_date']
data['year'] = hdate
data

Unnamed: 0,case_id,case_participant_id,charge_id,charge_version_id,court_facility,court_name,age_at_incident,gender,race,charge_disposition,length_of_case_in_days,current_sentence,primary_charge,sentence_date,offense_category,commitment_term,commitment_unit,sentence_type,year
3,44670309710,218297158761,297178094385,70461958445,Markham Courthouse,District 6 - Markham,27,Male,Black,Plea Of Guilty,10982,True,False,10/16/2014 12:00:00 AM,PROMIS Conversion,30,Year(s),Prison,10/16/2014 12:00:00 AM
4,44670309710,218297158761,297178094385,70461958445,Markham Courthouse,District 6 - Markham,27,Male,Black,Plea Of Guilty,619,False,False,6/2/1986 12:00:00 AM,PROMIS Conversion,030,Year(s),Conversion,6/2/1986 12:00:00 AM
5,44670309710,218297158761,297140236964,70609573999,Markham Courthouse,District 6 - Markham,27,Male,Black,Nolle On Remand,619,True,False,6/2/1986 12:00:00 AM,PROMIS Conversion,030,Year(s),Conversion,6/2/1986 12:00:00 AM
6,44670309710,218297158761,297178390146,80671262249,Markham Courthouse,District 6 - Markham,27,Male,Black,Plea Of Guilty,10982,True,False,10/16/2014 12:00:00 AM,PROMIS Conversion,30,Year(s),Prison,10/16/2014 12:00:00 AM
7,44670309710,218297158761,297178390146,80671262249,Markham Courthouse,District 6 - Markham,27,Male,Black,Plea Of Guilty,619,False,False,6/2/1986 12:00:00 AM,PROMIS Conversion,030,Year(s),Conversion,6/2/1986 12:00:00 AM
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
236119,131050473145,1099057140562,2580450259959,588492395407,26TH Street,District 1 - Chicago,41,Male,Black,Plea Of Guilty,0,True,True,11/6/2019 12:00:00 AM,Other Offense,2,Year(s),Prison,11/6/2019 12:00:00 AM
236120,131061467485,1099200781568,2580777963261,588568720502,Markham Courthouse,District 6 - Markham,29,Male,Black,Plea Of Guilty,0,True,True,11/22/2019 12:00:00 AM,Narcotics,2,Days,Prison,11/22/2019 12:00:00 AM
236121,131099137335,1099668289072,2582334258185,588935174939,26TH Street,District 1 - Chicago,28,Male,Black,Plea Of Guilty,7,True,True,11/26/2019 12:00:00 AM,Aggravated Battery,2,Year(s),Inpatient Mental Health Services,11/26/2019 12:00:00 AM
236122,131109474643,1099813219916,2582081086681,588876168995,26TH Street,District 1 - Chicago,32,Male,Black,Plea Of Guilty,0,True,True,11/26/2019 12:00:00 AM,Aggravated DUI,1,Year(s),Prison,11/26/2019 12:00:00 AM


In [64]:
data['year'] = data['year'].replace(regex='[0-9]*\/[0-9]*\/', value='')
data['year'] = data['year'].replace(regex='\s[0-9]*\:[0-9]*\:[0-9]*\s[A-Z]*', value='')
data['year']

3         2014
4         1986
5         1986
6         2014
7         1986
          ... 
236119    2019
236120    2019
236121    2019
236122    2019
236123    2019
Name: year, Length: 211576, dtype: object

In [68]:
data['year'] = data['year'].astype(float)

In [69]:
data.dtypes

case_id                    object
case_participant_id        object
charge_id                  object
charge_version_id          object
court_facility             object
court_name                 object
age_at_incident            object
gender                     object
race                       object
charge_disposition         object
length_of_case_in_days     object
current_sentence             bool
primary_charge               bool
sentence_date              object
offense_category           object
commitment_term            object
commitment_unit            object
sentence_type              object
year                      float64
dtype: object

In [76]:
y2000 = data['year'] > 1999 | data['year'] < 2020
data_2000 = data[y2000]
data_2000.describe()

Unnamed: 0,year
count,211569.0
mean,2014.815469
std,8.11767
min,2000.0
25%,2013.0
50%,2015.0
75%,2017.0
max,2917.0


In [51]:
data['sentence_date'] = pd.to_datetime(data['sentence_date'])

OutOfBoundsDatetime: Out of bounds nanosecond timestamp: 2912-10-31 00:00:00

In [52]:
filter_year = 2000