In [1]:
import pandas as pd
from sodapy import Socrata
import numpy as np

# Get information from Cook County Sentencing Data Database

In [2]:
# Call API
client = Socrata("datacatalog.cookcountyil.gov", None)



In [3]:
# Retrive Data
results = client.get("tg8v-tm6u", limit=300000)

In [4]:
# Generate DataFrame
original_data = pd.DataFrame.from_records(results)

# Explore data
list(original_data.columns.values)

['case_id',
 'case_participant_id',
 'received_date',
 'offense_category',
 'primary_charge',
 'charge_id',
 'charge_version_id',
 'disposition_charged_offense_title',
 'charge_count',
 'disposition_date',
 'disposition_charged_chapter',
 'disposition_charged_act',
 'disposition_charged_section',
 'disposition_charged_class',
 'disposition_charged_aoic',
 'charge_disposition',
 'sentence_judge',
 'court_name',
 'court_facility',
 'sentence_phase',
 'sentence_date',
 'sentence_type',
 'current_sentence',
 'commitment_type',
 'length_of_case_in_days',
 'age_at_incident',
 'race',
 'gender',
 'incident_begin_date',
 'law_enforcement_agency',
 'arrest_date',
 'felony_review_date',
 'felony_review_result',
 'arraignment_date',
 'updated_offense_category',
 'commitment_term',
 'commitment_unit',
 'incident_end_date',
 'charge_disposition_reason',
 'incident_city',
 'unit']

# Cleaning Process

## Select only chosen columns

In [5]:
data = original_data[['case_id','case_participant_id','charge_id','charge_version_id','court_facility','court_name','age_at_incident','gender','race','charge_disposition','length_of_case_in_days','current_sentence','primary_charge','sentence_date','offense_category','commitment_term','commitment_unit','sentence_type']]
data.head()

Unnamed: 0,case_id,case_participant_id,charge_id,charge_version_id,court_facility,court_name,age_at_incident,gender,race,charge_disposition,length_of_case_in_days,current_sentence,primary_charge,sentence_date,offense_category,commitment_term,commitment_unit,sentence_type
0,87302669130,221103049156,106130426295,71631463339,Markham Courthouse,District 6 - Markham,27,Male,Black,Nolle On Remand,619,True,False,6/2/1986 12:00:00 AM,PROMIS Conversion,,,Conversion
1,87302669130,221103049156,106130637571,60521205325,Markham Courthouse,District 6 - Markham,27,Male,Black,Nolle On Remand,619,True,False,6/2/1986 12:00:00 AM,PROMIS Conversion,30.0,Year(s),Conversion
2,87302669130,221103049156,106143736689,81281765233,Markham Courthouse,District 6 - Markham,27,Male,Black,Nolle On Remand,619,True,False,6/2/1986 12:00:00 AM,PROMIS Conversion,,,Conversion
3,87302669130,221103049156,106143842327,81277737609,Markham Courthouse,District 6 - Markham,27,Male,Black,Nolle On Remand,619,True,False,6/2/1986 12:00:00 AM,PROMIS Conversion,,,Conversion
4,87302669130,221103049156,106144159241,60394680398,Markham Courthouse,District 6 - Markham,27,Male,Black,Plea Of Guilty,10982,True,False,10/16/2014 12:00:00 AM,PROMIS Conversion,30.0,Year(s),Prison


In [6]:
# Explore values in columns
data.count()

case_id                   241511
case_participant_id       241511
charge_id                 241511
charge_version_id         241511
court_facility            239630
court_name                240126
age_at_incident           232073
gender                    240700
race                      240247
charge_disposition        241511
length_of_case_in_days    222605
current_sentence          241511
primary_charge            241511
sentence_date             241511
offense_category          241511
commitment_term           239852
commitment_unit           239852
sentence_type             241511
dtype: int64

## Drop NaN Values

In [7]:
data = data.dropna()
data.count()

case_id                   210782
case_participant_id       210782
charge_id                 210782
charge_version_id         210782
court_facility            210782
court_name                210782
age_at_incident           210782
gender                    210782
race                      210782
charge_disposition        210782
length_of_case_in_days    210782
current_sentence          210782
primary_charge            210782
sentence_date             210782
offense_category          210782
commitment_term           210782
commitment_unit           210782
sentence_type             210782
dtype: int64

## Filter data after 2000 & before 2019

In [8]:
# Explore type of values in each column
data.dtypes

case_id                   object
case_participant_id       object
charge_id                 object
charge_version_id         object
court_facility            object
court_name                object
age_at_incident           object
gender                    object
race                      object
charge_disposition        object
length_of_case_in_days    object
current_sentence            bool
primary_charge              bool
sentence_date             object
offense_category          object
commitment_term           object
commitment_unit           object
sentence_type             object
dtype: object

In [9]:
# Create a copy of the Sentence Date to filter it
hdate = data['sentence_date']
data['year'] = hdate

data.head()

Unnamed: 0,case_id,case_participant_id,charge_id,charge_version_id,court_facility,court_name,age_at_incident,gender,race,charge_disposition,length_of_case_in_days,current_sentence,primary_charge,sentence_date,offense_category,commitment_term,commitment_unit,sentence_type,year
1,87302669130,221103049156,106130637571,60521205325,Markham Courthouse,District 6 - Markham,27,Male,Black,Nolle On Remand,619,True,False,6/2/1986 12:00:00 AM,PROMIS Conversion,30,Year(s),Conversion,6/2/1986 12:00:00 AM
4,87302669130,221103049156,106144159241,60394680398,Markham Courthouse,District 6 - Markham,27,Male,Black,Plea Of Guilty,10982,True,False,10/16/2014 12:00:00 AM,PROMIS Conversion,30,Year(s),Prison,10/16/2014 12:00:00 AM
5,87302669130,221103049156,106144159241,60394680398,Markham Courthouse,District 6 - Markham,27,Male,Black,Plea Of Guilty,619,False,False,6/2/1986 12:00:00 AM,PROMIS Conversion,30,Year(s),Conversion,6/2/1986 12:00:00 AM
6,87302669130,221103049156,106144264879,69145326192,Markham Courthouse,District 6 - Markham,27,Male,Black,Plea Of Guilty,10982,True,False,10/16/2014 12:00:00 AM,PROMIS Conversion,30,Year(s),Prison,10/16/2014 12:00:00 AM
7,87302669130,221103049156,106144264879,69145326192,Markham Courthouse,District 6 - Markham,27,Male,Black,Plea Of Guilty,619,False,False,6/2/1986 12:00:00 AM,PROMIS Conversion,30,Year(s),Conversion,6/2/1986 12:00:00 AM


In [10]:
# Select only the year
data['year'] = data['year'].replace(regex=['[0-9]*\/[0-9]*\/','\s[0-9]*\:[0-9]*\:[0-9]*\s[A-Z]*'], value='')

In [11]:
# Change value type
data['year'] = data['year'].astype(int)

In [12]:
data.dtypes

case_id                   object
case_participant_id       object
charge_id                 object
charge_version_id         object
court_facility            object
court_name                object
age_at_incident           object
gender                    object
race                      object
charge_disposition        object
length_of_case_in_days    object
current_sentence            bool
primary_charge              bool
sentence_date             object
offense_category          object
commitment_term           object
commitment_unit           object
sentence_type             object
year                       int32
dtype: object

In [13]:
# Filter year
data = data[(data.year > 1999) & (data.year < 2020)]

In [14]:
# Confirm values
data.describe()

Unnamed: 0,year
count,207508.0
mean,2014.735846
std,2.445846
min,2000.0
25%,2013.0
50%,2015.0
75%,2017.0
max,2019.0


## Analize Offense Category and consolidate into related categories

In [15]:
# Review all the information
data.offense_category.value_counts()

Narcotics                       58271
UUW - Unlawful Use of Weapon    23511
Aggravated DUI                  12778
Retail Theft                    12360
Burglary                        10750
                                ...  
Tampering                           9
Failure To Pay Child Support        6
Possession of Explosives            3
Compelling Gang Membership          2
Benefit Recipient Fraud             2
Name: offense_category, Length: 88, dtype: int64

In [16]:
# Consolidate Homicide category
data.offense_category = data.offense_category.replace(to_replace=['Attempt Homicide','Reckless Homicide'],value='Homicide')

data.offense_category.value_counts()

Narcotics                       58271
UUW - Unlawful Use of Weapon    23511
Aggravated DUI                  12778
Retail Theft                    12360
Burglary                        10750
                                ...  
Tampering                           9
Failure To Pay Child Support        6
Possession of Explosives            3
Benefit Recipient Fraud             2
Compelling Gang Membership          2
Name: offense_category, Length: 86, dtype: int64

In [17]:
# Consolidate Offense Against Police Officers category
data.offense_category = data.offense_category.replace(to_replace=['Aggravated Assault Police Officer','Aggravated Assault Police Officer Firearm','Police Shooting','Impersonating Police Officer'],value='Offense Against Police Officers')

data.offense_category.value_counts()

Narcotics                       58271
UUW - Unlawful Use of Weapon    23511
Aggravated DUI                  12778
Retail Theft                    12360
Burglary                        10750
                                ...  
Tampering                           9
Failure To Pay Child Support        6
Possession of Explosives            3
Compelling Gang Membership          2
Benefit Recipient Fraud             2
Name: offense_category, Length: 83, dtype: int64

In [18]:
# Consolidate Battery category
data.offense_category = data.offense_category.replace(to_replace=['Aggravated Battery','Aggravated Battery Police Officer','Aggravated Battery Police Officer Firearm','Aggravated Battery With A Firearm','Domestic Battery'],value='Battery')

data.offense_category.value_counts()

Narcotics                       58271
UUW - Unlawful Use of Weapon    23511
Battery                         13511
Aggravated DUI                  12778
Retail Theft                    12360
                                ...  
Tampering                           9
Failure To Pay Child Support        6
Possession of Explosives            3
Compelling Gang Membership          2
Benefit Recipient Fraud             2
Name: offense_category, Length: 78, dtype: int64

In [19]:
# Consolidate Robbery, Burglery and Theft category
data.offense_category = data.offense_category.replace(to_replace=['Aggravated Identity Theft','Aggravated Robbery','Aggravated Robbery BB Gun','Armed Robbery','Attempt Armed Robbery','Burglary','Identity Theft','Residential Burglary','Retail Theft','Robbery','Theft','Theft by Deception','Possession Of Burglary Tools'],value='Robbery/Burglery/Theft')

data.offense_category.value_counts()

Narcotics                       58271
Robbery/Burglery/Theft          47777
UUW - Unlawful Use of Weapon    23511
Battery                         13511
Aggravated DUI                  12778
                                ...  
Tampering                           9
Failure To Pay Child Support        6
Possession of Explosives            3
Compelling Gang Membership          2
Benefit Recipient Fraud             2
Name: offense_category, Length: 66, dtype: int64

In [20]:
# Consolidate Arson category
data.offense_category = data.offense_category.replace(to_replace=['Arson and Attempt Arson','Attempt Arson'],value='Arson')

data.offense_category.value_counts()

Narcotics                       58271
Robbery/Burglery/Theft          47777
UUW - Unlawful Use of Weapon    23511
Battery                         13511
Aggravated DUI                  12778
                                ...  
Tampering                           9
Failure To Pay Child Support        6
Possession of Explosives            3
Compelling Gang Membership          2
Benefit Recipient Fraud             2
Name: offense_category, Length: 64, dtype: int64

In [21]:
# Consolidate Firearms and Explosives category
data.offense_category = data.offense_category.replace(to_replace=['Aggravated Discharge Firearm','Armed Violence','Disarming Police Officer','Gun Running','Possession of Explosives','UUW - Unlawful Use of Weapon','Gun - Non UUW','Bomb Threat','Reckless Discharge of Firearm'],value='Firearms and Explosives')

data.offense_category.value_counts()

Narcotics                                        58271
Robbery/Burglery/Theft                           47777
Firearms and Explosives                          26030
Battery                                          13511
Aggravated DUI                                   12778
Driving With Suspended Or Revoked License         8155
Sex Crimes                                        4320
Other Offense                                     3850
Possession of Stolen Motor Vehicle                3756
Homicide                                          3709
DUI                                               3330
Forgery                                           2511
Escape - Failure to Return                        2358
PROMIS Conversion                                 2214
Failure to Register as a Sex Offender             2177
Aggravated Fleeing and Eluding                    2131
Criminal Damage to Property                       1642
Credit Card Cases                                 1248
Home Invas

In [22]:
# Consolidate Motor Vehicles Offenses category
data.offense_category = data.offense_category.replace(to_replace=['Aggravated DUI','Attempt Vehicular Hijacking','DUI','Driving With Suspended Or Revoked License','Major Accidents','Possession of Stolen Motor Vehicle','Vehicular Hijacking','Vehicular Invasion'],value='Motor Vehicle Offenses')

data.offense_category.value_counts()

Narcotics                                        58271
Robbery/Burglery/Theft                           47777
Motor Vehicle Offenses                           29045
Firearms and Explosives                          26030
Battery                                          13511
Sex Crimes                                        4320
Other Offense                                     3850
Homicide                                          3709
Forgery                                           2511
Escape - Failure to Return                        2358
PROMIS Conversion                                 2214
Failure to Register as a Sex Offender             2177
Aggravated Fleeing and Eluding                    2131
Criminal Damage to Property                       1642
Credit Card Cases                                 1248
Home Invasion                                     1210
Offense Against Police Officers                    875
Prostitution                                       642
Fraudulent

In [23]:
# Consolidate Judicial Process Violations category
data.offense_category = data.offense_category.replace(to_replace=['Communicating With Witness','Escape - Failure to Return','Obstructing Justice','Perjury','Tampering','Violate Bail Bond','Violation Order Of Protection'],value='Judicial Process Violations')

data.offense_category.value_counts()

Narcotics                                        58271
Robbery/Burglery/Theft                           47777
Motor Vehicle Offenses                           29045
Firearms and Explosives                          26030
Battery                                          13511
Sex Crimes                                        4320
Other Offense                                     3850
Homicide                                          3709
Judicial Process Violations                       3100
Forgery                                           2511
PROMIS Conversion                                 2214
Failure to Register as a Sex Offender             2177
Aggravated Fleeing and Eluding                    2131
Criminal Damage to Property                       1642
Credit Card Cases                                 1248
Home Invasion                                     1210
Offense Against Police Officers                    875
Prostitution                                       642
Fraudulent

In [24]:
# Consolidate Sex Offenses category
data.offense_category = data.offense_category.replace(to_replace=['Attempt Sex Crimes','Child Pornography','Failure to Register as a Sex Offender','Pandering','Prostitution','Sex Crimes','Violation of Sex Offender Registration'],value='Sex Offenses')

data.offense_category.value_counts()

Narcotics                                        58271
Robbery/Burglery/Theft                           47777
Motor Vehicle Offenses                           29045
Firearms and Explosives                          26030
Battery                                          13511
Sex Offenses                                      7393
Other Offense                                     3850
Homicide                                          3709
Judicial Process Violations                       3100
Forgery                                           2511
PROMIS Conversion                                 2214
Aggravated Fleeing and Eluding                    2131
Criminal Damage to Property                       1642
Credit Card Cases                                 1248
Home Invasion                                     1210
Offense Against Police Officers                    875
Fraudulent ID                                      452
Arson                                              396
Fraud     

In [25]:
# Consolidate Human Trafficking, Detention and Kidnapping category
data.offense_category = data.offense_category.replace(to_replace=['Child Abduction','Human Trafficking','Kidnapping','Unlawful Restraint'],value='Human Trafficking/Detention/Kidnapping')

data.offense_category.value_counts()

Narcotics                                        58271
Robbery/Burglery/Theft                           47777
Motor Vehicle Offenses                           29045
Firearms and Explosives                          26030
Battery                                          13511
Sex Offenses                                      7393
Other Offense                                     3850
Homicide                                          3709
Judicial Process Violations                       3100
Forgery                                           2511
PROMIS Conversion                                 2214
Aggravated Fleeing and Eluding                    2131
Criminal Damage to Property                       1642
Credit Card Cases                                 1248
Home Invasion                                     1210
Offense Against Police Officers                    875
Human Trafficking/Detention/Kidnapping             611
Fraudulent ID                                      452
Arson     

In [26]:
# Consolidate Trespassing category
data.offense_category = data.offense_category.replace(to_replace=['Home Invasion','Criminal Trespass To Residence'],value='Trespassing')

data.offense_category.value_counts()

Narcotics                                        58271
Robbery/Burglery/Theft                           47777
Motor Vehicle Offenses                           29045
Firearms and Explosives                          26030
Battery                                          13511
Sex Offenses                                      7393
Other Offense                                     3850
Homicide                                          3709
Judicial Process Violations                       3100
Forgery                                           2511
PROMIS Conversion                                 2214
Aggravated Fleeing and Eluding                    2131
Criminal Damage to Property                       1642
Trespassing                                       1304
Credit Card Cases                                 1248
Offense Against Police Officers                    875
Human Trafficking/Detention/Kidnapping             611
Fraudulent ID                                      452
Arson     

In [27]:
# Consolidate Fraud and Deception category
data.offense_category = data.offense_category.replace(to_replace=['Benefit Recipient Fraud','Credit Card Cases','Deceptive Practice','Forgery','Fraud','Fraudulent ID'],value='Fraud/Deception')

data.offense_category.value_counts()

Narcotics                                        58271
Robbery/Burglery/Theft                           47777
Motor Vehicle Offenses                           29045
Firearms and Explosives                          26030
Battery                                          13511
Sex Offenses                                      7393
Fraud/Deception                                   4747
Other Offense                                     3850
Homicide                                          3709
Judicial Process Violations                       3100
PROMIS Conversion                                 2214
Aggravated Fleeing and Eluding                    2131
Criminal Damage to Property                       1642
Trespassing                                       1304
Offense Against Police Officers                    875
Human Trafficking/Detention/Kidnapping             611
Arson                                              396
Stalking                                           289
Intimidati

In [28]:
# Consolidate Corruption category
data.offense_category = data.offense_category.replace(to_replace=['Intimidation','Official Misconduct','Bribery'],value='Corruption')

data.offense_category.value_counts()

Narcotics                                        58271
Robbery/Burglery/Theft                           47777
Motor Vehicle Offenses                           29045
Firearms and Explosives                          26030
Battery                                          13511
Sex Offenses                                      7393
Fraud/Deception                                   4747
Other Offense                                     3850
Homicide                                          3709
Judicial Process Violations                       3100
PROMIS Conversion                                 2214
Aggravated Fleeing and Eluding                    2131
Criminal Damage to Property                       1642
Trespassing                                       1304
Offense Against Police Officers                    875
Human Trafficking/Detention/Kidnapping             611
Arson                                              396
Corruption                                         325
Stalking  

In [29]:
# Consolidate Inside Penal Institutions category
data.offense_category = data.offense_category.replace(to_replace=['Possession of Contraband in Penal Institution','Possession of Shank in Penal Institution'],value='Inside Penal Institutions')

data.offense_category.value_counts()

Narcotics                                 58271
Robbery/Burglery/Theft                    47777
Motor Vehicle Offenses                    29045
Firearms and Explosives                   26030
Battery                                   13511
Sex Offenses                               7393
Fraud/Deception                            4747
Other Offense                              3850
Homicide                                   3709
Judicial Process Violations                3100
PROMIS Conversion                          2214
Aggravated Fleeing and Eluding             2131
Criminal Damage to Property                1642
Trespassing                                1304
Offense Against Police Officers             875
Human Trafficking/Detention/Kidnapping      611
Arson                                       396
Corruption                                  325
Stalking                                    289
Inside Penal Institutions                   159
Hate Crimes                             

In [30]:
# Consolidate Other Offense category
data.offense_category = data.offense_category.replace(to_replace=['Dog Fighting','Gambling','Failure To Pay Child Support','Compelling Gang Membership'],value='Other Offense')

data.offense_category.value_counts()

Narcotics                                 58271
Robbery/Burglery/Theft                    47777
Motor Vehicle Offenses                    29045
Firearms and Explosives                   26030
Battery                                   13511
Sex Offenses                               7393
Fraud/Deception                            4747
Other Offense                              3910
Homicide                                   3709
Judicial Process Violations                3100
PROMIS Conversion                          2214
Aggravated Fleeing and Eluding             2131
Criminal Damage to Property                1642
Trespassing                                1304
Offense Against Police Officers             875
Human Trafficking/Detention/Kidnapping      611
Arson                                       396
Corruption                                  325
Stalking                                    289
Inside Penal Institutions                   159
Hate Crimes                             

## Cleaning Race columns

In [31]:
# Review data
data.race.value_counts()

Black                               140446
White [Hispanic or Latino]           32744
White                                28199
HISPANIC                              3418
Asian                                 1196
White/Black [Hispanic or Latino]      1069
Unknown                                293
American Indian                        108
Biracial                                29
ASIAN                                    6
Name: race, dtype: int64

In [32]:
# Consolidate Asian race
data.race = data.race.replace(to_replace='ASIAN',value='Asian')

data.race.value_counts()

Black                               140446
White [Hispanic or Latino]           32744
White                                28199
HISPANIC                              3418
Asian                                 1202
White/Black [Hispanic or Latino]      1069
Unknown                                293
American Indian                        108
Biracial                                29
Name: race, dtype: int64

In [33]:
# Consolidate Hispanic/Latino race
data.race = data.race.replace(to_replace=['White [Hispanic or Latino]','HISPANIC','White/Black [Hispanic or Latino]'],value='Hispanic/Latino')

data.race.value_counts()

Black              140446
Hispanic/Latino     37231
White               28199
Asian                1202
Unknown               293
American Indian       108
Biracial               29
Name: race, dtype: int64

## Cleaning Sentence Type

In [34]:
# Review data
data.sentence_type.value_counts()

Prison                                   112359
Probation                                 80240
Jail                                       6434
Cook County Boot Camp                      2333
Conditional Discharge                      2180
2nd Chance Probation                       1763
Supervision                                1603
Probation Terminated Unsatisfactorily       229
Inpatient Mental Health Services            155
Conditional Release                          57
Death                                        56
Probation Terminated Instanter               54
Conversion                                   28
Probation Terminated Satisfactorily          17
Name: sentence_type, dtype: int64

In [35]:
# Consolidate Probation & Supervision sentence
data.sentence_type = data.sentence_type.replace(to_replace=['Probation','2nd Chance Probation','Supervision','Probation Terminated Unsatisfactorily','Probation Terminated Instanter','Probation Terminated Satisfactorily'],value='Probation/Supervision')

data.sentence_type.value_counts()

Prison                              112359
Probation/Supervision                83906
Jail                                  6434
Cook County Boot Camp                 2333
Conditional Discharge                 2180
Inpatient Mental Health Services       155
Conditional Release                     57
Death                                   56
Conversion                              28
Name: sentence_type, dtype: int64

In [36]:
# Consolidate Conditional sentence type
data.sentence_type = data.sentence_type.replace(to_replace='Conditional Release',value='Conditional Discharge')

data.sentence_type.value_counts()

Prison                              112359
Probation/Supervision                83906
Jail                                  6434
Cook County Boot Camp                 2333
Conditional Discharge                 2237
Inpatient Mental Health Services       155
Death                                   56
Conversion                              28
Name: sentence_type, dtype: int64

In [37]:
# Consolidate Incarceration sentence
data.sentence_type = data.sentence_type.replace(to_replace=['Jail','Prison'],value='Incarceration')

data.sentence_type.value_counts()

Incarceration                       118793
Probation/Supervision                83906
Cook County Boot Camp                 2333
Conditional Discharge                 2237
Inpatient Mental Health Services       155
Death                                   56
Conversion                              28
Name: sentence_type, dtype: int64

## Cleaning Commitment Unit & Term by Sentence Type

In [38]:
# Data Exploration
data.commitment_unit.value_counts()

Year(s)         151605
Months           47829
Days              5448
Term              2135
Natural Life       403
Dollars             56
Weeks               15
Hours               14
Pounds               2
Kilos                1
Name: commitment_unit, dtype: int64

In [39]:
# Unify Weight units
data.commitment_unit = data.commitment_unit.replace(to_replace=['Pounds','Kilos'],value='Weight')

data.commitment_unit.value_counts()

Year(s)         151605
Months           47829
Days              5448
Term              2135
Natural Life       403
Dollars             56
Weeks               15
Hours               14
Weight               3
Name: commitment_unit, dtype: int64

In [40]:
sorted(list(data.commitment_term.unique()))

['0',
 '00',
 '001',
 '002',
 '006',
 '007',
 '010',
 '012',
 '015',
 '018',
 '02',
 '024',
 '027',
 '030',
 '036',
 '042',
 '054',
 '055',
 '06',
 '062',
 '1',
 '1,154.00',
 '1.5',
 '10',
 '10.5',
 '100',
 '101',
 '1013',
 '102',
 '103',
 '104',
 '105',
 '106',
 '107',
 '108',
 '109',
 '11',
 '11.5',
 '110',
 '111',
 '112',
 '113',
 '114',
 '115',
 '116',
 '117',
 '118',
 '119',
 '12',
 '12.5',
 '12.75',
 '120',
 '121',
 '122',
 '123',
 '125',
 '126',
 '127',
 '1277.13',
 '128',
 '129',
 '13',
 '13.5',
 '130',
 '131',
 '132',
 '133',
 '134',
 '135',
 '136',
 '137',
 '138',
 '139',
 '14',
 '14.5',
 '140',
 '141',
 '142',
 '143',
 '144',
 '145',
 '146',
 '147',
 '148',
 '149',
 '15',
 '150',
 '151',
 '152',
 '153',
 '154',
 '155',
 '156',
 '157',
 '158',
 '159',
 '16',
 '16.5',
 '160',
 '161',
 '162',
 '163',
 '164',
 '165',
 '166',
 '167',
 '168',
 '169',
 '17',
 '17.5',
 '170',
 '171',
 '172',
 '174',
 '175',
 '176',
 '177',
 '178',
 '179',
 '18',
 '18 months',
 '180',
 '181',
 '182',

In [41]:
# Clean all non-numeric characters and transform value type to float
data.commitment_term = data.commitment_term.replace(to_replace='two',value=2)
data.commitment_term = data.commitment_term.replace(regex=['[a-z]*','\,','\`'], value='')
data.commitment_term = data.commitment_term.astype('float')

data.commitment_term

4         30.0
6         30.0
9         62.0
17        70.0
34         6.0
          ... 
241252    24.0
241257     2.0
241280    18.0
241298     2.0
241310    90.0
Name: commitment_term, Length: 207508, dtype: float64

In [42]:
data_commitmentunits = data.groupby(['commitment_unit']).mean()
data_commitmentunits['commitment_term']

commitment_unit
Days            102.542034
Dollars         130.520179
Hours            34.214286
Months           24.848784
Natural Life     10.411911
Term              1.017330
Weeks            25.066667
Weight           11.333333
Year(s)           4.167848
Name: commitment_term, dtype: float64

In [43]:
terms = data['commitment_term']
data['commitment_term_orig'] = terms

terms_type = data['commitment_unit']
data['commitment_unit_orig'] = terms_type

data.head()

Unnamed: 0,case_id,case_participant_id,charge_id,charge_version_id,court_facility,court_name,age_at_incident,gender,race,charge_disposition,length_of_case_in_days,current_sentence,primary_charge,sentence_date,offense_category,commitment_term,commitment_unit,sentence_type,year,commitment_term_orig
4,87302669130,221103049156,106144159241,60394680398,Markham Courthouse,District 6 - Markham,27,Male,Black,Plea Of Guilty,10982,True,False,10/16/2014 12:00:00 AM,PROMIS Conversion,30.0,Year(s),Incarceration,2014,30.0
6,87302669130,221103049156,106144264879,69145326192,Markham Courthouse,District 6 - Markham,27,Male,Black,Plea Of Guilty,10982,True,False,10/16/2014 12:00:00 AM,PROMIS Conversion,30.0,Year(s),Incarceration,2014,30.0
9,87302669130,221103049156,106130320657,69533646672,Markham Courthouse,District 6 - Markham,27,Male,Black,Plea Of Guilty,10982,True,True,10/16/2014 12:00:00 AM,PROMIS Conversion,62.0,Year(s),Incarceration,2014,62.0
17,90077158262,225880020769,108900784087,71752349593,26TH Street,District 1 - Chicago,21,Male,Black,Finding Guilty,12479,True,False,9/10/2018 12:00:00 AM,PROMIS Conversion,70.0,Year(s),Incarceration,2018,70.0
34,91518077764,300040539496,122760812814,130802551602,Markham Courthouse,District 6 - Markham,23,Male,Black,Verdict Guilty,3760,True,False,3/14/2012 12:00:00 AM,PROMIS Conversion,6.0,Year(s),Incarceration,2012,6.0


### Cleaning by Cook County Boot Camp

In [44]:
bootcamp = data.loc[data.sentence_type == 'Cook County Boot Camp']
bootcamp.commitment_unit.value_counts()

Term       1967
Year(s)     179
Months      135
Days         51
Dollars       1
Name: commitment_unit, dtype: int64

In [45]:
bootcamp_years = bootcamp['commitment_term'].loc[bootcamp.commitment_unit == 'Months']
bootcamp_years.value_counts()

6.0     95
0.0     17
4.0     11
12.0     6
18.0     2
24.0     2
42.0     1
9.0      1
Name: commitment_term, dtype: int64

In [46]:
# Changing 0 Terms to Months
data.loc[(data.sentence_type == 'Cook County Boot Camp') & (data.commitment_unit == 'Term') & (data.commitment_term == 0),['commitment_unit']] = 'Months'

In [47]:
# Changing 0 Years to Months
data.loc[(data.sentence_type == 'Cook County Boot Camp') & (data.commitment_unit == 'Year(s)') & (data.commitment_term == 0),['commitment_unit']] = 'Months'

In [48]:
# Changing 1 Term to 12
data.loc[(data.sentence_type == 'Cook County Boot Camp') & (data.commitment_unit == 'Term') & (data.commitment_term == 1),['commitment_term']] = 12

In [49]:
# Changing 12 Term to Months
data.loc[(data.sentence_type == 'Cook County Boot Camp') & (data.commitment_unit == 'Term') & (data.commitment_term == 12),['commitment_unit']] = 'Months'

In [50]:
# Changing 18 Term to Months
data.loc[(data.sentence_type == 'Cook County Boot Camp') & (data.commitment_unit == 'Term') & (data.commitment_term == 18),['commitment_unit']] = 'Months'

In [51]:
bootcamp = data.loc[data.sentence_type == 'Cook County Boot Camp']
bootcamp.commitment_unit.value_counts()

Months     2121
Year(s)     160
Days         51
Dollars       1
Name: commitment_unit, dtype: int64

In [52]:
data.loc[data.sentence_type == 'Cook County Boot Camp'].commitment_term.loc[data.commitment_unit == 'Year(s)'].value_counts()

1.0     85
3.0     26
4.0     26
2.0     13
6.0      8
15.0     1
7.0      1
Name: commitment_term, dtype: int64

In [53]:
# Changing Years to Months, numeric
data.loc[(data.sentence_type == 'Cook County Boot Camp') & (data.commitment_unit == 'Year(s)') & (data.commitment_term == 1),['commitment_term']] = 12

data.loc[(data.sentence_type == 'Cook County Boot Camp') & (data.commitment_unit == 'Year(s)') & (data.commitment_term == 2),['commitment_term']] = 24

data.loc[(data.sentence_type == 'Cook County Boot Camp') & (data.commitment_unit == 'Year(s)') & (data.commitment_term == 3),['commitment_term']] = 36

data.loc[(data.sentence_type == 'Cook County Boot Camp') & (data.commitment_unit == 'Year(s)') & (data.commitment_term == 4),['commitment_term']] = 48

data.loc[(data.sentence_type == 'Cook County Boot Camp') & (data.commitment_unit == 'Year(s)') & (data.commitment_term == 6),['commitment_term']] = 72

data.loc[(data.sentence_type == 'Cook County Boot Camp') & (data.commitment_unit == 'Year(s)') & (data.commitment_term == 7),['commitment_term']] = 84

data.loc[(data.sentence_type == 'Cook County Boot Camp') & (data.commitment_unit == 'Year(s)') & (data.commitment_term == 15),['commitment_term']] = 180

In [54]:
# Changing Years to Months unit
data.loc[(data.sentence_type == 'Cook County Boot Camp') & (data.commitment_unit == 'Year(s)'),['commitment_unit']] = 'Months'

In [55]:
bootcamp = data.loc[data.sentence_type == 'Cook County Boot Camp']
bootcamp.commitment_unit.value_counts()

Months     2281
Days         51
Dollars       1
Name: commitment_unit, dtype: int64

In [56]:
data.loc[data.sentence_type == 'Cook County Boot Camp'].commitment_term.loc[data.commitment_unit == 'Days'].value_counts()

180.0    43
0.0       3
120.0     2
66.0      1
299.0     1
150.0     1
Name: commitment_term, dtype: int64

In [57]:
# Changing Days to Months, numeric
data.loc[(data.sentence_type == 'Cook County Boot Camp') & (data.commitment_unit == 'Days') & (data.commitment_term == 180),['commitment_term']] = 6

data.loc[(data.sentence_type == 'Cook County Boot Camp') & (data.commitment_unit == 'Days') & (data.commitment_term == 120),['commitment_term']] = 4

data.loc[(data.sentence_type == 'Cook County Boot Camp') & (data.commitment_unit == 'Days') & (data.commitment_term == 150),['commitment_term']] = 5

data.loc[(data.sentence_type == 'Cook County Boot Camp') & (data.commitment_unit == 'Days') & (data.commitment_term == 299),['commitment_term']] = 10

data.loc[(data.sentence_type == 'Cook County Boot Camp') & (data.commitment_unit == 'Days') & (data.commitment_term == 66),['commitment_term']] = 2

In [58]:
# Changing Years to Months unit
data.loc[(data.sentence_type == 'Cook County Boot Camp') & (data.commitment_unit == 'Days'),['commitment_unit']] = 'Months'

In [59]:
bootcamp = data.loc[data.sentence_type == 'Cook County Boot Camp']
bootcamp.commitment_unit.value_counts()

Months     2332
Dollars       1
Name: commitment_unit, dtype: int64

In [60]:
data.loc[(data.sentence_type == 'Cook County Boot Camp') & (data.commitment_unit == 'Dollars'),['commitment_term']]

Unnamed: 0,commitment_term
7549,0.0


In [61]:
# Changing Dollars to Months unit
data.loc[(data.sentence_type == 'Cook County Boot Camp') & (data.commitment_unit == 'Dollars'),['commitment_unit']] = 'Months'

In [62]:
bootcamp = data.loc[data.sentence_type == 'Cook County Boot Camp']
bootcamp.commitment_unit.value_counts()

Months    2333
Name: commitment_unit, dtype: int64

In [63]:
data.loc[data.sentence_type == 'Cook County Boot Camp'].commitment_term.value_counts()

12.0     1980
6.0       138
0.0       117
36.0       26
48.0       26
24.0       15
4.0        13
72.0        8
18.0        3
180.0       1
2.0         1
10.0        1
5.0         1
42.0        1
9.0         1
84.0        1
Name: commitment_term, dtype: int64

### Cleaning by Inpatient Mental Health Services

In [64]:
data.loc[data.sentence_type == 'Inpatient Mental Health Services'].commitment_unit.value_counts()

Year(s)    102
Term        35
Months      18
Name: commitment_unit, dtype: int64

In [65]:
data.loc[data.sentence_type == 'Inpatient Mental Health Services'].commitment_term.loc[data.commitment_unit == 'Year(s)'].value_counts()

120.0    36
2.0      17
60.0     14
15.0      8
5.0       7
30.0      5
14.0      5
7.0       3
13.0      2
10.0      2
1.0       1
6.0       1
3.0       1
Name: commitment_term, dtype: int64

In [66]:
data.loc[data.sentence_type == 'Inpatient Mental Health Services'].commitment_term.loc[data.commitment_unit == 'Months'].value_counts()

14.0    10
24.0     5
15.0     2
30.0     1
Name: commitment_term, dtype: int64

In [67]:
# Changing Months to Years, numeric
data.loc[(data.sentence_type == 'Inpatient Mental Health Services') & (data.commitment_unit == 'Months') & (data.commitment_term == 14),['commitment_term']] = (14/12)

data.loc[(data.sentence_type == 'Inpatient Mental Health Services') & (data.commitment_unit == 'Months') & (data.commitment_term == 24),['commitment_term']] = (24/12)

data.loc[(data.sentence_type == 'Inpatient Mental Health Services') & (data.commitment_unit == 'Months') & (data.commitment_term == 15),['commitment_term']] = (15/12)

data.loc[(data.sentence_type == 'Inpatient Mental Health Services') & (data.commitment_unit == 'Months') & (data.commitment_term == 30),['commitment_term']] = (30/12)

In [68]:
data.loc[(data.sentence_type == 'Inpatient Mental Health Services') & (data.commitment_unit == 'Months'),['commitment_unit']] = 'Year(s)'

In [69]:
data.loc[data.sentence_type == 'Inpatient Mental Health Services'].commitment_unit.value_counts()

Year(s)    120
Term        35
Name: commitment_unit, dtype: int64

In [70]:
data.loc[data.sentence_type == 'Inpatient Mental Health Services'].commitment_term.loc[data.commitment_unit == 'Term'].value_counts()

1.0    27
0.0     8
Name: commitment_term, dtype: int64

In [71]:
data.sentence_type.value_counts()

Incarceration                       118793
Probation/Supervision                83906
Cook County Boot Camp                 2333
Conditional Discharge                 2237
Inpatient Mental Health Services       155
Death                                   56
Conversion                              28
Name: sentence_type, dtype: int64

### Cleaning by Conditional Discharge

In [72]:
data.loc[data.sentence_type == 'Conditional Discharge'].commitment_unit.value_counts()

Months     1142
Year(s)    1066
Days         26
Term          2
Dollars       1
Name: commitment_unit, dtype: int64

In [73]:
data.loc[data.sentence_type == 'Conditional Discharge'].commitment_term.loc[data.commitment_unit == 'Months'].value_counts()

18.0    559
12.0    324
24.0    190
6.0      37
30.0     17
15.0      5
20.0      3
4.0       2
16.0      1
14.0      1
9.0       1
3.0       1
1.0       1
Name: commitment_term, dtype: int64

In [74]:
data.loc[data.sentence_type == 'Conditional Discharge'].commitment_term.loc[data.commitment_unit == 'Year(s)'].value_counts()

1.0     643
2.0     417
12.0      2
3.0       2
8.0       2
Name: commitment_term, dtype: int64

In [75]:
# Changing Years to Months, numeric
data.loc[(data.sentence_type == 'Conditional Discharge') & (data.commitment_unit == 'Year(s)') & (data.commitment_term == 1),['commitment_term']] = (1*12)

data.loc[(data.sentence_type == 'Conditional Discharge') & (data.commitment_unit == 'Year(s)') & (data.commitment_term == 2),['commitment_term']] = (2*12)

data.loc[(data.sentence_type == 'Conditional Discharge') & (data.commitment_unit == 'Year(s)') & (data.commitment_term == 12),['commitment_term']] = (12*12)

data.loc[(data.sentence_type == 'Conditional Discharge') & (data.commitment_unit == 'Year(s)') & (data.commitment_term == 3),['commitment_term']] = (3*12)

data.loc[(data.sentence_type == 'Conditional Discharge') & (data.commitment_unit == 'Year(s)') & (data.commitment_term == 8),['commitment_term']] = (8*12)

In [76]:
data.loc[(data.sentence_type == 'Conditional Discharge') & (data.commitment_unit == 'Year(s)'),['commitment_unit']] = 'Months'

In [77]:
data.loc[data.sentence_type == 'Conditional Discharge'].commitment_unit.value_counts()

Months     2208
Days         26
Term          2
Dollars       1
Name: commitment_unit, dtype: int64

In [78]:
data.loc[data.sentence_type == 'Conditional Discharge'].commitment_term.loc[data.commitment_unit == 'Days'].value_counts()

2.0      4
5.0      3
30.0     2
4.0      2
0.0      2
90.0     1
158.0    1
133.0    1
60.0     1
12.0     1
120.0    1
10.0     1
364.0    1
7.0      1
194.0    1
180.0    1
1.0      1
14.0     1
Name: commitment_term, dtype: int64

In [79]:
# Changing Years to Months, numeric
data.loc[(data.sentence_type == 'Conditional Discharge') & (data.commitment_unit == 'Days') & (data.commitment_term == 2),['commitment_term']] = 0.07

data.loc[(data.sentence_type == 'Conditional Discharge') & (data.commitment_unit == 'Days') & (data.commitment_term == 5),['commitment_term']] = 0.17

data.loc[(data.sentence_type == 'Conditional Discharge') & (data.commitment_unit == 'Days') & (data.commitment_term == 30),['commitment_term']] = 1

data.loc[(data.sentence_type == 'Conditional Discharge') & (data.commitment_unit == 'Days') & (data.commitment_term == 4),['commitment_term']] = 0.13

data.loc[(data.sentence_type == 'Conditional Discharge') & (data.commitment_unit == 'Days') & (data.commitment_term == 90),['commitment_term']] = 3

data.loc[(data.sentence_type == 'Conditional Discharge') & (data.commitment_unit == 'Days') & (data.commitment_term == 158),['commitment_term']] = 5

data.loc[(data.sentence_type == 'Conditional Discharge') & (data.commitment_unit == 'Days') & (data.commitment_term == 133),['commitment_term']] = 4.75

data.loc[(data.sentence_type == 'Conditional Discharge') & (data.commitment_unit == 'Days') & (data.commitment_term == 60),['commitment_term']] = 2

data.loc[(data.sentence_type == 'Conditional Discharge') & (data.commitment_unit == 'Days') & (data.commitment_term == 12),['commitment_term']] = 0.4

data.loc[(data.sentence_type == 'Conditional Discharge') & (data.commitment_unit == 'Days') & (data.commitment_term == 120),['commitment_term']] = 5

data.loc[(data.sentence_type == 'Conditional Discharge') & (data.commitment_unit == 'Days') & (data.commitment_term == 10),['commitment_term']] = 0.33

data.loc[(data.sentence_type == 'Conditional Discharge') & (data.commitment_unit == 'Days') & (data.commitment_term == 364),['commitment_term']] = 12

data.loc[(data.sentence_type == 'Conditional Discharge') & (data.commitment_unit == 'Days') & (data.commitment_term == 7),['commitment_term']] = 0.25

data.loc[(data.sentence_type == 'Conditional Discharge') & (data.commitment_unit == 'Days') & (data.commitment_term == 194),['commitment_term']] = 7

data.loc[(data.sentence_type == 'Conditional Discharge') & (data.commitment_unit == 'Days') & (data.commitment_term == 180),['commitment_term']] = 6

data.loc[(data.sentence_type == 'Conditional Discharge') & (data.commitment_unit == 'Days') & (data.commitment_term == 1),['commitment_term']] = 0.03

data.loc[(data.sentence_type == 'Conditional Discharge') & (data.commitment_unit == 'Days') & (data.commitment_term == 14),['commitment_term']] = 0.5

In [80]:
data.loc[(data.sentence_type == 'Conditional Discharge') & (data.commitment_unit == 'Days'),['commitment_unit']] = 'Months'

In [81]:
data.loc[data.sentence_type == 'Conditional Discharge'].commitment_unit.value_counts()

Months     2234
Term          2
Dollars       1
Name: commitment_unit, dtype: int64

In [82]:
data.loc[data.sentence_type == 'Conditional Discharge'].commitment_term.loc[data.commitment_unit == 'Term'].value_counts()

1.0    2
Name: commitment_term, dtype: int64

In [83]:
data.loc[data.sentence_type == 'Conditional Discharge'].commitment_term.loc[data.commitment_unit == 'Dollars'].value_counts()

100.0    1
Name: commitment_term, dtype: int64

### Cleaning by Probation/Supervision

In [84]:
data.sentence_type.value_counts()

Incarceration                       118793
Probation/Supervision                83906
Cook County Boot Camp                 2333
Conditional Discharge                 2237
Inpatient Mental Health Services       155
Death                                   56
Conversion                              28
Name: sentence_type, dtype: int64

In [85]:
data.loc[data.sentence_type == 'Probation/Supervision'].commitment_unit.value_counts()

Year(s)         49203
Months          33936
Days              636
Term               87
Natural Life       12
Weeks              11
Dollars             9
Hours               9
Weight              3
Name: commitment_unit, dtype: int64

In [86]:
data.loc[data.sentence_type == 'Probation/Supervision'].commitment_term.loc[data.commitment_unit == 'Dollars'].value_counts()

2.00       4
1277.13    1
1154.00    1
27.00      1
1.00       1
24.00      1
Name: commitment_term, dtype: int64

In [87]:
data.loc[data.sentence_type == 'Probation/Supervision'].commitment_term.loc[data.commitment_unit == 'Natural Life'].value_counts()

2.0    6
1.0    6
Name: commitment_term, dtype: int64

In [102]:
data.loc[(data.sentence_type == 'Probation/Supervision') & (data.commitment_unit == 'Natural Life') & (data.commitment_term == 1),['commitment_term']] = 1560

data.loc[(data.sentence_type == 'Probation/Supervision') & (data.commitment_unit == 'Natural Life') & (data.commitment_term == 2),['commitment_term']] = 1560

In [103]:
data.loc[data.sentence_type == 'Probation/Supervision'].commitment_term.loc[data.commitment_unit == 'Natural Life'].value_counts()

130.0    12
Name: commitment_term, dtype: int64

In [104]:
data.loc[(data.sentence_type == 'Probation/Supervision') & (data.commitment_unit == 'Natural Life'),['commitment_unit']] = 'Months'

In [105]:
data.loc[data.sentence_type == 'Probation/Supervision'].commitment_unit.value_counts()

Year(s)    49215
Months     33936
Days         636
Term          87
Weeks         11
Dollars        9
Hours          9
Weight         3
Name: commitment_unit, dtype: int64

In [107]:
data.loc[data.sentence_type == 'Probation/Supervision'].commitment_term.loc[data.commitment_unit == 'Weeks'].value_counts()

2.0     3
24.0    3
18.0    2
12.0    1
30.0    1
54.0    1
Name: commitment_term, dtype: int64

In [108]:
data.loc[data.sentence_type == 'Probation/Supervision'].commitment_term.loc[data.commitment_unit == 'Months'].value_counts()

24.0     16264
18.0      8315
30.0      6349
12.0      1251
14.0       497
36.0       450
6.0        258
15.0       108
0.0         89
48.0        82
2.0         46
13.0        36
3.0         35
20.0        35
16.0        15
21.0        15
17.0        14
19.0        13
42.0        12
9.0          8
23.0         7
22.0         6
11.0         4
25.0         4
28.0         3
10.0         3
54.0         3
29.0         2
27.0         2
20.5         1
8.0          1
66.0         1
4.0          1
32.0         1
34.0         1
39.0         1
33.0         1
254.0        1
1.0          1
Name: commitment_term, dtype: int64

In [110]:
sorted(data.loc[data.sentence_type == 'Probation/Supervision'].commitment_term.loc[data.commitment_unit == 'Days'].unique())

[0.0,
 1.0,
 2.0,
 3.0,
 4.0,
 5.0,
 6.0,
 7.0,
 8.0,
 10.0,
 12.0,
 14.0,
 15.0,
 16.0,
 17.0,
 18.0,
 19.0,
 20.0,
 21.0,
 22.0,
 23.0,
 24.0,
 25.0,
 26.0,
 27.0,
 28.0,
 29.0,
 30.0,
 31.0,
 32.0,
 33.0,
 35.0,
 38.0,
 39.0,
 40.0,
 41.0,
 42.0,
 43.0,
 44.0,
 45.0,
 46.0,
 47.0,
 48.0,
 49.0,
 50.0,
 51.0,
 53.0,
 54.0,
 56.0,
 59.0,
 60.0,
 61.0,
 62.0,
 63.0,
 64.0,
 65.0,
 71.0,
 72.0,
 73.0,
 74.0,
 75.0,
 76.0,
 77.0,
 80.0,
 81.0,
 84.0,
 85.0,
 87.0,
 90.0,
 92.0,
 93.0,
 96.0,
 98.0,
 99.0,
 101.0,
 103.0,
 104.0,
 106.0,
 108.0,
 110.0,
 111.0,
 112.0,
 113.0,
 114.0,
 115.0,
 117.0,
 118.0,
 119.0,
 120.0,
 125.0,
 128.0,
 131.0,
 132.0,
 133.0,
 140.0,
 141.0,
 142.0,
 143.0,
 147.0,
 151.0,
 154.0,
 160.0,
 163.0,
 165.0,
 171.0,
 172.0,
 175.0,
 177.0,
 178.0,
 179.0,
 180.0,
 187.0,
 191.0,
 194.0,
 206.0,
 210.0,
 212.0,
 220.0,
 222.0,
 228.0,
 240.0,
 263.0,
 273.0,
 285.0,
 300.0,
 303.0,
 305.0,
 352.0,
 356.0,
 364.0,
 365.0,
 368.0,
 444.0,
 574.0]