In [1]:
import pandas as pd
from sodapy import Socrata
import numpy as np

# Get information from Cook County Sentencing Data Database

In [2]:
# Call API
client = Socrata("datacatalog.cookcountyil.gov", None)



In [3]:
# Retrive Data
results = client.get("tg8v-tm6u", limit=300000)

In [4]:
# Generate DataFrame
original_data = pd.DataFrame.from_records(results)

# Explore data
list(original_data.columns.values)

['case_id',
 'case_participant_id',
 'offense_category',
 'primary_charge',
 'charge_id',
 'charge_version_id',
 'disposition_charged_offense_title',
 'disposition_charged_chapter',
 'disposition_charged_act',
 'disposition_charged_section',
 'disposition_charged_class',
 'disposition_charged_aoic',
 'disposition_date',
 'charge_disposition',
 'sentence_phase',
 'sentence_date',
 'sentence_judge',
 'sentence_type',
 'current_sentence',
 'commitment_type',
 'court_name',
 'court_facility',
 'length_of_case_in_days',
 'age_at_incident',
 'gender',
 'race',
 'incident_begin_date',
 'arrest_date',
 'law_enforcement_agency',
 'received_date',
 'arraignment_date',
 'updated_offense_category',
 'charge_count',
 'commitment_term',
 'commitment_unit',
 'incident_end_date',
 'charge_disposition_reason',
 'incident_city',
 'unit']

# Cleaning Process

## Select only chosen columns

In [51]:
data = original_data[['case_id','case_participant_id','charge_id','charge_version_id','court_facility','court_name','age_at_incident','gender','race','charge_disposition','length_of_case_in_days','current_sentence','primary_charge','sentence_date','offense_category','commitment_term','commitment_unit','sentence_type']]
data.head()

Unnamed: 0,case_id,case_participant_id,charge_id,charge_version_id,court_facility,court_name,age_at_incident,gender,race,charge_disposition,length_of_case_in_days,current_sentence,primary_charge,sentence_date,offense_category,commitment_term,commitment_unit,sentence_type
0,44670309710,218297158761,297139645442,83571817251,Markham Courthouse,District 6 - Markham,27,Male,Black,Nolle On Remand,619,True,False,6/2/1986 12:00:00 AM,PROMIS Conversion,,,Conversion
1,44670309710,218297158761,297176911341,94830742153,Markham Courthouse,District 6 - Markham,27,Male,Black,Nolle On Remand,619,True,False,6/2/1986 12:00:00 AM,PROMIS Conversion,,,Conversion
2,44670309710,218297158761,297177207102,94826043158,Markham Courthouse,District 6 - Markham,27,Male,Black,Nolle On Remand,619,True,False,6/2/1986 12:00:00 AM,PROMIS Conversion,,,Conversion
3,44670309710,218297158761,297178094385,70461958445,Markham Courthouse,District 6 - Markham,27,Male,Black,Plea Of Guilty,10982,True,False,10/16/2014 12:00:00 AM,PROMIS Conversion,30.0,Year(s),Prison
4,44670309710,218297158761,297178094385,70461958445,Markham Courthouse,District 6 - Markham,27,Male,Black,Plea Of Guilty,619,False,False,6/2/1986 12:00:00 AM,PROMIS Conversion,30.0,Year(s),Conversion


In [52]:
# Explore values in columns
data.count()

case_id                   236124
case_participant_id       236124
charge_id                 236124
charge_version_id         236124
court_facility            234258
court_name                234741
age_at_incident           233071
gender                    235334
race                      234890
charge_disposition        236124
length_of_case_in_days    217350
current_sentence          236124
primary_charge            236124
sentence_date             236124
offense_category          236124
commitment_term           234516
commitment_unit           234516
sentence_type             236124
dtype: int64

## Drop NaN Values

In [53]:
data = data.dropna()
data.count()

case_id                   211576
case_participant_id       211576
charge_id                 211576
charge_version_id         211576
court_facility            211576
court_name                211576
age_at_incident           211576
gender                    211576
race                      211576
charge_disposition        211576
length_of_case_in_days    211576
current_sentence          211576
primary_charge            211576
sentence_date             211576
offense_category          211576
commitment_term           211576
commitment_unit           211576
sentence_type             211576
dtype: int64

## Filter data after 2000 & before 2019

In [54]:
# Explore type of values in each column
data.dtypes

case_id                   object
case_participant_id       object
charge_id                 object
charge_version_id         object
court_facility            object
court_name                object
age_at_incident           object
gender                    object
race                      object
charge_disposition        object
length_of_case_in_days    object
current_sentence            bool
primary_charge              bool
sentence_date             object
offense_category          object
commitment_term           object
commitment_unit           object
sentence_type             object
dtype: object

In [55]:
# Create a copy of the Sentence Date to filter it
hdate = data['sentence_date']
data['year'] = hdate

data.head()

Unnamed: 0,case_id,case_participant_id,charge_id,charge_version_id,court_facility,court_name,age_at_incident,gender,race,charge_disposition,length_of_case_in_days,current_sentence,primary_charge,sentence_date,offense_category,commitment_term,commitment_unit,sentence_type,year
3,44670309710,218297158761,297178094385,70461958445,Markham Courthouse,District 6 - Markham,27,Male,Black,Plea Of Guilty,10982,True,False,10/16/2014 12:00:00 AM,PROMIS Conversion,30,Year(s),Prison,10/16/2014 12:00:00 AM
4,44670309710,218297158761,297178094385,70461958445,Markham Courthouse,District 6 - Markham,27,Male,Black,Plea Of Guilty,619,False,False,6/2/1986 12:00:00 AM,PROMIS Conversion,30,Year(s),Conversion,6/2/1986 12:00:00 AM
5,44670309710,218297158761,297140236964,70609573999,Markham Courthouse,District 6 - Markham,27,Male,Black,Nolle On Remand,619,True,False,6/2/1986 12:00:00 AM,PROMIS Conversion,30,Year(s),Conversion,6/2/1986 12:00:00 AM
6,44670309710,218297158761,297178390146,80671262249,Markham Courthouse,District 6 - Markham,27,Male,Black,Plea Of Guilty,10982,True,False,10/16/2014 12:00:00 AM,PROMIS Conversion,30,Year(s),Prison,10/16/2014 12:00:00 AM
7,44670309710,218297158761,297178390146,80671262249,Markham Courthouse,District 6 - Markham,27,Male,Black,Plea Of Guilty,619,False,False,6/2/1986 12:00:00 AM,PROMIS Conversion,30,Year(s),Conversion,6/2/1986 12:00:00 AM


In [56]:
# Select only the year
data['year'] = data['year'].replace(regex=['[0-9]*\/[0-9]*\/','\s[0-9]*\:[0-9]*\:[0-9]*\s[A-Z]*'], value='')

In [57]:
# Change value type
data['year'] = data['year'].astype(int)

In [58]:
data.dtypes

case_id                   object
case_participant_id       object
charge_id                 object
charge_version_id         object
court_facility            object
court_name                object
age_at_incident           object
gender                    object
race                      object
charge_disposition        object
length_of_case_in_days    object
current_sentence            bool
primary_charge              bool
sentence_date             object
offense_category          object
commitment_term           object
commitment_unit           object
sentence_type             object
year                       int32
dtype: object

In [59]:
# Filter year
data = data[(data.year > 1999) & (data.year < 2020)]

In [60]:
# Confirm values
data.describe()

Unnamed: 0,year
count,211516.0
mean,2014.734895
std,2.43294
min,2000.0
25%,2013.0
50%,2015.0
75%,2017.0
max,2019.0


## Analize Offense Category and consolidate into related categories

In [61]:
# Review all the information
data.offense_category.value_counts()

Narcotics                       60250
UUW - Unlawful Use of Weapon    23456
Aggravated DUI                  13265
Retail Theft                    12425
Burglary                        10832
                                ...  
Gambling                           12
Tampering                           9
Failure To Pay Child Support        6
Benefit Recipient Fraud             2
Compelling Gang Membership          2
Name: offense_category, Length: 88, dtype: int64

In [62]:
# Consolidate Homicide category
data.offense_category = data.offense_category.replace(to_replace=['Attempt Homicide','Reckless Homicide'],value='Homicide')

data.offense_category.value_counts()

Narcotics                       60250
UUW - Unlawful Use of Weapon    23456
Aggravated DUI                  13265
Retail Theft                    12425
Burglary                        10832
                                ...  
Gambling                           12
Tampering                           9
Failure To Pay Child Support        6
Compelling Gang Membership          2
Benefit Recipient Fraud             2
Name: offense_category, Length: 86, dtype: int64

In [63]:
# Consolidate Offense Against Police Officers category
data.offense_category = data.offense_category.replace(to_replace=['Aggravated Assault Police Officer','Aggravated Assault Police Officer Firearm','Police Shooting','Impersonating Police Officer'],value='Offense Against Police Officers')

data.offense_category.value_counts()

Narcotics                       60250
UUW - Unlawful Use of Weapon    23456
Aggravated DUI                  13265
Retail Theft                    12425
Burglary                        10832
                                ...  
Gambling                           12
Tampering                           9
Failure To Pay Child Support        6
Compelling Gang Membership          2
Benefit Recipient Fraud             2
Name: offense_category, Length: 83, dtype: int64

In [64]:
# Consolidate Battery category
data.offense_category = data.offense_category.replace(to_replace=['Aggravated Battery','Aggravated Battery Police Officer','Aggravated Battery Police Officer Firearm','Aggravated Battery With A Firearm','Domestic Battery'],value='Battery')

data.offense_category.value_counts()

Narcotics                       60250
UUW - Unlawful Use of Weapon    23456
Battery                         13544
Aggravated DUI                  13265
Retail Theft                    12425
                                ...  
Gambling                           12
Tampering                           9
Failure To Pay Child Support        6
Benefit Recipient Fraud             2
Compelling Gang Membership          2
Name: offense_category, Length: 78, dtype: int64

In [65]:
# Consolidate Robbery, Burglery and Theft category
data.offense_category = data.offense_category.replace(to_replace=['Aggravated Identity Theft','Aggravated Robbery','Aggravated Robbery BB Gun','Armed Robbery','Attempt Armed Robbery','Burglary','Identity Theft','Residential Burglary','Retail Theft','Robbery','Theft','Theft by Deception','Possession Of Burglary Tools'],value='Robbery/Burglery/Theft')

data.offense_category.value_counts()

Narcotics                       60250
Robbery/Burglery/Theft          48208
UUW - Unlawful Use of Weapon    23456
Battery                         13544
Aggravated DUI                  13265
                                ...  
Gambling                           12
Tampering                           9
Failure To Pay Child Support        6
Benefit Recipient Fraud             2
Compelling Gang Membership          2
Name: offense_category, Length: 66, dtype: int64

In [66]:
# Consolidate Arson category
data.offense_category = data.offense_category.replace(to_replace=['Arson and Attempt Arson','Attempt Arson'],value='Arson')

data.offense_category.value_counts()

Narcotics                       60250
Robbery/Burglery/Theft          48208
UUW - Unlawful Use of Weapon    23456
Battery                         13544
Aggravated DUI                  13265
                                ...  
Gambling                           12
Tampering                           9
Failure To Pay Child Support        6
Compelling Gang Membership          2
Benefit Recipient Fraud             2
Name: offense_category, Length: 64, dtype: int64

In [67]:
# Consolidate Firearms and Explosives category
data.offense_category = data.offense_category.replace(to_replace=['Aggravated Discharge Firearm','Armed Violence','Disarming Police Officer','Gun Running','Possession of Explosives','UUW - Unlawful Use of Weapon','Gun - Non UUW','Bomb Threat','Reckless Discharge of Firearm'],value='Firearms and Explosives')

data.offense_category.value_counts()

Narcotics                                        60250
Robbery/Burglery/Theft                           48208
Firearms and Explosives                          25996
Battery                                          13544
Aggravated DUI                                   13265
Driving With Suspended Or Revoked License         8415
Sex Crimes                                        4407
Other Offense                                     3866
Possession of Stolen Motor Vehicle                3770
Homicide                                          3698
DUI                                               3443
Escape - Failure to Return                        2681
Forgery                                           2571
PROMIS Conversion                                 2232
Failure to Register as a Sex Offender             2198
Aggravated Fleeing and Eluding                    2148
Criminal Damage to Property                       1657
Credit Card Cases                                 1258
Home Invas

In [68]:
# Consolidate Motor Vehicles Offenses category
data.offense_category = data.offense_category.replace(to_replace=['Aggravated DUI','Attempt Vehicular Hijacking','DUI','Driving With Suspended Or Revoked License','Major Accidents','Possession of Stolen Motor Vehicle','Vehicular Hijacking','Vehicular Invasion'],value='Motor Vehicle Offenses')

data.offense_category.value_counts()

Narcotics                                        60250
Robbery/Burglery/Theft                           48208
Motor Vehicle Offenses                           29928
Firearms and Explosives                          25996
Battery                                          13544
Sex Crimes                                        4407
Other Offense                                     3866
Homicide                                          3698
Escape - Failure to Return                        2681
Forgery                                           2571
PROMIS Conversion                                 2232
Failure to Register as a Sex Offender             2198
Aggravated Fleeing and Eluding                    2148
Criminal Damage to Property                       1657
Credit Card Cases                                 1258
Home Invasion                                     1225
Offense Against Police Officers                    882
Prostitution                                       643
Violation 

In [69]:
# Consolidate Judicial Process Violations category
data.offense_category = data.offense_category.replace(to_replace=['Communicating With Witness','Escape - Failure to Return','Obstructing Justice','Perjury','Tampering','Violate Bail Bond','Violation Order Of Protection'],value='Judicial Process Violations')

data.offense_category.value_counts()

Narcotics                                        60250
Robbery/Burglery/Theft                           48208
Motor Vehicle Offenses                           29928
Firearms and Explosives                          25996
Battery                                          13544
Sex Crimes                                        4407
Other Offense                                     3866
Homicide                                          3698
Judicial Process Violations                       3434
Forgery                                           2571
PROMIS Conversion                                 2232
Failure to Register as a Sex Offender             2198
Aggravated Fleeing and Eluding                    2148
Criminal Damage to Property                       1657
Credit Card Cases                                 1258
Home Invasion                                     1225
Offense Against Police Officers                    882
Prostitution                                       643
Fraudulent

In [70]:
# Consolidate Sex Offenses category
data.offense_category = data.offense_category.replace(to_replace=['Attempt Sex Crimes','Child Pornography','Failure to Register as a Sex Offender','Pandering','Prostitution','Sex Crimes','Violation of Sex Offender Registration'],value='Sex Offenses')

data.offense_category.value_counts()

Narcotics                                        60250
Robbery/Burglery/Theft                           48208
Motor Vehicle Offenses                           29928
Firearms and Explosives                          25996
Battery                                          13544
Sex Offenses                                      7510
Other Offense                                     3866
Homicide                                          3698
Judicial Process Violations                       3434
Forgery                                           2571
PROMIS Conversion                                 2232
Aggravated Fleeing and Eluding                    2148
Criminal Damage to Property                       1657
Credit Card Cases                                 1258
Home Invasion                                     1225
Offense Against Police Officers                    882
Fraudulent ID                                      450
Arson                                              397
Fraud     

In [71]:
# Consolidate Human Trafficking, Detention and Kidnapping category
data.offense_category = data.offense_category.replace(to_replace=['Child Abduction','Human Trafficking','Kidnapping','Unlawful Restraint'],value='Human Trafficking/Detention/Kidnapping')

data.offense_category.value_counts()

Narcotics                                        60250
Robbery/Burglery/Theft                           48208
Motor Vehicle Offenses                           29928
Firearms and Explosives                          25996
Battery                                          13544
Sex Offenses                                      7510
Other Offense                                     3866
Homicide                                          3698
Judicial Process Violations                       3434
Forgery                                           2571
PROMIS Conversion                                 2232
Aggravated Fleeing and Eluding                    2148
Criminal Damage to Property                       1657
Credit Card Cases                                 1258
Home Invasion                                     1225
Offense Against Police Officers                    882
Human Trafficking/Detention/Kidnapping             635
Fraudulent ID                                      450
Arson     

In [72]:
# Consolidate Trespassing category
data.offense_category = data.offense_category.replace(to_replace=['Home Invasion','Criminal Trespass To Residence'],value='Trespassing')

data.offense_category.value_counts()

Narcotics                                        60250
Robbery/Burglery/Theft                           48208
Motor Vehicle Offenses                           29928
Firearms and Explosives                          25996
Battery                                          13544
Sex Offenses                                      7510
Other Offense                                     3866
Homicide                                          3698
Judicial Process Violations                       3434
Forgery                                           2571
PROMIS Conversion                                 2232
Aggravated Fleeing and Eluding                    2148
Criminal Damage to Property                       1657
Trespassing                                       1321
Credit Card Cases                                 1258
Offense Against Police Officers                    882
Human Trafficking/Detention/Kidnapping             635
Fraudulent ID                                      450
Arson     

In [73]:
# Consolidate Fraud and Deception category
data.offense_category = data.offense_category.replace(to_replace=['Benefit Recipient Fraud','Credit Card Cases','Deceptive Practice','Forgery','Fraud','Fraudulent ID'],value='Fraud/Deception')

data.offense_category.value_counts()

Narcotics                                        60250
Robbery/Burglery/Theft                           48208
Motor Vehicle Offenses                           29928
Firearms and Explosives                          25996
Battery                                          13544
Sex Offenses                                      7510
Fraud/Deception                                   4860
Other Offense                                     3866
Homicide                                          3698
Judicial Process Violations                       3434
PROMIS Conversion                                 2232
Aggravated Fleeing and Eluding                    2148
Criminal Damage to Property                       1657
Trespassing                                       1321
Offense Against Police Officers                    882
Human Trafficking/Detention/Kidnapping             635
Arson                                              397
Stalking                                           311
Intimidati

In [74]:
# Consolidate Corruption category
data.offense_category = data.offense_category.replace(to_replace=['Intimidation','Official Misconduct','Bribery'],value='Corruption')

data.offense_category.value_counts()

Narcotics                                        60250
Robbery/Burglery/Theft                           48208
Motor Vehicle Offenses                           29928
Firearms and Explosives                          25996
Battery                                          13544
Sex Offenses                                      7510
Fraud/Deception                                   4860
Other Offense                                     3866
Homicide                                          3698
Judicial Process Violations                       3434
PROMIS Conversion                                 2232
Aggravated Fleeing and Eluding                    2148
Criminal Damage to Property                       1657
Trespassing                                       1321
Offense Against Police Officers                    882
Human Trafficking/Detention/Kidnapping             635
Arson                                              397
Corruption                                         345
Stalking  

In [75]:
# Consolidate Inside Penal Institutions category
data.offense_category = data.offense_category.replace(to_replace=['Possession of Contraband in Penal Institution','Possession of Shank in Penal Institution'],value='Inside Penal Institutions')

data.offense_category.value_counts()

Narcotics                                 60250
Robbery/Burglery/Theft                    48208
Motor Vehicle Offenses                    29928
Firearms and Explosives                   25996
Battery                                   13544
Sex Offenses                               7510
Fraud/Deception                            4860
Other Offense                              3866
Homicide                                   3698
Judicial Process Violations                3434
PROMIS Conversion                          2232
Aggravated Fleeing and Eluding             2148
Criminal Damage to Property                1657
Trespassing                                1321
Offense Against Police Officers             882
Human Trafficking/Detention/Kidnapping      635
Arson                                       397
Corruption                                  345
Stalking                                    311
Inside Penal Institutions                   164
Hate Crimes                             

In [76]:
# Consolidate Other Offense category
data.offense_category = data.offense_category.replace(to_replace=['Dog Fighting','Gambling','Failure To Pay Child Support','Compelling Gang Membership'],value='Other Offense')

data.offense_category.value_counts()

Narcotics                                 60250
Robbery/Burglery/Theft                    48208
Motor Vehicle Offenses                    29928
Firearms and Explosives                   25996
Battery                                   13544
Sex Offenses                               7510
Fraud/Deception                            4860
Other Offense                              3927
Homicide                                   3698
Judicial Process Violations                3434
PROMIS Conversion                          2232
Aggravated Fleeing and Eluding             2148
Criminal Damage to Property                1657
Trespassing                                1321
Offense Against Police Officers             882
Human Trafficking/Detention/Kidnapping      635
Arson                                       397
Corruption                                  345
Stalking                                    311
Inside Penal Institutions                   164
Hate Crimes                             

## Cleaning Race columns

In [77]:
# Review data
data.race.value_counts()

Black                               143073
White [Hispanic or Latino]           33506
White                                28725
HISPANIC                              3460
Asian                                 1221
White/Black [Hispanic or Latino]      1082
Unknown                                302
American Indian                        112
Biracial                                29
ASIAN                                    6
Name: race, dtype: int64

In [78]:
# Consolidate Asian race
data.race = data.race.replace(to_replace='ASIAN',value='Asian')

data.race.value_counts()

Black                               143073
White [Hispanic or Latino]           33506
White                                28725
HISPANIC                              3460
Asian                                 1227
White/Black [Hispanic or Latino]      1082
Unknown                                302
American Indian                        112
Biracial                                29
Name: race, dtype: int64

In [79]:
# Consolidate Hispanic/Latino race
data.race = data.race.replace(to_replace=['White [Hispanic or Latino]','HISPANIC','White/Black [Hispanic or Latino]'],value='Hispanic/Latino')

data.race.value_counts()

Black              143073
Hispanic/Latino     38048
White               28725
Asian                1227
Unknown               302
American Indian       112
Biracial               29
Name: race, dtype: int64

## Cleaning Sentence Type

In [80]:
# Review data
data.sentence_type.value_counts()

Prison                                   114472
Probation                                 81925
Jail                                       6519
Cook County Boot Camp                      2373
Conditional Discharge                      2221
2nd Chance Probation                       1777
Supervision                                1621
Probation Terminated Unsatisfactorily       235
Inpatient Mental Health Services            158
Conditional Release                          59
Death                                        57
Probation Terminated Instanter               53
Conversion                                   28
Probation Terminated Satisfactorily          18
Name: sentence_type, dtype: int64

In [81]:
# Consolidate Probation & Supervision sentence
data.sentence_type = data.sentence_type.replace(to_replace=['Probation','2nd Chance Probation','Supervision','Probation Terminated Unsatisfactorily','Probation Terminated Instanter','Probation Terminated Satisfactorily'],value='Probation/Supervision')

data.sentence_type.value_counts()

Prison                              114472
Probation/Supervision                85629
Jail                                  6519
Cook County Boot Camp                 2373
Conditional Discharge                 2221
Inpatient Mental Health Services       158
Conditional Release                     59
Death                                   57
Conversion                              28
Name: sentence_type, dtype: int64

In [82]:
# Consolidate Conditional sentence type
data.sentence_type = data.sentence_type.replace(to_replace='Conditional Release',value='Conditional Discharge')

data.sentence_type.value_counts()

Prison                              114472
Probation/Supervision                85629
Jail                                  6519
Cook County Boot Camp                 2373
Conditional Discharge                 2280
Inpatient Mental Health Services       158
Death                                   57
Conversion                              28
Name: sentence_type, dtype: int64

In [83]:
# Consolidate Incarceration sentence
data.sentence_type = data.sentence_type.replace(to_replace=['Jail','Prison'],value='Incarceration')

data.sentence_type.value_counts()

Incarceration                       120991
Probation/Supervision                85629
Cook County Boot Camp                 2373
Conditional Discharge                 2280
Inpatient Mental Health Services       158
Death                                   57
Conversion                              28
Name: sentence_type, dtype: int64

## Cleaning Commitment Unit & Term by Sentence Type

In [84]:
# Data Exploration
data.commitment_unit.value_counts()

Year(s)         154583
Months           48745
Days              5511
Term              2166
Natural Life       419
Dollars             57
Hours               17
Weeks               15
Pounds               2
Kilos                1
Name: commitment_unit, dtype: int64

In [85]:
# Unify Weight units
data.commitment_unit = data.commitment_unit.replace(to_replace=['Pounds','Kilos'],value='Weight')

data.commitment_unit.value_counts()

Year(s)         154583
Months           48745
Days              5511
Term              2166
Natural Life       419
Dollars             57
Hours               17
Weeks               15
Weight               3
Name: commitment_unit, dtype: int64

In [86]:
sorted(list(data.commitment_term.unique()))

['0',
 '00',
 '001',
 '002',
 '006',
 '007',
 '010',
 '012',
 '015',
 '018',
 '02',
 '024',
 '027',
 '030',
 '036',
 '042',
 '054',
 '055',
 '06',
 '062',
 '1',
 '1,154.00',
 '1.5',
 '10',
 '10.5',
 '100',
 '101',
 '1013',
 '102',
 '103',
 '104',
 '105',
 '106',
 '107',
 '108',
 '109',
 '11',
 '11.5',
 '110',
 '111',
 '112',
 '113',
 '114',
 '115',
 '116',
 '117',
 '118',
 '119',
 '12',
 '12.5',
 '12.75',
 '120',
 '121',
 '122',
 '123',
 '125',
 '126',
 '127',
 '1277.13',
 '128',
 '129',
 '13',
 '13.5',
 '130',
 '131',
 '132',
 '133',
 '134',
 '135',
 '136',
 '137',
 '138',
 '139',
 '14',
 '14.5',
 '140',
 '141',
 '142',
 '143',
 '144',
 '145',
 '146',
 '147',
 '148',
 '149',
 '15',
 '150',
 '151',
 '152',
 '153',
 '154',
 '155',
 '156',
 '157',
 '158',
 '159',
 '16',
 '16.5',
 '160',
 '161',
 '162',
 '163',
 '164',
 '165',
 '166',
 '167',
 '168',
 '169',
 '17',
 '17.5',
 '170',
 '171',
 '172',
 '174',
 '175',
 '176',
 '177',
 '178',
 '179',
 '18',
 '18 months',
 '180',
 '181',
 '182',

In [87]:
# Clean all non-numeric characters and transform value type to float
data.commitment_term = data.commitment_term.replace(to_replace='two',value=2)
data.commitment_term = data.commitment_term.replace(regex=['[a-z]*','\,','\`'], value='')
data.commitment_term = data.commitment_term.astype('float')

data.commitment_term

3         30.0
6         30.0
9         62.0
17        70.0
34         6.0
          ... 
236119     2.0
236120     2.0
236121     2.0
236122     1.0
236123     3.0
Name: commitment_term, Length: 211516, dtype: float64

In [88]:
data_commitmentunits = data.groupby(['commitment_unit']).mean()
data_commitmentunits['commitment_term']

commitment_unit
Days            102.789875
Dollars         129.019825
Hours            34.176471
Months           24.853554
Natural Life     11.541766
Term              1.016620
Weeks            25.066667
Weight           11.333333
Year(s)           4.135462
Name: commitment_term, dtype: float64

### Cleaning by Cook County Boot Camp

In [162]:
data_break = data

In [163]:
bootcamp = data_break.loc[data_break.sentence_type == 'Cook County Boot Camp']
bootcamp.commitment_unit.value_counts()

Term       1998
Year(s)     181
Months      141
Days         52
Dollars       1
Name: commitment_unit, dtype: int64

In [164]:
# fnames = {103: "Matt", 104: "Mr"}
# df['First_Name'] = df['ID'].map(fnames)]

In [165]:
bootcamp_years = bootcamp['commitment_term'].loc[bootcamp.commitment_unit == 'Year(s)']
bootcamp_years.value_counts()

1.0     86
4.0     27
3.0     26
0.0     19
2.0     13
6.0      8
15.0     1
7.0      1
Name: commitment_term, dtype: int64

In [177]:
change = data_break.commitment_term.loc[data_break.sentence_type == 'Cook County Boot Camp'].loc[data_break.commitment_unit == 'Year(s)'].loc[data_break.commitment_term == 0]

# data_break.where(~change,1)

data_break.loc[data_break.sentence_type == 'Cook County Boot Camp'].commitment_term.loc[data_break.commitment_unit == 'Year(s)'].value_counts()

TypeError: ufunc 'invert' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''

In [167]:
for index,row in data_break.iterrows():
    if row.sentence_type == 'Cook County Boot Camp':
        if row.commitment_unit == 'Year(s)':
            # row.commitment_term.replace(0,1)
            if row.commitment_term == 0:
                row.commitment_term = 1
                
data_break.loc[data_break.sentence_type == 'Cook County Boot Camp'].commitment_term.loc[data_break.commitment_unit == 'Year(s)'].value_counts()

1.0     86
4.0     27
3.0     26
0.0     19
2.0     13
6.0      8
15.0     1
7.0      1
Name: commitment_term, dtype: int64