In [1]:
import pandas as pd
from sodapy import Socrata
import numpy as np

# Get information from Cook County Sentencing Data Database

In [2]:
# Call API
client = Socrata("datacatalog.cookcountyil.gov", None)



In [3]:
# Retrive Data
results = client.get("tg8v-tm6u", limit=300000)

In [4]:
# Generate DataFrame
original_data = pd.DataFrame.from_records(results)

# Explore data
list(original_data.columns.values)

['case_id',
 'case_participant_id',
 'received_date',
 'offense_category',
 'primary_charge',
 'charge_id',
 'charge_version_id',
 'disposition_charged_offense_title',
 'charge_count',
 'disposition_date',
 'disposition_charged_chapter',
 'disposition_charged_act',
 'disposition_charged_section',
 'disposition_charged_class',
 'disposition_charged_aoic',
 'charge_disposition',
 'sentence_judge',
 'court_name',
 'court_facility',
 'sentence_phase',
 'sentence_date',
 'sentence_type',
 'current_sentence',
 'commitment_type',
 'length_of_case_in_days',
 'age_at_incident',
 'race',
 'gender',
 'incident_begin_date',
 'law_enforcement_agency',
 'arrest_date',
 'felony_review_date',
 'felony_review_result',
 'arraignment_date',
 'updated_offense_category',
 'commitment_term',
 'commitment_unit',
 'incident_end_date',
 'charge_disposition_reason',
 'incident_city',
 'unit']

# Cleaning Process

## Select only chosen columns

In [5]:
# Copy only columns that we'll need for our analysis
data = original_data[['case_id','case_participant_id','charge_id','charge_version_id','court_facility','court_name','age_at_incident','gender','race','charge_disposition','length_of_case_in_days','current_sentence','primary_charge','sentence_date','offense_category','commitment_term','commitment_unit','sentence_type']]
data.head()

Unnamed: 0,case_id,case_participant_id,charge_id,charge_version_id,court_facility,court_name,age_at_incident,gender,race,charge_disposition,length_of_case_in_days,current_sentence,primary_charge,sentence_date,offense_category,commitment_term,commitment_unit,sentence_type
0,87302669130,221103049156,106130426295,71631463339,Markham Courthouse,District 6 - Markham,27,Male,Black,Nolle On Remand,619,True,False,6/2/1986 12:00:00 AM,PROMIS Conversion,,,Conversion
1,87302669130,221103049156,106130637571,60521205325,Markham Courthouse,District 6 - Markham,27,Male,Black,Nolle On Remand,619,True,False,6/2/1986 12:00:00 AM,PROMIS Conversion,30.0,Year(s),Conversion
2,87302669130,221103049156,106143736689,81281765233,Markham Courthouse,District 6 - Markham,27,Male,Black,Nolle On Remand,619,True,False,6/2/1986 12:00:00 AM,PROMIS Conversion,,,Conversion
3,87302669130,221103049156,106143842327,81277737609,Markham Courthouse,District 6 - Markham,27,Male,Black,Nolle On Remand,619,True,False,6/2/1986 12:00:00 AM,PROMIS Conversion,,,Conversion
4,87302669130,221103049156,106144159241,60394680398,Markham Courthouse,District 6 - Markham,27,Male,Black,Plea Of Guilty,10982,True,False,10/16/2014 12:00:00 AM,PROMIS Conversion,30.0,Year(s),Prison


In [6]:
# Explore values in columns
data.count()

case_id                   241511
case_participant_id       241511
charge_id                 241511
charge_version_id         241511
court_facility            239630
court_name                240126
age_at_incident           232073
gender                    240700
race                      240247
charge_disposition        241511
length_of_case_in_days    222605
current_sentence          241511
primary_charge            241511
sentence_date             241511
offense_category          241511
commitment_term           239852
commitment_unit           239852
sentence_type             241511
dtype: int64

In [7]:
# Delete conversion data from dataframe
data.replace(regex='PROMIS*',value=np.NaN, inplace=True)

## Drop NaN Values

In [8]:
data = data.dropna()
data.count()

case_id                   208561
case_participant_id       208561
charge_id                 208561
charge_version_id         208561
court_facility            208561
court_name                208561
age_at_incident           208561
gender                    208561
race                      208561
charge_disposition        208561
length_of_case_in_days    208561
current_sentence          208561
primary_charge            208561
sentence_date             208561
offense_category          208561
commitment_term           208561
commitment_unit           208561
sentence_type             208561
dtype: int64

## Filter data after 2000 & before 2019

In [9]:
# Explore type of values in each column
data.dtypes

case_id                   object
case_participant_id       object
charge_id                 object
charge_version_id         object
court_facility            object
court_name                object
age_at_incident           object
gender                    object
race                      object
charge_disposition        object
length_of_case_in_days    object
current_sentence            bool
primary_charge              bool
sentence_date             object
offense_category          object
commitment_term           object
commitment_unit           object
sentence_type             object
dtype: object

In [10]:
# Create a copy of the Sentence Date to filter it
hdate = data['sentence_date']
data['date_year'] = hdate

data.head()

Unnamed: 0,case_id,case_participant_id,charge_id,charge_version_id,court_facility,court_name,age_at_incident,gender,race,charge_disposition,length_of_case_in_days,current_sentence,primary_charge,sentence_date,offense_category,commitment_term,commitment_unit,sentence_type,date_year
663,114024398027,230101052744,807648613316,449520166967,26TH Street,District 1 - Chicago,21,Male,White,Plea Of Guilty,187,True,False,5/11/2015 12:00:00 AM,Reckless Homicide,8,Year(s),Prison,5/11/2015 12:00:00 AM
1360,123803937179,255504441936,190690725339,122497821309,26TH Street,District 1 - Chicago,27,Male,HISPANIC,Plea Of Guilty,2624,True,False,11/5/2014 12:00:00 AM,UUW - Unlawful Use of Weapon,2,Year(s),Probation,11/5/2014 12:00:00 AM
2123,127790349873,265428457350,541647467636,271650860226,Markham Courthouse,District 6 - Markham,18,Male,Black,Plea Of Guilty,795,True,False,12/9/2013 12:00:00 AM,Sex Crimes,4,Year(s),Prison,12/9/2013 12:00:00 AM
2177,128491120254,266244611418,759810529209,458530536964,26TH Street,District 1 - Chicago,21,Male,Black,Finding Guilty,1201,True,True,2/1/2016 12:00:00 AM,Homicide,9,Year(s),Prison,2/1/2016 12:00:00 AM
2261,129030951431,266511120401,702938173654,382945270335,26TH Street,District 1 - Chicago,35,Male,Black,Plea Of Guilty,271,True,True,2/28/2011 12:00:00 AM,Escape - Failure to Return,2,Year(s),Prison,2/28/2011 12:00:00 AM


In [11]:
# Select only the year
data['date_year'] = data['date_year'].replace(regex=['[0-9]*\/[0-9]*\/','\s[0-9]*\:[0-9]*\:[0-9]*\s[A-Z]*'], value='')

In [12]:
# Change value type
data['date_year'] = data['date_year'].astype(int)

In [13]:
# Confirm value type change
data.dtypes

case_id                   object
case_participant_id       object
charge_id                 object
charge_version_id         object
court_facility            object
court_name                object
age_at_incident           object
gender                    object
race                      object
charge_disposition        object
length_of_case_in_days    object
current_sentence            bool
primary_charge              bool
sentence_date             object
offense_category          object
commitment_term           object
commitment_unit           object
sentence_type             object
date_year                  int32
dtype: object

In [14]:
# Filter year
data = data[(data.date_year > 2014) & (data.date_year < 2020)]

In [15]:
# Confirm values
data.describe()

Unnamed: 0,date_year
count,105320.0
mean,2016.799032
std,1.398293
min,2015.0
25%,2016.0
50%,2017.0
75%,2018.0
max,2019.0


In [16]:
data = data.loc[data.current_sentence == True]
data = data.loc[data.primary_charge == True]
data.describe()

Unnamed: 0,date_year
count,69962.0
mean,2016.775735
std,1.407371
min,2015.0
25%,2016.0
50%,2017.0
75%,2018.0
max,2019.0


## Analize Offense Category and consolidate into related categories

In [18]:
# Review all the information
data.offense_category.value_counts()

Narcotics                       18389
UUW - Unlawful Use of Weapon     7091
Aggravated DUI                   4790
Retail Theft                     4763
Burglary                         4222
                                ...  
Dog Fighting                        4
Tampering                           4
Police Shooting                     3
Failure To Pay Child Support        2
Perjury                             1
Name: offense_category, Length: 84, dtype: int64

In [19]:
# Consolidate Homicide category
data.offense_category = data.offense_category.replace(to_replace=['Attempt Homicide','Reckless Homicide'],value='Homicide')

data.offense_category.value_counts()

Narcotics                       18389
UUW - Unlawful Use of Weapon     7091
Aggravated DUI                   4790
Retail Theft                     4763
Burglary                         4222
                                ...  
Dog Fighting                        4
Tampering                           4
Police Shooting                     3
Failure To Pay Child Support        2
Perjury                             1
Name: offense_category, Length: 82, dtype: int64

In [20]:
# Consolidate Offense Against Police Officers category
data.offense_category = data.offense_category.replace(to_replace=['Aggravated Assault Police Officer','Aggravated Assault Police Officer Firearm','Police Shooting','Impersonating Police Officer'],value='Offense Against Police Officers')

data.offense_category.value_counts()

Narcotics                       18389
UUW - Unlawful Use of Weapon     7091
Aggravated DUI                   4790
Retail Theft                     4763
Burglary                         4222
                                ...  
Pandering                           4
Dog Fighting                        4
Tampering                           4
Failure To Pay Child Support        2
Perjury                             1
Name: offense_category, Length: 79, dtype: int64

In [21]:
# Consolidate Battery category
data.offense_category = data.offense_category.replace(to_replace=['Aggravated Battery','Aggravated Battery Police Officer','Aggravated Battery Police Officer Firearm','Aggravated Battery With A Firearm','Domestic Battery'],value='Battery')

data.offense_category.value_counts()

Narcotics                       18389
UUW - Unlawful Use of Weapon     7091
Aggravated DUI                   4790
Retail Theft                     4763
Battery                          4229
                                ...  
Dog Fighting                        4
Pandering                           4
Tampering                           4
Failure To Pay Child Support        2
Perjury                             1
Name: offense_category, Length: 74, dtype: int64

In [22]:
# Consolidate Robbery, Burglery and Theft category
data.offense_category = data.offense_category.replace(to_replace=['Aggravated Identity Theft','Aggravated Robbery','Aggravated Robbery BB Gun','Armed Robbery','Attempt Armed Robbery','Burglary','Identity Theft','Residential Burglary','Retail Theft','Robbery','Theft','Theft by Deception','Possession Of Burglary Tools'],value='Robbery/Burglery/Theft')

data.offense_category.value_counts()

Narcotics                       18389
Robbery/Burglery/Theft          17439
UUW - Unlawful Use of Weapon     7091
Aggravated DUI                   4790
Battery                          4229
                                ...  
Attempt Sex Crimes                  4
Dog Fighting                        4
Tampering                           4
Failure To Pay Child Support        2
Perjury                             1
Name: offense_category, Length: 62, dtype: int64

In [23]:
# Consolidate Arson category
data.offense_category = data.offense_category.replace(to_replace=['Arson and Attempt Arson','Attempt Arson'],value='Arson')

data.offense_category.value_counts()

Narcotics                                        18389
Robbery/Burglery/Theft                           17439
UUW - Unlawful Use of Weapon                      7091
Aggravated DUI                                    4790
Battery                                           4229
Driving With Suspended Or Revoked License         3949
Possession of Stolen Motor Vehicle                1631
Other Offense                                     1362
DUI                                               1288
Escape - Failure to Return                        1151
Sex Crimes                                        1136
Failure to Register as a Sex Offender             1044
Aggravated Fleeing and Eluding                     992
Forgery                                            944
Criminal Damage to Property                        746
Homicide                                           599
Credit Card Cases                                  448
Reckless Discharge of Firearm                      233
Vehicular 

In [24]:
# Consolidate Firearms and Explosives category
data.offense_category = data.offense_category.replace(to_replace=['Aggravated Discharge Firearm','Armed Violence','Disarming Police Officer','Gun Running','Possession of Explosives','UUW - Unlawful Use of Weapon','Gun - Non UUW','Bomb Threat','Reckless Discharge of Firearm'],value='Firearms and Explosives')

data.offense_category.value_counts()

Narcotics                                        18389
Robbery/Burglery/Theft                           17439
Firearms and Explosives                           7663
Aggravated DUI                                    4790
Battery                                           4229
Driving With Suspended Or Revoked License         3949
Possession of Stolen Motor Vehicle                1631
Other Offense                                     1362
DUI                                               1288
Escape - Failure to Return                        1151
Sex Crimes                                        1136
Failure to Register as a Sex Offender             1044
Aggravated Fleeing and Eluding                     992
Forgery                                            944
Criminal Damage to Property                        746
Homicide                                           599
Credit Card Cases                                  448
Vehicular Hijacking                                229
Offense Ag

In [25]:
# Consolidate Motor Vehicles Offenses category
data.offense_category = data.offense_category.replace(to_replace=['Aggravated DUI','Attempt Vehicular Hijacking','DUI','Driving With Suspended Or Revoked License','Major Accidents','Possession of Stolen Motor Vehicle','Vehicular Hijacking','Vehicular Invasion'],value='Motor Vehicle Offenses')

data.offense_category.value_counts()

Narcotics                                        18389
Robbery/Burglery/Theft                           17439
Motor Vehicle Offenses                           11998
Firearms and Explosives                           7663
Battery                                           4229
Other Offense                                     1362
Escape - Failure to Return                        1151
Sex Crimes                                        1136
Failure to Register as a Sex Offender             1044
Aggravated Fleeing and Eluding                     992
Forgery                                            944
Criminal Damage to Property                        746
Homicide                                           599
Credit Card Cases                                  448
Offense Against Police Officers                    229
Home Invasion                                      207
Fraudulent ID                                      187
Violation Order Of Protection                      179
Arson     

In [26]:
# Consolidate Judicial Process Violations category
data.offense_category = data.offense_category.replace(to_replace=['Communicating With Witness','Escape - Failure to Return','Obstructing Justice','Perjury','Tampering','Violate Bail Bond','Violation Order Of Protection'],value='Judicial Process Violations')

data.offense_category.value_counts()

Narcotics                                        18389
Robbery/Burglery/Theft                           17439
Motor Vehicle Offenses                           11998
Firearms and Explosives                           7663
Battery                                           4229
Judicial Process Violations                       1432
Other Offense                                     1362
Sex Crimes                                        1136
Failure to Register as a Sex Offender             1044
Aggravated Fleeing and Eluding                     992
Forgery                                            944
Criminal Damage to Property                        746
Homicide                                           599
Credit Card Cases                                  448
Offense Against Police Officers                    229
Home Invasion                                      207
Fraudulent ID                                      187
Arson                                              146
Fraud     

In [27]:
# Consolidate Sex Offenses category
data.offense_category = data.offense_category.replace(to_replace=['Attempt Sex Crimes','Child Pornography','Failure to Register as a Sex Offender','Pandering','Prostitution','Sex Crimes','Violation of Sex Offender Registration'],value='Sex Offenses')

data.offense_category.value_counts()

Narcotics                                        18389
Robbery/Burglery/Theft                           17439
Motor Vehicle Offenses                           11998
Firearms and Explosives                           7663
Battery                                           4229
Sex Offenses                                      2273
Judicial Process Violations                       1432
Other Offense                                     1362
Aggravated Fleeing and Eluding                     992
Forgery                                            944
Criminal Damage to Property                        746
Homicide                                           599
Credit Card Cases                                  448
Offense Against Police Officers                    229
Home Invasion                                      207
Fraudulent ID                                      187
Arson                                              146
Fraud                                              135
Stalking  

In [28]:
# Consolidate Human Trafficking, Detention and Kidnapping category
data.offense_category = data.offense_category.replace(to_replace=['Child Abduction','Human Trafficking','Kidnapping','Unlawful Restraint'],value='Human Trafficking/Detention/Kidnapping')

data.offense_category.value_counts()

Narcotics                                        18389
Robbery/Burglery/Theft                           17439
Motor Vehicle Offenses                           11998
Firearms and Explosives                           7663
Battery                                           4229
Sex Offenses                                      2273
Judicial Process Violations                       1432
Other Offense                                     1362
Aggravated Fleeing and Eluding                     992
Forgery                                            944
Criminal Damage to Property                        746
Homicide                                           599
Credit Card Cases                                  448
Offense Against Police Officers                    229
Home Invasion                                      207
Fraudulent ID                                      187
Arson                                              146
Human Trafficking/Detention/Kidnapping             145
Fraud     

In [29]:
# Consolidate Trespassing category
data.offense_category = data.offense_category.replace(to_replace=['Home Invasion','Criminal Trespass To Residence'],value='Trespassing')

data.offense_category.value_counts()

Narcotics                                        18389
Robbery/Burglery/Theft                           17439
Motor Vehicle Offenses                           11998
Firearms and Explosives                           7663
Battery                                           4229
Sex Offenses                                      2273
Judicial Process Violations                       1432
Other Offense                                     1362
Aggravated Fleeing and Eluding                     992
Forgery                                            944
Criminal Damage to Property                        746
Homicide                                           599
Credit Card Cases                                  448
Trespassing                                        264
Offense Against Police Officers                    229
Fraudulent ID                                      187
Arson                                              146
Human Trafficking/Detention/Kidnapping             145
Fraud     

In [30]:
# Consolidate Fraud and Deception category
data.offense_category = data.offense_category.replace(to_replace=['Benefit Recipient Fraud','Credit Card Cases','Deceptive Practice','Forgery','Fraud','Fraudulent ID'],value='Fraud/Deception')

data.offense_category.value_counts()

Narcotics                                        18389
Robbery/Burglery/Theft                           17439
Motor Vehicle Offenses                           11998
Firearms and Explosives                           7663
Battery                                           4229
Sex Offenses                                      2273
Fraud/Deception                                   1777
Judicial Process Violations                       1432
Other Offense                                     1362
Aggravated Fleeing and Eluding                     992
Criminal Damage to Property                        746
Homicide                                           599
Trespassing                                        264
Offense Against Police Officers                    229
Arson                                              146
Human Trafficking/Detention/Kidnapping             145
Stalking                                            87
Intimidation                                        72
Possession

In [31]:
# Consolidate Corruption category
data.offense_category = data.offense_category.replace(to_replace=['Intimidation','Official Misconduct','Bribery'],value='Corruption')

data.offense_category.value_counts()

Narcotics                                        18389
Robbery/Burglery/Theft                           17439
Motor Vehicle Offenses                           11998
Firearms and Explosives                           7663
Battery                                           4229
Sex Offenses                                      2273
Fraud/Deception                                   1777
Judicial Process Violations                       1432
Other Offense                                     1362
Aggravated Fleeing and Eluding                     992
Criminal Damage to Property                        746
Homicide                                           599
Trespassing                                        264
Offense Against Police Officers                    229
Arson                                              146
Human Trafficking/Detention/Kidnapping             145
Corruption                                         105
Stalking                                            87
Possession

In [32]:
# Consolidate Inside Penal Institutions category
data.offense_category = data.offense_category.replace(to_replace=['Possession of Contraband in Penal Institution','Possession of Shank in Penal Institution'],value='Inside Penal Institutions')

data.offense_category.value_counts()

Narcotics                                 18389
Robbery/Burglery/Theft                    17439
Motor Vehicle Offenses                    11998
Firearms and Explosives                    7663
Battery                                    4229
Sex Offenses                               2273
Fraud/Deception                            1777
Judicial Process Violations                1432
Other Offense                              1362
Aggravated Fleeing and Eluding              992
Criminal Damage to Property                 746
Homicide                                    599
Trespassing                                 264
Offense Against Police Officers             229
Arson                                       146
Human Trafficking/Detention/Kidnapping      145
Corruption                                  105
Stalking                                     87
Inside Penal Institutions                    52
Hate Crimes                                  22
Gambling                                

In [33]:
# Consolidate Other Offense category
data.offense_category = data.offense_category.replace(to_replace=['Dog Fighting','Gambling','Failure To Pay Child Support','Compelling Gang Membership'],value='Other Offense')

data.offense_category.value_counts()

Narcotics                                 18389
Robbery/Burglery/Theft                    17439
Motor Vehicle Offenses                    11998
Firearms and Explosives                    7663
Battery                                    4229
Sex Offenses                               2273
Fraud/Deception                            1777
Judicial Process Violations                1432
Other Offense                              1375
Aggravated Fleeing and Eluding              992
Criminal Damage to Property                 746
Homicide                                    599
Trespassing                                 264
Offense Against Police Officers             229
Arson                                       146
Human Trafficking/Detention/Kidnapping      145
Corruption                                  105
Stalking                                     87
Inside Penal Institutions                    52
Hate Crimes                                  22
Name: offense_category, dtype: int64

## Cleaning Race columns

In [34]:
# Review data
data.race.value_counts()

Black                               46044
White [Hispanic or Latino]          11842
White                               10154
HISPANIC                              947
Asian                                 418
White/Black [Hispanic or Latino]      401
Unknown                               111
American Indian                        34
Biracial                               11
Name: race, dtype: int64

In [35]:
# Consolidate Asian race
data.race = data.race.replace(to_replace='ASIAN',value='Asian')

data.race.value_counts()

Black                               46044
White [Hispanic or Latino]          11842
White                               10154
HISPANIC                              947
Asian                                 418
White/Black [Hispanic or Latino]      401
Unknown                               111
American Indian                        34
Biracial                               11
Name: race, dtype: int64

In [36]:
# Consolidate Hispanic/Latino race
data.race = data.race.replace(to_replace=['White [Hispanic or Latino]','HISPANIC','White/Black [Hispanic or Latino]'],value='Hispanic/Latino')

data.race.value_counts()

Black              46044
Hispanic/Latino    13190
White              10154
Asian                418
Unknown              111
American Indian       34
Biracial              11
Name: race, dtype: int64

## Cleaning Sentence Type

In [37]:
# Review data
data.sentence_type.value_counts()

Prison                                   35373
Probation                                27756
Jail                                      3219
2nd Chance Probation                      1332
Conditional Discharge                     1014
Supervision                                931
Cook County Boot Camp                      158
Probation Terminated Unsatisfactorily       97
Probation Terminated Instanter              27
Inpatient Mental Health Services            18
Conditional Release                         16
Death                                       16
Probation Terminated Satisfactorily          5
Name: sentence_type, dtype: int64

In [38]:
# Consolidate Probation & Supervision sentence
data.sentence_type = data.sentence_type.replace(to_replace=['Probation','2nd Chance Probation','Supervision','Probation Terminated Unsatisfactorily','Probation Terminated Instanter','Probation Terminated Satisfactorily'],value='Probation/Supervision')

data.sentence_type.value_counts()

Prison                              35373
Probation/Supervision               30148
Jail                                 3219
Conditional Discharge                1014
Cook County Boot Camp                 158
Inpatient Mental Health Services       18
Conditional Release                    16
Death                                  16
Name: sentence_type, dtype: int64

In [39]:
# Consolidate Conditional sentence type
data.sentence_type = data.sentence_type.replace(to_replace='Conditional Release',value='Conditional Discharge')

data.sentence_type.value_counts()

Prison                              35373
Probation/Supervision               30148
Jail                                 3219
Conditional Discharge                1030
Cook County Boot Camp                 158
Inpatient Mental Health Services       18
Death                                  16
Name: sentence_type, dtype: int64

In [40]:
# Consolidate Incarceration sentence
data.sentence_type = data.sentence_type.replace(to_replace=['Jail','Prison'],value='Incarceration')

data.sentence_type.value_counts()

Incarceration                       38592
Probation/Supervision               30148
Conditional Discharge                1030
Cook County Boot Camp                 158
Inpatient Mental Health Services       18
Death                                  16
Name: sentence_type, dtype: int64

## Cleaning Commitment Unit

In [41]:
# Data Exploration
data.commitment_unit.value_counts()

Year(s)         51153
Months          15491
Days             3105
Term              155
Natural Life       32
Dollars            17
Hours               4
Weeks               4
Kilos               1
Name: commitment_unit, dtype: int64

In [42]:
# Unify Weight units
data.commitment_unit = data.commitment_unit.replace(to_replace=['Pounds','Kilos'],value='Weight')

data.commitment_unit.value_counts()

Year(s)         51153
Months          15491
Days             3105
Term              155
Natural Life       32
Dollars            17
Hours               4
Weeks               4
Weight              1
Name: commitment_unit, dtype: int64

In [43]:
sorted(list(data.commitment_term.unique()))

['0',
 '001',
 '1',
 '1.5',
 '10',
 '10.5',
 '100',
 '101',
 '102',
 '103',
 '104',
 '105',
 '106',
 '107',
 '108',
 '109',
 '11',
 '11.5',
 '110',
 '111',
 '113',
 '114',
 '115',
 '116',
 '118',
 '119',
 '12',
 '12.5',
 '120',
 '121',
 '122',
 '123',
 '126',
 '127',
 '1277.13',
 '128',
 '129',
 '13',
 '13.5',
 '130',
 '131',
 '132',
 '133',
 '134',
 '135',
 '136',
 '137',
 '138',
 '139',
 '14',
 '14.5',
 '140',
 '141',
 '142',
 '143',
 '144',
 '145',
 '146',
 '147',
 '148',
 '149',
 '15',
 '150',
 '151',
 '153',
 '154',
 '155',
 '156',
 '158',
 '159',
 '16',
 '16.5',
 '160',
 '161',
 '162',
 '164',
 '165',
 '166',
 '167',
 '168',
 '169',
 '17',
 '17.5',
 '170',
 '171',
 '172',
 '174',
 '175',
 '176',
 '177',
 '179',
 '18',
 '180',
 '181',
 '182',
 '183',
 '184',
 '185',
 '186',
 '187',
 '188',
 '189',
 '19',
 '190',
 '191',
 '192',
 '193',
 '194',
 '195',
 '197',
 '199',
 '2',
 '2.5',
 '2.6',
 '20',
 '200',
 '202',
 '204',
 '205',
 '206',
 '207',
 '208',
 '21',
 '210',
 '211',
 '212',

In [44]:
# Clean all non-numeric characters and transform value type to float
data.commitment_term = data.commitment_term.replace(to_replace='two',value=2)
data.commitment_term = data.commitment_term.replace(regex=['[a-z]*','\,','\`'], value='')
data.commitment_term = data.commitment_term.astype('float')

data.commitment_term

0         9.0
1        30.0
2         4.0
3        52.0
4        10.0
         ... 
69957    24.0
69958     2.0
69959    18.0
69960     2.0
69961    90.0
Name: commitment_term, Length: 69962, dtype: float64

In [45]:
# Review commitment unit values
data.commitment_unit.value_counts()

Year(s)         51153
Months          15491
Days             3105
Term              155
Natural Life       32
Dollars            17
Hours               4
Weeks               4
Weight              1
Name: commitment_unit, dtype: int64

In [46]:
# Change all years over 130 to 130 to mark natural life
data.loc[(data.commitment_unit == 'Year(s)') & (data.commitment_term > 129),['commitment_term']] = 130

In [47]:
# Changes in Bootcamp Terms to Months
data.loc[(data.sentence_type == 'Cook County Boot Camp') & (data.commitment_unit == 'Term') & (data.commitment_term == 1),['commitment_term']] = 12

data.loc[(data.sentence_type == 'Cook County Boot Camp') & (data.commitment_unit == 'Term') & (data.commitment_term == 12),['commitment_unit']] = 'Months'

data.loc[(data.sentence_type == 'Cook County Boot Camp') & (data.commitment_unit == 'Term') & (data.commitment_term == 18),['commitment_unit']] = 'Months'

In [48]:
# Change Death info to 130 years
data.loc[(data.sentence_type == 'Death'),['commitment_unit']] = 'Year(s)'
data.loc[(data.sentence_type == 'Death'),['commitment_term']] = 130

In [49]:
# Create column with all values in months
def month_convert(row):
    if row ['commitment_unit'] == 'Months':
        return round(float(row['commitment_term']),2)
    if row["commitment_unit"] == "Year(s)":
        return round(int(row["commitment_term"]) * 12.0, 2)
    if row["commitment_unit"] == "Weeks":
       return round(float(row['commitment_term']) / 4, 2)
    if row["commitment_unit"] == "Days":
        return round(float( row['commitment_term']) / 30, 2)
    if row['commitment_unit'] == "Natural Life":
        return 1560.
    else:
        return 0.

data['month'] = data.apply(lambda row:month_convert(row), axis = 1)

In [50]:
# Create column with all values in years
def year_convert(row):
    if row ['commitment_unit'] == 'Year(s)':
        return round(float(row['commitment_term']),2)
    if row["commitment_unit"] == "Months":
        return round(int(row["commitment_term"]) / 12.0, 2)
    if row["commitment_unit"] == "Weeks":
       return round(float(row['commitment_term']) / 52, 2)
    if row["commitment_unit"] == "Days":
        return round(float( row['commitment_term'])/365, 2)
    if row['commitment_unit'] == "Natural Life":
        return 130.
    else:
        return 0.

data['year'] = data.apply(lambda row:year_convert(row), axis = 1)

In [51]:
# Review new columns and changes
data[['commitment_unit', 'commitment_term','year','month']]

Unnamed: 0,commitment_unit,commitment_term,year,month
0,Year(s),9.0,9.00,108.00
1,Days,30.0,0.08,1.00
2,Year(s),4.0,4.00,48.00
3,Year(s),52.0,52.00,624.00
4,Months,10.0,0.83,10.00
...,...,...,...,...
69957,Months,24.0,2.00,24.00
69958,Days,2.0,0.01,0.07
69959,Months,18.0,1.50,18.00
69960,Days,2.0,0.01,0.07


In [52]:
# Review Age values
data.age_at_incident.value_counts().head(60)

20    3274
19    3168
21    3098
22    3070
23    2965
24    2925
25    2758
26    2753
27    2510
28    2290
29    2181
30    2130
31    1939
18    1929
32    1757
33    1753
34    1699
35    1678
36    1529
37    1451
38    1379
40    1204
39    1173
43    1156
45    1150
46    1143
42    1139
48    1111
44    1091
41    1080
47    1077
49    1021
51     982
50     925
52     918
54     835
53     821
55     727
56     645
57     572
58     469
59     442
60     367
61     333
62     243
17     228
63     164
64     160
65     128
66     108
67      65
68      63
69      39
70      37
71      23
72      21
73      19
75      12
74      10
76       6
Name: age_at_incident, dtype: int64

In [53]:
# Change age dtype from string to integer 
data.age_at_incident = data.age_at_incident.astype(int)

In [54]:
# Confirm changes
data.dtypes

case_id                    object
case_participant_id        object
charge_id                  object
charge_version_id          object
court_facility             object
court_name                 object
age_at_incident             int32
gender                     object
race                       object
charge_disposition         object
length_of_case_in_days     object
current_sentence             bool
primary_charge               bool
sentence_date              object
offense_category           object
commitment_term           float64
commitment_unit            object
sentence_type              object
date_year                   int32
month                     float64
year                      float64
dtype: object

In [55]:
# Bins to group age
bins_ranges = [0,18,24,29,34,39,49,59,137]
bins_names = ["<18", '18-24', '25-29', '30-34', '35-39', '40s', '50s', '60+']

data['age_bins'] = pd.cut(data.age_at_incident,bins_ranges,labels=bins_names)

In [56]:
# Drop duplicates of data with current values
data.drop_duplicates(inplace=True)
data.reset_index(drop=True,inplace=True)
data

Unnamed: 0,case_id,case_participant_id,charge_id,charge_version_id,court_facility,court_name,age_at_incident,gender,race,charge_disposition,...,primary_charge,sentence_date,offense_category,commitment_term,commitment_unit,sentence_type,date_year,month,year,age_bins
0,128491120254,266244611418,759810529209,458530536964,26TH Street,District 1 - Chicago,21,Male,Black,Finding Guilty,...,True,2/1/2016 12:00:00 AM,Homicide,9.0,Year(s),Incarceration,2016,108.00,9.00,18-24
1,138057500568,274547245089,707305567445,480844954165,26TH Street,District 1 - Chicago,47,Female,Black,Plea Of Guilty,...,True,11/2/2017 12:00:00 AM,Narcotics,30.0,Days,Incarceration,2017,1.00,0.08,40s
2,219960145803,874516091021,702607209651,382760114712,26TH Street,District 1 - Chicago,27,Male,Black,Plea Of Guilty,...,True,6/17/2016 12:00:00 AM,Narcotics,4.0,Year(s),Incarceration,2016,48.00,4.00,25-29
3,220003462419,874796614290,701366913337,382041644149,Markham Courthouse,District 6 - Markham,33,Male,Black,Verdict Guilty,...,True,4/25/2018 12:00:00 AM,Homicide,52.0,Year(s),Incarceration,2018,624.00,52.00,30-34
4,220036934349,874993408134,701540793563,478307838807,Skokie Courthouse,District 2 - Skokie,40,Male,White,Plea Of Guilty,...,True,8/22/2017 12:00:00 AM,Robbery/Burglery/Theft,10.0,Months,Incarceration,2017,10.00,0.83,40s
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
69957,256376492742,1114936528342,922971556855,505146600989,Rolling Meadows Courthouse,District 3 - Rolling Meadows,21,Male,White,Plea Of Guilty,...,True,12/5/2019 12:00:00 AM,Narcotics,24.0,Months,Probation/Supervision,2019,24.00,2.00,18-24
69958,256381543493,1114965982098,922961626878,505922206274,Markham Courthouse,District 6 - Markham,61,Male,Black,Plea Of Guilty,...,True,12/20/2019 12:00:00 AM,Narcotics,2.0,Days,Incarceration,2019,0.07,0.01,60+
69959,256409108612,1115174177395,923107618660,505224851968,Maywood Courthouse,District 4 - Maywood,38,Male,White,Plea Of Guilty,...,True,12/27/2019 12:00:00 AM,Firearms and Explosives,18.0,Months,Probation/Supervision,2019,18.00,1.50,35-39
69960,256441638876,1115411588918,923226884015,506085957954,26TH Street,District 1 - Chicago,26,Male,Black,Plea Of Guilty,...,True,12/24/2019 12:00:00 AM,Narcotics,2.0,Days,Incarceration,2019,0.07,0.01,25-29


## Define commitment unit for each sentence type

In [57]:
data.sentence_type.value_counts()

Incarceration                       38592
Probation/Supervision               30148
Conditional Discharge                1030
Cook County Boot Camp                 158
Inpatient Mental Health Services       18
Death                                  16
Name: sentence_type, dtype: int64

### Analize Incarceration data to define unit

In [58]:
data.loc[data.sentence_type == 'Incarceration'].commitment_unit.value_counts()
# Incarceration will be read by Year columns unless we are analyzing penalties in dollars

Year(s)         31883
Months           3828
Days             2834
Natural Life       25
Dollars            13
Term                7
Weeks               1
Hours               1
Name: commitment_unit, dtype: int64

### Analize Probation/Supervision to define unit

In [59]:
data.loc[data.sentence_type == 'Probation/Supervision'].commitment_unit.value_counts()

Year(s)         18778
Months          11095
Days              242
Term               18
Natural Life        4
Dollars             4
Weeks               3
Hours               3
Weight              1
Name: commitment_unit, dtype: int64

In [60]:
sorted(data.loc[data.sentence_type == 'Probation/Supervision'].year.unique())

[0.0,
 0.01,
 0.02,
 0.03,
 0.04,
 0.05,
 0.06,
 0.07,
 0.08,
 0.09,
 0.1,
 0.11,
 0.12,
 0.13,
 0.14,
 0.15,
 0.16,
 0.17,
 0.18,
 0.19,
 0.2,
 0.21,
 0.22,
 0.23,
 0.25,
 0.27,
 0.28,
 0.3,
 0.31,
 0.32,
 0.33,
 0.38,
 0.39,
 0.4,
 0.42,
 0.44,
 0.45,
 0.46,
 0.47,
 0.49,
 0.5,
 0.58,
 0.61,
 0.62,
 0.67,
 0.75,
 0.82,
 0.83,
 0.84,
 0.92,
 0.96,
 0.98,
 1.0,
 1.01,
 1.08,
 1.17,
 1.22,
 1.25,
 1.33,
 1.42,
 1.5,
 1.57,
 1.58,
 1.67,
 1.75,
 1.83,
 1.92,
 2.0,
 2.08,
 2.25,
 2.5,
 3.0,
 3.5,
 4.0,
 4.5,
 5.0,
 6.0,
 8.0,
 12.0,
 18.0,
 21.17,
 24.0,
 25.0,
 30.0,
 130.0]

In [61]:
data.loc[data.sentence_type == 'Probation/Supervision'].year.value_counts()

2.00    21840
1.50     2785
1.00     2309
2.50     1670
3.00      495
        ...  
0.61        1
0.19        1
0.44        1
0.67        1
0.84        1
Name: year, Length: 85, dtype: int64

In [62]:
sorted(data.loc[data.sentence_type == 'Probation/Supervision'].month.unique())

[0.0,
 0.03,
 0.07,
 0.1,
 0.23,
 0.27,
 0.33,
 0.4,
 0.47,
 0.5,
 0.57,
 0.6,
 0.63,
 0.67,
 0.7,
 0.73,
 0.77,
 0.8,
 0.83,
 0.93,
 1.0,
 1.03,
 1.07,
 1.1,
 1.27,
 1.3,
 1.33,
 1.37,
 1.4,
 1.43,
 1.5,
 1.53,
 1.57,
 1.6,
 1.67,
 1.7,
 1.8,
 1.97,
 2.0,
 2.03,
 2.07,
 2.13,
 2.37,
 2.4,
 2.43,
 2.57,
 2.67,
 2.7,
 2.8,
 3.0,
 3.07,
 3.3,
 3.47,
 3.6,
 3.8,
 3.83,
 4.0,
 4.67,
 4.73,
 4.9,
 5.13,
 5.33,
 5.5,
 5.73,
 6.0,
 7.0,
 7.07,
 7.4,
 7.6,
 8.0,
 9.0,
 10.0,
 10.17,
 11.0,
 11.73,
 11.87,
 12.0,
 12.13,
 12.27,
 13.0,
 14.0,
 14.8,
 15.0,
 16.0,
 17.0,
 18.0,
 19.0,
 19.13,
 20.0,
 21.0,
 22.0,
 23.0,
 24.0,
 25.0,
 27.0,
 30.0,
 36.0,
 42.0,
 48.0,
 54.0,
 60.0,
 72.0,
 96.0,
 144.0,
 216.0,
 254.0,
 288.0,
 300.0,
 360.0,
 1560.0]

In [63]:
data.loc[data.sentence_type == 'Probation/Supervision'].month.value_counts()
# We will analize this info in months

24.00    21841
18.00     2785
12.00     2308
30.00     1669
36.00      496
         ...  
4.90         1
0.73         1
1.30         1
1.57         1
10.17        1
Name: month, Length: 110, dtype: int64

### Analize Cook County Boot Camp to define unit

In [64]:
data.loc[data.sentence_type == 'Cook County Boot Camp'].commitment_unit.value_counts()
# This sentence will be analize in months

Months     136
Days        11
Year(s)     10
Term         1
Name: commitment_unit, dtype: int64

### Analize Conditional Discharge to define unit

In [65]:
data.loc[data.sentence_type == 'Conditional Discharge'].commitment_unit.value_counts()

Months     553
Year(s)    460
Days        17
Name: commitment_unit, dtype: int64

In [66]:
data.loc[data.sentence_type == 'Conditional Discharge'].year.value_counts()

1.00     395
1.50     298
2.00     294
0.50      17
0.01       8
0.25       2
1.25       2
12.00      2
0.00       2
1.17       1
2.50       1
0.49       1
0.75       1
0.53       1
0.33       1
1.67       1
1.33       1
0.03       1
0.02       1
Name: year, dtype: int64

In [67]:
data.loc[data.sentence_type == 'Conditional Discharge'].month.value_counts()
# This information will be view in months

12.00     394
18.00     298
24.00     294
6.00       18
0.07        4
0.17        3
3.00        2
15.00       2
144.00      2
20.00       1
4.00        1
9.00        1
0.23        1
30.00       1
0.00        1
16.00       1
0.13        1
12.13       1
6.47        1
0.33        1
0.03        1
14.00       1
Name: month, dtype: int64

### Analize Inpatien Mental Health Services to define unit

In [68]:
data.loc[data.sentence_type == 'Inpatient Mental Health Services'].commitment_unit.value_counts()
# This information will be view in years

Year(s)    11
Term        5
Months      2
Name: commitment_unit, dtype: int64

### Analize Death to confirm unit

In [69]:
data.loc[data.sentence_type == 'Death'].commitment_unit.value_counts()

Year(s)    16
Name: commitment_unit, dtype: int64

## Filter only current sentence

In [70]:
data = data.loc[data.current_sentence == True]
data

Unnamed: 0,case_id,case_participant_id,charge_id,charge_version_id,court_facility,court_name,age_at_incident,gender,race,charge_disposition,...,primary_charge,sentence_date,offense_category,commitment_term,commitment_unit,sentence_type,date_year,month,year,age_bins
0,128491120254,266244611418,759810529209,458530536964,26TH Street,District 1 - Chicago,21,Male,Black,Finding Guilty,...,True,2/1/2016 12:00:00 AM,Homicide,9.0,Year(s),Incarceration,2016,108.00,9.00,18-24
1,138057500568,274547245089,707305567445,480844954165,26TH Street,District 1 - Chicago,47,Female,Black,Plea Of Guilty,...,True,11/2/2017 12:00:00 AM,Narcotics,30.0,Days,Incarceration,2017,1.00,0.08,40s
2,219960145803,874516091021,702607209651,382760114712,26TH Street,District 1 - Chicago,27,Male,Black,Plea Of Guilty,...,True,6/17/2016 12:00:00 AM,Narcotics,4.0,Year(s),Incarceration,2016,48.00,4.00,25-29
3,220003462419,874796614290,701366913337,382041644149,Markham Courthouse,District 6 - Markham,33,Male,Black,Verdict Guilty,...,True,4/25/2018 12:00:00 AM,Homicide,52.0,Year(s),Incarceration,2018,624.00,52.00,30-34
4,220036934349,874993408134,701540793563,478307838807,Skokie Courthouse,District 2 - Skokie,40,Male,White,Plea Of Guilty,...,True,8/22/2017 12:00:00 AM,Robbery/Burglery/Theft,10.0,Months,Incarceration,2017,10.00,0.83,40s
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
69957,256376492742,1114936528342,922971556855,505146600989,Rolling Meadows Courthouse,District 3 - Rolling Meadows,21,Male,White,Plea Of Guilty,...,True,12/5/2019 12:00:00 AM,Narcotics,24.0,Months,Probation/Supervision,2019,24.00,2.00,18-24
69958,256381543493,1114965982098,922961626878,505922206274,Markham Courthouse,District 6 - Markham,61,Male,Black,Plea Of Guilty,...,True,12/20/2019 12:00:00 AM,Narcotics,2.0,Days,Incarceration,2019,0.07,0.01,60+
69959,256409108612,1115174177395,923107618660,505224851968,Maywood Courthouse,District 4 - Maywood,38,Male,White,Plea Of Guilty,...,True,12/27/2019 12:00:00 AM,Firearms and Explosives,18.0,Months,Probation/Supervision,2019,18.00,1.50,35-39
69960,256441638876,1115411588918,923226884015,506085957954,26TH Street,District 1 - Chicago,26,Male,Black,Plea Of Guilty,...,True,12/24/2019 12:00:00 AM,Narcotics,2.0,Days,Incarceration,2019,0.07,0.01,25-29


# Analize new dataframe to create the database diagram

In [71]:
data.columns

Index(['case_id', 'case_participant_id', 'charge_id', 'charge_version_id',
       'court_facility', 'court_name', 'age_at_incident', 'gender', 'race',
       'charge_disposition', 'length_of_case_in_days', 'current_sentence',
       'primary_charge', 'sentence_date', 'offense_category',
       'commitment_term', 'commitment_unit', 'sentence_type', 'date_year',
       'month', 'year', 'age_bins'],
      dtype='object')

In [72]:
len(data[['case_participant_id','age_at_incident', 'gender', 'race']].drop_duplicates())

69962

In [73]:
len(data.case_participant_id.drop_duplicates())

69962

In [74]:
len(data.charge_version_id.drop_duplicates())

68884

In [75]:
len(data[['charge_version_id','offense_category']].drop_duplicates())

68884

In [76]:
len(data[['case_participant_id','charge_id']].drop_duplicates())

69962

In [77]:
data[['case_id','case_participant_id','offense_category','charge_disposition','charge_id','charge_version_id']].drop_duplicates()

Unnamed: 0,case_id,case_participant_id,offense_category,charge_disposition,charge_id,charge_version_id
0,128491120254,266244611418,Homicide,Finding Guilty,759810529209,458530536964
1,138057500568,274547245089,Narcotics,Plea Of Guilty,707305567445,480844954165
2,219960145803,874516091021,Narcotics,Plea Of Guilty,702607209651,382760114712
3,220003462419,874796614290,Homicide,Verdict Guilty,701366913337,382041644149
4,220036934349,874993408134,Robbery/Burglery/Theft,Plea Of Guilty,701540793563,478307838807
...,...,...,...,...,...,...
69957,256376492742,1114936528342,Narcotics,Plea Of Guilty,922971556855,505146600989
69958,256381543493,1114965982098,Narcotics,Plea Of Guilty,922961626878,505922206274
69959,256409108612,1115174177395,Firearms and Explosives,Plea Of Guilty,923107618660,505224851968
69960,256441638876,1115411588918,Narcotics,Plea Of Guilty,923226884015,506085957954


In [76]:
data[['case_id','case_participant_id','offense_category','charge_disposition']].drop_duplicates()

Unnamed: 0,case_id,case_participant_id,offense_category,charge_disposition
0,114024398027,230101052744,Homicide,Plea Of Guilty
1,123803937179,255504441936,Firearms and Explosives,Plea Of Guilty
2,127790349873,265428457350,Sex Offenses,Plea Of Guilty
3,128491120254,266244611418,Homicide,Finding Guilty
4,129030951431,266511120401,Judicial Process Violations,Plea Of Guilty
...,...,...,...,...
205284,256376492742,1114936528342,Narcotics,Plea Of Guilty
205285,256381543493,1114965982098,Narcotics,Plea Of Guilty
205286,256409108612,1115174177395,Firearms and Explosives,Plea Of Guilty
205287,256441638876,1115411588918,Narcotics,Plea Of Guilty


![DBD-from-quickDBD.png](DBD-from-quickDBD.png)

# Create tables for SQL

In [78]:
participants = data[['case_participant_id','age_at_incident','gender','race','age_bins']].drop_duplicates()
participants

Unnamed: 0,case_participant_id,age_at_incident,gender,race,age_bins
0,266244611418,21,Male,Black,18-24
1,274547245089,47,Female,Black,40s
2,874516091021,27,Male,Black,25-29
3,874796614290,33,Male,Black,30-34
4,874993408134,40,Male,White,40s
...,...,...,...,...,...
69957,1114936528342,21,Male,White,18-24
69958,1114965982098,61,Male,Black,60+
69959,1115174177395,38,Male,White,35-39
69960,1115411588918,26,Male,Black,25-29


In [79]:
courts = data[['court_facility', 'court_name']].drop_duplicates().reset_index(drop=True)
courts

Unnamed: 0,court_facility,court_name
0,26TH Street,District 1 - Chicago
1,Markham Courthouse,District 6 - Markham
2,Skokie Courthouse,District 2 - Skokie
3,Bridgeview Courthouse,District 5 - Bridgeview
4,Rolling Meadows Courthouse,District 3 - Rolling Meadows
5,Maywood Courthouse,District 4 - Maywood
6,Harrison & Kedzie (Area 4),District 1 - Chicago
7,DV Courthouse,District 1 - Chicago
8,727 E. 111th Street (Area 2),District 1 - Chicago
9,51st & Wentworth (Area 1),District 1 - Chicago


In [80]:
courts['court_id']=['1-26','6','2','5','4','3','1-4','1-DV','1-1','1-3','1-2','1-5','1-RJCC']
courts = courts[['court_id', 'court_facility', 'court_name']]
courts

Unnamed: 0,court_id,court_facility,court_name
0,1-26,26TH Street,District 1 - Chicago
1,6,Markham Courthouse,District 6 - Markham
2,2,Skokie Courthouse,District 2 - Skokie
3,5,Bridgeview Courthouse,District 5 - Bridgeview
4,4,Rolling Meadows Courthouse,District 3 - Rolling Meadows
5,3,Maywood Courthouse,District 4 - Maywood
6,1-4,Harrison & Kedzie (Area 4),District 1 - Chicago
7,1-DV,DV Courthouse,District 1 - Chicago
8,1-1,727 E. 111th Street (Area 2),District 1 - Chicago
9,1-3,51st & Wentworth (Area 1),District 1 - Chicago


In [81]:
offenses = data[['offense_category']].drop_duplicates()
offenses = offenses.reset_index(drop=True)
offenses['offense_id'] = offenses.index + 1
offenses = offenses[['offense_id','offense_category']]
offenses

Unnamed: 0,offense_id,offense_category
0,1,Homicide
1,2,Narcotics
2,3,Robbery/Burglery/Theft
3,4,Motor Vehicle Offenses
4,5,Sex Offenses
5,6,Firearms and Explosives
6,7,Other Offense
7,8,Fraud/Deception
8,9,Battery
9,10,Aggravated Fleeing and Eluding


In [82]:
sentences = data[['sentence_type','commitment_term','commitment_unit','month', 'year']].drop_duplicates()
sentences = sentences.reset_index(drop=True)
sentences['sentence_id'] = sentences.index + 1
sentences = sentences[['sentence_id','sentence_type','commitment_term','commitment_unit','month', 'year']]
sentences

Unnamed: 0,sentence_id,sentence_type,commitment_term,commitment_unit,month,year
0,1,Incarceration,9.0,Year(s),108.00,9.00
1,2,Incarceration,30.0,Days,1.00,0.08
2,3,Incarceration,4.0,Year(s),48.00,4.00
3,4,Incarceration,52.0,Year(s),624.00,52.00
4,5,Incarceration,10.0,Months,10.00,0.83
...,...,...,...,...,...,...
663,664,Incarceration,141.0,Days,4.70,0.39
664,665,Incarceration,43.0,Months,43.00,3.58
665,666,Incarceration,164.0,Days,5.47,0.45
666,667,Probation/Supervision,6.0,Year(s),72.00,6.00


In [83]:
results = data.merge(sentences,on=['sentence_type','commitment_term','commitment_unit','month','year'])
results = results.merge(offenses,on='offense_category')
results = results.merge(courts,on=['court_facility','court_name'])
results.columns

Index(['case_id', 'case_participant_id', 'charge_id', 'charge_version_id',
       'court_facility', 'court_name', 'age_at_incident', 'gender', 'race',
       'charge_disposition', 'length_of_case_in_days', 'current_sentence',
       'primary_charge', 'sentence_date', 'offense_category',
       'commitment_term', 'commitment_unit', 'sentence_type', 'date_year',
       'month', 'year', 'age_bins', 'sentence_id', 'offense_id', 'court_id'],
      dtype='object')

In [84]:
results = results[['case_id','sentence_id','offense_id','court_id','case_participant_id','charge_id','charge_version_id','charge_disposition','length_of_case_in_days','primary_charge']]
results

Unnamed: 0,case_id,sentence_id,offense_id,court_id,case_participant_id,charge_id,charge_version_id,charge_disposition,length_of_case_in_days,primary_charge
0,128491120254,1,1,1-26,266244611418,759810529209,458530536964,Finding Guilty,1201,True
1,227103107024,1,1,1-26,919559790804,741611206410,404355083213,Plea Of Guilty,1091,True
2,243841383374,1,1,1-26,1029535601348,841928054046,459871620641,Plea Of Guilty,467,True
3,245627551709,1,1,1-26,1042011642171,854444895001,501233016402,Plea Of Guilty,774,True
4,229071273628,3,1,1-26,931384404814,752324488983,450658143326,Finding Guilty,1052,True
...,...,...,...,...,...,...,...,...,...,...
69957,242764374803,8,18,1-DV,1022008504066,834677375389,455853030145,Plea Of Guilty,76,True
69958,242915811745,8,18,1-DV,1023049005491,835743052011,456433353209,Plea Of Guilty,474,True
69959,254887462690,84,18,1-DV,1104738878037,915002011286,500691416065,Plea Of Guilty,89,True
69960,240404988967,216,18,1-DV,1006118321655,820398597442,447911994288,Finding Guilty,92,True


In [85]:
data.to_json('../Dashboard/data.json',orient='records')
# results.to_json('../data/results.json')
# participants.to_json('../data/participants.json')
# courts.to_json('../data/courts.json')
# offenses.to_json('../data/offenses.json')
# sentences.to_json('../data/sentences.json')

# Upload information to Postgres

In [84]:
from password import key
from sqlalchemy import create_engine

In [85]:
conn = f"postgres:{key}@localhost:5432/sentencing"
engine = create_engine(f'postgresql://{conn}')

In [86]:
# courts.to_sql(name='courts',con=engine,if_exists='append',index=False)

In [87]:
# participants.to_sql(name='participants',con=engine,if_exists='append',index=False)

In [88]:
# offenses.to_sql(name='offenses',con=engine,if_exists='append',index=False)

In [89]:
# sentences.to_sql(name='sentences',con=engine,if_exists='append',index=False)

In [90]:
results = results[['case_participant_id','court_id','offense_id','sentence_id','case_id','primary_charge','charge_disposition','charge_id','charge_version_id','length_of_case_in_days']]

In [91]:
# results.to_sql(name='results',con=engine,if_exists='append',index=False)

# Queries from SQL for Graphs

In [93]:
import json

In [94]:
participants.to_json('../data/general_demographics.json',orient='records')

In [95]:
data[['case_participant_id','court_name','age_bins','offense_category','sentence_type']].drop_duplicates()

Unnamed: 0,case_participant_id,court_name,age_bins,offense_category,sentence_type
0,230101052744,District 1 - Chicago,18-24,Homicide,Incarceration
1,255504441936,District 1 - Chicago,25-29,Firearms and Explosives,Probation/Supervision
2,265428457350,District 6 - Markham,<18,Sex Offenses,Incarceration
3,266244611418,District 1 - Chicago,18-24,Homicide,Incarceration
4,266511120401,District 1 - Chicago,35-39,Judicial Process Violations,Incarceration
...,...,...,...,...,...
205284,1114936528342,District 3 - Rolling Meadows,18-24,Narcotics,Probation/Supervision
205285,1114965982098,District 6 - Markham,60+,Narcotics,Incarceration
205286,1115174177395,District 4 - Maywood,35-39,Firearms and Explosives,Probation/Supervision
205287,1115411588918,District 1 - Chicago,25-29,Narcotics,Incarceration


In [96]:
query1 = 'select \
	pa.age_bins, \
    pa.gender, \
    pa.race, \
	fr.court_name, \
	fr.offense_category, \
	fr.sentence_type \
from ( \
    select \
		r.case_participant_id, \
		max(court_name) court_name, \
		o.offense_category, \
		s.sentence_type \
	from results r \
	left join courts co \
		on r.court_id = co.court_id \
	left join offenses o \
		on r.offense_id = o.offense_id \
	left join sentences s \
		on r.sentence_id = s.sentence_id \
	group by ( \
		o.offense_category, \
		s.sentence_type, \
		r.case_participant_id \
	)) fr \
left join participants pa \
	on fr.case_participant_id = pa.case_participant_id;'

In [97]:
filtered_demographics = pd.read_sql_query(query1,con=engine)
filtered_demographics.to_json('../data/filtered_demographics.json',orient='records')

In [98]:
data[['year','month','offense_category','sentence_type','court_name']].loc[data.year != 0].drop_duplicates()

Unnamed: 0,year,month,offense_category,sentence_type,court_name
0,8.00,96.00,Homicide,Incarceration,District 1 - Chicago
1,2.00,24.00,Firearms and Explosives,Probation/Supervision,District 1 - Chicago
2,4.00,48.00,Sex Offenses,Incarceration,District 6 - Markham
3,9.00,108.00,Homicide,Incarceration,District 1 - Chicago
4,2.00,24.00,Judicial Process Violations,Incarceration,District 1 - Chicago
...,...,...,...,...,...
205108,0.24,2.87,Other Offense,Incarceration,District 1 - Chicago
205109,0.11,1.30,Other Offense,Incarceration,District 1 - Chicago
205170,0.25,3.00,Fraud/Deception,Probation/Supervision,District 1 - Chicago
205211,0.12,1.43,Fraud/Deception,Incarceration,District 1 - Chicago


In [99]:
query2 = 'select\
	s.year, \
	s.month, \
	fr.offense_category, \
	s.sentence_type, \
	fr.court_name \
from \
	(select \
	 	r.sentence_id, \
	 	o.offense_category, \
	 	co.court_name \
	 from results r \
	 left join courts co \
	 	on r.court_id = co.court_id \
	 left join offenses o \
	 	on r.offense_id = o.offense_id \
	 group by ( \
	 	r.sentence_id, \
	 	o.offense_category, \
	 	co.court_name \
	)) fr \
left join sentences s \
	on s.sentence_id = fr.sentence_id \
where s.month !=0;'

In [100]:
boxplot_offense = pd.read_sql_query(query2,con=engine)
boxplot_offense.to_json('../data/boxplot_offense.json',orient='records')

In [101]:
data[['length_of_case_in_days','month','year','offense_category','sentence_type']].loc[data.month != 0].drop_duplicates()

Unnamed: 0,length_of_case_in_days,month,year,offense_category,sentence_type
0,187,96.00,8.00,Homicide,Incarceration
1,2624,24.00,2.00,Firearms and Explosives,Probation/Supervision
2,795,48.00,4.00,Sex Offenses,Incarceration
3,1201,108.00,9.00,Homicide,Incarceration
4,271,24.00,2.00,Judicial Process Violations,Incarceration
...,...,...,...,...,...
205258,7,24.00,2.00,Battery,Inpatient Mental Health Services
205259,24,13.00,1.08,Robbery/Burglery/Theft,Probation/Supervision
205275,9,0.07,0.01,Motor Vehicle Offenses,Incarceration
205278,0,0.87,0.07,Judicial Process Violations,Incarceration


In [102]:
query3 = 'select \
	fr.length_of_case_in_days, \
	s.month, \
	s.year, \
	fr.offense_category, \
	s.sentence_type \
from  \
	(select \
	 	r.sentence_id, \
	 	r.length_of_case_in_days, \
	 	o.offense_category \
	 from results r \
	 left join offenses o \
	 	on r.offense_id = o.offense_id \
	 group by ( \
		r.sentence_id, \
	 	r.length_of_case_in_days, \
	 	o.offense_category \
	 )) fr \
left join sentences s \
	on s.sentence_id = fr.sentence_id \
where s.month !=0 \
and fr.length_of_case_in_days != 0;'

In [103]:
scatter_length = pd.read_sql_query(query3, con=engine)
scatter_length.to_json('../data/scatter_length.json',orient='records')

In [104]:
query4 = 'select \
	fr.length_of_case_in_days, \
	s.month, \
	s.year, \
	fr.offense_category, \
	s.sentence_type \
from  \
	(select \
	 	r.sentence_id, \
	 	r.length_of_case_in_days, \
	 	o.offense_category \
	 from results r \
	 left join offenses o \
	 	on r.offense_id = o.offense_id \
	 group by ( \
		r.sentence_id, \
	 	r.length_of_case_in_days, \
	 	o.offense_category \
	 )) fr \
left join sentences s \
	on s.sentence_id = fr.sentence_id \
where s.month !=0  \
and fr.length_of_case_in_days != 0;'

In [105]:
boxplot_length = pd.read_sql_query(query4, con=engine)
boxplot_length.to_json('../data/boxplot_length.json',orient='records')

In [106]:
query5 = 'select \
	fr.court_name, \
	count(s.sentence_type) sentence_type, \
	fr.offense_category \
from ( \
	select \
		co.court_name, \
		o.offense_category, \
		r.sentence_id \
	from results r \
		 left join offenses o \
	 	on r.offense_id = o.offense_id \
	 left join courts co \
	 	on r.court_id = co.court_id \
	 group by ( \
		co.court_name, \
		o.offense_category, \
		r.sentence_id \
	 )) fr \
left join sentences s \
	on s.sentence_id = fr.sentence_id \
group by  \
	fr.court_name, \
	fr.offense_category;'

In [107]:
barchar_courts = pd.read_sql_query(query5, con=engine)
barchar_courts.to_json('../data/barchar_courts.json',orient='records')

In [108]:
query6 = 'select \
	count(fr.case_participant_id) participants, \
	fr.age_at_incident, \
	s.month, \
	s.year, \
	s.sentence_type, \
	fr.court_name \
from ( \
	select \
		pa.case_participant_id, \
		co.court_name, \
		r.sentence_id, \
		pa.age_at_incident \
	from results r \
		left join offenses o \
	 		on r.offense_id = o.offense_id \
		left join courts co \
	 		on r.court_id = co.court_id \
		left join participants pa \
			on r.case_participant_id = pa.case_participant_id \
	 group by ( \
		pa.case_participant_id, \
		co.court_name, \
		r.sentence_id, \
		pa.age_at_incident \
	 )) fr \
left join sentences s \
	on s.sentence_id = fr.sentence_id \
group by  \
	fr.age_at_incident, \
	s.month, \
	s.year, \
	s.sentence_type, \
	fr.court_name;' 

In [109]:
scatter_courts = pd.read_sql_query(query6, con=engine)
scatter_courts.to_json('../data/scatter_courts.json',orient='records')

In [110]:
data[['case_id','offense_category','sentence_type','court_name']].drop_duplicates()

Unnamed: 0,case_id,offense_category,sentence_type,court_name
0,114024398027,Homicide,Incarceration,District 1 - Chicago
1,123803937179,Firearms and Explosives,Probation/Supervision,District 1 - Chicago
2,127790349873,Sex Offenses,Incarceration,District 6 - Markham
3,128491120254,Homicide,Incarceration,District 1 - Chicago
4,129030951431,Judicial Process Violations,Incarceration,District 1 - Chicago
...,...,...,...,...
205284,256376492742,Narcotics,Probation/Supervision,District 3 - Rolling Meadows
205285,256381543493,Narcotics,Incarceration,District 6 - Markham
205286,256409108612,Firearms and Explosives,Probation/Supervision,District 4 - Maywood
205287,256441638876,Narcotics,Incarceration,District 1 - Chicago


In [111]:
query7 = 'select \
	r.case_id, \
	o.offense_category, \
	s.sentence_type, \
	co.court_name \
from results r \
left join offenses o \
	on r.offense_id = o.offense_id \
left join courts co \
	on r.court_id = co.court_id \
left join sentences s \
	on r.sentence_id = s.sentence_id \
group by \
	r.case_id, \
	o.offense_category, \
	s.sentence_type, \
	co.court_name;'

In [112]:
pie_offense = pd.read_sql_query(query7, con=engine)
pie_offense.to_json('../data/pie_offense.json',orient='records')