In [1]:
import pandas as pd
from sodapy import Socrata
import numpy as np

# Get information from Cook County Sentencing Data Database

In [2]:
# Call API
client = Socrata("datacatalog.cookcountyil.gov", None)



In [3]:
# Retrive Data
results = client.get("tg8v-tm6u", limit=300000)

In [4]:
# Generate DataFrame
original_data = pd.DataFrame.from_records(results)

# Explore data
list(original_data.columns.values)

['case_id',
 'case_participant_id',
 'received_date',
 'offense_category',
 'primary_charge',
 'charge_id',
 'charge_version_id',
 'disposition_charged_offense_title',
 'charge_count',
 'disposition_date',
 'disposition_charged_chapter',
 'disposition_charged_act',
 'disposition_charged_section',
 'disposition_charged_class',
 'disposition_charged_aoic',
 'charge_disposition',
 'sentence_judge',
 'court_name',
 'court_facility',
 'sentence_phase',
 'sentence_date',
 'sentence_type',
 'current_sentence',
 'commitment_type',
 'length_of_case_in_days',
 'age_at_incident',
 'race',
 'gender',
 'incident_begin_date',
 'law_enforcement_agency',
 'arrest_date',
 'felony_review_date',
 'felony_review_result',
 'arraignment_date',
 'updated_offense_category',
 'commitment_term',
 'commitment_unit',
 'incident_end_date',
 'charge_disposition_reason',
 'incident_city',
 'unit']

# Cleaning Process

## Select only chosen columns

In [5]:
data = original_data[['case_id','case_participant_id','charge_id','charge_version_id','court_facility','court_name','age_at_incident','gender','race','charge_disposition','length_of_case_in_days','current_sentence','primary_charge','sentence_date','offense_category','commitment_term','commitment_unit','sentence_type']]
data.head()

Unnamed: 0,case_id,case_participant_id,charge_id,charge_version_id,court_facility,court_name,age_at_incident,gender,race,charge_disposition,length_of_case_in_days,current_sentence,primary_charge,sentence_date,offense_category,commitment_term,commitment_unit,sentence_type
0,87302669130,221103049156,106130426295,71631463339,Markham Courthouse,District 6 - Markham,27,Male,Black,Nolle On Remand,619,True,False,6/2/1986 12:00:00 AM,PROMIS Conversion,,,Conversion
1,87302669130,221103049156,106130637571,60521205325,Markham Courthouse,District 6 - Markham,27,Male,Black,Nolle On Remand,619,True,False,6/2/1986 12:00:00 AM,PROMIS Conversion,30.0,Year(s),Conversion
2,87302669130,221103049156,106143736689,81281765233,Markham Courthouse,District 6 - Markham,27,Male,Black,Nolle On Remand,619,True,False,6/2/1986 12:00:00 AM,PROMIS Conversion,,,Conversion
3,87302669130,221103049156,106143842327,81277737609,Markham Courthouse,District 6 - Markham,27,Male,Black,Nolle On Remand,619,True,False,6/2/1986 12:00:00 AM,PROMIS Conversion,,,Conversion
4,87302669130,221103049156,106144159241,60394680398,Markham Courthouse,District 6 - Markham,27,Male,Black,Plea Of Guilty,10982,True,False,10/16/2014 12:00:00 AM,PROMIS Conversion,30.0,Year(s),Prison


In [6]:
# Explore values in columns
data.count()

case_id                   241511
case_participant_id       241511
charge_id                 241511
charge_version_id         241511
court_facility            239630
court_name                240126
age_at_incident           232073
gender                    240700
race                      240247
charge_disposition        241511
length_of_case_in_days    222605
current_sentence          241511
primary_charge            241511
sentence_date             241511
offense_category          241511
commitment_term           239852
commitment_unit           239852
sentence_type             241511
dtype: int64

In [7]:
data.replace(regex='PROMIS*',value=np.NaN, inplace=True)

## Drop NaN Values

In [8]:
data = data.dropna()
data.count()

case_id                   208561
case_participant_id       208561
charge_id                 208561
charge_version_id         208561
court_facility            208561
court_name                208561
age_at_incident           208561
gender                    208561
race                      208561
charge_disposition        208561
length_of_case_in_days    208561
current_sentence          208561
primary_charge            208561
sentence_date             208561
offense_category          208561
commitment_term           208561
commitment_unit           208561
sentence_type             208561
dtype: int64

## Filter data after 2000 & before 2019

In [9]:
# Explore type of values in each column
data.dtypes

case_id                   object
case_participant_id       object
charge_id                 object
charge_version_id         object
court_facility            object
court_name                object
age_at_incident           object
gender                    object
race                      object
charge_disposition        object
length_of_case_in_days    object
current_sentence            bool
primary_charge              bool
sentence_date             object
offense_category          object
commitment_term           object
commitment_unit           object
sentence_type             object
dtype: object

In [10]:
# Create a copy of the Sentence Date to filter it
hdate = data['sentence_date']
data['date_year'] = hdate

data.head()

Unnamed: 0,case_id,case_participant_id,charge_id,charge_version_id,court_facility,court_name,age_at_incident,gender,race,charge_disposition,length_of_case_in_days,current_sentence,primary_charge,sentence_date,offense_category,commitment_term,commitment_unit,sentence_type,date_year
663,114024398027,230101052744,807648613316,449520166967,26TH Street,District 1 - Chicago,21,Male,White,Plea Of Guilty,187,True,False,5/11/2015 12:00:00 AM,Reckless Homicide,8,Year(s),Prison,5/11/2015 12:00:00 AM
1360,123803937179,255504441936,190690725339,122497821309,26TH Street,District 1 - Chicago,27,Male,HISPANIC,Plea Of Guilty,2624,True,False,11/5/2014 12:00:00 AM,UUW - Unlawful Use of Weapon,2,Year(s),Probation,11/5/2014 12:00:00 AM
2123,127790349873,265428457350,541647467636,271650860226,Markham Courthouse,District 6 - Markham,18,Male,Black,Plea Of Guilty,795,True,False,12/9/2013 12:00:00 AM,Sex Crimes,4,Year(s),Prison,12/9/2013 12:00:00 AM
2177,128491120254,266244611418,759810529209,458530536964,26TH Street,District 1 - Chicago,21,Male,Black,Finding Guilty,1201,True,True,2/1/2016 12:00:00 AM,Homicide,9,Year(s),Prison,2/1/2016 12:00:00 AM
2261,129030951431,266511120401,702938173654,382945270335,26TH Street,District 1 - Chicago,35,Male,Black,Plea Of Guilty,271,True,True,2/28/2011 12:00:00 AM,Escape - Failure to Return,2,Year(s),Prison,2/28/2011 12:00:00 AM


In [11]:
# Select only the year
data['date_year'] = data['date_year'].replace(regex=['[0-9]*\/[0-9]*\/','\s[0-9]*\:[0-9]*\:[0-9]*\s[A-Z]*'], value='')

In [12]:
# Change value type
data['date_year'] = data['date_year'].astype(int)

In [13]:
data.dtypes

case_id                   object
case_participant_id       object
charge_id                 object
charge_version_id         object
court_facility            object
court_name                object
age_at_incident           object
gender                    object
race                      object
charge_disposition        object
length_of_case_in_days    object
current_sentence            bool
primary_charge              bool
sentence_date             object
offense_category          object
commitment_term           object
commitment_unit           object
sentence_type             object
date_year                  int32
dtype: object

In [14]:
# Filter year
data = data[(data.date_year > 1999) & (data.date_year < 2020)]

In [15]:
# Confirm values
data.describe()

Unnamed: 0,date_year
count,205294.0
mean,2014.767533
std,2.434685
min,2000.0
25%,2013.0
50%,2015.0
75%,2017.0
max,2019.0


## Analize Offense Category and consolidate into related categories

In [16]:
# Review all the information
data.offense_category.value_counts()

Narcotics                       58271
UUW - Unlawful Use of Weapon    23511
Aggravated DUI                  12778
Retail Theft                    12360
Burglary                        10750
                                ...  
Tampering                           9
Failure To Pay Child Support        6
Possession of Explosives            3
Benefit Recipient Fraud             2
Compelling Gang Membership          2
Name: offense_category, Length: 87, dtype: int64

In [17]:
# Consolidate Homicide category
data.offense_category = data.offense_category.replace(to_replace=['Attempt Homicide','Reckless Homicide'],value='Homicide')

data.offense_category.value_counts()

Narcotics                       58271
UUW - Unlawful Use of Weapon    23511
Aggravated DUI                  12778
Retail Theft                    12360
Burglary                        10750
                                ...  
Tampering                           9
Failure To Pay Child Support        6
Possession of Explosives            3
Compelling Gang Membership          2
Benefit Recipient Fraud             2
Name: offense_category, Length: 85, dtype: int64

In [18]:
# Consolidate Offense Against Police Officers category
data.offense_category = data.offense_category.replace(to_replace=['Aggravated Assault Police Officer','Aggravated Assault Police Officer Firearm','Police Shooting','Impersonating Police Officer'],value='Offense Against Police Officers')

data.offense_category.value_counts()

Narcotics                       58271
UUW - Unlawful Use of Weapon    23511
Aggravated DUI                  12778
Retail Theft                    12360
Burglary                        10750
                                ...  
Tampering                           9
Failure To Pay Child Support        6
Possession of Explosives            3
Compelling Gang Membership          2
Benefit Recipient Fraud             2
Name: offense_category, Length: 82, dtype: int64

In [19]:
# Consolidate Battery category
data.offense_category = data.offense_category.replace(to_replace=['Aggravated Battery','Aggravated Battery Police Officer','Aggravated Battery Police Officer Firearm','Aggravated Battery With A Firearm','Domestic Battery'],value='Battery')

data.offense_category.value_counts()

Narcotics                       58271
UUW - Unlawful Use of Weapon    23511
Battery                         13511
Aggravated DUI                  12778
Retail Theft                    12360
                                ...  
Tampering                           9
Failure To Pay Child Support        6
Possession of Explosives            3
Benefit Recipient Fraud             2
Compelling Gang Membership          2
Name: offense_category, Length: 77, dtype: int64

In [20]:
# Consolidate Robbery, Burglery and Theft category
data.offense_category = data.offense_category.replace(to_replace=['Aggravated Identity Theft','Aggravated Robbery','Aggravated Robbery BB Gun','Armed Robbery','Attempt Armed Robbery','Burglary','Identity Theft','Residential Burglary','Retail Theft','Robbery','Theft','Theft by Deception','Possession Of Burglary Tools'],value='Robbery/Burglery/Theft')

data.offense_category.value_counts()

Narcotics                       58271
Robbery/Burglery/Theft          47777
UUW - Unlawful Use of Weapon    23511
Battery                         13511
Aggravated DUI                  12778
                                ...  
Tampering                           9
Failure To Pay Child Support        6
Possession of Explosives            3
Benefit Recipient Fraud             2
Compelling Gang Membership          2
Name: offense_category, Length: 65, dtype: int64

In [21]:
# Consolidate Arson category
data.offense_category = data.offense_category.replace(to_replace=['Arson and Attempt Arson','Attempt Arson'],value='Arson')

data.offense_category.value_counts()

Narcotics                       58271
Robbery/Burglery/Theft          47777
UUW - Unlawful Use of Weapon    23511
Battery                         13511
Aggravated DUI                  12778
                                ...  
Tampering                           9
Failure To Pay Child Support        6
Possession of Explosives            3
Benefit Recipient Fraud             2
Compelling Gang Membership          2
Name: offense_category, Length: 63, dtype: int64

In [22]:
# Consolidate Firearms and Explosives category
data.offense_category = data.offense_category.replace(to_replace=['Aggravated Discharge Firearm','Armed Violence','Disarming Police Officer','Gun Running','Possession of Explosives','UUW - Unlawful Use of Weapon','Gun - Non UUW','Bomb Threat','Reckless Discharge of Firearm'],value='Firearms and Explosives')

data.offense_category.value_counts()

Narcotics                                        58271
Robbery/Burglery/Theft                           47777
Firearms and Explosives                          26030
Battery                                          13511
Aggravated DUI                                   12778
Driving With Suspended Or Revoked License         8155
Sex Crimes                                        4320
Other Offense                                     3850
Possession of Stolen Motor Vehicle                3756
Homicide                                          3709
DUI                                               3330
Forgery                                           2511
Escape - Failure to Return                        2358
Failure to Register as a Sex Offender             2177
Aggravated Fleeing and Eluding                    2131
Criminal Damage to Property                       1642
Credit Card Cases                                 1248
Home Invasion                                     1210
Offense Ag

In [23]:
# Consolidate Motor Vehicles Offenses category
data.offense_category = data.offense_category.replace(to_replace=['Aggravated DUI','Attempt Vehicular Hijacking','DUI','Driving With Suspended Or Revoked License','Major Accidents','Possession of Stolen Motor Vehicle','Vehicular Hijacking','Vehicular Invasion'],value='Motor Vehicle Offenses')

data.offense_category.value_counts()

Narcotics                                        58271
Robbery/Burglery/Theft                           47777
Motor Vehicle Offenses                           29045
Firearms and Explosives                          26030
Battery                                          13511
Sex Crimes                                        4320
Other Offense                                     3850
Homicide                                          3709
Forgery                                           2511
Escape - Failure to Return                        2358
Failure to Register as a Sex Offender             2177
Aggravated Fleeing and Eluding                    2131
Criminal Damage to Property                       1642
Credit Card Cases                                 1248
Home Invasion                                     1210
Offense Against Police Officers                    875
Prostitution                                       642
Fraudulent ID                                      452
Violation 

In [24]:
# Consolidate Judicial Process Violations category
data.offense_category = data.offense_category.replace(to_replace=['Communicating With Witness','Escape - Failure to Return','Obstructing Justice','Perjury','Tampering','Violate Bail Bond','Violation Order Of Protection'],value='Judicial Process Violations')

data.offense_category.value_counts()

Narcotics                                        58271
Robbery/Burglery/Theft                           47777
Motor Vehicle Offenses                           29045
Firearms and Explosives                          26030
Battery                                          13511
Sex Crimes                                        4320
Other Offense                                     3850
Homicide                                          3709
Judicial Process Violations                       3100
Forgery                                           2511
Failure to Register as a Sex Offender             2177
Aggravated Fleeing and Eluding                    2131
Criminal Damage to Property                       1642
Credit Card Cases                                 1248
Home Invasion                                     1210
Offense Against Police Officers                    875
Prostitution                                       642
Fraudulent ID                                      452
Arson     

In [25]:
# Consolidate Sex Offenses category
data.offense_category = data.offense_category.replace(to_replace=['Attempt Sex Crimes','Child Pornography','Failure to Register as a Sex Offender','Pandering','Prostitution','Sex Crimes','Violation of Sex Offender Registration'],value='Sex Offenses')

data.offense_category.value_counts()

Narcotics                                        58271
Robbery/Burglery/Theft                           47777
Motor Vehicle Offenses                           29045
Firearms and Explosives                          26030
Battery                                          13511
Sex Offenses                                      7393
Other Offense                                     3850
Homicide                                          3709
Judicial Process Violations                       3100
Forgery                                           2511
Aggravated Fleeing and Eluding                    2131
Criminal Damage to Property                       1642
Credit Card Cases                                 1248
Home Invasion                                     1210
Offense Against Police Officers                    875
Fraudulent ID                                      452
Arson                                              396
Fraud                                              339
Kidnapping

In [26]:
# Consolidate Human Trafficking, Detention and Kidnapping category
data.offense_category = data.offense_category.replace(to_replace=['Child Abduction','Human Trafficking','Kidnapping','Unlawful Restraint'],value='Human Trafficking/Detention/Kidnapping')

data.offense_category.value_counts()

Narcotics                                        58271
Robbery/Burglery/Theft                           47777
Motor Vehicle Offenses                           29045
Firearms and Explosives                          26030
Battery                                          13511
Sex Offenses                                      7393
Other Offense                                     3850
Homicide                                          3709
Judicial Process Violations                       3100
Forgery                                           2511
Aggravated Fleeing and Eluding                    2131
Criminal Damage to Property                       1642
Credit Card Cases                                 1248
Home Invasion                                     1210
Offense Against Police Officers                    875
Human Trafficking/Detention/Kidnapping             611
Fraudulent ID                                      452
Arson                                              396
Fraud     

In [27]:
# Consolidate Trespassing category
data.offense_category = data.offense_category.replace(to_replace=['Home Invasion','Criminal Trespass To Residence'],value='Trespassing')

data.offense_category.value_counts()

Narcotics                                        58271
Robbery/Burglery/Theft                           47777
Motor Vehicle Offenses                           29045
Firearms and Explosives                          26030
Battery                                          13511
Sex Offenses                                      7393
Other Offense                                     3850
Homicide                                          3709
Judicial Process Violations                       3100
Forgery                                           2511
Aggravated Fleeing and Eluding                    2131
Criminal Damage to Property                       1642
Trespassing                                       1304
Credit Card Cases                                 1248
Offense Against Police Officers                    875
Human Trafficking/Detention/Kidnapping             611
Fraudulent ID                                      452
Arson                                              396
Fraud     

In [28]:
# Consolidate Fraud and Deception category
data.offense_category = data.offense_category.replace(to_replace=['Benefit Recipient Fraud','Credit Card Cases','Deceptive Practice','Forgery','Fraud','Fraudulent ID'],value='Fraud/Deception')

data.offense_category.value_counts()

Narcotics                                        58271
Robbery/Burglery/Theft                           47777
Motor Vehicle Offenses                           29045
Firearms and Explosives                          26030
Battery                                          13511
Sex Offenses                                      7393
Fraud/Deception                                   4747
Other Offense                                     3850
Homicide                                          3709
Judicial Process Violations                       3100
Aggravated Fleeing and Eluding                    2131
Criminal Damage to Property                       1642
Trespassing                                       1304
Offense Against Police Officers                    875
Human Trafficking/Detention/Kidnapping             611
Arson                                              396
Stalking                                           289
Intimidation                                       183
Possession

In [29]:
# Consolidate Corruption category
data.offense_category = data.offense_category.replace(to_replace=['Intimidation','Official Misconduct','Bribery'],value='Corruption')

data.offense_category.value_counts()

Narcotics                                        58271
Robbery/Burglery/Theft                           47777
Motor Vehicle Offenses                           29045
Firearms and Explosives                          26030
Battery                                          13511
Sex Offenses                                      7393
Fraud/Deception                                   4747
Other Offense                                     3850
Homicide                                          3709
Judicial Process Violations                       3100
Aggravated Fleeing and Eluding                    2131
Criminal Damage to Property                       1642
Trespassing                                       1304
Offense Against Police Officers                    875
Human Trafficking/Detention/Kidnapping             611
Arson                                              396
Corruption                                         325
Stalking                                           289
Possession

In [30]:
# Consolidate Inside Penal Institutions category
data.offense_category = data.offense_category.replace(to_replace=['Possession of Contraband in Penal Institution','Possession of Shank in Penal Institution'],value='Inside Penal Institutions')

data.offense_category.value_counts()

Narcotics                                 58271
Robbery/Burglery/Theft                    47777
Motor Vehicle Offenses                    29045
Firearms and Explosives                   26030
Battery                                   13511
Sex Offenses                               7393
Fraud/Deception                            4747
Other Offense                              3850
Homicide                                   3709
Judicial Process Violations                3100
Aggravated Fleeing and Eluding             2131
Criminal Damage to Property                1642
Trespassing                                1304
Offense Against Police Officers             875
Human Trafficking/Detention/Kidnapping      611
Arson                                       396
Corruption                                  325
Stalking                                    289
Inside Penal Institutions                   159
Hate Crimes                                  69
Dog Fighting                            

In [31]:
# Consolidate Other Offense category
data.offense_category = data.offense_category.replace(to_replace=['Dog Fighting','Gambling','Failure To Pay Child Support','Compelling Gang Membership'],value='Other Offense')

data.offense_category.value_counts()

Narcotics                                 58271
Robbery/Burglery/Theft                    47777
Motor Vehicle Offenses                    29045
Firearms and Explosives                   26030
Battery                                   13511
Sex Offenses                               7393
Fraud/Deception                            4747
Other Offense                              3910
Homicide                                   3709
Judicial Process Violations                3100
Aggravated Fleeing and Eluding             2131
Criminal Damage to Property                1642
Trespassing                                1304
Offense Against Police Officers             875
Human Trafficking/Detention/Kidnapping      611
Arson                                       396
Corruption                                  325
Stalking                                    289
Inside Penal Institutions                   159
Hate Crimes                                  69
Name: offense_category, dtype: int64

## Cleaning Race columns

In [32]:
# Review data
data.race.value_counts()

Black                               138741
White [Hispanic or Latino]           32709
White                                27899
HISPANIC                              3252
Asian                                 1195
White/Black [Hispanic or Latino]      1068
Unknown                                293
American Indian                        105
Biracial                                29
ASIAN                                    3
Name: race, dtype: int64

In [33]:
# Consolidate Asian race
data.race = data.race.replace(to_replace='ASIAN',value='Asian')

data.race.value_counts()

Black                               138741
White [Hispanic or Latino]           32709
White                                27899
HISPANIC                              3252
Asian                                 1198
White/Black [Hispanic or Latino]      1068
Unknown                                293
American Indian                        105
Biracial                                29
Name: race, dtype: int64

In [34]:
# Consolidate Hispanic/Latino race
data.race = data.race.replace(to_replace=['White [Hispanic or Latino]','HISPANIC','White/Black [Hispanic or Latino]'],value='Hispanic/Latino')

data.race.value_counts()

Black              138741
Hispanic/Latino     37029
White               27899
Asian                1198
Unknown               293
American Indian       105
Biracial               29
Name: race, dtype: int64

## Cleaning Sentence Type

In [35]:
# Review data
data.sentence_type.value_counts()

Prison                                   110845
Probation                                 79661
Jail                                       6382
Cook County Boot Camp                      2326
Conditional Discharge                      2159
2nd Chance Probation                       1763
Supervision                                1597
Probation Terminated Unsatisfactorily       228
Inpatient Mental Health Services            151
Conditional Release                          56
Death                                        56
Probation Terminated Instanter               53
Probation Terminated Satisfactorily          17
Name: sentence_type, dtype: int64

In [36]:
# Consolidate Probation & Supervision sentence
data.sentence_type = data.sentence_type.replace(to_replace=['Probation','2nd Chance Probation','Supervision','Probation Terminated Unsatisfactorily','Probation Terminated Instanter','Probation Terminated Satisfactorily'],value='Probation/Supervision')

data.sentence_type.value_counts()

Prison                              110845
Probation/Supervision                83319
Jail                                  6382
Cook County Boot Camp                 2326
Conditional Discharge                 2159
Inpatient Mental Health Services       151
Conditional Release                     56
Death                                   56
Name: sentence_type, dtype: int64

In [37]:
# Consolidate Conditional sentence type
data.sentence_type = data.sentence_type.replace(to_replace='Conditional Release',value='Conditional Discharge')

data.sentence_type.value_counts()

Prison                              110845
Probation/Supervision                83319
Jail                                  6382
Cook County Boot Camp                 2326
Conditional Discharge                 2215
Inpatient Mental Health Services       151
Death                                   56
Name: sentence_type, dtype: int64

In [38]:
# Consolidate Incarceration sentence
data.sentence_type = data.sentence_type.replace(to_replace=['Jail','Prison'],value='Incarceration')

data.sentence_type.value_counts()

Incarceration                       117227
Probation/Supervision                83319
Cook County Boot Camp                 2326
Conditional Discharge                 2215
Inpatient Mental Health Services       151
Death                                   56
Name: sentence_type, dtype: int64

## Cleaning Commitment Unit

In [39]:
# Data Exploration
data.commitment_unit.value_counts()

Year(s)         149895
Months           47479
Days              5408
Term              2125
Natural Life       300
Dollars             55
Weeks               15
Hours               14
Pounds               2
Kilos                1
Name: commitment_unit, dtype: int64

In [40]:
# Unify Weight units
data.commitment_unit = data.commitment_unit.replace(to_replace=['Pounds','Kilos'],value='Weight')

data.commitment_unit.value_counts()

Year(s)         149895
Months           47479
Days              5408
Term              2125
Natural Life       300
Dollars             55
Weeks               15
Hours               14
Weight               3
Name: commitment_unit, dtype: int64

In [41]:
sorted(list(data.commitment_term.unique()))

['0',
 '00',
 '001',
 '002',
 '006',
 '007',
 '012',
 '015',
 '018',
 '02',
 '024',
 '027',
 '030',
 '036',
 '042',
 '1',
 '1,154.00',
 '1.5',
 '10',
 '10.5',
 '100',
 '101',
 '1013',
 '102',
 '103',
 '104',
 '105',
 '106',
 '107',
 '108',
 '109',
 '11',
 '11.5',
 '110',
 '111',
 '112',
 '113',
 '114',
 '115',
 '116',
 '117',
 '118',
 '119',
 '12',
 '12.5',
 '12.75',
 '120',
 '121',
 '122',
 '123',
 '125',
 '126',
 '127',
 '1277.13',
 '128',
 '129',
 '13',
 '13.5',
 '130',
 '131',
 '132',
 '133',
 '134',
 '135',
 '136',
 '137',
 '138',
 '139',
 '14',
 '14.5',
 '140',
 '141',
 '142',
 '143',
 '144',
 '145',
 '146',
 '147',
 '148',
 '149',
 '15',
 '150',
 '151',
 '152',
 '153',
 '154',
 '155',
 '156',
 '157',
 '158',
 '159',
 '16',
 '16.5',
 '160',
 '161',
 '162',
 '163',
 '164',
 '165',
 '166',
 '167',
 '168',
 '169',
 '17',
 '17.5',
 '170',
 '171',
 '172',
 '174',
 '175',
 '176',
 '177',
 '178',
 '179',
 '18',
 '18 months',
 '180',
 '181',
 '182',
 '183',
 '184',
 '185',
 '186',
 '187'

In [42]:
# Clean all non-numeric characters and transform value type to float
data.commitment_term = data.commitment_term.replace(to_replace='two',value=2)
data.commitment_term = data.commitment_term.replace(regex=['[a-z]*','\,','\`'], value='')
data.commitment_term = data.commitment_term.astype('float')

data.commitment_term

663        8.0
1360       2.0
2123       4.0
2177       9.0
2261       2.0
          ... 
241252    24.0
241257     2.0
241280    18.0
241298     2.0
241310    90.0
Name: commitment_term, Length: 205294, dtype: float64

In [43]:
data.commitment_unit.value_counts()

Year(s)         149895
Months           47479
Days              5408
Term              2125
Natural Life       300
Dollars             55
Weeks               15
Hours               14
Weight               3
Name: commitment_unit, dtype: int64

In [44]:
# Change all years over 130 to 130 to mark natural life
data.loc[(data.commitment_unit == 'Year(s)') & (data.commitment_term > 129),['commitment_term']] = 130

In [45]:
# Changes in Bootcamp Terms to Months
data.loc[(data.sentence_type == 'Cook County Boot Camp') & (data.commitment_unit == 'Term') & (data.commitment_term == 1),['commitment_term']] = 12

data.loc[(data.sentence_type == 'Cook County Boot Camp') & (data.commitment_unit == 'Term') & (data.commitment_term == 12),['commitment_unit']] = 'Months'

data.loc[(data.sentence_type == 'Cook County Boot Camp') & (data.commitment_unit == 'Term') & (data.commitment_term == 18),['commitment_unit']] = 'Months'

In [46]:
# Change Death info to 130 years
data.loc[(data.sentence_type == 'Death'),['commitment_unit']] = 'Year(s)'
data.loc[(data.sentence_type == 'Death'),['commitment_term']] = 130

In [47]:
def month_convert(row):
    if row ['commitment_unit'] == 'Months':
        return round(float(row['commitment_term']),2)
    if row["commitment_unit"] == "Year(s)":
        return round(int(row["commitment_term"]) * 12.0, 2)
    if row["commitment_unit"] == "Weeks":
       return round(float(row['commitment_term']) / 4, 2)
    if row["commitment_unit"] == "Days":
        return round(float( row['commitment_term']) / 30, 2)
    if row['commitment_unit'] == "Natural Life":
        return 1560.
    else:
        return 0.

data['month'] = data.apply(lambda row:month_convert(row), axis = 1)

In [48]:
def year_convert(row):
    if row ['commitment_unit'] == 'Year(s)':
        return round(float(row['commitment_term']),2)
    if row["commitment_unit"] == "Months":
        return round(int(row["commitment_term"]) / 12.0, 2)
    if row["commitment_unit"] == "Weeks":
       return round(float(row['commitment_term']) / 52, 2)
    if row["commitment_unit"] == "Days":
        return round(float( row['commitment_term'])/365, 2)
    if row['commitment_unit'] == "Natural Life":
        return 130.
    else:
        return 0.

data['year'] = data.apply(lambda row:year_convert(row), axis = 1)

In [49]:
data[['commitment_unit', 'commitment_term','year','month']]

Unnamed: 0,commitment_unit,commitment_term,year,month
663,Year(s),8.0,8.00,96.00
1360,Year(s),2.0,2.00,24.00
2123,Year(s),4.0,4.00,48.00
2177,Year(s),9.0,9.00,108.00
2261,Year(s),2.0,2.00,24.00
...,...,...,...,...
241252,Months,24.0,2.00,24.00
241257,Days,2.0,0.01,0.07
241280,Months,18.0,1.50,18.00
241298,Days,2.0,0.01,0.07


In [50]:
data.length_of_case_in_days.value_counts()

0       12135
7        2072
35       1174
28       1076
42       1067
        ...  
-178        1
1800        1
2031        1
3011        1
2162        1
Name: length_of_case_in_days, Length: 2356, dtype: int64

# --------------------------------------

data.to_json('../Front-end/data.json')

# --------------------------------------

In [51]:
data.drop_duplicates(inplace=True)
data.reset_index(drop=True,inplace=True)
data

Unnamed: 0,case_id,case_participant_id,charge_id,charge_version_id,court_facility,court_name,age_at_incident,gender,race,charge_disposition,...,current_sentence,primary_charge,sentence_date,offense_category,commitment_term,commitment_unit,sentence_type,date_year,month,year
0,114024398027,230101052744,807648613316,449520166967,26TH Street,District 1 - Chicago,21,Male,White,Plea Of Guilty,...,True,False,5/11/2015 12:00:00 AM,Homicide,8.0,Year(s),Incarceration,2015,96.00,8.00
1,123803937179,255504441936,190690725339,122497821309,26TH Street,District 1 - Chicago,27,Male,Hispanic/Latino,Plea Of Guilty,...,True,False,11/5/2014 12:00:00 AM,Firearms and Explosives,2.0,Year(s),Probation/Supervision,2014,24.00,2.00
2,127790349873,265428457350,541647467636,271650860226,Markham Courthouse,District 6 - Markham,18,Male,Black,Plea Of Guilty,...,True,False,12/9/2013 12:00:00 AM,Sex Offenses,4.0,Year(s),Incarceration,2013,48.00,4.00
3,128491120254,266244611418,759810529209,458530536964,26TH Street,District 1 - Chicago,21,Male,Black,Finding Guilty,...,True,True,2/1/2016 12:00:00 AM,Homicide,9.0,Year(s),Incarceration,2016,108.00,9.00
4,129030951431,266511120401,702938173654,382945270335,26TH Street,District 1 - Chicago,35,Male,Black,Plea Of Guilty,...,True,True,2/28/2011 12:00:00 AM,Judicial Process Violations,2.0,Year(s),Incarceration,2011,24.00,2.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
205284,256376492742,1114936528342,922971556855,505146600989,Rolling Meadows Courthouse,District 3 - Rolling Meadows,21,Male,White,Plea Of Guilty,...,True,True,12/5/2019 12:00:00 AM,Narcotics,24.0,Months,Probation/Supervision,2019,24.00,2.00
205285,256381543493,1114965982098,922961626878,505922206274,Markham Courthouse,District 6 - Markham,61,Male,Black,Plea Of Guilty,...,True,True,12/20/2019 12:00:00 AM,Narcotics,2.0,Days,Incarceration,2019,0.07,0.01
205286,256409108612,1115174177395,923107618660,505224851968,Maywood Courthouse,District 4 - Maywood,38,Male,White,Plea Of Guilty,...,True,True,12/27/2019 12:00:00 AM,Firearms and Explosives,18.0,Months,Probation/Supervision,2019,18.00,1.50
205287,256441638876,1115411588918,923226884015,506085957954,26TH Street,District 1 - Chicago,26,Male,Black,Plea Of Guilty,...,True,True,12/24/2019 12:00:00 AM,Narcotics,2.0,Days,Incarceration,2019,0.07,0.01


In [52]:
data.sentence_type.value_counts()

Incarceration                       117226
Probation/Supervision                83315
Cook County Boot Camp                 2326
Conditional Discharge                 2215
Inpatient Mental Health Services       151
Death                                   56
Name: sentence_type, dtype: int64

## Analize Incarceration data to define unit

In [53]:
data.loc[data.sentence_type == 'Incarceration'].commitment_unit.value_counts()
# Incarceration will be read by Year columns unless we are analyzing penalties in dollars

Year(s)         99686
Months          12458
Days             4708
Natural Life      281
Dollars            45
Term               39
Hours               5
Weeks               4
Name: commitment_unit, dtype: int64

## Analize Probation/Supervision to define unit

In [54]:
data.loc[data.sentence_type == 'Probation/Supervision'].commitment_unit.value_counts()

Year(s)         48834
Months          33728
Days              623
Term               86
Natural Life       12
Weeks              11
Dollars             9
Hours               9
Weight              3
Name: commitment_unit, dtype: int64

In [55]:
sorted(data.loc[data.sentence_type == 'Probation/Supervision'].year.unique())

[0.0,
 0.01,
 0.02,
 0.03,
 0.04,
 0.05,
 0.06,
 0.07,
 0.08,
 0.09,
 0.1,
 0.11,
 0.12,
 0.13,
 0.14,
 0.15,
 0.16,
 0.17,
 0.18,
 0.19,
 0.2,
 0.21,
 0.22,
 0.23,
 0.24,
 0.25,
 0.26,
 0.27,
 0.28,
 0.29,
 0.3,
 0.31,
 0.32,
 0.33,
 0.34,
 0.35,
 0.36,
 0.38,
 0.39,
 0.4,
 0.41,
 0.42,
 0.44,
 0.45,
 0.46,
 0.47,
 0.48,
 0.49,
 0.5,
 0.51,
 0.52,
 0.53,
 0.56,
 0.58,
 0.6,
 0.61,
 0.62,
 0.66,
 0.67,
 0.72,
 0.75,
 0.78,
 0.82,
 0.83,
 0.84,
 0.92,
 0.96,
 0.98,
 1.0,
 1.01,
 1.04,
 1.08,
 1.17,
 1.22,
 1.25,
 1.33,
 1.42,
 1.5,
 1.57,
 1.58,
 1.67,
 1.75,
 1.83,
 1.92,
 2.0,
 2.08,
 2.25,
 2.33,
 2.42,
 2.5,
 2.67,
 2.75,
 2.83,
 3.0,
 3.25,
 3.5,
 4.0,
 4.5,
 5.0,
 5.5,
 6.0,
 7.0,
 8.0,
 10.0,
 11.0,
 12.0,
 14.0,
 15.0,
 18.0,
 20.0,
 21.0,
 21.17,
 22.0,
 24.0,
 25.0,
 30.0,
 48.0,
 130.0]

In [56]:
data.loc[data.sentence_type == 'Probation/Supervision'].year.value_counts()

2.00    58933
1.50     8266
2.50     6309
1.00     5427
3.00     1514
        ...  
0.53        1
0.66        1
3.25        1
0.24        1
0.82        1
Name: year, Length: 118, dtype: int64

In [57]:
sorted(data.loc[data.sentence_type == 'Probation/Supervision'].month.unique())

[0.0,
 0.03,
 0.07,
 0.1,
 0.13,
 0.17,
 0.2,
 0.23,
 0.27,
 0.33,
 0.4,
 0.47,
 0.5,
 0.53,
 0.57,
 0.6,
 0.63,
 0.67,
 0.7,
 0.73,
 0.77,
 0.8,
 0.83,
 0.87,
 0.9,
 0.93,
 0.97,
 1.0,
 1.03,
 1.07,
 1.1,
 1.17,
 1.27,
 1.3,
 1.33,
 1.37,
 1.4,
 1.43,
 1.47,
 1.5,
 1.53,
 1.57,
 1.6,
 1.63,
 1.67,
 1.7,
 1.77,
 1.8,
 1.87,
 1.97,
 2.0,
 2.03,
 2.07,
 2.1,
 2.13,
 2.17,
 2.37,
 2.4,
 2.43,
 2.47,
 2.5,
 2.53,
 2.57,
 2.67,
 2.7,
 2.8,
 2.83,
 2.9,
 3.0,
 3.07,
 3.1,
 3.2,
 3.27,
 3.3,
 3.37,
 3.43,
 3.47,
 3.53,
 3.6,
 3.67,
 3.7,
 3.73,
 3.77,
 3.8,
 3.83,
 3.9,
 3.93,
 3.97,
 4.0,
 4.17,
 4.27,
 4.37,
 4.4,
 4.43,
 4.5,
 4.67,
 4.7,
 4.73,
 4.77,
 4.9,
 5.03,
 5.13,
 5.33,
 5.43,
 5.5,
 5.7,
 5.73,
 5.83,
 5.9,
 5.93,
 6.0,
 6.23,
 6.37,
 6.47,
 6.87,
 7.0,
 7.07,
 7.33,
 7.4,
 7.5,
 7.6,
 8.0,
 8.77,
 9.0,
 9.1,
 9.5,
 10.0,
 10.17,
 11.0,
 11.73,
 11.87,
 12.0,
 12.13,
 12.17,
 12.27,
 13.0,
 13.5,
 14.0,
 14.8,
 15.0,
 16.0,
 17.0,
 18.0,
 19.0,
 19.13,
 20.0,
 20.5,
 21.0,
 22.0,

In [58]:
data.loc[data.sentence_type == 'Probation/Supervision'].month.value_counts()
# We will analize this info in months

24.00    58937
18.00     8266
30.00     6305
12.00     5424
36.00     1516
         ...  
3.70         1
0.57         1
3.77         1
2.13         1
19.13        1
Name: month, Length: 184, dtype: int64

## Analize Cook County Boot Camp to define unit

In [59]:
data.loc[data.sentence_type == 'Cook County Boot Camp'].commitment_unit.value_counts()
# This sentence will be analize in months

Months     2019
Year(s)     179
Term         77
Days         51
Name: commitment_unit, dtype: int64

## Analize Conditional Discharge to define unit

In [60]:
data.loc[data.sentence_type == 'Conditional Discharge'].commitment_unit.value_counts()

Months     1133
Year(s)    1054
Days         25
Term          2
Dollars       1
Name: commitment_unit, dtype: int64

In [61]:
data.loc[data.sentence_type == 'Conditional Discharge'].year.value_counts()

1.00     957
2.00     603
1.50     554
0.50      37
2.50      17
0.01       9
0.00       6
1.25       5
1.67       3
0.33       3
0.08       3
0.03       2
3.00       2
0.25       2
12.00      2
0.75       1
1.17       1
0.49       1
8.00       1
1.33       1
0.02       1
0.36       1
0.53       1
0.16       1
0.43       1
Name: year, dtype: int64

In [62]:
data.loc[data.sentence_type == 'Conditional Discharge'].month.value_counts()
# This information will be view in months

12.00     956
24.00     603
18.00     554
6.00       38
30.00      17
0.00        5
15.00       5
0.07        4
4.00        3
1.00        3
0.17        3
20.00       3
0.13        2
144.00      2
3.00        2
36.00       2
96.00       1
2.00        1
0.23        1
14.00       1
16.00       1
6.47        1
0.33        1
4.43        1
5.27        1
0.40        1
0.03        1
12.13       1
9.00        1
Name: month, dtype: int64

## Analize Inpatien Mental Health Services to define unit

In [63]:
data.loc[data.sentence_type == 'Inpatient Mental Health Services'].commitment_unit.value_counts()
# This information will be view in years

Year(s)    98
Term       35
Months     18
Name: commitment_unit, dtype: int64

## Analize Death to confirm unit

In [64]:
data.loc[data.sentence_type == 'Death'].commitment_unit.value_counts()

Year(s)    56
Name: commitment_unit, dtype: int64

## Filter only current sentence

In [65]:
data = data.loc[data.current_sentence == True]
data

Unnamed: 0,case_id,case_participant_id,charge_id,charge_version_id,court_facility,court_name,age_at_incident,gender,race,charge_disposition,...,current_sentence,primary_charge,sentence_date,offense_category,commitment_term,commitment_unit,sentence_type,date_year,month,year
0,114024398027,230101052744,807648613316,449520166967,26TH Street,District 1 - Chicago,21,Male,White,Plea Of Guilty,...,True,False,5/11/2015 12:00:00 AM,Homicide,8.0,Year(s),Incarceration,2015,96.00,8.00
1,123803937179,255504441936,190690725339,122497821309,26TH Street,District 1 - Chicago,27,Male,Hispanic/Latino,Plea Of Guilty,...,True,False,11/5/2014 12:00:00 AM,Firearms and Explosives,2.0,Year(s),Probation/Supervision,2014,24.00,2.00
2,127790349873,265428457350,541647467636,271650860226,Markham Courthouse,District 6 - Markham,18,Male,Black,Plea Of Guilty,...,True,False,12/9/2013 12:00:00 AM,Sex Offenses,4.0,Year(s),Incarceration,2013,48.00,4.00
3,128491120254,266244611418,759810529209,458530536964,26TH Street,District 1 - Chicago,21,Male,Black,Finding Guilty,...,True,True,2/1/2016 12:00:00 AM,Homicide,9.0,Year(s),Incarceration,2016,108.00,9.00
4,129030951431,266511120401,702938173654,382945270335,26TH Street,District 1 - Chicago,35,Male,Black,Plea Of Guilty,...,True,True,2/28/2011 12:00:00 AM,Judicial Process Violations,2.0,Year(s),Incarceration,2011,24.00,2.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
205284,256376492742,1114936528342,922971556855,505146600989,Rolling Meadows Courthouse,District 3 - Rolling Meadows,21,Male,White,Plea Of Guilty,...,True,True,12/5/2019 12:00:00 AM,Narcotics,24.0,Months,Probation/Supervision,2019,24.00,2.00
205285,256381543493,1114965982098,922961626878,505922206274,Markham Courthouse,District 6 - Markham,61,Male,Black,Plea Of Guilty,...,True,True,12/20/2019 12:00:00 AM,Narcotics,2.0,Days,Incarceration,2019,0.07,0.01
205286,256409108612,1115174177395,923107618660,505224851968,Maywood Courthouse,District 4 - Maywood,38,Male,White,Plea Of Guilty,...,True,True,12/27/2019 12:00:00 AM,Firearms and Explosives,18.0,Months,Probation/Supervision,2019,18.00,1.50
205287,256441638876,1115411588918,923226884015,506085957954,26TH Street,District 1 - Chicago,26,Male,Black,Plea Of Guilty,...,True,True,12/24/2019 12:00:00 AM,Narcotics,2.0,Days,Incarceration,2019,0.07,0.01


# Analize new dataframe to create the database diagram

In [66]:
data.columns

Index(['case_id', 'case_participant_id', 'charge_id', 'charge_version_id',
       'court_facility', 'court_name', 'age_at_incident', 'gender', 'race',
       'charge_disposition', 'length_of_case_in_days', 'current_sentence',
       'primary_charge', 'sentence_date', 'offense_category',
       'commitment_term', 'commitment_unit', 'sentence_type', 'date_year',
       'month', 'year'],
      dtype='object')

In [67]:
len(data[['case_participant_id','age_at_incident', 'gender', 'race']].drop_duplicates())

176314

In [68]:
len(data.case_participant_id.drop_duplicates())

176314

In [69]:
len(data.charge_version_id.drop_duplicates())

191892

In [70]:
len(data[['charge_version_id','offense_category']].drop_duplicates())

191892

In [71]:
len(data[['case_participant_id','charge_id']].drop_duplicates())

197265

In [77]:
data[['case_id','case_participant_id','offense_category','charge_disposition','charge_id','charge_version_id']].drop_duplicates()

Unnamed: 0,case_id,case_participant_id,offense_category,charge_disposition,charge_id,charge_version_id
0,114024398027,230101052744,Homicide,Plea Of Guilty,807648613316,449520166967
1,123803937179,255504441936,Firearms and Explosives,Plea Of Guilty,190690725339,122497821309
2,127790349873,265428457350,Sex Offenses,Plea Of Guilty,541647467636,271650860226
3,128491120254,266244611418,Homicide,Finding Guilty,759810529209,458530536964
4,129030951431,266511120401,Judicial Process Violations,Plea Of Guilty,702938173654,382945270335
...,...,...,...,...,...,...
205284,256376492742,1114936528342,Narcotics,Plea Of Guilty,922971556855,505146600989
205285,256381543493,1114965982098,Narcotics,Plea Of Guilty,922961626878,505922206274
205286,256409108612,1115174177395,Firearms and Explosives,Plea Of Guilty,923107618660,505224851968
205287,256441638876,1115411588918,Narcotics,Plea Of Guilty,923226884015,506085957954


In [78]:
data[['case_id','case_participant_id','offense_category','charge_disposition']].drop_duplicates()

Unnamed: 0,case_id,case_participant_id,offense_category,charge_disposition
0,114024398027,230101052744,Homicide,Plea Of Guilty
1,123803937179,255504441936,Firearms and Explosives,Plea Of Guilty
2,127790349873,265428457350,Sex Offenses,Plea Of Guilty
3,128491120254,266244611418,Homicide,Finding Guilty
4,129030951431,266511120401,Judicial Process Violations,Plea Of Guilty
...,...,...,...,...
205284,256376492742,1114936528342,Narcotics,Plea Of Guilty
205285,256381543493,1114965982098,Narcotics,Plea Of Guilty
205286,256409108612,1115174177395,Firearms and Explosives,Plea Of Guilty
205287,256441638876,1115411588918,Narcotics,Plea Of Guilty


![DBD-from-quickDBD.png](DBD-from-quickDBD.png)

# Create tables

In [80]:
participants = data[['case_participant_id','age_at_incident','gender','race']].drop_duplicates()
participants

Unnamed: 0,case_participant_id,age_at_incident,gender,race
0,230101052744,21,Male,White
1,255504441936,27,Male,Hispanic/Latino
2,265428457350,18,Male,Black
3,266244611418,21,Male,Black
4,266511120401,35,Male,Black
...,...,...,...,...
205284,1114936528342,21,Male,White
205285,1114965982098,61,Male,Black
205286,1115174177395,38,Male,White
205287,1115411588918,26,Male,Black


In [84]:
courts = data[['court_facility', 'court_name']].drop_duplicates().reset_index(drop=True)
courts

Unnamed: 0,court_facility,court_name
0,26TH Street,District 1 - Chicago
1,Markham Courthouse,District 6 - Markham
2,Skokie Courthouse,District 2 - Skokie
3,Bridgeview Courthouse,District 5 - Bridgeview
4,Maywood Courthouse,District 4 - Maywood
5,Rolling Meadows Courthouse,District 3 - Rolling Meadows
6,Harrison & Kedzie (Area 4),District 1 - Chicago
7,DV Courthouse,District 1 - Chicago
8,51st & Wentworth (Area 1),District 1 - Chicago
9,Belmont & Western (Area 3),District 1 - Chicago


In [89]:
courts['court_id']=['1-26','6','2','5','4','3','1-4','1-DV','1-1','1-3','1-2','1-5','1-RJCC']
courts = courts[['court_id', 'court_facility', 'court_name']]
courts

Unnamed: 0,court_id,court_facility,court_name
0,1-26,26TH Street,District 1 - Chicago
1,6,Markham Courthouse,District 6 - Markham
2,2,Skokie Courthouse,District 2 - Skokie
3,5,Bridgeview Courthouse,District 5 - Bridgeview
4,4,Maywood Courthouse,District 4 - Maywood
5,3,Rolling Meadows Courthouse,District 3 - Rolling Meadows
6,1-4,Harrison & Kedzie (Area 4),District 1 - Chicago
7,1-DV,DV Courthouse,District 1 - Chicago
8,1-1,51st & Wentworth (Area 1),District 1 - Chicago
9,1-3,Belmont & Western (Area 3),District 1 - Chicago


In [96]:
cases = data[['case_id','court_facility', 'court_name','case_participant_id','length_of_case_in_days']].drop_duplicates()
cases

Unnamed: 0,case_id,court_facility,court_name,case_participant_id,length_of_case_in_days
0,114024398027,26TH Street,District 1 - Chicago,230101052744,187
1,123803937179,26TH Street,District 1 - Chicago,255504441936,2624
2,127790349873,Markham Courthouse,District 6 - Markham,265428457350,795
3,128491120254,26TH Street,District 1 - Chicago,266244611418,1201
4,129030951431,26TH Street,District 1 - Chicago,266511120401,271
...,...,...,...,...,...
205284,256376492742,Rolling Meadows Courthouse,District 3 - Rolling Meadows,1114936528342,1
205285,256381543493,Markham Courthouse,District 6 - Markham,1114965982098,0
205286,256409108612,Maywood Courthouse,District 4 - Maywood,1115174177395,1
205287,256441638876,26TH Street,District 1 - Chicago,1115411588918,0


In [97]:
cases = cases.merge(courts,on='court_facility')
cases = cases[['case_id','court_id','case_participant_id','length_of_case_in_days']]
cases

Unnamed: 0,case_id,court_id,case_participant_id,length_of_case_in_days
0,114024398027,1-26,230101052744,187
1,123803937179,1-26,255504441936,2624
2,128491120254,1-26,266244611418,1201
3,129030951431,1-26,266511120401,271
4,133675331526,1-26,273039402824,560
...,...,...,...,...
176448,240412094261,1-5,1006167846720,259
176449,240412351079,1-5,1006169153136,259
176450,243881618175,1-5,1029795221953,19
176451,243881618175,1-5,1029795459483,19


In [None]:
offense = data[['case_participant_id','case_id','offense_category','charge_disposition','primary_charge']]

In [None]:
offenses
-
offense_id integer pk
case_participant_id integer fk >- participants.case_participant_id
case_id integer fk >- cases.case_id
offense_category varchar
charge_disposition varchar
primary_charge boolean
charge_id list
charge_version_id list

In [72]:
'case_participant_id', 'charge_id', 'charge_version_id','court_facility', 'court_name', 'age_at_incident', 'gender', 'race','charge_disposition', 'length_of_case_in_days', 'current_sentence','primary_charge', 'sentence_date', 'offense_category','commitment_term','commitment_unit', 'sentence_type', 'date_year','month', 'year'

('case_participant_id',
 'charge_id',
 'charge_version_id',
 'court_facility',
 'court_name',
 'age_at_incident',
 'gender',
 'race',
 'charge_disposition',
 'length_of_case_in_days',
 'current_sentence',
 'primary_charge',
 'sentence_date',
 'offense_category',
 'commitment_term',
 'commitment_unit',
 'sentence_type',
 'date_year',
 'month',
 'year')