In [2]:
import pandas as pd
from sodapy import Socrata
import numpy as np

# Get information from Cook County Sentencing Data Database

In [3]:
# Call API
client = Socrata("datacatalog.cookcountyil.gov", None)



In [4]:
# Retrive Data
results = client.get("tg8v-tm6u", limit=300000)

In [5]:
# Generate DataFrame
original_data = pd.DataFrame.from_records(results)

# Explore data
# list(original_data.columns.values)

# Cleaning Process

## Select only chosen columns

In [129]:
# Copy only columns that we'll need for our analysis
data = original_data[['case_id','case_participant_id','charge_id','charge_version_id','court_facility','court_name','age_at_incident','gender','race','charge_disposition','length_of_case_in_days','current_sentence','primary_charge','sentence_date','offense_category','commitment_term','commitment_unit','sentence_type']]
# data.head()

In [130]:
# Explore values in columns
# data.count()

In [131]:
# Delete conversion data from dataframe
data.replace(regex='PROMIS*',value=np.NaN, inplace=True)

## Drop NaN Values

In [132]:
data = data.dropna()
# data.count()

## Filter data after 2000 & before 2019

In [133]:
# Explore type of values in each column
# data.dtypes

In [134]:
# Create a copy of the Sentence Date to filter it
hdate = data['sentence_date']
data['date_year'] = hdate

# data.head()

In [135]:
# Select only the year
data['date_year'] = data['date_year'].replace(regex=['[0-9]*\/[0-9]*\/','\s[0-9]*\:[0-9]*\:[0-9]*\s[A-Z]*'], value='')

In [136]:
# Change value type
data['date_year'] = data['date_year'].astype(int)

In [137]:
# Confirm value type change
# data.dtypes

In [138]:
# Filter year
data = data[(data.date_year > 2014) & (data.date_year < 2020)]

In [139]:
# Confirm values
# data.describe()

## Filter only current sentence and primary charge

In [140]:
data = data.loc[data.current_sentence == True]
data = data.loc[data.primary_charge == True]
# data.describe()

## Analize Offense Category and consolidate into related categories

In [141]:
# Review all the information
# data.offense_category.value_counts()

In [142]:
# Consolidate Homicide category
data.offense_category = data.offense_category.replace(to_replace=['Attempt Homicide','Reckless Homicide'],value='Homicide')

# data.offense_category.value_counts()

In [143]:
# Consolidate Offense Against Police Officers category
data.offense_category = data.offense_category.replace(to_replace=['Aggravated Assault Police Officer','Aggravated Assault Police Officer Firearm','Police Shooting','Impersonating Police Officer'],value='Offense Against Police Officers')

# data.offense_category.value_counts()

In [144]:
# Consolidate Battery category
data.offense_category = data.offense_category.replace(to_replace=['Aggravated Battery','Aggravated Battery Police Officer','Aggravated Battery Police Officer Firearm','Aggravated Battery With A Firearm','Domestic Battery'],value='Battery')

# data.offense_category.value_counts()

In [145]:
# Consolidate Robbery, Burglery and Theft category
data.offense_category = data.offense_category.replace(to_replace=['Aggravated Identity Theft','Aggravated Robbery','Aggravated Robbery BB Gun','Armed Robbery','Attempt Armed Robbery','Burglary','Identity Theft','Residential Burglary','Retail Theft','Robbery','Theft','Theft by Deception','Possession Of Burglary Tools'],value='Robbery/Burglery/Theft')

# data.offense_category.value_counts()

In [146]:
# Consolidate Arson category
data.offense_category = data.offense_category.replace(to_replace=['Arson and Attempt Arson','Attempt Arson'],value='Arson')

# data.offense_category.value_counts()

In [147]:
# Consolidate Firearms and Explosives category
data.offense_category = data.offense_category.replace(to_replace=['Aggravated Discharge Firearm','Armed Violence','Disarming Police Officer','Gun Running','Possession of Explosives','UUW - Unlawful Use of Weapon','Gun - Non UUW','Bomb Threat','Reckless Discharge of Firearm'],value='Firearms and Explosives')

# data.offense_category.value_counts()

In [148]:
# Consolidate Motor Vehicles Offenses category
data.offense_category = data.offense_category.replace(to_replace=['Aggravated DUI','Attempt Vehicular Hijacking','DUI','Driving With Suspended Or Revoked License','Major Accidents','Possession of Stolen Motor Vehicle','Vehicular Hijacking','Vehicular Invasion'],value='Motor Vehicle Offenses')

# data.offense_category.value_counts()

In [149]:
# Consolidate Judicial Process Violations category
data.offense_category = data.offense_category.replace(to_replace=['Communicating With Witness','Escape - Failure to Return','Obstructing Justice','Perjury','Tampering','Violate Bail Bond','Violation Order Of Protection'],value='Judicial Process Violations')

# data.offense_category.value_counts()

In [150]:
# Consolidate Sex Offenses category
data.offense_category = data.offense_category.replace(to_replace=['Attempt Sex Crimes','Child Pornography','Failure to Register as a Sex Offender','Pandering','Prostitution','Sex Crimes','Violation of Sex Offender Registration'],value='Sex Offenses')

# data.offense_category.value_counts()

In [151]:
# Consolidate Human Trafficking, Detention and Kidnapping category
data.offense_category = data.offense_category.replace(to_replace=['Child Abduction','Human Trafficking','Kidnapping','Unlawful Restraint'],value='Human Trafficking/Detention/Kidnapping')

# data.offense_category.value_counts()

In [152]:
# Consolidate Trespassing category
data.offense_category = data.offense_category.replace(to_replace=['Home Invasion','Criminal Trespass To Residence'],value='Trespassing')

# data.offense_category.value_counts()

In [153]:
# Consolidate Fraud and Deception category
data.offense_category = data.offense_category.replace(to_replace=['Benefit Recipient Fraud','Credit Card Cases','Deceptive Practice','Forgery','Fraud','Fraudulent ID'],value='Fraud/Deception')

# data.offense_category.value_counts()

In [154]:
# Consolidate Corruption category
data.offense_category = data.offense_category.replace(to_replace=['Intimidation','Official Misconduct','Bribery'],value='Corruption')

# data.offense_category.value_counts()

In [155]:
# Consolidate Inside Penal Institutions category
data.offense_category = data.offense_category.replace(to_replace=['Possession of Contraband in Penal Institution','Possession of Shank in Penal Institution'],value='Inside Penal Institutions')

# data.offense_category.value_counts()

In [156]:
# Consolidate Other Offense category
data.offense_category = data.offense_category.replace(to_replace=['Dog Fighting','Gambling','Failure To Pay Child Support','Compelling Gang Membership'],value='Other Offense')

# data.offense_category.value_counts()

## Cleaning Race columns

In [157]:
# Review data
# data.race.value_counts()

In [158]:
# Consolidate Asian race
data.race = data.race.replace(to_replace='ASIAN',value='Asian')

# data.race.value_counts()

In [159]:
# Consolidate Hispanic/Latino race
data.race = data.race.replace(to_replace=['White [Hispanic or Latino]','HISPANIC','White/Black [Hispanic or Latino]'],value='Hispanic/Latino')

# data.race.value_counts()

## Cleaning Sentence Type

In [160]:
# Review data
# data.sentence_type.value_counts()

In [161]:
# Consolidate Probation & Supervision sentence
data.sentence_type = data.sentence_type.replace(to_replace=['Probation','2nd Chance Probation','Supervision','Probation Terminated Unsatisfactorily','Probation Terminated Instanter','Probation Terminated Satisfactorily'],value='Probation/Supervision')

# data.sentence_type.value_counts()

In [162]:
# Consolidate Conditional sentence type
data.sentence_type = data.sentence_type.replace(to_replace='Conditional Release',value='Conditional Discharge')

# data.sentence_type.value_counts()

In [163]:
# Consolidate Incarceration sentence
data.sentence_type = data.sentence_type.replace(to_replace=['Jail','Prison'],value='Incarceration')

# data.sentence_type.value_counts()

## Cleaning Gender

In [164]:
# data.gender.value_counts()

In [165]:
data = data.loc[(data.gender == "Male") | (data.gender == "Female")]
# data.gender.value_counts()

## Cleaning Commitment Unit

In [166]:
# Data Exploration
# data.commitment_unit.value_counts()

In [167]:
# Unify Weight units
data.commitment_unit = data.commitment_unit.replace(to_replace=['Pounds','Kilos'],value='Weight')

# data.commitment_unit.value_counts()

In [168]:
# sorted(list(data.commitment_term.unique()))

In [169]:
# Clean all non-numeric characters and transform value type to float
data.commitment_term = data.commitment_term.replace(to_replace='two',value=2)
data.commitment_term = data.commitment_term.replace(regex=['[a-z]*','\,','\`'], value='')
data.commitment_term = data.commitment_term.astype('float')

# data.commitment_term

In [170]:
# Review commitment unit values
# data.commitment_unit.value_counts()

In [171]:
# Change all years over 130 to 130 to mark natural life
data.loc[(data.commitment_unit == 'Year(s)') & (data.commitment_term > 129),['commitment_term']] = 130

In [172]:
# Changes in Bootcamp Terms to Months
data.loc[(data.sentence_type == 'Cook County Boot Camp') & (data.commitment_unit == 'Term') & (data.commitment_term == 1),['commitment_term']] = 12

data.loc[(data.sentence_type == 'Cook County Boot Camp') & (data.commitment_unit == 'Term') & (data.commitment_term == 12),['commitment_unit']] = 'Months'

data.loc[(data.sentence_type == 'Cook County Boot Camp') & (data.commitment_unit == 'Term') & (data.commitment_term == 18),['commitment_unit']] = 'Months'

In [173]:
# Change Death info to 130 years
data.loc[(data.sentence_type == 'Death'),['commitment_unit']] = 'Year(s)'
data.loc[(data.sentence_type == 'Death'),['commitment_term']] = 130

In [174]:
# Create column with all values in months
def month_convert(row):
    if row ['commitment_unit'] == 'Months':
        return round(float(row['commitment_term']),2)
    if row["commitment_unit"] == "Year(s)":
        return round(int(row["commitment_term"]) * 12.0, 2)
    if row["commitment_unit"] == "Weeks":
       return round(float(row['commitment_term']) / 4, 2)
    if row["commitment_unit"] == "Days":
        return round(float( row['commitment_term']) / 30, 2)
    if row['commitment_unit'] == "Natural Life":
        return 1560.
    else:
        return 0.

data['month'] = data.apply(lambda row:month_convert(row), axis = 1)

In [175]:
# Create column with all values in years
def year_convert(row):
    if row ['commitment_unit'] == 'Year(s)':
        return round(float(row['commitment_term']),2)
    if row["commitment_unit"] == "Months":
        return round(int(row["commitment_term"]) / 12.0, 2)
    if row["commitment_unit"] == "Weeks":
       return round(float(row['commitment_term']) / 52, 2)
    if row["commitment_unit"] == "Days":
        return round(float( row['commitment_term'])/365, 2)
    if row['commitment_unit'] == "Natural Life":
        return 130.
    else:
        return 0.

data['year'] = data.apply(lambda row:year_convert(row), axis = 1)

In [176]:
# Review new columns and changes
# data[['commitment_unit', 'commitment_term','year','month']]

In [177]:
# Review Age values
# data.age_at_incident.value_counts().head(60)

In [178]:
# Change age dtype from string to integer 
data.age_at_incident = data.age_at_incident.astype(int)

In [179]:
# Confirm changes
# data.dtypes

In [180]:
# Bins to group age
bins_ranges = [0,18,24,29,39,49,59,137]
bins_names = ["<18", '18-24', '25-29', '30s', '40s', '50s', '60+']

data['age_bins'] = pd.cut(data.age_at_incident,bins_ranges,labels=bins_names)

In [181]:
# Drop duplicates of data with current values
data.drop_duplicates(inplace=True)
data.reset_index(drop=True,inplace=True)
# data

## Define commitment unit for each sentence type

In [182]:
# data.sentence_type.value_counts()

### Analize Incarceration data to define unit

In [183]:
# data.loc[data.sentence_type == 'Incarceration'].commitment_unit.value_counts()
# Incarceration will be read by Year columns unless we are analyzing penalties in dollars

### Analize Probation/Supervision to define unit

In [184]:
# data.loc[data.sentence_type == 'Probation/Supervision'].commitment_unit.value_counts()

In [185]:
# sorted(data.loc[data.sentence_type == 'Probation/Supervision'].year.unique())

In [186]:
# data.loc[data.sentence_type == 'Probation/Supervision'].year.value_counts()

In [187]:
# sorted(data.loc[data.sentence_type == 'Probation/Supervision'].month.unique())

In [188]:
# data.loc[data.sentence_type == 'Probation/Supervision'].month.value_counts()
# We will analize this info in months

### Analize Cook County Boot Camp to define unit

In [189]:
# data.loc[data.sentence_type == 'Cook County Boot Camp'].commitment_unit.value_counts()
# This sentence will be analize in months

### Analize Conditional Discharge to define unit

In [190]:
# data.loc[data.sentence_type == 'Conditional Discharge'].commitment_unit.value_counts()

In [191]:
# data.loc[data.sentence_type == 'Conditional Discharge'].year.value_counts()

In [192]:
# data.loc[data.sentence_type == 'Conditional Discharge'].month.value_counts()
# This information will be view in months

### Analize Inpatien Mental Health Services to define unit

In [193]:
# data.loc[data.sentence_type == 'Inpatient Mental Health Services'].commitment_unit.value_counts()
# This information will be view in years

### Analize Death to confirm unit

In [194]:
# data.loc[data.sentence_type == 'Death'].commitment_unit.value_counts()

## Filter only current sentence

In [195]:
# data = data.loc[data.current_sentence == True]
# data

# Analize new dataframe to create the database diagram

In [196]:
# data.columns

In [197]:
# len(data[['case_participant_id','age_at_incident', 'gender', 'race']].drop_duplicates())

In [198]:
# len(data.case_participant_id.drop_duplicates())

In [199]:
# len(data.charge_version_id.drop_duplicates())

In [200]:
# len(data[['charge_version_id','offense_category']].drop_duplicates())

In [201]:
# len(data[['case_participant_id','charge_id']].drop_duplicates())

In [202]:
# data[['case_id','case_participant_id','offense_category','charge_disposition','charge_id','charge_version_id']].drop_duplicates()

In [203]:
# data[['case_id','case_participant_id','offense_category','charge_disposition']].drop_duplicates()

![DBD-from-quickDBD.png](DBD-from-quickDBD.png)

# Create tables for SQL

In [204]:
participants = data[['case_participant_id','age_at_incident','gender','race','age_bins']].drop_duplicates()
# participants

In [205]:
courts = data[['court_facility', 'court_name']].drop_duplicates().reset_index(drop=True)
# courts

In [206]:
courts['court_id']=['1-26','6','2','5','4','3','1-4','1-DV','1-1','1-3','1-2','1-5','1-RJCC']
courts = courts[['court_id', 'court_facility', 'court_name']]
# courts

In [207]:
offenses = data[['offense_category']].drop_duplicates()
offenses = offenses.reset_index(drop=True)
offenses['offense_id'] = offenses.index + 1
offenses = offenses[['offense_id','offense_category']]
# offenses

In [208]:
sentences = data[['sentence_type','commitment_term','commitment_unit','month', 'year']].drop_duplicates()
sentences = sentences.reset_index(drop=True)
sentences['sentence_id'] = sentences.index + 1
sentences = sentences[['sentence_id','sentence_type','commitment_term','commitment_unit','month', 'year']]
# sentences

In [209]:
results = data.merge(sentences,on=['sentence_type','commitment_term','commitment_unit','month','year'])
results = results.merge(offenses,on='offense_category')
results = results.merge(courts,on=['court_facility','court_name'])
# results.columns

In [210]:
results = results[['case_id','sentence_id','offense_id','court_id','case_participant_id','charge_id','charge_version_id','charge_disposition','length_of_case_in_days','primary_charge']]
# results

In [211]:
participants.to_json('../Dashboard/static/participants.json', orient='records')

In [212]:
offense_multi = data[['case_id','offense_category','sentence_type','court_name']].drop_duplicates()
offense_multi.to_json('../Dashboard/static/offense_multi.json', orient='records')

In [213]:
# participants_age = participants.groupby(['age_bins']).count()
# participants_age
# participants_age.to_json('../Dashboard/static/participants_age.json')

In [214]:
# participants.columns

In [215]:
# participants2 = participants[['case_participant_id', 'gender', 'race', 'age_bins']]
# participants_multi = participants2.groupby(['gender','race','age_bins']).count()
# participants_multi.dropna(inplace=True)
# participants_multi.to_json('../Dashboard/static/participants_multi.json',orient='split')
# participants_multi

In [216]:
# data.to_json('../Dashboard/data.json',orient='records')
# results.to_json('../data/results.json')
# participants.to_json('../data/participants.json')
# courts.to_json('../data/courts.json')
# offenses.to_json('../data/offenses.json')
# sentences.to_json('../data/sentences.json')

In [217]:
# import sqlite3

In [218]:
# conn = sqlite3.connect('sentencing.db')

In [219]:
# from sqlalchemy import create_engine

In [220]:
# engine = create_engine('sqlite:///sentencing.db')

In [221]:
# courts.to_sql(name='courts',con=engine,if_exists='append',index=False)

In [222]:
# results = results[['case_participant_id','court_id','offense_id','sentence_id','case_id','charge_disposition','charge_id','charge_version_id','length_of_case_in_days']]

In [223]:
# participants.to_sql(name='participants',con=engine,if_exists='append',index=False)
# offenses.to_sql(name='offenses',con=engine,if_exists='append',index=False)
# sentences.to_sql(name='sentences',con=engine,if_exists='append',index=False)
# results.to_sql(name='results',con=engine,if_exists='append',index=False)

# ============================================================================

# ============================================================================

# Upload information to Postgres

In [224]:
# from password import key
# from sqlalchemy import create_engine

In [225]:
# conn = f"postgres:{key}@localhost:5432/sentencing"
# engine = create_engine(f'postgresql://{conn}')

In [226]:
# courts.to_sql(name='courts',con=engine,if_exists='append',index=False)

In [227]:
# participants.to_sql(name='participants',con=engine,if_exists='append',index=False)

In [228]:
# offenses.to_sql(name='offenses',con=engine,if_exists='append',index=False)

In [229]:
# sentences.to_sql(name='sentences',con=engine,if_exists='append',index=False)

In [230]:
# results = results[['case_participant_id','court_id','offense_id','sentence_id','case_id','primary_charge','charge_disposition','charge_id','charge_version_id','length_of_case_in_days']]

In [231]:
# results.to_sql(name='results',con=engine,if_exists='append',index=False)

# Queries from SQL for Graphs

In [232]:
# import json

In [233]:
# participants.to_json('../data/general_demographics.json',orient='records')

In [234]:
# data[['case_participant_id','court_name','age_bins','offense_category','sentence_type']].drop_duplicates()

In [235]:
# query1 = 'select \
# 	pa.age_bins, \
#     pa.gender, \
#     pa.race, \
# 	fr.court_name, \
# 	fr.offense_category, \
# 	fr.sentence_type \
# from ( \
#     select \
# 		r.case_participant_id, \
# 		max(court_name) court_name, \
# 		o.offense_category, \
# 		s.sentence_type \
# 	from results r \
# 	left join courts co \
# 		on r.court_id = co.court_id \
# 	left join offenses o \
# 		on r.offense_id = o.offense_id \
# 	left join sentences s \
# 		on r.sentence_id = s.sentence_id \
# 	group by ( \
# 		o.offense_category, \
# 		s.sentence_type, \
# 		r.case_participant_id \
# 	)) fr \
# left join participants pa \
# 	on fr.case_participant_id = pa.case_participant_id;'

In [236]:
# filtered_demographics = pd.read_sql_query(query1,con=engine)
# filtered_demographics.to_json('../data/filtered_demographics.json',orient='records')

In [237]:
# data[['year','month','offense_category','sentence_type','court_name']].loc[data.year != 0].drop_duplicates()

In [238]:
# query2 = 'select\
# 	s.year, \
# 	s.month, \
# 	fr.offense_category, \
# 	s.sentence_type, \
# 	fr.court_name \
# from \
# 	(select \
# 	 	r.sentence_id, \
# 	 	o.offense_category, \
# 	 	co.court_name \
# 	 from results r \
# 	 left join courts co \
# 	 	on r.court_id = co.court_id \
# 	 left join offenses o \
# 	 	on r.offense_id = o.offense_id \
# 	 group by ( \
# 	 	r.sentence_id, \
# 	 	o.offense_category, \
# 	 	co.court_name \
# 	)) fr \
# left join sentences s \
# 	on s.sentence_id = fr.sentence_id \
# where s.month !=0;'

In [239]:
# boxplot_offense = pd.read_sql_query(query2,con=engine)
# boxplot_offense.to_json('../data/boxplot_offense.json',orient='records')

In [240]:
# data[['length_of_case_in_days','month','year','offense_category','sentence_type']].loc[data.month != 0].drop_duplicates()

In [241]:
# query3 = 'select \
# 	fr.length_of_case_in_days, \
# 	s.month, \
# 	s.year, \
# 	fr.offense_category, \
# 	s.sentence_type \
# from  \
# 	(select \
# 	 	r.sentence_id, \
# 	 	r.length_of_case_in_days, \
# 	 	o.offense_category \
# 	 from results r \
# 	 left join offenses o \
# 	 	on r.offense_id = o.offense_id \
# 	 group by ( \
# 		r.sentence_id, \
# 	 	r.length_of_case_in_days, \
# 	 	o.offense_category \
# 	 )) fr \
# left join sentences s \
# 	on s.sentence_id = fr.sentence_id \
# where s.month !=0 \
# and fr.length_of_case_in_days != 0;'

In [242]:
# scatter_length = pd.read_sql_query(query3, con=engine)
# scatter_length.to_json('../data/scatter_length.json',orient='records')

In [243]:
# query4 = 'select \
# 	fr.length_of_case_in_days, \
# 	s.month, \
# 	s.year, \
# 	fr.offense_category, \
# 	s.sentence_type \
# from  \
# 	(select \
# 	 	r.sentence_id, \
# 	 	r.length_of_case_in_days, \
# 	 	o.offense_category \
# 	 from results r \
# 	 left join offenses o \
# 	 	on r.offense_id = o.offense_id \
# 	 group by ( \
# 		r.sentence_id, \
# 	 	r.length_of_case_in_days, \
# 	 	o.offense_category \
# 	 )) fr \
# left join sentences s \
# 	on s.sentence_id = fr.sentence_id \
# where s.month !=0  \
# and fr.length_of_case_in_days != 0;'

In [244]:
# boxplot_length = pd.read_sql_query(query4, con=engine)
# boxplot_length.to_json('../data/boxplot_length.json',orient='records')

In [245]:
# query5 = 'select \
# 	fr.court_name, \
# 	count(s.sentence_type) sentence_type, \
# 	fr.offense_category \
# from ( \
# 	select \
# 		co.court_name, \
# 		o.offense_category, \
# 		r.sentence_id \
# 	from results r \
# 		 left join offenses o \
# 	 	on r.offense_id = o.offense_id \
# 	 left join courts co \
# 	 	on r.court_id = co.court_id \
# 	 group by ( \
# 		co.court_name, \
# 		o.offense_category, \
# 		r.sentence_id \
# 	 )) fr \
# left join sentences s \
# 	on s.sentence_id = fr.sentence_id \
# group by  \
# 	fr.court_name, \
# 	fr.offense_category;'

In [246]:
# barchar_courts = pd.read_sql_query(query5, con=engine)
# barchar_courts.to_json('../data/barchar_courts.json',orient='records')

In [247]:
# query6 = 'select \
# 	count(fr.case_participant_id) participants, \
# 	fr.age_at_incident, \
# 	s.month, \
# 	s.year, \
# 	s.sentence_type, \
# 	fr.court_name \
# from ( \
# 	select \
# 		pa.case_participant_id, \
# 		co.court_name, \
# 		r.sentence_id, \
# 		pa.age_at_incident \
# 	from results r \
# 		left join offenses o \
# 	 		on r.offense_id = o.offense_id \
# 		left join courts co \
# 	 		on r.court_id = co.court_id \
# 		left join participants pa \
# 			on r.case_participant_id = pa.case_participant_id \
# 	 group by ( \
# 		pa.case_participant_id, \
# 		co.court_name, \
# 		r.sentence_id, \
# 		pa.age_at_incident \
# 	 )) fr \
# left join sentences s \
# 	on s.sentence_id = fr.sentence_id \
# group by  \
# 	fr.age_at_incident, \
# 	s.month, \
# 	s.year, \
# 	s.sentence_type, \
# 	fr.court_name;' 

In [248]:
# scatter_courts = pd.read_sql_query(query6, con=engine)
# scatter_courts.to_json('../data/scatter_courts.json',orient='records')

In [249]:
# data[['case_id','offense_category','sentence_type','court_name']].drop_duplicates()

In [250]:
# query7 = 'select \
# 	r.case_id, \
# 	o.offense_category, \
# 	s.sentence_type, \
# 	co.court_name \
# from results r \
# left join offenses o \
# 	on r.offense_id = o.offense_id \
# left join courts co \
# 	on r.court_id = co.court_id \
# left join sentences s \
# 	on r.sentence_id = s.sentence_id \
# group by \
# 	r.case_id, \
# 	o.offense_category, \
# 	s.sentence_type, \
# 	co.court_name;'

In [251]:
# pie_offense = pd.read_sql_query(query7, con=engine)
# pie_offense.to_json('../data/pie_offense.json',orient='records')