In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pylab as plt
import matplotlib as mtp
import warnings
from datetime import datetime
from dateutil.relativedelta import relativedelta
from sqlalchemy import create_engine

engine = create_engine("postgresql:///kcmo-mc")
db_conn = engine.connect()

In [None]:
q_siscases = """
select 
code as probation,
sp.*
from clean.ordvpost2012 sp 
where sent_exec_code = 'SIS'
"""
SIScases_prob_term_level = pd.read_sql(q_siscases, db_conn)  
# Replace missing values in the outcome column with the string 'None'
SIScases_prob_term_level['outcome'].fillna('None', inplace = True)

# Add columns for projected end date for probation, and code it as 1 if the most 
# recent disposition date in the dataset has not passed yet
SIScases_prob_term_level['start_dttm']= pd.to_datetime(SIScases_prob_term_level['start_dttm'])
SIScases_prob_term_level['projected_end_dt'] = SIScases_prob_term_level['start_dttm'] + SIScases_prob_term_level['prob_total_months'].astype('timedelta64[M]')
SIScases_prob_term_level['end_date_passed'] = np.where(SIScases_prob_term_level['projected_end_dt'] < max(SIScases_prob_term_level['disp_date']), 1, 0)
SIScases_prob_term_level = SIScases_prob_term_level.drop(['rnk', 'p_months', 'p_years', 'person_id', 'case_num', 'disp_date', 'prob_total_months', 'start_dttm' , 'final_action_dttm', 'sent_exec_code', 'sex', 'statute_ord', 'zipcode'], axis = 1)
SIScases_prob_term_level.loc[SIScases_prob_term_level.loc[:,'race'].isnull(),'race'] = 'U'
SIScases_prob_term_level.loc[SIScases_prob_term_level.loc[:,'outcome'].isnull(),'outcome'] = 'None'
SIScases_prob_term_level['race'] = SIScases_prob_term_level['race'].str.strip()
race_SIS_end_date_passed = SIScases_prob_term_level[SIScases_prob_term_level['end_date_passed']==1]
race_SIS_end_date_passed = race_SIS_end_date_passed.drop(['projected_end_dt', 'end_date_passed', 'code'], axis=1)
race_SIS_end_date_passed = race_SIS_end_date_passed.groupby(['probation', 'code_desc', 'race', 'outcome'])['outcome'].size().reset_index(name = 'count') 
race_SIS_end_date_passed['proportion'] = round(race_SIS_end_date_passed['count']/race_SIS_end_date_passed.groupby('probation')['count'].transform('sum'), 3)
# Write to file
race_SIS_end_date_passed.to_csv('race_SIS_outcomes.csv', index = False)
race_SIS_end_date_passed

In [None]:
# make an arbitrary-length colormap
cm = plt.get_cmap('vlag')
c = [cm(1.0 * i/len(race_SIS_end_date_passed['race'].unique())) for i in range(len(race_SIS_end_date_passed['race'].unique()))]
colors = {i:j for i, j in zip(race_SIS_end_date_passed['race'].unique(), c)}

for prob in race_SIS_end_date_passed['probation'].unique():
    obj_prob = race_SIS_end_date_passed[(race_SIS_end_date_passed['probation']==prob)]
    obj_prob1 = obj_prob.drop(['count'], axis =  1)
    obj_prob2 = obj_prob1
    obj_prob1 = obj_prob1.groupby(['probation', 'code_desc', 'outcome', 'race'], as_index = False).proportion.sum()
    obj_prob2 = obj_prob2.groupby(['probation', 'race'], as_index = False).proportion.sum()
    
    # For the caption containing outcome counts, create a zipped list of outcomes and counts
    zlist = list(zip(obj_prob2['race'], round(obj_prob2['proportion'], 3)))
    
    caption = "" # Initialize empty string
    # For each outcome, count, plug in the appropriate values from the zipped list to form the caption string
    for outcomes, counts in zlist:
        caption = caption + " " + '{}: {}'.format(outcomes, counts)
    
    if ((obj_prob.shape[0] > 1) and (obj_prob['count'].sum() > 1)):
        plt.clf() #clear the plot area
        pivoted = pd.pivot(obj_prob1, columns = ['race'], index= ['outcome'], values = ['proportion'])
        pl_loop = pivoted.plot(kind = "bar", stacked = True, color = [colors[i] for i in  np.sort(obj_prob1['race'].unique())])
        pl_loop.set_xticklabels(pl_loop.get_xticklabels(), rotation = 90)
        pl_loop.set_title("Racial distribution within SIS probation outcomes for {}\n{}".format(prob, obj_prob1['code_desc'].values[0]))
        plt.ylim(0, 1)
        # Add caption to plot
        pl_loop.text(x = -0.9, y = -0.45, s = "Overall distribution of race")
        pl_loop.text(x = -0.9, y = -0.5, s = caption)
        pl_loop.text(x = -0.9, y = -0.35, s = "Total number of individuals receiving probation term {}: {}".format(prob, obj_prob['count'].sum()))
        plt.legend(title = 'Race',labels = np.sort(obj_prob1['race'].unique()))
        plt.savefig('race_SIS_{}_p_outcomes.png'.format(prob), dpi=300, bbox_inches="tight")
        plt.show()

In [None]:
q_sescases = """
select 
code as probation,
sp.*
from clean.ordvpost2012 sp 
where sent_exec_code = 'SES'
"""
SEScases_prob_term_level = pd.read_sql(q_sescases, db_conn)  
# Replace missing values in the outcome column with the string 'None'
SEScases_prob_term_level['outcome'].fillna('None', inplace = True)

# Add columns for projected end date for probation, and code it as 1 if the most 
# recent disposition date in the dataset has not passed yet
SEScases_prob_term_level['start_dttm']= pd.to_datetime(SEScases_prob_term_level['start_dttm'])
SEScases_prob_term_level['projected_end_dt'] = SEScases_prob_term_level['start_dttm'] + SEScases_prob_term_level['prob_total_months'].astype('timedelta64[M]')
SEScases_prob_term_level['end_date_passed'] = np.where(SEScases_prob_term_level['projected_end_dt'] < max(SEScases_prob_term_level['disp_date']), 1, 0)
SEScases_prob_term_level = SEScases_prob_term_level.drop(['rnk', 'p_months', 'p_years', 'person_id', 'case_num', 'disp_date', 'prob_total_months', 'start_dttm' , 'final_action_dttm', 'sent_exec_code', 'sex', 'statute_ord', 'zipcode'], axis = 1)
SEScases_prob_term_level.loc[SEScases_prob_term_level.loc[:,'race'].isnull(),'race'] = 'U'
SEScases_prob_term_level.loc[SEScases_prob_term_level.loc[:,'outcome'].isnull(),'outcome'] = 'None'
SEScases_prob_term_level['race'] = SEScases_prob_term_level['race'].str.strip()
race_SES_end_date_passed = SEScases_prob_term_level[SEScases_prob_term_level['end_date_passed']==1]
race_SES_end_date_passed = race_SES_end_date_passed.drop(['projected_end_dt', 'end_date_passed', 'code'], axis=1)
race_SES_end_date_passed = race_SES_end_date_passed.groupby(['probation', 'code_desc', 'race', 'outcome'])['outcome'].size().reset_index(name = 'count') 
race_SES_end_date_passed['proportion'] = round(race_SES_end_date_passed['count']/race_SES_end_date_passed.groupby('probation')['count'].transform('sum'), 3)
# Write to file
race_SES_end_date_passed.to_csv('race_SES_outcomes.csv', index = False)
race_SES_end_date_passed

In [None]:
# make an arbitrary-length colormap
cm = plt.get_cmap('vlag')
c = [cm(1.0 * i/len(race_SES_end_date_passed['race'].unique())) for i in range(len(race_SES_end_date_passed['race'].unique()))]
colors = {i:j for i, j in zip(race_SIS_end_date_passed['race'].unique(), c)} #SIS here to keep colors consistent

for prob in race_SES_end_date_passed['probation'].unique():
    obj_prob = race_SES_end_date_passed[(race_SES_end_date_passed['probation']==prob)]
    obj_prob1 = obj_prob.drop(['count'], axis =  1)
    obj_prob2 = obj_prob1
    obj_prob1 = obj_prob1.groupby(['probation', 'code_desc', 'outcome', 'race'], as_index = False).proportion.sum()
    obj_prob2 = obj_prob2.groupby(['probation', 'race'], as_index = False).proportion.sum()

    # For the caption containing outcome counts, create a zipped list of outcomes and counts
    zlist = list(zip(obj_prob2['race'], round(obj_prob2['proportion'], 3)))
    
    caption = "" # Initialize empty string
    # For each outcome, count, plug in the appropriate values from the zipped list to form the caption string
    for outcomes, counts in zlist:
        caption = caption + " " + '{}: {}'.format(outcomes, counts)
    
    if ((obj_prob.shape[0] > 1) and (obj_prob['count'].sum() > 1)):
        plt.clf() #clear the plot area
        pivoted = pd.pivot(obj_prob1, columns = ['race'], index= ['outcome'], values = ['proportion'])

        pl_loop = pivoted.plot(kind = "bar", stacked = True, color = [colors[i] for i in  np.sort(obj_prob1['race'].unique())])
        pl_loop.set_xticklabels(pl_loop.get_xticklabels(), rotation = 90)
        pl_loop.set_title("Racial distribution within outcome types for probation term \n{} (SES)".format(prob, obj_prob1['code_desc'].values[0]))
        plt.ylim(0, 1)
        # Add caption to plot
        pl_loop.text(x = -0.9, y = -0.45, s = "Overall distribution of race")
        pl_loop.text(x = -0.9, y = -0.5, s = caption)
        pl_loop.text(x = -0.9, y = -0.35, s = "Total number of individuals receiving probation term {}: {}".format(prob, obj_prob['count'].sum()))
        plt.legend(title = 'Race',labels = np.sort(obj_prob1['race'].unique()))
        plt.savefig('race_SES_{}_p_outcomes.png'.format(prob), dpi=300, bbox_inches="tight")
        plt.show()