In [None]:
#import required functions
# Note: If you have not previously used some of these packages (such as shap), you may need to install them
# We apologize for any inconvenience due to installing packages
# However, it is impossible for us to know in advance the packages each individual has installed

import pandas as pd
import numpy as np
import os

import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor, plot_tree
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, r2_score, mean_squared_error
from xgboost import XGBRegressor, XGBClassifier

import shap
from functools import reduce

#Read-in custom function
from CleanSAO import clean_sao
from CleanFDOC import clean_fdoc
from CreateCCMaster import create_ccm
from CreateModelDF import model_df
from TreeModels import get_tree

#set working path

dir = os.getcwd()
path = dir+'/data/'
os.chdir(path)

#Clean charges(prosecutor specific actions) datasets (drug, theft)
drug_sa_clean, theft_sa_clean = clean_sao('CjdtSAOCase_00000_sample.csv')

#Clean sentencing (offenses) datasets (drug, theft)
drug_offenses_clean, theft_offenses_clean = clean_fdoc(['Active_Offenses_PRPR.csv', 'Active_Offenses_CPS.csv', 
                                                       'Release_Offenses_PRPR.csv', 'Release_Offenses_CPS.csv', 
                                                       'Active_Root.csv', 'Release_Root.csv'])

#Circuit-county-year political/State Attorney dataset
ccm = create_ccm(safile = "SA_Political_Leanings_sample.csv", 
                    housefile = "clean_house_sample.csv", 
                    senatefile = "clean_senate_sample.csv", 
                    presfile = "clean_pres_sample.csv", 
                    circuitcountyfile="circuit_county_crosswalk_sample.csv")
                    
# Compile modeling datasets
drug_sa_df = model_df(drug_sa_clean, ccm, obstype = 'action', crimetype = 'drug')
theft_sa_df = model_df(theft_sa_clean, ccm, obstype = 'action', crimetype = 'theft')
drug_off_df = model_df(drug_offenses_clean, ccm, obstype = 'offense', crimetype = 'drug')
theft_off_df = model_df(theft_offenses_clean, ccm, obstype = 'offense', crimetype = 'theft')

#CART Model Hyperparameters
cart = {'max_depth':[3, 6, 10], 
        'min_samples_split':[8, 20], 
        'min_samples_leaf': [10, 20, 100]}

#CART Model Drug Charges
drug_sa_cart = get_tree(drug_sa_df, target = 'FINAL_ACTION_DESC', paramdict = cart, model = DecisionTreeClassifier,  seed = 42)

#CART Model Theft Charges
theft_sa_cart = get_tree(theft_sa_df, target = 'FINAL_ACTION_DESC', paramdict = cart, model = DecisionTreeClassifier, seed = 42)

#CART Model Drug Sentencing
drug_off_cart = get_tree(drug_off_df, target = 'TERM_YEARS', paramdict = cart, model = DecisionTreeRegressor, seed = 42, nsample = 10000)

#CART Model Theft Sentencing
theft_off_cart = get_tree(theft_off_df, 'TERM_YEARS', paramdict = cart, model = DecisionTreeRegressor, seed = 42, nsample = 10000)

#Random Forest hyperarameter space
bag = {'max_depth':[10, 15], 
        'min_samples_leaf': [100, 200], 
        'max_features': [40, 80], 
        'n_estimators':[15, 20]} 

#Random Forest Drug Charges        
drug_sa_rf = get_tree(drug_sa_df, target = 'FINAL_ACTION_DESC', paramdict = bag, model = RandomForestClassifier, seed = 42)

#Random Forest Theft Charges
theft_sa_rf = get_tree(theft_sa_df, target = 'FINAL_ACTION_DESC', paramdict = bag, model = RandomForestClassifier, seed = 42)

#Random Forest Drug Sentencing
drug_off_rf= get_tree(drug_off_df, target = 'TERM_YEARS', paramdict = bag, model = RandomForestRegressor, seed = 42, nsample = 10000)

#Random Forest Theft Sentencing
theft_off_rf = get_tree(theft_off_df, target = 'TERM_YEARS', paramdict = bag, model = RandomForestRegressor, seed = 42, nsample = 10000)

#XGBoost Hyperparameters for Classifier models
xgb_cls = {'max_depth':[4, 6], 
        'min_child_weight':[10, 20],
        'alpha': [0.1, 0.2],
        'colsample_bytree': [0.6, 0.75]} 

#XGBoost Drug Charges
drug_sa_xgb = get_tree(drug_sa_df, target = 'FINAL_ACTION_DESC', paramdict = xgb_cls, model = XGBClassifier, seed =10)

#XGBoost Theft Charges
theft_sa_xgb = get_tree(theft_sa_df, target = 'FINAL_ACTION_DESC', paramdict = xgb_cls, model = XGBClassifier,seed=10)

#XGBoost Hyperparameters for Regression models
boost = {'max_depth':[8, 12], 
        'min_child_weight':[10, 20],
        'alpha': [0.1, 0.2],
        'colsample_bytree': [0.6, 0.75]}
        
#XGBoost Drug Sentencing
drug_off_xgb = get_tree(drug_off_df, target = 'TERM_YEARS', paramdict = boost, model = XGBRegressor, seed=42, nsample = 10000)

#XGBoost Theft Sentencing
theft_off_xgb = get_tree(theft_off_df, 'TERM_YEARS', paramdict = boost, model = XGBRegressor, seed = 42, nsample = 10000)

: 

In [None]:
pip install sqlite3

: 