In [418]:
# Import packages
import pandas as pd
import numpy as np
import datetime as dt
import warnings
warnings.filterwarnings("ignore")

In [419]:
# Set pandas to show all columns
pd.set_option('display.max_columns', None)

In [420]:
# Read 2019 Justice data
data = pd.read_csv('HC_Justice_2015-2019.csv')

### County Level Analysis

In [422]:
# Create dataframe to store data
begin = 2015
end = 2019
County_data = pd.DataFrame({'Year': range(begin, end + 1), 'County': ['Hennepin'] * (end - begin + 1)})

In [423]:
# Calculate total number of case
County_data['num_of_cases_total'] = data.groupby('Year').nunique()['Cases'].values

In [424]:
# Calculate case by gender
genders = data['Gender'].unique().tolist()
for gender in genders:
    df = data.groupby(['Year', 'Gender']).agg({ 'Cases' : 'nunique'}).xs(gender, level=1, drop_level=True)
    df.columns = ['num_of_case_gender_' + gender]
    County_data = County_data.merge(df, on= 'Year' )

In [425]:
# Calculate case by race
races = data['Race'].unique().tolist()
for race in races:
    df = data.groupby(['Year', 'Race']).agg({ 'Cases' : 'nunique'}).xs(race, level=1, drop_level=True)
    df.columns = ['num_of_case_race_' + race]
    County_data = County_data.merge(df, on= 'Year' )

In [426]:
# Calculate case by Group
groups = data['Group'].unique().tolist()
for group in groups:
    df = data.groupby(['Year', 'Group']).agg({ 'Cases' : 'nunique'}).xs(group, level=1, drop_level=True)
    df.columns = ['num_of_case_group_' + group]
    County_data = County_data.merge(df, on= 'Year' )

In [427]:
# Calculate case by offense level
levels = data['Offense Level'].unique().tolist()
for level in levels:
    df = data.groupby(['Year', 'Offense Level']).agg({ 'Cases' : 'nunique'}).xs(level, level=1, drop_level=True)
    df.columns = ['num_of_case_level_' + level]
    County_data = County_data.merge(df, on= 'Year' )

In [353]:
# Store data in csv
County_data.to_csv('County_Justice_data_' + str(begin) + '-' + str(end) + '.csv', index = None)

### City Level Analysis

In [466]:
cities = sorted(data['City_new'].unique().tolist())

In [468]:
begin = 2015
end = 2019
iterables = [cities,  range(begin, end + 1)]
index = pd.MultiIndex.from_product(iterables, names=['City', 'Year'])
City_data = pd.DataFrame(index=index)

In [470]:
# Calculate number of case by city and year
df = data.groupby(['City_new', 'Year']).agg({ 'Cases' : 'nunique'})
df.columns = ['num_of_cases_total']
City_data = City_data.merge(df, how='left', left_index=True, right_on = ['City_new', 'Year'])

In [472]:
# Calculate case by gender
genders = data['Gender'].unique().tolist()
for gender in genders:
    df = data.groupby(['City_new', 'Year', 'Gender']).agg({ 'Cases' : 'nunique'}).xs(gender, level=2, drop_level=True)
    df.columns = ['num_of_case_gender_' + gender]
    City_data = City_data.merge(df, how='left', left_index=True, right_on = ['City_new', 'Year'])

In [473]:
# Calculate case by race
races = data['Race'].unique().tolist()
for race in races:
    df = data.groupby(['City_new', 'Year', 'Race']).agg({ 'Cases' : 'nunique'}).xs(race, level=2, drop_level=True)
    df.columns = ['num_of_case_race_' + race]
    City_data = City_data.merge(df, how='left', left_index=True, right_on = ['City_new', 'Year'])

In [474]:
# Calculate case by Group
groups = data['Group'].unique().tolist()
for group in groups:
    df = data.groupby(['City_new', 'Year', 'Group']).agg({ 'Cases' : 'nunique'}).xs(group, level=2, drop_level=True)
    df.columns = ['num_of_case_group_' + group]
    City_data = City_data.merge(df, how='left', left_index=True, right_on = ['City_new', 'Year'])

In [475]:
# Calculate case by offense level
levels = data['Offense Level'].unique().tolist()
for level in levels:
    df = data.groupby(['City_new', 'Year', 'Offense Level']).agg({ 'Cases' : 'nunique'}).xs(level, level=2, drop_level=True)
    df.columns = ['num_of_case_level_' + level]
    City_data = City_data.merge(df, how='left', left_index=True, right_on = ['City_new', 'Year'])

In [477]:
# Release multi-index into normal column values, and rename city column
City_data = City_data.reset_index()
City_data = City_data.rename(columns={'City_new': 'City'})

In [482]:
# Store data in csv
City_data.to_csv('City_Justice_data_' + str(begin) + '-' + str(end) + '.csv', index = None)