# script to aggregate calenviroscreen score to county level

### inputs

In [None]:
data_path = '/Volumes/GoogleDrive/.shortcut-targets-by-id/1-BTh8T0PMwHDs3KZ9V--KPrNWRUgRxvV/2020_CAEECC_Public_Sector_Underserved/data/raw/calenviroscreen'
data_file = 'ces3results.xlsx'

### outputs

In [None]:
save_path = '/Volumes/GoogleDrive/.shortcut-targets-by-id/1-BTh8T0PMwHDs3KZ9V--KPrNWRUgRxvV/2020_CAEECC_Public_Sector_Underserved/data/processed'
save_file = 'ces_dac_county_mean_median.csv'

### load libraries

In [None]:
import pandas as pd
import os
import glob

## script

### read in calenviroscreen data

In [None]:
df_ces = pd.read_excel(os.path.join(data_path, data_file), sheet_name='CES 3.0 (2018 Update)')

In [None]:
df_ces.head()

### only keep select few columns

In [None]:
df_ces_sel = df_ces[['Census Tract', 'California County', 'CES 3.0 Score', ' CES 3.0 Percentile', 'SB 535 Disadvantaged Community']]

In [None]:
df_ces_sel = df_ces_sel.rename(columns = {'Census Tract': 'census_tract', 
                                          'California County': 'county',
                                          'CES 3.0 Score': 'ces_score',
                                          ' CES 3.0 Percentile': 'ces_percentile',
                                          'SB 535 Disadvantaged Community': 'dac_flag'})

In [None]:
df_ces_sel.head()

### aggregate ces scores to county level

count number of dac flagged census tracts within each county

In [None]:
count_dac = df_ces_sel.groupby(['county', 'dac_flag'])['census_tract'].count().reset_index(name = 'counts')

In [None]:
count_dac.head()

check that there are no counties with zero disadvantaged communities:

In [None]:
count_dac[(count_dac.dac_flag == 'Yes') & (count_dac.counts == 0)]

count how many census tracts there are in each county

In [None]:
count_tracts = df_ces_sel.groupby(['county'])['census_tract'].count().reset_index(name = 'total')

In [None]:
count_tracts.head()

merge count of dac flags with total count of census tracts

In [None]:
df_county = count_dac.merge(count_tracts, on = 'county')

In [None]:
df_county.head()

calculation proportion of dac-assigned census tracts within each county

In [None]:
df_county['dac_proportion'] = df_county['counts']/df_county['total']

In [None]:
df_county.head()

take mean of ces percentile by county

In [None]:
agg_ces = df_ces_sel.groupby(['county'])['ces_score', 'ces_percentile'].agg(['mean', 'median']).reset_index()

In [None]:
agg_ces.head()

In [None]:
agg_ces.columns = agg_ces.columns.map('_'.join).str.strip('_')

In [None]:
agg_ces.head()

In [None]:
# agg_ces.columns = agg_ces.columns.droplevel(0)

### export to csv file

In [None]:
agg_ces.to_csv(os.path.join(save_path, save_file), index=False)