In [1]:
import os
os.environ['USE_PYGEOS'] = '0'

import pandas as pd
import geopandas as gpd
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt
import matplotlib as mpl

## Read shapefile of ZCTAs and CA Counties

In [2]:
### Read shapefile of ZCTAs
shp_path = "D:/000_User_Documents/COH/COVID_MUAP/Codes_Notebook/data/ZCTA_Update.shp"
ZCTA_CA = gpd.GeoDataFrame.from_file(shp_path)
ZCTA_CA = ZCTA_CA.to_crs('epsg:6414')
print('Observations, Attributes:',ZCTA_CA.shape)
ZCTA_CA.head(2)

Observations, Attributes: (1666, 4)


Unnamed: 0,ZCTA5CE10,ZCTA,Area,geometry
0,93924,93924,190.283915,"POLYGON ((-162540.814 -176061.241, -162041.898..."
1,95404,95404,80.004021,"POLYGON ((-238341.052 60298.013, -237609.933 6..."


In [3]:
### Read shapefile of ZCTAs
shp_path_CA = "D:/000_User_Documents/COH/COVID_MUAP/Codes_Notebook/data/CA_Counties_TIGER2016.shp"
CA_County = gpd.GeoDataFrame.from_file(shp_path_CA)
CA_County = CA_County.to_crs('epsg:6414')
print('Observations, Attributes:',CA_County.shape)
CA_County.head(2)

Observations, Attributes: (58, 18)


Unnamed: 0,STATEFP,COUNTYFP,COUNTYNS,GEOID,NAME,NAMELSAD,LSAD,CLASSFP,MTFCC,CSAFP,CBSAFP,METDIVFP,FUNCSTAT,ALAND,AWATER,INTPTLAT,INTPTLON,geometry
0,6,91,277310,6091,Sierra,Sierra County,6,H1,G4020,,,,A,2468694587,23299110,39.5769252,-120.5219926,"POLYGON ((-56192.570 186608.420, -56187.298 18..."
1,6,67,277298,6067,Sacramento,Sacramento County,6,H1,G4020,472.0,40900.0,,A,2499183617,76073827,38.4500114,-121.3404409,"POLYGON ((-103238.273 78232.450, -103128.896 7..."


## Read GTWR model results

In [4]:
### Get zcta_county information
zcta_county=pd.read_csv('data/ZCTA_CountyInfo.csv',usecols=['ZCTA','NAME'])

### Read full results and merge conunty info
gtwr_full_results_var1 = pd.read_csv('4_plus_1/Full_TimeRange_AAG/gtwr_coeff_Crowding_[fixed-AllZcta-AllWeeks]_[bw-0.4_tau-4000].csv')
gtwr_full_results_var2 = pd.read_csv('4_plus_1/Full_TimeRange_AAG/gtwr_coeff_Hispanic_[fixed-AllZcta-AllWeeks]_[bw-0.4_tau-4000].csv')
gtwr_full_results_var3 = pd.read_csv('4_plus_1/Full_TimeRange_AAG/gtwr_coeff_NonHisBlk_[fixed-AllZcta-AllWeeks]_[bw-0.4_tau-4000].csv')
gtwr_full_results_var4 = pd.read_csv('4_plus_1/Full_TimeRange_AAG/gtwr_coeff_EmployService_[fixed-AllZcta-AllWeeks]_[bw-0.4_tau-4000].csv')
gtwr_full_results_var5 = pd.read_csv('4_plus_1/Full_TimeRange_AAG/gtwr_coeff_NoHighSchool_[fixed-AllZcta-AllWeeks]_[bw-0.4_tau-4000].csv')

gtwr_full_results = gtwr_full_results_var1.merge(gtwr_full_results_var2[['ZCTA','Week','coeff_Hispanic']], on=['ZCTA','Week'], how='left')
gtwr_full_results = gtwr_full_results.merge(gtwr_full_results_var3[['ZCTA','Week','coeff_NonHisBlk']], on=['ZCTA','Week'], how='left')
gtwr_full_results = gtwr_full_results.merge(gtwr_full_results_var4[['ZCTA','Week','coeff_EmployService']], on=['ZCTA','Week'], how='left')
gtwr_full_results = gtwr_full_results.merge(gtwr_full_results_var5[['ZCTA','Week','coeff_NoHighSchool']], on=['ZCTA','Week'], how='left')

### Save a copy for future use
# gtwr_full_results.to_csv('gtwr_coeff_all_predictors_only.csv', index=False)

In [5]:
### Only keep selected columns for plotting
gtwr_full_results = gtwr_full_results[['ZCTA','Week','total_r','coeff_Crowding','coeff_Hispanic','coeff_NonHisBlk','coeff_EmployService','coeff_NoHighSchool']]

### Add county names to column
gtwr_full_results = gtwr_full_results.merge(zcta_county, on = 'ZCTA', how = 'left')

gtwr_full_results = gtwr_full_results.rename(columns={'coeff_Crowding':'Crowding',
                                                      'coeff_Hispanic':'Hispanic',
                                                      'coeff_NonHisBlk':'NonHisBlk',
                                                      'coeff_EmployService':'EmployService',
                                                      'coeff_NoHighSchool':'NoHighSchool'})

gtwr_full_results.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 203252 entries, 0 to 203251
Data columns (total 9 columns):
 #   Column         Non-Null Count   Dtype  
---  ------         --------------   -----  
 0   ZCTA           203252 non-null  int64  
 1   Week           203252 non-null  object 
 2   total_r        203252 non-null  float64
 3   Crowding       203252 non-null  float64
 4   Hispanic       203252 non-null  float64
 5   NonHisBlk      203252 non-null  float64
 6   EmployService  203252 non-null  float64
 7   NoHighSchool   203252 non-null  float64
 8   NAME           200446 non-null  object 
dtypes: float64(6), int64(1), object(2)
memory usage: 15.5+ MB


## Merger GeoDF with GTWR outputs

In [6]:
### Merger GeoDF with GTWR outputs 
ZCTA_withCoeff_gdf = ZCTA_CA.merge(gtwr_full_results, on='ZCTA',how='left')
print (ZCTA_withCoeff_gdf.shape)
ZCTA_withCoeff_gdf.head(2)

(203252, 12)


Unnamed: 0,ZCTA5CE10,ZCTA,Area,geometry,Week,total_r,Crowding,Hispanic,NonHisBlk,EmployService,NoHighSchool,NAME
0,93924,93924,190.283915,"POLYGON ((-162540.814 -176061.241, -162041.898...",2020-01-20,0.0,-0.004047,-0.000575,0.015814,-0.003583,-0.003898,Monterey
1,93924,93924,190.283915,"POLYGON ((-162540.814 -176061.241, -162041.898...",2020-01-27,0.0,-0.014361,0.028274,-0.088127,-0.016516,-0.005007,Monterey


In [None]:
### Write to file for future use

# ZCTA_withCoeff_gdf.to_csv("GeoDataFram_AllZCTAs_FullTimeRange_withCoeff_proj6414.csv", index=False)
ZCTA_withCoeff_gdf.to_pickle("GeoDataFram_AllZCTAs_FullTimeRange_withCoeff_proj6414.pkl")
ZCTA_withCoeff_gdf = pd.read_pickle("GeoDataFram_AllZCTAs_FullTimeRange_withCoeff_proj6414.pkl")

---
## Plot No.1: ALL ZCTAs / Single Var / Full TimeRange / Value: Full TimeRange median

In [7]:
plot_vars = ['Crowding','Hispanic','NonHisBlk','EmployService','NoHighSchool']
colors = ['Reds','Greens','Blues','Oranges','Purples']

### Calculate mdeian for each ZCTA, over full time rage
ZCTA_singleVar_media_v1 = ZCTA_withCoeff_gdf.groupby('ZCTA')[plot_vars[0]].median().reset_index()
ZCTA_singleVar_media_v2 = ZCTA_withCoeff_gdf.groupby('ZCTA')[plot_vars[1]].median().reset_index()
ZCTA_singleVar_media_v3 = ZCTA_withCoeff_gdf.groupby('ZCTA')[plot_vars[2]].median().reset_index()
ZCTA_singleVar_media_v4 = ZCTA_withCoeff_gdf.groupby('ZCTA')[plot_vars[3]].median().reset_index()
ZCTA_singleVar_media_v5 = ZCTA_withCoeff_gdf.groupby('ZCTA')[plot_vars[4]].median().reset_index()

### Create a ZCTA gdf with only median for each location 
ZCTA_withCoeff_gdf_medianOnly = ZCTA_CA.merge(ZCTA_singleVar_media_v1, on='ZCTA',how='left')
ZCTA_withCoeff_gdf_medianOnly = ZCTA_withCoeff_gdf_medianOnly.merge(ZCTA_singleVar_media_v2, on='ZCTA',how='left')
ZCTA_withCoeff_gdf_medianOnly = ZCTA_withCoeff_gdf_medianOnly.merge(ZCTA_singleVar_media_v3, on='ZCTA',how='left')
ZCTA_withCoeff_gdf_medianOnly = ZCTA_withCoeff_gdf_medianOnly.merge(ZCTA_singleVar_media_v4, on='ZCTA',how='left')
ZCTA_withCoeff_gdf_medianOnly = ZCTA_withCoeff_gdf_medianOnly.merge(ZCTA_singleVar_media_v5, on='ZCTA',how='left')

print (ZCTA_withCoeff_gdf_medianOnly.shape)
ZCTA_withCoeff_gdf_medianOnly.head(2)

(1666, 9)


Unnamed: 0,ZCTA5CE10,ZCTA,Area,geometry,Crowding,Hispanic,NonHisBlk,EmployService,NoHighSchool
0,93924,93924,190.283915,"POLYGON ((-162540.814 -176061.241, -162041.898...",-1.812241,7.67959,2.905605,-3.786627,7.671866
1,95404,95404,80.004021,"POLYGON ((-238341.052 60298.013, -237609.933 6...",0.904485,14.936286,6.184869,5.421746,12.757758


In [None]:
plot_data = ZCTA_withCoeff_gdf_medianOnly.copy()
plot_vars = ['Crowding','Hispanic','NonHisBlk','NoHighSchool','EmployService']
colors = ['Reds','Greens','Blues','Oranges','Purples']
filter_outlier = False


for plot_var, color in zip(plot_vars,colors):
    
    if filter_outlier: 
        q_low = plot_data[plot_var].quantile(0.02)
        q_hi  = plot_data[plot_var].quantile(0.98)
        plot_data_final = plot_data[(plot_data[plot_var] < q_hi) & (plot_data[plot_var] > q_low)]
    else:
        plot_data_final = plot_data

    ### Plot DF
    fig, ax = plt.subplots(figsize=(4, 6))
    
    plot_data_final.plot(ax=ax, column=plot_var, scheme='quantiles', k=7, cmap=color, edgecolor='gray',linewidth=0.08, legend=False)
    CA_County.plot(ax=ax, facecolor="none", edgecolor='gray', lw=0.14)
    
    ax.set_axis_off()
    
    output_map_name = f"AllZCTAs_median_{plot_var}_{'filtered[2-98]_'if filter_outlier else ''}proj6414.png"
    
    ### Save figures
    fig.savefig(output_map_name, transparent=True, bbox_inches='tight', pad_inches=0, dpi=300)
    
    ### Clear inline
    fig.clf()
    plt.clf()
    # plt.close()