# Zip Code with Gage Figures
_Calvin Whealton_

This notebook loops through all the zip codes that are in the contiguous 48 states with a zip code tabulation area. The gages for the zip code are plotted based on the distance and the trend in the time series. These plots represent whether the floods in a location are getting larger or smaller on average with time.

In [None]:
import numpy as np
import pandas as pd
import geopandas as gpd
import matplotlib as mpl
import os
from shapely.geometry import box
from shapely.geometry import Polygon
import rtree

### Zip Code Tabulation Area Shapefile (Reading in and Reprojecting)

In [None]:
# loading in the zip code tabulation area (ZCTA) shapefile
# available as a Tigerline shapefile from the US Census
# also uploaded to https://drive.google.com/drive/folders/1z3JkCNWx-PuLXD_cuMLPa72Xcuk7lyI3?usp=sharing
os.chdir('/Users/calvinwhealton/Documents/GitHub/floods_housing_zipcode/data/geo_data/tl_2019_us_zcta510_clipped48contig')
zctas = gpd.read_file('clipped48contig.shp')

In [None]:
# this confirms that the coordinate reference system (CRS) is WGS84 (decimal lat and long)
# will reproject to a distance-based projection
zctas.crs

In [None]:
# projecting to EPSG:2163
# US National Atlas projection
zctas_dist = zctas.to_crs('EPSG:2163')

In [None]:
zctas_dist.head()

### Stream Gages (Reading in and Reprojecting)

In [None]:
# loading in the stream gage information
# downloaded as a text file from USGS website
os.chdir('/Users/calvinwhealton/Documents/GitHub/floods_housing_zipcode/data/gage_data')
gages = pd.read_csv('usgs_supp.txt',sep='\t',comment='#')

In [None]:
gages.head()

In [None]:
min(gages['contrib_drain_area_va'].values), max(gages['contrib_drain_area_va'].values)

In [None]:
# dropping locations without coordinates
gages.dropna(subset=['dec_lat_va','dec_long_va'],inplace=True)
gages_gdf = gpd.GeoDataFrame(gages,geometry=gpd.points_from_xy(gages.dec_long_va, gages.dec_lat_va))
gages_gdf = gpd.GeoDataFrame(gages,columns=['site_no','geometry'])

In [None]:
gages_gdf.crs = {'init' :"EPSG:4269"}
gages_gdf_dist = gages_gdf.to_crs('EPSG:2163')
gages_gdf_dist['x'] = gages_gdf_dist['geometry'].x
gages_gdf_dist['y'] = gages_gdf_dist['geometry'].y

In [None]:
gages_gdf_dist.head()

### Gages for Zip Code (Reading-in file)

In [None]:
# reading-in the file that includes the closest gages to each zip code
os.chdir('/Users/calvinwhealton/Documents/GitHub/floods_housing_zipcode/data/processed_data')
gage_zip = pd.read_csv('zip_gage_dist_2020-08-10.csv')

In [None]:
gage_zip['GEOID10_str'] = gage_zip['GEOID10'].astype(str).str.pad(width=5, side='left', fillchar='0')

In [None]:
gage_zip.head()

In [None]:
# reading-in the file for trends in the gages
# information used in plotting the maps
os.chdir('/Users/calvinwhealton/Documents/GitHub/floods_housing_zipcode/data/processed_data')
gage_trends = pd.read_csv('gage_trends.csv')

### Making Maps

In [None]:
zctas_dist['GEOID10_str'] = zctas_dist['GEOID10'].str.pad(width=5, side='left', fillchar='0')

In [None]:
gage_cols_nms = ['gage0','gage1','gage2','gage3','gage4','gage5','gage6','gage7','gage8','gage9']



In [None]:
np.ndarray.flatten((gage_zip.loc[gage_zip['GEOID10_str']==gage_zip['GEOID10_str'].values[10],gage_cols_nms]).values)

In [None]:
import matplotlib.pyplot as plt

# column names to extract the gage numbers for each zip code
gage_cols_nms = ['gage0','gage1','gage2','gage3','gage4','gage5','gage6','gage7','gage8','gage9']
dist_cols_nms = ['dist0','dist1','dist2','dist3','dist4','dist5','dist6','dist7','dist8','dist9']

size=25
params = {'legend.fontsize': 'large',
          'figure.figsize': (20,8),
          'axes.labelsize': size,
          'axes.titlesize': size,
          'xtick.labelsize': size*0.75,
          'ytick.labelsize': size*0.75,
          'axes.titlepad': 25}

plt.rcParams.update(params)

os.chdir('/Users/calvinwhealton/Documents/GitHub/floods_housing_zipcode/visualizations/zip_results/zip_gages')

for z in zctas_dist['GEOID10_str'].values:
    
    # extract the gage numbers and distances to zip code and flatten it to a list
    gages_for_zip = np.ndarray.flatten((gage_zip.loc[gage_zip['GEOID10_str']==z,gage_cols_nms]).values)
    dist_for_zip = np.ndarray.flatten((gage_zip.loc[gage_zip['GEOID10_str']==z,dist_cols_nms]).values)
    
    # dataframe for gages
    gage_temp = pd.DataFrame()
    for g in gages_for_zip:
        gage_temp = gage_temp.append(gage_trends.loc[gage_trends['gage']==g])    
    
    gage_temp.reset_index(inplace=True)
    
    gage_temp['dist'] = 0
    for ind in gage_temp.index:
        dister = dist_for_zip[gages_for_zip==gage_temp.loc[ind,'gage']]
        if len(dister) == 1:
            gage_temp.loc[ind,'dist'] = dister
        else:
            gage_temp.loc[ind,'dist'] = dister[0]
    
    # making the figure
    plt.figure(figsize=(10,8))
    plt.scatter(gage_temp['dist'].values/1000,abs(gage_temp['tau'].values),
                s=100*abs(np.log10(abs(gage_temp['slope_rel_ref'].values))),
                linewidths=3,
                c=symbol_fill(gage_temp),
                edgecolors=symbol_color(gage_temp)
               )
    plt.title('Closest ' + str(gage_temp.shape[0]) + ' Gages for zip code '+ z)
    plt.xlabel('Distance (km)')
    plt.ylabel('Time Trend (Kendall tau)')
    plt.savefig(z + '_zip_gage'+'.png')
    plt.close()
    
    

In [None]:
def symbol_fill(gage_temp,increase='#fdae61',decrease='#abd9e9',ptest=0.1):
    '''
    function for whether the plotting symbol will be full or empty
    full indicates a statistically significant result
    '''
    
    fillers = []
    
    for ind in gage_temp.index:
        if gage_temp.loc[ind,'pvalue'] <= ptest:
            if gage_temp.loc[ind,'tau'] > 0:
                fillers.append(increase)
            else:
                fillers.append(decrease)
        else:
            fillers.append('white')
            
    return fillers

In [None]:
def symbol_color(gage_temp,increase='#fdae61',decrease='#abd9e9'):
    '''
    function to assign the colors for the points
    points with positive trend are assigned increase, below zero are decrease
    '''
    cols = []
    for ind in gage_temp.index:
        if gage_temp.loc[ind,'tau'] < 0:
            cols.append(decrease)
        else:
            cols.append(increase)
            
    return cols