# Figures and Videos
_Calvin Whealton_

Figures created for 2020-07-15 update.

Standard libraries used for plotting/wrangling/etc.

In [None]:
import geopandas as gpd
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os

Opening the US coastline file for the 48 contiguous states.

In [None]:
os.chdir('/Users/calvinwhealton/Documents/GitHub/floods_housing_zipcode/data/geo_data/US_48_COASTLINE')
us_48_coast = gpd.read_file('US_COASTLINE.shp')

In [None]:
base = us_48_coast.plot(color='white',edgecolor='black')

## Trend in Flooding

Evaluating the trend in flooding using the kendall tau value bewteen the flood series and the "year". THe scipy kendall tau value and the associated p-value are used to determine the strength of the trend.

In [None]:
from scipy.stats import kendalltau

# calculating trends in each gage
os.chdir('/Users/calvinwhealton/Documents/GitHub/floods_housing_zipcode/data/gage_data/peak_flows')

# finding all files in the directory
list_files = os.listdir()

# initializing dataframe to store results
kendallt_df = pd.DataFrame(columns=['gage','tau','pvalue'])

# looping over files
for f in list_files:
    # read in file
    temp = pd.read_csv(f)
    
    # checking that there are observations in the file
    if len(temp) != 0:

        # peak values
        peaks = temp['peak_va'].values

        # calculated kendal tau
        kendall_tau_lag1 = kendalltau(peaks,range(0,len(peaks)))

        # adding to dataframe
        kendallt_df = kendallt_df.append({'gage':f.split('.')[0],'tau':kendall_tau_lag1.correlation,'pvalue':kendall_tau_lag1.pvalue},ignore_index=True)

kendallt_df.reset_index(inplace=True)


In [None]:
kendallt_df.head()

In [None]:
# reading in supplemental data that include the location of the gage
os.chdir('/Users/calvinwhealton/Documents/GitHub/floods_housing_zipcode/data/gage_data')
gage_supp = pd.read_csv('usgs_supp.txt',sep='\t',skiprows=33)

In [None]:
gage_supp.head()

In [None]:
kendallt_df['site_no'] = kendallt_df['gage'].astype(int)

In [None]:
def assign_cols_gages(df):
    '''
    function to assign colors to gages
    assign color based on the p-value and trend in the gage
    emphasizes gages with significant and increasing trends
    '''
    
    col_list = []
    
    for i in df.index:
        if df.loc[i,'tau'] > 0 and df.loc[i,'pvalue'] <0.05:
            col_list.append('#b2182b')
        elif df.loc[i,'tau'] > 0 and df.loc[i,'pvalue'] < 0.10:
            col_list.append('#ef8a62')
        elif df.loc[i,'tau'] < 0 and df.loc[i,'pvalue'] <0.20:
            col_list.append('#fddbc7')
        else:
            col_list.append('#f7f7f7')
            
    return col_list
    

In [None]:
# converting gages to a geodataframe
gage_supp_gdf = gpd.GeoDataFrame(
    gage_supp, geometry=gpd.points_from_xy(gage_supp.dec_long_va, gage_supp.dec_lat_va))

In [None]:
# needed because there are two gages with the name number somewhere
gage_supp_gdf2 = gage_supp_gdf[~gage_supp_gdf['site_no'].duplicated()]

In [None]:
# merging with the kendall tau dataframe, so kendall tau values can be plotted across the US
gage_supp_gdf2 = gage_supp_gdf2.merge(kendallt_df,on='site_no')

In [None]:
gage_supp_gdf2.head()

In [None]:
# making a new column for the color for each point
gage_supp_gdf2['pt_col'] = assign_cols_gages(gage_supp_gdf2)

In [None]:
plt.rcParams["figure.figsize"] = (40,30)

plt.rcParams["font.size"] = "50"
base = us_48_coast.plot(color='gainsboro',edgecolor='black')
gage_supp_gdf2.plot(ax=base,color=gage_supp_gdf2['pt_col'].values,marker='o',markersize=60)
plt.xlabel('Longitude',fontsize=50)
plt.ylabel('Latitude',fontsize=50)
plt.title('Stream Gages in Contiguous United States',fontsize=100)

# Videos

Making videos for the data. First step is to make a lot of plots, one for each time period and each type of data desired. Then the plots for a single year will be stitched together. Then, the plots will be stitched together in sequence to form a video.

In [None]:
# national flood insurance claims at the yearly time stamp
os.chdir('/Users/calvinwhealton/Documents/GitHub/floods_housing_zipcode/data/processed_data')
claims = pd.read_csv('ts_claims.csv')

In [None]:
claims.head()

In [None]:
claims['GEOID102'] = claims['zips'].values.astype(int)

In [None]:
# shapefile that includes locations for each zip code (shapefile of zip code and interpolation latitude and longitude)
os.chdir('/Users/calvinwhealton/Documents/GitHub/floods_housing_zipcode/data/geo_data/tl_2019_us_zcta510')
zips_latlong = gpd.read_file('tl_2019_us_zcta510.shp')

In [None]:
zips_latlong.head()

In [None]:
# converting string values to float, if needed
zips_latlong['loc_x'] = zips_latlong['INTPTLON10'].values.astype(float)
zips_latlong['loc_y'] = zips_latlong['INTPTLAT10'].values.astype(float)

In [None]:
zips_latlong_df = pd.DataFrame(zips_latlong)

In [None]:
zips_latlong_df.head()

In [None]:
zips_latlong.head()

In [None]:
zips_latlong_df = zips_latlong_df.drop(columns=['geometry'])

In [None]:
# converting values to a geopandas dataframe
# use interpolation latitude and longitude for reference
zips_latlong_gdf = gpd.GeoDataFrame(
    zips_latlong_df, geometry=gpd.points_from_xy(zips_latlong_df.loc_x, zips_latlong_df.loc_y))

In [None]:
zips_latlong_gdf.head()

In [None]:
zips_latlong_gdf['GEOID102'] = zips_latlong_gdf['GEOID10'].values.astype(int)

In [None]:
claims_gdf = zips_latlong_gdf.merge(claims,on='GEOID102')

In [None]:
claims_gdf.head()

In [None]:
def cols_claims(lister):
    '''
    function to define colors for claims
    assigns the color based on the amount of the claim
    '''
    col_list = []
    for ind in range(len(lister)):
        if abs(lister[ind]) > 0:
            col_list.append('#6a51a3')
        else:
            col_list.append('#fcfbfd00')
            
    return col_list

In [None]:
# making example plot about claims
df_temp = gpd.GeoDataFrame(claims_gdf, columns =['2000', 'geometry']) 
base = us_48_coast.plot(color='gainsboro',edgecolor='black')
df_temp.plot(ax=base,color= cols_claims(df_temp['2000'].values) ,marker='o',markersize=60)
plt.xlabel('Longitude',fontsize=50)
plt.ylabel('Latitude',fontsize=50)
plt.title('Flood Claims',fontsize=100)
plt.rcParams["font.size"] = "50"
plt.rcParams["figure.figsize"] = (40,30)
plt.xlim((-130,-65))
plt.ylim((24,50))


In [None]:
def cols_rps(lister):
    '''
    function to assign colors based on return periods
    redder colors assign as more extreme events
    '''
    col_list = []
    for ind in range(len(lister)):
        if abs(lister[ind]) > 500:
            col_list.append('#a50f15')
        elif abs(lister[ind] > 100):
            col_list.append('#fb6a4a')
        elif abs(lister[ind] > 25):
            col_list.append('#fee5d9')
        else:
            col_list.append('#fcfbfd00')
    return col_list

In [None]:
# loading in return period data
os.chdir('/Users/calvinwhealton/Documents/GitHub/floods_housing_zipcode/data/processed_data')
rps_ts = pd.read_csv('ts_rps.csv')

In [None]:
rps_ts.head()

In [None]:
len(rps_ts['Gage'].unique())

In [None]:
rps_ts['site_no'] = rps_ts['Gage'].values.astype(int)

In [None]:
gage_supp_gdf3 = gage_supp_gdf2.merge(rps_ts,on='site_no')

In [None]:
# making an example plot for the return periods
df_temp = gpd.GeoDataFrame(gage_supp_gdf3, columns =['2017', 'geometry']) 
base = us_48_coast.plot(color='gainsboro',edgecolor='black')
df_temp.plot(ax=base,color= cols_rps(df_temp['2017'].values) ,marker='o',markersize=200)
plt.xlabel('Longitude',fontsize=50)
plt.ylabel('Latitude',fontsize=50)
plt.title('Flood Claims',fontsize=100)
plt.rcParams["font.size"] = "50"
plt.rcParams["figure.figsize"] = (40,30)
plt.xlim((-130,-65))
plt.ylim((24,50))

In [None]:
# set directory where all plots will be saved
os.chdir('/Users/calvinwhealton/Documents/GitHub/floods_housing_zipcode/visualizations/videos/figs_2020-07-22')

# loop to create the plots
for i in range(1996,2019):
    plt.rcParams["font.size"] = "50"
    plt.rcParams["figure.figsize"] = (40,15)

    df_temp = gpd.GeoDataFrame(claims_gdf, columns =[str(i), 'geometry']) 
    base = us_48_coast.plot(color='gainsboro',edgecolor='black')
    df_temp.plot(ax=base,color= cols_claims(df_temp[str(i)].values) ,marker='o',markersize=60)
    plt.xlabel('Longitude',fontsize=50)
    plt.ylabel('Latitude',fontsize=50)
    plt.title('Flood Insurance Claims ' + str(i),fontsize=100)
    plt.rcParams["font.size"] = "50"
    plt.rcParams["figure.figsize"] = (40,30)
    plt.xlim((-130,-65))
    plt.ylim((24,50))
    
    plt.savefig(fname=('claims_'+str(i)+'.png'),Bbox='tight')
    plt.close()

In [None]:
# set directory where all plots will be saved
os.chdir('/Users/calvinwhealton/Documents/GitHub/floods_housing_zipcode/visualizations/videos/figs_2020-07-22')

# loop to create the plots
for i in range(1996,2019):
    
    plt.rcParams["font.size"] = "50"
    plt.rcParams["figure.figsize"] = (40,15)
    
    df_temp = gpd.GeoDataFrame(gage_supp_gdf3, columns =[str(i), 'geometry']) 
    base = us_48_coast.plot(color='gainsboro',edgecolor='black')
    df_temp.plot(ax=base,color= cols_rps(df_temp[str(i)].values) ,marker='o',markersize=200)
    plt.xlabel('Longitude',fontsize=50)
    plt.ylabel('Latitude',fontsize=50)
    plt.title('Gage Return Periods ' + str(i),fontsize=100)
    plt.rcParams["font.size"] = "50"
    plt.rcParams["figure.figsize"] = (40,30)
    plt.xlim((-130,-65))
    plt.ylim((24,50))
    
    plt.savefig(fname=('gages_'+str(i)+'.png'),Bbox='tight')
    plt.close()

In [None]:
# stiching the figures together
# taken from: https://note.nkmk.me/en/python-pillow-concat-images/
from PIL import Image

def get_concat_h(im1, im2):
    dst = Image.new('RGB', (im1.width + im2.width, im1.height))
    dst.paste(im1, (0, 0))
    dst.paste(im2, (im1.width, 0))
    return dst

def get_concat_v(im1, im2):
    dst = Image.new('RGB', (im1.width, im1.height + im2.height))
    dst.paste(im1, (0, 0))
    dst.paste(im2, (0, im1.height))
    return dst

for i in range(1996,2019):
    im1 = Image.open('gages_'+str(i)+'.png')
    im2 = Image.open('claims_'+str(i)+'.png')
    
    get_concat_v(im1, im2).save('combined/claim_gage_'+str(i)+'.png')


In [None]:
import moviepy.video.io.ImageSequenceClip
image_folder='/Users/calvinwhealton/Documents/GitHub/floods_housing_zipcode/visualizations/videos/figs_2020-07-22/combined'
fps=1

# need to sort the files because otherwise the years get jumbled
image_files = sorted([image_folder+'/'+img for img in os.listdir(image_folder) if img.endswith(".png")])
clip = moviepy.video.io.ImageSequenceClip.ImageSequenceClip(image_files, fps=fps)
clip.write_videofile('claim_gage_video.mp4')