# Trends in Flood Gages
_Calvin Whealton_

This notebook calculates the trends in the stream gages. Each stream gage is assessed and the Kendal Tau is calculated between the peak flows and "year" of the flood. Gages with trends are identified and the significance of the trend measured.

In [20]:
import pandas as pd
import numpy as np
from scipy.stats import kendalltau
from scipy.stats import linregress
from scipy.stats import norm
import os

In [4]:
# chaning working directory to where the peak flow data is located
os.chdir('/Users/calvinwhealton/Documents/GitHub/floods_housing_zipcode/data/gage_data/peak_flows')

In [77]:
# list of files in directory
list_files = os.listdir()

# dataframe that will store results
kendallt_df = pd.DataFrame(columns=['gage','tau','pvalue','slope_rel_ref'])

for f in list_files:
    # read in file
    # all files in directory have the same structure
    # all were scraped from the USGS
    temp = pd.read_csv(f)
    
    if len(temp) != 0:

        # peak values
        peaks = np.array(temp['peak_va'].values)
        keepers = (np.isnan(peaks) == False)
        peaks = peaks[keepers]
        
        # years
        temp['yr'] = [int(x.split('-')[0]) for x in temp['peak_dt'].values]
        yrs = np.array(temp['yr'].values)[keepers]
        
        if len(peaks) > 15:

            # calculated kendall tau
            kendall_tau_lag1 = kendalltau(peaks,yrs)

            # calculating overall trend in floods
            # ordinary linear regression
            slope, intercept, r_value, p_value, std_err = linregress(yrs,peaks)
            #print(slope)
            #print(intercept)

            # dealing with special case of a zero peak flow
            # setting it to 90% of the smallest "observed" flow
            # setting smaller would make the standard deviation blow up
            if min(peaks) == 0:
                peaks[peaks == 0] = 0.99*min(peaks[peaks > 0])
            
            # log-space mean and standard deviation
            mean_ls = np.mean(np.log(peaks))
            sd_ls = np.std(np.log(peaks))
            
            # measure of increase
            # calcualte the slope as a fraction of the nominal 100-year flood
            fl_ref = np.exp(norm.ppf(0.5, loc=mean_ls, scale=sd_ls))
            slope_rel_fl_ref = slope/np.mean(peaks)

            # adding to dataframe
            kendallt_df = kendallt_df.append({'gage': f.split('.')[0],
                                              'tau': kendall_tau_lag1.correlation,
                                              'pvalue': kendall_tau_lag1.pvalue,
                                              'slope_rel_ref':slope_rel_fl_ref},ignore_index=True)

In [78]:
max(kendallt_df['slope_rel_ref'].values)

0.3117550905249261

In [79]:
np.sum(np.array(kendallt_df['slope_rel_ref'].values) > 0.01)

2449

In [80]:
kendallt_df.reset_index(inplace=True)

In [82]:
os.chdir('/Users/calvinwhealton/Documents/GitHub/floods_housing_zipcode/data/processed_data')
kendallt_df.to_csv('gage_trends.csv')