In [67]:
##### Merge all barrier polygons in one single shp #####
########################################################

from pathlib import Path
import pandas
import geopandas

folder = Path("E:\Barriers\Polygons")
shapefiles = folder.glob("*.shp")
gdf = pandas.concat([
    geopandas.read_file(shp)
    for shp in shapefiles
]).pipe(geopandas.GeoDataFrame)
gdf.to_file(r'E:\Barriers\Stations\US_barriers.shp')

In [None]:
##### Read stations.csv and convert to geodataframe #####
#########################################################

import pandas as pd
from geopandas import GeoDataFrame
from shapely.geometry import Point

df = pd.read_csv("E:\Barriers\Stations\Stations.csv", sep=",", header=0) 
df["geometry"] = df[["Longitude", "Latitude"]].apply(Point, axis=1)
gdf = geopandas.GeoDataFrame(df, geometry='geometry')
gdf = gdf.set_crs("EPSG:4326")
gdf.to_file("E:\Barriers\Stations\Stations.shp")

In [None]:
##### Find nearest station to each barrier and its corresponding distance #####
###############################################################################

import geopandas as gpd

barriers= gpd.read_file(r'E:\Barriers\Stations\US_barriers.shp')
stations= gpd.read_file(r'E:\Barriers\Stations\Stations.shp')

barriers= barriers.to_crs('esri:102009')
stations= stations.to_crs('esri:102009')

barrier_name=[]
min_distance=[]
station_number=[]

for i in range(0,len(barriers)):
    name= barriers['name'][i]
    barrier_name.append(name)
    barrier= barriers['geometry'][i]
    barrier= gpd.GeoSeries(barrier)
    distance=[]
    for j in range(0,len(stations)):
        station= stations['geometry'][j]
        station= gpd.GeoSeries(station)
        dist = barrier.distance(station)
        dist = dist.iloc[0]
        distance.append(dist)
        
    min_dist= min(distance)
    pos=[e for e, f in enumerate(distance) if f == min_dist]
    distance_km=min_dist/1000
    min_distance.append(distance_km)
    station_nu=stations.Station[pos]
    station_nu=station_nu.iloc[0]
    station_number.append(station_nu)
    
df = pd.DataFrame(list(zip(barrier_name, station_number, min_distance)),
               columns =['name', 'closest_station','distance_km'])
df2  = barriers.merge(df, on='name', how='left')
gdf = gpd.GeoDataFrame(df2)
gdf.to_file("E:\Barriers\Stations\Barriers_Stations.shp")

In [None]:
##### Calculate exceedance probability curves #####
###################################################

import networkx as nx
import numpy as np
import pandas as pd
import geopandas as gpd

from scipy import stats
from scipy.stats import genextreme

import requests
import random

import matplotlib.cm as cm
import matplotlib.colors as colors
import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline

# Load required data
param = pd.read_csv("E:\Barriers\Stations\Parameters.csv", sep=",", header=0) # table with parameters from NOAA's report
stations = pd.read_csv("E:\Barriers\Stations\Stations.csv", sep=",", header=0) # table with station information, including MLR trends (used to detrend historical data)
barriers = gpd.read_file("E:\Barriers\Stations\Barriers_Stations.shp") # shp with Gulf and Atlantic US barriers. Code of closest station and distance to it included as attributes
mhhw = pd.read_csv("E:\Barriers\Stations\MHHW.csv", sep=",", header=0) # table with MHHW info only for stations that were linked to the barriers


# Loop within US barriers shp to calculate exceedance for each barrier, using parameters from the closest station

for i in range(0, len(barriers)):
    barrier= barriers['name'][i]
    station= barriers['closest_st'][i]
    
    for j in range(0, len(param)):
        if param.Station_Number[j]==station:
            c=float(param.Shape_meters[j]) # shape parameter
            loc=float(param.Location_meters[j])
            scale=float(param.Scale_meters[j])
        else:
            continue
    
    for k in range(0,len(stations)):
        if stations.Station[k]==station:
            MSL_trend=float(stations.MSL_Trend[k]) # retrieve MSL trend of that station (in mm)
            station_name=stations.Station_Name[k]  
        else:
            pass
    
    for l in range(0,len(mhhw)):
        if mhhw.Station[l]==station:
            MHHW=mhhw.MHHW[l] # retrieve the local MHHW (in m)
            print(MHHW)
        else:
            pass
    
    
    fig, ax = plt.subplots(1, 1)

    x = np.linspace(genextreme.ppf(0.001, c, loc, scale), genextreme.ppf(0.999, c, loc, scale), 100)
  
    ax.plot(x, genextreme.pdf(x, c, loc, scale), 'r-', lw=5, alpha=0.6, label='{0} genextreme pdf'.format(barrier, station_name))

    rv = genextreme(c, loc, scale)
    ax.plot(x, rv.pdf(x), 'k-', lw=2, label='frozen pdf')
    
    vals = genextreme.ppf([0.001, 0.5, 0.999], c, loc, scale)
    np.allclose([0.001, 0.5, 0.999], genextreme.cdf(vals, c, loc, scale))

    r = genextreme.rvs(c, loc, scale, size=1000) # sample from the distribution to get water hts > MHHW
    #ax.hist(r, density=True, histtype='stepfilled', alpha=0.2)
    ax.legend(loc='best', frameon=False)
    plt.title(label='{0}_{1}'.format(barrier, station_name))
    plt.show() 
    fig.savefig("E:\\Barriers\\Figures\\Exceedance\\PDF\\{0}.png".format(barrier), dpi=500, facecolor='w')
    
       
    
    n = len(r) # total observations
    df = pd.DataFrame() # initialise dataframe
    samples_sorted = list(sorted(r)) # sort 'r' observations ascending
    rank = list(range(1, 1 + n)) # rank from 1:n, smallest first
    df['Rank'] = rank # make 'Rank' a column of the dataframe (for easier plotting later)
    prob = ((n - df['Rank'] + 1) / (n + 1)) # calculate probability
    return_years = (1 / prob) # calculate return period (in years) 
    trend = [x*(MSL_trend/1000) for x in return_years] # calc linear background trend in MHW (convert MSL_trend to meters)
    MaxWL = [x + y for x, y in zip(trend, samples_sorted)] # add trend to >MHHW samples from GEV
    MaxWL = [x + MHHW for x in MaxWL] # for real total water level, add background MHHW level

    #MaxWL = samples_sorted # for just the >MHHW heights sampled from GEV

    # fill out remaining columns of dataframe (for easier plotting)
    df['MaxWL'] = MaxWL
    df['Probability'] = prob
    df['Return_Pd'] = return_years
    df.to_csv("E:\Barriers\Exceedance\Exceedance_prob\{0}_Exceedance.csv".format(barrier)) # save data in csv
    
    #df.head(5) # check to see that the dataframe is tidy
    # Find a specific return period & check its elevation; also shows two values nearby
#     ret_pd = 100
#     df.iloc[(df['Return_Pd'] - ret_pd).abs().argsort()[:3]]
    
    # Plot it
    sns.set_theme()
    fig, ax = plt.subplots(figsize=(16, 9))
    
    ax.set(xscale="log")
    ax.tick_params(left=True, bottom=True)
    ax = sns.scatterplot(x="Return_Pd", y="MaxWL", data=df, color= 'r', linewidth=0, s= 25, label='exceedance probability')
    ax.set(xlim = (0,150)) # set x axis limits
    a =list(df.loc[df['Rank'] == 991, 'MaxWL'])[0] # to find upper limit of y axis
    ax.set(ylim = (0, a+0.5)) # set y axis limits
    ax.legend(loc='best', frameon=False)
    plt.title(label='{0}_{1}'.format(barrier, station_name))

    plt.savefig("E:\\Barriers\\Figures\\Exceedance\\Curves\\{0}.png".format(barrier), dpi=500, facecolor='w')