 # <strong>Road networks and robustness to flooding on US Atlantic and Gulf barrier islands</strong>
 ## <strong>- Statistics -</strong>
 ### This notebook generates the stats included in the manuscript

In [1]:
### Packages

import os
import geopandas as gpd
import pandas as pd
import networkx as nx
import numpy as np

In [2]:
path=''
os.chdir(path)

In [3]:
### Area and shoreline length of the US Atlantic and Gulf barrier islands

# Calculate area (in km2) and shoreline length (in km) for all barriers
barriers= gpd.read_file('./Data/Exceedance/US_barriers.shp')
barriers= barriers.to_crs('esri:102003')
barriers["area"] = barriers['geometry'].area/ 10**6 
barriers["length"]= barriers['geometry'].length/10**3 

# Filter those that belong to the 72 networks with more than 100 nodes (sampled) and keep the remaining in another df (unsampled)
table = pd.read_csv("./Results/Results_AllBarriers.csv", sep=",", header=0) 
sampled_barriers= list(table.Barrier) # >100 nodes
sampled = barriers.query('name in @sampled_barriers')
unsampled = barriers.query('name not in @sampled_barriers') #

number_sampled= len(sampled_barriers) # number of barrier with more than 100 nodes

# Stats
area_sampled_sum=sampled['area'].sum()
area_total_sum=barriers['area'].sum()
area_sampled_mean=sampled['area'].mean()
area_unsampled_mean=unsampled['area'].mean()
area_sampled_100=len(sampled.loc[sampled['area']<100])
area_sampled_100_perc=len(sampled.loc[sampled['area']<100])/number_sampled*100
area_sampled_25=len(sampled.loc[sampled['area']<25])
area_sampled_25_perc=len(sampled.loc[sampled['area']<25])/number_sampled*100

length_sampled_sum=sampled['length'].sum()
length_total_sum=barriers['length'].sum()

In [5]:
### Street length

rootdir = './Results/Statistics'
extensions = ('.csv')

barrier_names=[]
length_street=[]

# Loop through files with statistics and open each csv to retrieve street length
for subdir, dirs, files in os.walk(rootdir):
    for file in files:
        ext = os.path.splitext(file)[-1].lower()
        if ext in extensions:
            file_path = os.path.join(subdir, file)
            barrier = file.replace(".csv","")
            barrier = barrier.replace("_geo","")
            barrier_names.append(barrier)
            table = pd.read_csv(file_path, sep=",", header=0)
            table.rename(columns={ table.columns[0]: "stats", table.columns[1]:"values"}, inplace = True)
            length=table.loc[table['stats'] == 'street_length_total', 'values'].iloc[0]/10**3
            length_street.append(length)

# Create new dataframe with results and filter those that have more than 100 nodes
df = list(zip(barrier_names, length_street))
df = pd.DataFrame(df, columns=['Barrier','Street_length'])

table = pd.read_csv("./Results/Results_AllBarriers.csv", sep=",", header=0) 
sampled_barriers= list(table.Barrier)
sampled = df.query('Barrier in @sampled_barriers')

# Stats
street_length_min=sampled['Street_length'].min()
street_length_max=sampled['Street_length'].max()
street_length_mean=sampled['Street_length'].mean()
street_length_200=len(sampled.loc[sampled['Street_length']>200])
street_length_200_perc=len(sampled.loc[sampled['Street_length']>200])/number_sampled*100

In [6]:
### Number of nodes 

rootdir = './Data/Roads'
extensions = ('.graphml')

barrier_names=[]
nodes=[]
# Loop through files and open barrier graphml to retrieve number of nodes in each drivable road network
for subdir, dirs, files in os.walk(rootdir):
    for file in files:
        ext = os.path.splitext(file)[-1].lower()
        if ext in extensions:
            file_path = os.path.join(subdir, file)
            barrier = file.replace(".graphml","")
            barrier_names.append(barrier)
            G = nx.read_graphml(file_path)
            N= len(G.nodes(data=True))
            nodes.append(N)

df = list(zip(barrier_names, nodes))
df= pd.DataFrame(df, columns=['Barrier','Nodes'])

number_drivable=len(df)

# Filter those that have more than 100 nodes (sampled)               
table = pd.read_csv("./Results/Results_AllBarriers.csv", sep=",", header=0) 
sampled_barriers= list(table.Barrier)
sampled = df.query('Barrier in @sampled_barriers')

# Stats
nodes_min=sampled['Nodes'].min()
nodes_max=sampled['Nodes'].max()
nodes_mean=sampled['Nodes'].mean()
nodes_1000=len(sampled.loc[sampled['Nodes']>1000])
nodes_1000_perc=len(sampled.loc[sampled['Nodes']>1000])/number_sampled*100

In [8]:
### Nodes elevation

rootdir = './Data/Roads'
extensions = ('.graphml')

elevations=[]
barrier_names=[]

# Loop through files and open barrier graphml to retrieve the elevation of each node in each drivable network
for subdir, dirs, files in os.walk(rootdir):
    for file in files:
        ext = os.path.splitext(file)[-1].lower()
        if ext in extensions:
            file_path = os.path.join(subdir, file)
            barrier = file.replace(".graphml","")
            barrier_names.append(barrier)
            G = nx.read_graphml(file_path)
            N= len(G.nodes(data=True))
            # select only those that have more than 100 nodes
            if N>100:
                df=pd.DataFrame.from_dict(dict(G.nodes(data=True)), orient='index')
                Elev= pd.to_numeric(df.Elevations)
                Elev=list(Elev.values)
                elevations.append(Elev)
            
merged_list = []
for l in elevations:
    merged_list += l
elevations=np.array(merged_list)

# Stats
elevations_mean= elevations.mean()
elevations_1btw3=len(elevations[(elevations>1)&(elevations<3)])/len(elevations)*100
elevations_1=len(elevations[(elevations<1)])/len(elevations)*100
elevations_5=len(elevations[(elevations>5)])/len(elevations)*100
elevations_10=len(elevations[(elevations>10)])/len(elevations)*100

In [12]:
### Critical elevation

table = pd.read_csv("./Results/Results_AllBarriers.csv", sep=",", header=0) 
df = table[['Barrier','Critical_elevation']]


# Stats
z_5=len(df.loc[df['Critical_elevation']<5])
z_5_perc=len(df.loc[df['Critical_elevation']<5])/72*100
z_25=len(df.loc[df['Critical_elevation']<2.5])
z_25_perc=len(df.loc[df['Critical_elevation']<2.5])/72*100
z_15=len(df.loc[df['Critical_elevation']<1.5])
z_15_perc=len(df.loc[df['Critical_elevation']<1.5])/72*100
z_1=len(df.loc[df['Critical_elevation']<1])
z_1_perc=len(df.loc[df['Critical_elevation']<1])/72*100

In [7]:
### Critical exceedance

table = pd.read_csv("./Results/Results_AllBarriers.csv", sep=",", header=0) 
df = table[['Barrier','Critical_exceedance']]

# Stats
e_100=len(df.loc[df['Critical_exceedance']>0.01])
e_100_perc=len(df.loc[df['Critical_exceedance']>0.01])/72*100
e_10=len(df.loc[df['Critical_exceedance']>0.1])
e_10_perc=len(df.loc[df['Critical_exceedance']>0.1])/72*100

In [12]:
### Critical elevation and critical exceedance

table = pd.read_csv("./Results/Results_AllBarriers.csv", sep=",", header=0) 
df = table[['Barrier','Critical_exceedance','Critical_elevation']]

# Stats
e_10=df.loc[df['Critical_exceedance']<10]
e_10_meanz=e_10['Critical_elevation'].mean()

In [13]:
### Robustness

table = pd.read_csv("./Results/Results_AllBarriers.csv", sep=",", header=0) 
df = table[['Barrier','Robustness']]

# Stats
R_03btw05= len(df.loc[(df['Robustness']>0.3)&(df['Robustness']<0.4)])
R_03btw05_perc= len(df.loc[(df['Robustness']>0.3)&(df['Robustness']<0.4)])/72*100
R_04= len(df.loc[df['Robustness']>0.4])
R_04_perc= len(df.loc[df['Robustness']>0.4])/72*100
R_045= len(df.loc[df['Robustness']>0.45])
R_045_perc= len(df.loc[df['Robustness']>0.45])/72*100
R_03= len(df.loc[df['Robustness']<0.3])
R_03_perc= len(df.loc[df['Robustness']<0.3])/72*100
R_02= len(df.loc[df['Robustness']<0.2])
R_02_perc= len(df.loc[df['Robustness']<0.2])/72*100
R_max=df.loc[df['Robustness']==df['Robustness'].max()]
R_min=df.loc[df['Robustness']==df['Robustness'].min()]