# Coastal Compound Floods - Hourly Observational
- current implementation is flawed
- thresholds are hard coded
- risk level is also made up
- some results were checked and did not match any occurred events
- historical floods were not identified in the results
- cmip6 data was not included

In [1]:
# !pip install noaa_coops
# !pip install meteostat

import os
import re
import glob
import warnings
import pandas as pd
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.io as pio
import plotly.graph_objects as go
from plotly.colors import qualitative
from plotly.subplots import make_subplots
import geopandas as gpd
from shapely.geometry import Point, box
import contextily as cx
import matplotlib.dates as mdates
import matplotlib.lines as mlines
from IPython.display import Image, display
import plot_settings

warnings.filterwarnings('ignore')

static = True # interactive/static plotly plots
PATH = 'data/floods' 
PATH2 = PATH + '/hourly_max' # daily_max, daily_mean, hourly_max
# !python process_flood.py --output_dir $PATH

# Load Data

In [2]:
# Load NOAA and Meteo
noaa = pd.read_csv(f'{PATH}/noaa_tc.csv')
noaa['id'] = noaa.id.astype(str)
noaa[['start_date', 'end_date']] = noaa[['start_date', 'end_date']].apply(pd.to_datetime)
noaa['station'] = noaa.name + ' ' + noaa.id
wind_stations = noaa.loc[noaa.variable == 'Wind', 'id'].values
water_stations = noaa.loc[noaa.variable == 'Verified 6-Minute Water Level', 'id'].values

meteo = pd.read_csv(f'{PATH}/meteo.csv')
meteo[['hourly_start', 'hourly_end']] = meteo[['hourly_start', 'hourly_end']].apply(pd.to_datetime)
meteo['station'] = meteo.name + ' ' + meteo.id

# Load DataFrames
wind_df = pd.read_parquet(f'{PATH2}/noaa_tc.wind.parquet')
wind_df = wind_df.loc[:, ~wind_df.columns.str.contains('flag')]
water_df = pd.read_parquet(f'{PATH2}/noaa_tc.water.parquet')
meteo_df = pd.read_parquet(f'{PATH2}/meteo.pr_wind.parquet')

# Join all data
df = pd.concat([wind_df, water_df, meteo_df], axis=1)
df = df[sorted(df.columns)][df.index.year >= 1950]
# df[df.filter(regex='pr_').columns] = df.filter(regex='pr_').replace(0, np.nan)

# Remove sparse years
def mask_sparse_years(df, min_obs=100):
    '''Set all values in a year to nan if that year does not have at least min_obs values'''
    sparse_mask = df.notna().groupby(df.index.year).sum() < min_obs
    year_index = df.index.year.values
    mask = np.zeros_like(df.values, dtype=bool)
    for col_idx, col in enumerate(df.columns):
        for year_idx, year in enumerate(sparse_mask.index):
            if sparse_mask.iloc[year_idx, col_idx]:
                mask[year_index == year, col_idx] = True
    return df.mask(mask)

df = mask_sparse_years(df)

# Variables map
var_map = {
    'speed_': 'Wind Speed (m/s)',
    'gust_': 'Wind Gust (m/s)',
    'mhhw_': 'Mean Higher High Water (m)',
    'pr_': 'Precipitation (mm)',
    'dir_': 'Wind Direction',
    'dirdeg_': 'Wind Direction (degree)',
    'type_': 'Tide Type (LL-1, L-2, H-3, HH-4)'
}

# Stations with color
color_list = px.colors.qualitative.Plotly + px.colors.qualitative.Light24 + px.colors.qualitative.Alphabet
colors = {}
for sid, name in sorted(
    list(zip(noaa['id'].astype(str), noaa['name'])) +
    list(zip(meteo['id'], meteo['name']))
):
    colors.setdefault(sid, {'name': name or sid, 'color': color_list[len(colors)]})
colors = pd.DataFrame.from_dict(colors, orient='index').reset_index().rename(columns={'index': 'id'})
tide_map = {1: 'LL', 2: 'L', 3: 'H', 4: 'HH'}
ordered_tides = list(tide_map.values())[::-1]

if len(df) != len(pd.date_range(df.index.min(), df.index.max(), freq='h')):
    print(f'Missing {len(pd.date_range(df.index.min(), df.index.max(), freq='h'))- len(df)} observations')

Missing 5 observations


# Analysis

1. **Data Standardization**
   - Normalize measurements across different stations/sources for comparability
   - Use quantile mapping and median adjustment techniques depending on variable type
   - Multiplicative for precipitation and wind variables

2. **Precipitation Event Identification**
   - Identify distinct precipitation events using minimum gap criteria (3-hour)
   - Calculate metrics per event: duration, total precipitation, and intensity (mm/hr)

3. **Multi-Hazard Event Identification**
   - Detect and flag specific hazards:
     * High-intensity precipitation events
     * Significant 24hr precipitation accumulation
     * High tide conditions
     * Storm surge potential (onshore winds and high wind speeds)
   - Identify periods when multiple hazards coincide (compound events)

4. **Compound Event Characterization**
   - Find start and end times of each compound event
   - Label which specific hazards are present in each event
   - Get event duration and hazard counts
   - Calculate metrics for each event (precipitation totals, intensities, tides, winds)

5. **Risk Assessment and Classification**
   - Calculate risk metrics
   - Develops a multi-factor risk scoring system incorporating:
     * Number of coincident hazards
     * Precipitation intensity (binned into severity categories)
     * Daily precipitation levels
   - Classify events into risk levels: Low, Moderate, High, and Extreme

In [3]:
def standardize(df, variables, ref_station=None):
    result = df.copy()
    for var in variables:
        cols = [c for c in df if var in c]
        if not cols: continue
        ref_col = (f"{var}{ref_station}" if ref_station and f"{var}{ref_station}" in cols 
                   else max(cols, key=lambda c: df[c].count()))
        ref = df[ref_col].dropna()
        if ref.size < 100: continue
        for c in cols:
            if c == ref_col: continue
            src = df[c].dropna()
            if src.size < 100: continue
            common = src.index.intersection(ref.index)
            if common.size < 100:
                ranks = stats.rankdata(src) / src.size
                result.loc[src.index, c] = np.quantile(ref, ranks)
            else:
                src_vals, ref_vals = src.loc[common], ref.loc[common]
                if any(k in c for k in ['pr_', 'speed_', 'gust_']):
                    idx = (src_vals > 0) & (ref_vals > 0)
                    if idx.sum() > 50:
                        result.loc[src.index, c] = src * (ref_vals[idx].median() / src_vals[idx].median())
                else:
                    result.loc[src.index, c] = src + (ref_vals.median() - src_vals.median())
    return result

def analyze_precipitation_with_storms(df, pr_pattern='pr_', min_gap=3):
    pr_df = df.filter(regex=pr_pattern).dropna(how='all')
    labeled_df, all_storms = df.copy(), {}
    for col in pr_df:
        storms, mask = [], pd.Series(np.nan, index=pr_df.index)
        in_storm, start, end, total, dry = False, None, None, 0, 0
        for t, v in pr_df[col].items():
            if pd.notna(v) and v > 0:
                if not in_storm: in_storm, start = True, t
                end, total, dry = t, total + v, 0
            elif in_storm:
                dry += 1
                if dry >= min_gap:
                    hrs = (end - start).total_seconds() / 3600 + 1
                    intensity = total/hrs
                    storms.append({'start': start, 'end': end, 'hours': hrs, 'mm': total, 'intensity': intensity})
                    mask[start:end] = intensity
                    in_storm, total = False, 0
        if in_storm:
            hrs = (end - start).total_seconds() / 3600 + 1
            intensity = total/hrs
            storms.append({'start': start, 'end': end, 'hours': hrs, 'mm': total, 'intensity': intensity})
            mask[start:end] = intensity
        labeled_df[f'intensity_{col}'] = mask
        all_storms[col] = pd.DataFrame(storms)
    stats = {
        col.replace(pr_pattern, ''): {
            'storms': len(s),
            'total_mm': s['mm'].sum() if not s.empty else 0,
            'avg_intensity': s['intensity'].mean() if not s.empty else 0,
            'max_mm': s['mm'].max() if not s.empty else 0,
            'max_intensity': s['intensity'].max() if not s.empty else 0
        } for col, s in all_storms.items()
    }
    return pd.DataFrame(stats).T, all_storms, labeled_df


def identify_compound_events(df, storm_data, storms_df, daily_precip, thresholds, window_hours=24):
    events = pd.DataFrame(index=df.index)
    
    # High-intensity precipitation
    events['high_intensity'] = storm_data.filter(regex='intensity_').max(axis=1) >= thresholds.get('intensity', 8.0)
    
    # Daily precipitation levels
    max_daily = daily_precip.max(axis=1)
    levels = [100, 150, 200]
    for t in levels:
        events[f'daily_precip_{t}mm'] = max_daily >= t
    events['high_daily_precip'] = sum((max_daily >= t).astype(int) for t in levels)
    events['high_daily_precip_flag'] = events['high_daily_precip'] > 0

    # High tide
    events['high_tide'] = df.filter(regex='mhhw_').max(axis=1) > thresholds.get('mhhw', 0)
    
    # Storm surge potential
    onshore = df.filter(regex='^dirdeg_').apply(lambda x: ((x <= 135) | (x >= 300))).any(axis=1)
    wind = pd.Series(False, index=df.index)
    for prefix, key, default in [('speed_', 'wind_speed', 10), ('gust_', 'wind_gust', 15)]:
        wind_df = df.filter(regex=f'^{prefix}')
        wind |= wind_df.max(axis=1) > thresholds.get(key, default)
    events['storm_surge'] = onshore & wind
    
    # Compound risk windows
    hazards = ['high_intensity', 'high_daily_precip_flag', 'high_tide', 'storm_surge']
    events['compound_risk'] = 0
    for h in hazards:
        if h in events:
            win_col = f'{h}_window'
            window = pd.Series(0, index=df.index)
            for idx in events.index[events[h].fillna(False)]:
                end = min(events.index[-1], idx + pd.Timedelta(hours=window_hours))
                window.loc[idx:end] = 1
            events[win_col] = window
            events['compound_risk'] += window

    # Detect compound events (≥2 hazards coincident)
    compound_events, current_event = [], None
    for idx, row in events.iterrows():
        if row['compound_risk'] >= 2:
            if current_event is None:
                current_event = {'start': idx, 'hazards': set()}
            current_event['end'] = idx
            current_event['hazards'].update(
                h for h in hazards if row.get(f'{h}_window', 0)
            )
        elif current_event:
            duration = (current_event['end'] - current_event['start']).total_seconds() / 3600
            max_precip = events.loc[current_event['start']:current_event['end'], 'high_daily_precip'].max()
            compound_events.append({
                'start': current_event['start'],
                'end': current_event['end'],
                'duration': duration,
                'hazard_count': len(current_event['hazards']),
                'hazards': list(current_event['hazards']),
                'daily_precip_level': max_precip
            })
            current_event = None

    if current_event:
        duration = (current_event['end'] - current_event['start']).total_seconds() / 3600
        max_precip = events.loc[current_event['start']:current_event['end'], 'high_daily_precip'].max()
        compound_events.append({
            'start': current_event['start'],
            'end': current_event['end'],
            'duration': duration,
            'hazard_count': len(current_event['hazards']),
            'hazards': list(current_event['hazards']),
            'daily_precip_level': max_precip
        })
        
    return pd.DataFrame(compound_events), events
    

def assess_flood_risk(df, storms, events):
    if events.empty: return pd.DataFrame()
    
    risk = events.copy()
    metrics = ['mean_precip_mm', 'max_precip_mm', 'mean_precip_intensity', 'max_precip_intensity',
               'max_daily_precip_mm', 'max_tide', 'max_wind_speed', 'max_wind_gust']
    for m in metrics: risk[m] = 0

    pr_cols = df.filter(regex='^pr_').columns
    tide_cols = df.filter(regex='^mhhw_').columns
    speed_cols = df.filter(regex='^speed_').columns
    gust_cols = df.filter(regex='^gust_').columns

    for i, row in risk.iterrows():
        s, e = row['start'], row['end']
        mm, intensity = [], []
        for storm_df in storms.values():
            overlap = storm_df[(storm_df['start'] <= e) & (storm_df['end'] >= s)]
            mm += overlap.get('mm', []).tolist()
            intensity += overlap.get('intensity', []).tolist()
        if mm: risk.at[i, 'mean_precip_mm'], risk.at[i, 'max_precip_mm'] = np.mean(mm), np.max(mm)
        if intensity: risk.at[i, 'mean_precip_intensity'], risk.at[i, 'max_precip_intensity'] = np.mean(intensity), np.max(intensity)
        risk.at[i, 'max_daily_precip_mm'] = df.loc[s:e, pr_cols].rolling('24H').sum().max(axis=1).max()
        risk.at[i, 'max_tide'] = df.loc[s:e, tide_cols].max(axis=1).max()
        risk.at[i, 'max_wind_speed'] = df.loc[s:e, speed_cols].max(axis=1).max()
        risk.at[i, 'max_wind_gust'] = df.loc[s:e, gust_cols].max(axis=1).max()

    bins = [0, 8, 12, 16, np.inf]
    risk['intensity_score'] = pd.cut(risk['mean_precip_intensity'], bins=bins, labels=[0,1,2,3]).astype(float).fillna(0).astype(int)
    risk['risk_score'] = risk['hazard_count'] + risk['daily_precip_level'] + risk['intensity_score']
    risk['risk_level'] = pd.cut(risk['risk_score'], bins=[0, 3, 5, 7, np.inf], labels=['Low', 'Moderate', 'High', 'Extreme'])
    
    return risk


def analyze_compound_coastal_floods(df):
    variables = ['pr_', 'speed_', 'gust_', 'dirdeg_', 'mhhw_', 'msl_']
    std_df = standardize(df, variables)
    stats, storms, labeled = analyze_precipitation_with_storms(std_df)
    daily_pr = std_df.filter(regex='pr_').rolling('24H').sum()

    thresholds = {
        'intensity': 8.0,         # mm/hour (rainfall intensity)
        'mhhw': 0.3,              # m above mean higher high water
        'wind_speed': 10,         # m/s 
        'wind_gust': 15           # m/s
    }
    
    events, indicators = identify_compound_events(std_df, labeled, storms, daily_pr, thresholds)
    risk = assess_flood_risk(std_df, storms, events)
    
    return risk, events, indicators, std_df, stats

risk_assessment, compound_events, event_indicators, std_df, stats = analyze_compound_coastal_floods(df)

print(f"Identified {len(compound_events)} compound coastal flood events")
print("\nPrecipitation Statistics:")
display(stats)

if not risk_assessment.empty:
    print("\nRisk Assessment Summary:")
    print(f"Events by risk level:")
    display(risk_assessment['risk_level'].value_counts())
    
    print("\nTop 5 highest risk events:")
    display(risk_assessment.sort_values(by='risk_score', ascending=False).head().T)
else:
    print("\nNo compound flood events identified with the current thresholds.")

Identified 598 compound coastal flood events

Precipitation Statistics:


Unnamed: 0,storms,total_mm,avg_intensity,max_mm,max_intensity
69536,479.0,3123.314286,0.857373,125.828571,10.457143
72308,6575.0,57691.7,1.706043,231.9,42.7
74598,2568.0,20952.0,1.50077,269.6,122.3
KFAF0,2634.0,24896.6,1.866654,606.5,267.7
KPHF0,3399.0,25525.2,1.447672,252.2,35.3



Risk Assessment Summary:
Events by risk level:


risk_level
Low         555
Moderate     28
Extreme       8
High          7
Name: count, dtype: int64


Top 5 highest risk events:


Unnamed: 0,171,184,406,302,285
start,2008-05-12 01:00:00,2009-05-15 22:00:00,2018-12-05 18:00:00,2014-09-24 12:00:00,2013-11-22 18:00:00
end,2008-05-14 17:00:00,2009-05-19 10:00:00,2018-12-08 09:00:00,2014-09-28 00:00:00,2013-11-23 20:00:00
duration,64.0,84.0,63.0,84.0,26.0
hazard_count,4,3,3,4,2
hazards,"[high_daily_precip_flag, storm_surge, high_int...","[high_daily_precip_flag, high_intensity, storm...","[high_daily_precip_flag, high_intensity, high_...","[high_daily_precip_flag, storm_surge, high_int...","[high_daily_precip_flag, high_intensity]"
daily_precip_level,3,3,3,3,3
mean_precip_mm,32.1,79.642857,157.35,51.008333,86.32
max_precip_mm,267.7,278.8,606.5,244.6,428.5
mean_precip_intensity,27.877889,32.047891,26.82803,15.343465,29.0
max_precip_intensity,267.7,202.4,111.8,122.3,142.833333


In [4]:
event_indicators[event_indicators.daily_precip_200mm==1]

Unnamed: 0,high_intensity,daily_precip_100mm,daily_precip_150mm,daily_precip_200mm,high_daily_precip,high_daily_precip_flag,high_tide,storm_surge,compound_risk,high_intensity_window,high_daily_precip_flag_window,high_tide_window,storm_surge_window
2006-09-01 16:00:00,True,True,True,True,3,True,True,True,4,1,1,1,1
2006-09-01 17:00:00,True,True,True,True,3,True,True,True,4,1,1,1,1
2006-09-01 18:00:00,True,True,True,True,3,True,True,True,4,1,1,1,1
2006-09-01 19:00:00,True,True,True,True,3,True,True,False,4,1,1,1,1
2006-09-01 20:00:00,True,True,True,True,3,True,False,False,4,1,1,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2018-12-07 19:00:00,False,True,True,True,3,True,False,False,2,1,1,0,0
2018-12-07 20:00:00,False,True,True,True,3,True,False,False,2,1,1,0,0
2018-12-07 21:00:00,False,True,True,True,3,True,False,False,2,1,1,0,0
2018-12-07 22:00:00,False,True,True,True,3,True,False,False,2,1,1,0,0


In [5]:
display(risk_assessment.sort_values(by='risk_score', ascending=False).head(5).T)


Unnamed: 0,171,184,406,302,285
start,2008-05-12 01:00:00,2009-05-15 22:00:00,2018-12-05 18:00:00,2014-09-24 12:00:00,2013-11-22 18:00:00
end,2008-05-14 17:00:00,2009-05-19 10:00:00,2018-12-08 09:00:00,2014-09-28 00:00:00,2013-11-23 20:00:00
duration,64.0,84.0,63.0,84.0,26.0
hazard_count,4,3,3,4,2
hazards,"[high_daily_precip_flag, storm_surge, high_int...","[high_daily_precip_flag, high_intensity, storm...","[high_daily_precip_flag, high_intensity, high_...","[high_daily_precip_flag, storm_surge, high_int...","[high_daily_precip_flag, high_intensity]"
daily_precip_level,3,3,3,3,3
mean_precip_mm,32.1,79.642857,157.35,51.008333,86.32
max_precip_mm,267.7,278.8,606.5,244.6,428.5
mean_precip_intensity,27.877889,32.047891,26.82803,15.343465,29.0
max_precip_intensity,267.7,202.4,111.8,122.3,142.833333


In [6]:
risk_assessment[risk_assessment.start.dt.year==2016]

Unnamed: 0,start,end,duration,hazard_count,hazards,daily_precip_level,mean_precip_mm,max_precip_mm,mean_precip_intensity,max_precip_intensity,max_daily_precip_mm,max_tide,max_wind_speed,max_wind_gust,intensity_score,risk_score,risk_level
324,2016-01-10 23:00:00,2016-01-11 11:00:00,12.0,2,"[storm_surge, high_tide]",0,0.0,0.0,0.0,0.0,0.0,0.283,12.301709,15.1,0,2,Low
325,2016-01-16 01:00:00,2016-01-17 01:00:00,24.0,2,"[storm_surge, high_tide]",0,17.275,22.9,1.762778,2.29,18.8,0.323,12.388889,12.0,0,2,Low
326,2016-01-23 01:00:00,2016-01-25 06:00:00,53.0,2,"[storm_surge, high_tide]",0,54.4,65.5,1.790343,2.085714,61.8,0.772,17.5,21.3,0,2,Low
327,2016-02-07 19:00:00,2016-02-08 18:00:00,23.0,2,"[storm_surge, high_tide]",0,0.633333,0.8,0.5,0.8,0.8,0.507,9.633869,14.1,0,2,Low
328,2016-02-10 20:00:00,2016-02-11 11:00:00,15.0,2,"[storm_surge, high_tide]",0,0.0,0.0,0.0,0.0,0.0,0.234,12.508274,16.6,0,2,Low
329,2016-03-19 19:00:00,2016-03-21 14:00:00,43.0,2,"[storm_surge, high_tide]",0,2.485714,5.8,0.626807,1.3,8.8,0.453,11.305556,13.4,0,2,Low
330,2016-03-21 17:00:00,2016-03-22 08:00:00,15.0,2,"[storm_surge, high_tide]",0,0.0,0.0,0.0,0.0,0.0,0.234,10.828373,9.4,0,2,Low
331,2016-04-05 07:00:00,2016-04-06 07:00:00,24.0,2,"[storm_surge, high_tide]",0,4.875,6.1,0.68244,0.871429,3.6,0.315,12.388889,16.333333,0,2,Low
332,2016-04-07 21:00:00,2016-04-08 22:00:00,25.0,2,"[storm_surge, high_tide]",0,0.3,0.3,0.3,0.3,0.3,0.345,10.805556,14.4,0,2,Low
333,2016-06-05 07:00:00,2016-06-06 22:00:00,39.0,2,"[high_intensity, high_tide]",0,15.2125,49.0,3.573333,8.3,50.5,0.342,12.681197,19.883333,0,2,Low
