In [1]:
import pandas as pd
from datetime import datetime, timedelta
import numpy as np

In [2]:
# Set up paths

path_to_data = '/home/dnash/SEAK_clim_data/'      # project data -- read only
path_to_out  = '../out/'       # output files (numerical results, intermediate datafiles) -- read & write
path_to_figs = '../figs/'      # figures

In [3]:
fname = path_to_data + 'downloads/SEAK_flood_catalog.csv'
df = pd.read_csv(fname)
# some cleaning
df = df.drop(['Unnamed: 6', 'Unnamed: 7', 'Unnamed: 8', 'Unnamed: 9','Unnamed: 10', 'Unnamed: 11','Unnamed: 12','Unnamed: 13','Unnamed: 14'], axis=1)
df

Unnamed: 0,Impact Level,Impact dates,Impact,Impact Information,Impact Source,Notes
0,1,02/09/2022,Lake flooding,"Minor flooding around ward lake/river area, wa...",HYDRO,
1,2,01/22/2022,"Flood stage, flooding",Jordan creek at 7th highest crest; Montana cre...,AHPS,THe 22nd had the 5th highest precip event
2,2,01/22/2022,Flood stage; flooding in Juneau,"Flooding in Juneau closed Alex Holden road, th...",AHPS,
3,2,01/22/2022,Moderate flood stage,Salmon river at 3rd highest crest,AHPS,
4,0,01/22/2022,No impacts recorded,No impacts recorded but widespread ar event,,22nd had the 17th highest precip
...,...,...,...,...,...,...
445,1,10/10/1982,flood stage',10th overflowing of Fish Creek.',AHPS,10th is 6th highest precip and 17th hihgest IVT
446,1,09/11/1981,flood stage,Stikine at its 3rd highest crest,AHPS,
447,3,09/09/1981,Moderate flood stage,Mendenhall at 8th highest crest,AHPS,There was an AR event two days before and on t...
448,3,09/08/1981,Moderate flood stage,Mendenhall at its 8th highest crest,AHPS,Seems like a big rain event but not much more ...


In [4]:
# set start and end dates to match WRF data
start_date = '1979-01-01 0:00'
end_date = '2019-12-31 23:00'

In [5]:
def get_impact_dates_from_aaron_df(df, freq):
    
    if freq == '1D':
        impactdates = df['Impact dates'].values
    
    elif freq == '1H':
        dt_lst = []
        for index, row in df.iterrows():
            current_date = row['Impact dates']
            dt = datetime.strptime(current_date, "%m/%d/%Y")
            dt = pd.date_range(dt, dt + timedelta(hours=23), freq=freq)
            dt_lst.append(dt)
        impactdates = dt_lst[0].union_many(dt_lst[1:])
    
    
    # put into dataframe
    d = {'impactdates': impactdates}
    df = pd.DataFrame(data=d)

    # keep only unique dates
    df.index = pd.to_datetime(df['impactdates'])
    df_new = df['impactdates'].unique()
    # make a new df with only unique dates
    d = {'dates': df_new}
    df = pd.DataFrame(data=d)
    df.index = pd.to_datetime(df.dates)
    df = df.drop(['dates'], axis=1)
    df = df.reset_index()
    df['impact'] = 1


    # date array with all days
    dates_allDays = pd.date_range(start=start_date, end=end_date, freq=freq)
    arr_allDays = np.zeros(len(dates_allDays), dtype=int)

    # Loop over ar days and match to ar_full 
    for i, date in enumerate(df['dates'].values):
        idx = np.where(dates_allDays == date)
        arr_allDays[idx] = 1

    # Create dataframe
    data = {'IMPACT':arr_allDays}
    df_all = pd.DataFrame(data, index=dates_allDays)
    
    return df_all

In [6]:
impactdates_hourly = get_impact_dates_from_aaron_df(df, '1H')
impactdates_daily = get_impact_dates_from_aaron_df(df, '1D')

  impactdates = dt_lst[0].union_many(dt_lst[1:])


In [7]:
## save to csv file
impactdates_hourly.to_csv(path_to_out + 'SEAK_impactdates_hourly.csv')
impactdates_daily.to_csv(path_to_out + 'SEAK_impactdates_daily.csv')