In [1]:
import pandas as pd
import numpy as np
import glob
import datetime as dt
import tqdm
import matplotlib.pyplot as plt

In [2]:
def last_day_of_month(any_day):
    # get close to the end of the month for any day, and add 4 days 'over'
    next_month = any_day.replace(day=28) + dt.timedelta(days=4)
    # subtract the number of remaining 'overage' days to get last day of current month, or said programattically said, the previous day of the first of next month
    return next_month - dt.timedelta(days=next_month.day)    

def load_scws(rid,year,month):
    
    #Load the merged AWS, ERA5, TINT dataset. Define SCW events and keep these only. Currently: One minute max 3-sec gusts over 25 m/s, with a radar object within 
    #   10 km, and keeping the highest daily observed gust across all stations. Also require WGR(4-hr) greater than 1.5.
    d1 = dt.datetime(year,month,1)
    d2 = last_day_of_month(d1)
    temp_df = pd.read_pickle("/g/data/eg3/ab4502/ExtremeWind/points/era5_aws_tint_"+rid+"_"+d1.strftime("%Y%m%d")+"_"+d2.strftime("%Y%m%d")+"_max.pkl")
    temp_df = temp_df.set_index(pd.DatetimeIndex(temp_df.dt_utc)).sort_index()
    scws = temp_df.query("(gust>=25) & (in10km ==1)").sort_values("gust",ascending=False)
    scws["day"] = pd.DatetimeIndex(scws["dt_utc"]).floor("1D")
    
    #Get the wind gust ratio: ratio of peak gust to 1) mean 4-hour gust, 2) mean 2-hour gust pre-event, 3) mean 2-hour gust post-event.
    wgr_4 = []
    wgr_minus_2 = []    
    wgr_plus_2 = []
    wgr_data_pct = []
    for i in np.arange(scws.shape[0]):
        t=dt.datetime.strptime(scws.iloc[i].dt_utc,"%Y-%m-%d %H:%M:%S")
        peak_gust = scws.iloc[i].gust
        sliced = temp_df.query("stn_id=="+str(scws.iloc[i].stn_id)).loc[slice(t-dt.timedelta(hours=2),t+dt.timedelta(hours=2))].gust
        mean_gust_4 = sliced.mean()
        mean_gust_minus_2 = sliced.loc[slice(t-dt.timedelta(hours=2),t)].mean()        
        mean_gust_plus_2 = sliced.loc[slice(t,t+dt.timedelta(hours=2))].mean()                
        wgr_4.append(peak_gust / mean_gust_4)
        wgr_minus_2.append(peak_gust / mean_gust_minus_2)        
        wgr_plus_2.append(peak_gust / mean_gust_plus_2)                
        wgr_data_pct.append((~sliced.isna()).sum() / (4*60+1))
    scws["wgr_4"] = wgr_4
    scws["wgr_minus_2"] = wgr_minus_2
    scws["wgr_plus_2"] = wgr_plus_2
    scws["wgr_data_pct"] = wgr_data_pct
    
    return scws

In [None]:
sydney = pd.DataFrame()
for y in tqdm.tqdm(np.arange(2010,2021)):
    for m in np.arange(1,13):
        try:
            sydney = pd.concat([sydney, load_scws("71",y,m)])
        except:
            print("CAN'T LOAD YEAR "+str(y)+" MONTH "+str(m))

 18%|█▊        | 2/11 [01:49<08:13, 54.89s/it]

In [None]:
meas_per_day = [sydney.query("cluster=="+str(i)).groupby(["day"]).agg("nunique").stn_id.sum() / 
                sydney.query("cluster=="+str(i)).day.nunique() for i in [0,1,2]]