In [1]:
import pandas as pd
import numpy as np
import glob
import datetime as dt
import tqdm
import matplotlib.pyplot as plt

In [2]:
def last_day_of_month(any_day):
    # get close to the end of the month for any day, and add 4 days 'over'
    next_month = any_day.replace(day=28) + dt.timedelta(days=4)
    # subtract the number of remaining 'overage' days to get last day of current month, or said programattically said, the previous day of the first of next month
    return next_month - dt.timedelta(days=next_month.day)    

def load_scws(rid,year,month):
    
    #Load the merged AWS, ERA5, TINT dataset. Define SCW events and keep these only. Currently: One minute max 3-sec gusts over 25 m/s, with a radar object within 
    #   10 km, and keeping the highest daily observed gust across all stations. Also require WGR(4-hr) greater than 1.5.
    d1 = dt.datetime(year,month,1)
    d2 = last_day_of_month(d1)
    temp_df = pd.read_pickle("/g/data/eg3/ab4502/ExtremeWind/points/era5_aws_tint_"+rid+"_"+d1.strftime("%Y%m%d")+"_"+d2.strftime("%Y%m%d")+"_max.pkl")
    temp_df = temp_df.set_index(pd.DatetimeIndex(temp_df.dt_utc)).sort_index()
    scws = temp_df.query("(gust>=25) & (in10km ==1)").sort_values("gust",ascending=False)
    scws["day"] = pd.DatetimeIndex(scws["dt_utc"]).floor("1D")
    
    #Get the wind gust ratio: ratio of peak gust to 1) mean 4-hour gust, 2) mean 2-hour gust pre-event, 3) mean 2-hour gust post-event.
    wgr_4 = []
    wgr_minus_2 = []    
    wgr_plus_2 = []
    wgr_data_pct = []
    for i in np.arange(scws.shape[0]):
        t=dt.datetime.strptime(scws.iloc[i].dt_utc,"%Y-%m-%d %H:%M:%S")
        peak_gust = scws.iloc[i].gust
        sliced = temp_df.query("stn_id=="+str(scws.iloc[i].stn_id)).loc[slice(t-dt.timedelta(hours=2),t+dt.timedelta(hours=2))].gust
        mean_gust_4 = sliced.mean()
        mean_gust_minus_2 = sliced.loc[slice(t-dt.timedelta(hours=2),t)].mean()        
        mean_gust_plus_2 = sliced.loc[slice(t,t+dt.timedelta(hours=2))].mean()                
        wgr_4.append(peak_gust / mean_gust_4)
        wgr_minus_2.append(peak_gust / mean_gust_minus_2)        
        wgr_plus_2.append(peak_gust / mean_gust_plus_2)                
        wgr_data_pct.append((~sliced.isna()).sum() / (4*60+1))
    scws["wgr_4"] = wgr_4
    scws["wgr_minus_2"] = wgr_minus_2
    scws["wgr_plus_2"] = wgr_plus_2
    scws["wgr_data_pct"] = wgr_data_pct
    
    return scws

In [3]:
# sydney = pd.DataFrame()
# for y in tqdm.tqdm(np.arange(2010,2021)):
#     for m in np.arange(1,13):
#         try:
#             sydney = pd.concat([sydney, load_scws("71",y,m)])
#         except:
#             print("CAN'T LOAD YEAR "+str(y)+" MONTH "+str(m))

In [3]:
#DEV to put into get_scw_stats.py

rid="2"; year=2020; month=1

d1 = dt.datetime(year,month,1)
d2 = last_day_of_month(d1)
temp_df = pd.read_pickle("/g/data/eg3/ab4502/ExtremeWind/points/era5_aws_tint_"+rid+"_"+d1.strftime("%Y%m%d")+"_"+d2.strftime("%Y%m%d")+"_max.pkl")
temp_df["dt_utc"] = pd.DatetimeIndex(temp_df.dt_utc)
temp_df = temp_df.sort_values("dt_utc")
rolling = temp_df[["gust","stn_id","dt_utc"]].groupby("stn_id").rolling("4H",center=True,on="dt_utc",closed="both").mean()
temp_df = pd.merge(temp_df,rolling.rename(columns={"gust":"rolling4"}),on=["stn_id","dt_utc"])
temp_df["wgr_4"] = temp_df["gust"] / temp_df["rolling4"]
temp_df = temp_df.dropna(subset=["gust"])

In [4]:
#   stns = events which occur more than one hour apart or at different stations
temp_df["hour_group"] = 0
scws_envs = temp_df.query("(gust>=25) & (in10km ==1) & (wgr_4 >= 1.5)").copy()
keep_inds = []
drop="domain"
for i in np.arange(scws_envs.shape[0]):
     if scws_envs.iloc[i].hour_group == 0:
        diffs = abs(scws_envs.iloc[i].dt_utc - scws_envs.dt_utc)
        if drop=="stns":
            scws_envs.loc[(diffs < dt.timedelta(hours=1)) & \
                (np.in1d(scws_envs["stn_id"],scws_envs.iloc[i].stn_id)) & \
                (scws_envs.hour_group==0), "hour_group"] = i+1
        elif drop=="era5_grid":
            scws_envs.loc[(diffs < dt.timedelta(hours=1)) & \
                ( (np.in1d(scws_envs["era5_lat"],scws_envs.iloc[i].era5_lat)) &\
                (np.in1d(scws_envs["era5_lon"],scws_envs.iloc[i].era5_lon))) &\
                (scws_envs.hour_group==0), "hour_group"] = i+1
        elif drop=="domain":
            scws_envs.loc[(diffs < dt.timedelta(hours=1)) & (scws_envs.hour_group==0), "hour_group"] = i+1


In [31]:
meas_per_hour = scws_envs.groupby("hour_group").nunique().stn_id
meas_per_uid = scws_envs.groupby("uid10").nunique().stn_id
meas_per_hour

hour_group
1     1
11    2
14    2
22    5
28    1
Name: stn_id, dtype: int64

In [18]:
scws_envs[["dt_utc","stn_id","hour_group","uid10","cluster"]]

Unnamed: 0,dt_utc,stn_id,hour_group,uid10,cluster
510759,2020-01-15 04:00:00,87113,1,638.0,1
510781,2020-01-15 04:01:00,87113,1,638.0,1
510803,2020-01-15 04:02:00,87113,1,638.0,1
510847,2020-01-15 04:03:00,87113,1,638.0,1
510868,2020-01-15 04:04:00,87113,1,638.0,1
510892,2020-01-15 04:05:00,87113,1,638.0,1
510915,2020-01-15 04:06:00,87113,1,638.0,1
511079,2020-01-15 04:12:00,87113,1,638.0,1
511094,2020-01-15 04:13:00,87113,1,638.0,1
511118,2020-01-15 04:14:00,87113,1,638.0,1


In [28]:
scws_envs=pd.merge(scws_envs,meas_per_hour.rename("meas_per_hour"),on="hour_group")
scws_envs=pd.merge(scws_envs,meas_per_uid.rename("meas_per_uid"),on="uid10")

In [29]:
scws_envs

Unnamed: 0,dt_utc,group_id,scan,uid10,dist0km,dist10km,dist20km,in10km,stn_id,gust,...,mod_cape*s06,era5_lat,era5_lon,Lightning_observed,cluster,rolling4,wgr_4,hour_group,meas_per_hour,meas_per_uid
0,2020-01-15 04:00:00,20200115035428/638,3253.0,638.0,0.431,0.431,0.431,1.0,87113,30.3,...,106819.4375,-38.0,144.5,25.0,1,10.673444,2.838821,1,1,2
1,2020-01-15 04:01:00,20200115040031/638,3254.0,638.0,0.431,0.431,0.431,1.0,87113,27.3,...,106819.4375,-38.0,144.5,25.0,1,10.679668,2.556259,1,1,2
2,2020-01-15 04:02:00,20200115040031/638,3254.0,638.0,0.431,0.431,0.431,1.0,87113,30.3,...,106819.4375,-38.0,144.5,25.0,1,10.679668,2.837167,1,1,2
3,2020-01-15 04:03:00,20200115040031/638,3254.0,638.0,0.431,0.431,0.431,1.0,87113,26.7,...,106819.4375,-38.0,144.5,25.0,1,10.675519,2.501049,1,1,2
4,2020-01-15 04:04:00,20200115040031/638,3254.0,638.0,0.431,0.431,0.431,1.0,87113,27.3,...,106819.4375,-38.0,144.5,25.0,1,10.671369,2.558247,1,1,2
5,2020-01-15 04:05:00,20200115040031/638,3254.0,638.0,0.431,0.431,0.431,1.0,87113,31.4,...,106819.4375,-38.0,144.5,25.0,1,10.671369,2.942453,1,1,2
6,2020-01-15 04:06:00,20200115040628/638,3255.0,638.0,0.431,0.431,0.431,1.0,87113,30.3,...,106819.4375,-38.0,144.5,25.0,1,10.660581,2.842247,1,1,2
7,2020-01-15 04:12:00,20200115041228/638,3256.0,638.0,0.431,0.431,0.431,1.0,87113,26.2,...,106819.4375,-38.0,144.5,25.0,1,10.629461,2.464848,1,1,2
8,2020-01-15 04:13:00,20200115041228/638,3256.0,638.0,0.431,0.431,0.431,1.0,87113,28.3,...,106819.4375,-38.0,144.5,25.0,1,10.621162,2.664492,1,1,2
9,2020-01-15 04:14:00,20200115041228/638,3256.0,638.0,0.431,0.431,0.431,1.0,87113,26.2,...,106819.4375,-38.0,144.5,25.0,1,10.612863,2.468702,1,1,2
