In [1]:
import pandas as pd
import datetime as dt
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import glob
import numpy as np
import tqdm

In [2]:
def read_aws(stn_id):
    dtypes = {"hd":str, "stn_id":str, "dt_lt":str, "dt_utc":str, "gust":str, "q":str, "#":str}
    df=pd.read_csv(glob.glob("/g/data/eg3/ab4502/ExtremeWind/obs/aws/*_one_min_gust/HD01D_Data_*"+stn_id+"*.txt")[0],\
        names=["hd","stn_id","dt_lt","dt_utc","gust","q","#"],header=0,dtype=dtypes)
    df["dt_utc"] = pd.DatetimeIndex(pd.to_datetime(df["dt_utc"], format="%Y%m%d%H%M"))
    df = df.set_index("dt_utc")
    df["gust"] = pd.to_numeric(df["gust"], errors="coerce") 
    
    return df

def latlon_dist(lat, lon, lats, lons):

        #Calculate great circle distance (Harversine) between a lat lon point (lat, lon) and a list of lat lon
        # points (lats, lons)
                        
        R = 6373.0
                        
        lat1 = np.deg2rad(lat)
        lon1 = np.deg2rad(lon)
        lat2 = np.deg2rad(lats)
        lon2 = np.deg2rad(lons)
                
        dlon = lon2 - lon1
        dlat = lat2 - lat1

        a = np.sin(dlat / 2)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon / 2)**2
        c = 2 * np.arctan2(np.sqrt(a), np.sqrt(1 - a))

        return (R * c)
    
def load_lightning(date, stn_no, state):

    try:
        lightning = pd.read_csv("/g/data/eg3/ab4502/ExtremeWind/ad_data/lightning_raw/A"+date+".loc",header=None,
                               names=["date","time","lat","lon","1","2"])
    except:
        lightning = pd.read_csv("/g/data/eg3/ab4502/ExtremeWind/ad_data/lightning_raw/AE"+date+".loc",header=None,
                               names=["date","time","lat","lon","1","2","3","4","5"])
    names = ["id", "stn_no", "district", "stn_name", "site_open", "site_close", "lat", "lon", "latlon_method", "state",\
                    "hgt_asl", "hgt_asl_baro", "wmo_idx", "y1", "y2", "comp%", "Y%", "N%", "W%", "S%", "I%", "#"]
    stn_df = pd.read_csv(glob.glob("/g/data/eg3/ab4502/ExtremeWind/obs/aws/"+state+"_one_min_gust/HD01D_StnDet_*.txt")[0],\
            names=names, header=0)
    lightning["l_dist"] = latlon_dist(stn_df[stn_df.stn_no==int(stn_no)].lat.values[0], stn_df[stn_df.stn_no==int(stn_no)].lon.values[0],
        lightning["lat"], lightning["lon"])
    lightning["latlon"] = lightning["lat"].astype("str") + " " + lightning["lon"].astype("str")
    lightning["latlon"] = lightning.latlon.where(lightning.l_dist <= 50, np.nan)

    lightning["datetime"] = pd.DatetimeIndex(lightning["date"] + " " + lightning["time"])
    lightning = lightning.set_index(lightning["datetime"])    
    return lightning[["lat","lon","l_dist","latlon"]].resample("1min").nunique()    

In [3]:
details_list = pd.read_csv("/g/data/eg3/ab4502/figs/ExtremeWind/case_studies/case_study_list.csv")
details_list["gust_time_utc"] = pd.DatetimeIndex(details_list.gust_time_utc)
details_list["rid"] = details_list.rid.astype(str)
details_list["stn_id"] = details_list.stn_id.astype(str).str.pad(width=6,side="left",fillchar="0")


In [4]:
time_ls = []
rid = []
wg = []
rb = []
ra = []
rab4 = []
rab1 = []
pr = []
lightning=[]

for index, row in tqdm.tqdm(details_list.iterrows()):
    event_time = row.gust_time_utc
    for f in glob.glob("/g/data/eg3/ab4502/ExtremeWind/obs/aws/one_min_case_data_2/HD01D_Data_"+row["stn_id"]+"_*.txt"):
        df = pd.read_csv(f,
                   names=["hd","stn_no","dt_lt","Time (UTC)","p","p_q","p_p","t","t1","dp","dp_q",\
                          "rh","rh_q","ws","ws_q","min_ws","min_ws_q","wd","wd_q","std_wd","std_wd_q",\
                          "wg","wg_q","mslp","mslp_q","pres","pres_q","qnh","qnh_q","#"], header=0, index_col="Time (UTC)", parse_dates=True)
        df["wg"] = pd.to_numeric(df["wg"],errors="coerce")
        df["p"] = pd.to_numeric(df["p"],errors="coerce")
        if np.min(np.abs(df.index - event_time)).seconds < 3600:
                time = df.index[np.argmax(df.wg)]
                times = [time + dt.timedelta(seconds=-2*60*60), 
                         time + dt.timedelta(seconds=-30*60), 
                         time + dt.timedelta(seconds=30*60), 
                         time + dt.timedelta(seconds=2*60*60)]
                df = df.set_index(pd.DatetimeIndex(df.index))
                df = df.merge(load_lightning(row.gust_time_utc.strftime("%Y%m%d"),
                                             row["stn_id"],row["state"]), how="outer", right_index=True, left_index=True)
                wg_max = df.wg.max()

                time_ls.append(time)
                rid.append(row["rid"])
                wg.append(wg_max)
                if df.loc[slice(times[0], time)].wg.isna().sum() >= 30:
                    rb.append(np.nan)
                else:
                    rb.append(wg_max / df.loc[slice(times[0], time)].wg.mean())
                if df.loc[slice(time, times[3])].wg.isna().sum() >= 30:
                    ra.append(np.nan)
                else:
                    ra.append(wg_max / df.loc[slice(time, times[3])].wg.mean())
                rab4.append(wg_max / df.loc[slice(times[0], times[3])].wg.mean())
                rab1.append(wg_max / df.loc[slice(times[1], times[2])].wg.mean()) 
                
                if (df.loc[slice(time, time+dt.timedelta(seconds=60*60))].p.sum() < 1) & \
                        df.loc[slice(time, time+dt.timedelta(seconds=60*60))].p.isna().sum() < 10:
                    pr.append(df.loc[slice(time, time+dt.timedelta(seconds=60*60))].p.sum())
                else:
                    pr.append(np.nan)
                
                lightning.append(df.loc[slice(time+dt.timedelta(seconds=-30*60), time+dt.timedelta(seconds=30*60))].latlon.sum())
                
                break

36it [07:46, 12.96s/it]


In [5]:
pd.DataFrame({"time":time_ls, "rid":rid, "wg":wg, "rb":rb, "ra":ra, "rab4":rab4, "rab1":rab1, "pr":pr, "lightning":lightning}).to_csv("/g/data/eg3/ab4502/figs/ExtremeWind/case_studies/one_min_obs_stats.csv")