In [1]:
#Code to format .csv files into a nicely-publishable format

In [23]:
import pandas as pd
import numpy as np
import xarray as xr
import glob
import datetime as dt
import pytz

def load_scws(rid,tz):
    print("loading "+rid+"...")
    df1 = pd.read_csv("/g/data/eg3/ab4502/ExtremeWind/points/"+rid+"_scw_envs_df.csv")
    
    df1["cluster_new"] = df1.cluster.map({0:2,2:1,1:0})
    df1 = df1.set_index(pd.DatetimeIndex(df1.dt_utc))
    df1 = add_lt(df1,tz)    
    df1["year"] = df1.index.year
    df1["month"] = df1.index.month
    df1["hour"] = df1["lt"].dt.hour
    df1["rid"] = rid  
    df1["scw"] = 1
    
    return df1

def load_nulls(rid,tz):
    
    df2 = pd.read_csv("/g/data/eg3/ab4502/ExtremeWind/points/"+rid+"_non_scw_envs_df.csv")
    
    df2["cluster_new"] = df2.cluster.map({0:2,2:1,1:0})
    df2 = df2.set_index(pd.DatetimeIndex(df2.dt_utc))
    df2 = add_lt(df2,tz)    
    df2["year"] = df2.index.year
    df2["month"] = df2.index.month
    df2["hour"] = df2["lt"].dt.hour
    df2["rid"] = rid   
    df2["scw"] = 0
    
    return df2

def add_lt(df,tz):
    df["lt"] = df.index.tz_localize(pytz.utc).tz_convert(pytz.timezone(tz))
    return df

def remove_suspect_gusts(df):
    dts = ["2010-12-14 07:03:00","2011-01-11 03:49:00","2015-12-15 23:33:00","2020-02-09 01:00:00","2020-02-09 03:18:00","2020-05-25 06:11:00",
          "2012-11-02 18:58:00","2012-12-20 21:19:00","2012-12-15 13:00:00","2012-12-29 16:15:00","2012-12-30 06:25:00","2012-12-30 18:01:00","2013-01-02 08:15:00",
          "2013-01-05 03:36:00","2013-01-12 15:22:00","2013-02-11 07:56:00"]
    return df[np.in1d(df.dt_utc,dts,invert=True)]

def assign_storm_class(data):

    data["aspect_ratio"] = data.major_axis_length / data.minor_axis_length     
    
    #Linear
    data.loc[(data.aspect_ratio>=3) & (data.major_axis_length>=100),"class2"] = "Linear"
    #Non-linear
    data.loc[(data.aspect_ratio<3) & (data.major_axis_length>=100),"class2"] = "Non-linear"
    #Cellular
    data.loc[(data.local_max == 1),"class2"] = "Cellular"
    #Cluster of cells
    data.loc[(data.local_max>=2) & (data.major_axis_length<100),"class2"] = "Cell cluster"
    #Supercell
    data.loc[(data.max_alt>=7) & (data.azi_shear60>4) & ((data.aspect_ratio<3) | (data.major_axis_length<100)),"class2"] = "Supercellular"
    #Linear hybrid
    data.loc[(data.max_alt>=7) & (data.azi_shear60>4) & ((data.major_axis_length>=100)),"class2"] = "Embedded supercell"
    
    return data

In [24]:
df_events = assign_storm_class(remove_suspect_gusts(load_scws("2","Australia/Melbourne")))
df_nulls = assign_storm_class(load_nulls("2","Australia/Melbourne"))

loading 2...


In [25]:
df_events.head()

Unnamed: 0_level_0,Unnamed: 0,index,dt_utc,group_id,scan,uid10,dist0km,dist10km,dist20km,in10km,...,wgr_2b,cluster_new,lt,year,month,hour,rid,scw,aspect_ratio,class2
dt_utc,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2008-06-30 21:13:00,0,1002823,2008-06-30 21:13:00,20080630211234/344,6896.0,344.0,0.142,0.142,0.142,1.0,...,3.415511,0,2008-07-01 07:13:00+10:00,2008,6,7,2,1,2.563447,Non-linear
2008-11-13 08:28:00,0,401308,2008-11-13 08:28:00,20081113082427/359,2953.0,359.0,6.045,6.045,6.045,1.0,...,2.193115,1,2008-11-13 19:28:00+11:00,2008,11,19,2,1,1.705679,Cell cluster
2009-03-14 06:25:00,0,461742,2009-03-14 06:25:00,20090314062432/596,3149.0,603.0,146.743,6.811,6.811,1.0,...,2.253055,0,2009-03-14 17:25:00+11:00,2009,3,17,2,1,1.661704,Cell cluster
2009-03-14 07:49:00,1,463817,2009-03-14 07:49:00,20090314074832/620,3163.0,634.0,63.222,2.541,2.541,1.0,...,4.468978,0,2009-03-14 18:49:00+11:00,2009,3,18,2,1,4.012605,Cell cluster
2009-04-14 19:15:00,0,462303,2009-04-14 19:15:00,20090414191229/155,3189.0,155.0,0.471,0.471,0.471,1.0,...,2.261463,0,2009-04-15 05:15:00+10:00,2009,4,5,2,1,3.43119,Linear


In [26]:
np.sort(df_events.columns)

array(['F10', 'Fn10', 'Fs10', 'Lightning_observed', 'U1', 'U10', 'U3',
       'U500', 'U6', 'Umean01', 'Umean03', 'Umean06', 'Umean800_600',
       'Umeanwindinf', 'Unnamed: 0', 'Usr01_left', 'Usr03_left',
       'Usr06_left', 'Ust_left', 'Uwindinf', 'angle', 'angle_inst',
       'area_km', 'aspect_ratio', 'azi_shear', 'azi_shear60', 'bdsd',
       'c_totals', 'class2', 'cluster', 'cluster_new', 'conv10',
       'conv_pct', 'convgust_dry', 'convgust_wet', 'cp', 'day', 'dcape',
       'dcp', 'ddraft_temp', 'dist0km', 'dist10km', 'dist20km',
       'dist_max', 'dist_min', 'dmgwind', 'dmgwind_fixed', 'dmi', 'dp850',
       'dpd700', 'dpd850', 'dt_lt', 'dt_utc', 'duration_mins', 'ebwd',
       'eccentricity', 'eff_cape', 'eff_cin', 'eff_el', 'eff_lcl',
       'eff_sherb', 'effcape*s06', 'era5_lat', 'era5_lon', 'eth',
       'field_max', 'group_id', 'gust', 'gustex', 'hmi', 'hour',
       'hour_floor', 'icon10', 'in10km', 'index', 'isolated', 'k_index',
       'lat', 'local_max', 'lon', 'lr

In [74]:
#Columns to keep for publishing

ind_list = [\
            #ERA5 details
            "time_y","era5_lat","era5_lon",        
            #Clustering
            "cluster_new",            
            #Wind indices
            "Umean06","Umean01","U10","wg10","s06","ebwd","Umeanwindinf","srhe_left","srh06_left",\
            #Downburst indices
            "dmi","lr_subcloud","lr_freezing","lr03","lr13","wmsi_ml","bdsd","hmi","convgust_wet","convgust_dry",\
            "gustex","dmgwind","dmgwind_fixed","dcape","wmpi","windex","ddraft_temp","te_diff","tei","wndg",\
            #Storm mode
            "dcp","scp","scp_fixed",\
            #Severe storm indices
            "sherb","eff_sherb","sweat","mucape*s06","mlcape*s06","effcape*s06","t_totals","k_index",\
            #Instability indices
            "eff_cape","eff_lcl","ml_cape","ml_lcl","mu_cape","mu_lcl","qmean01","qmean06"
           ]

gust_list = ["stn_id","gust","wgr_4","scw"]

radar_list = ["rid","speed","angle","class2",
              "in10km","major_axis_length",
              "minor_axis_length","local_max",
              "max_alt","azi_shear60"]

lightning_list = ["Lightning_observed"]

In [75]:
renames = {
         'Umean06': "Umean06",
         'Umean01': "Umean01",
         'U10': "U10",
         'wg10': "WindGust10",
         's06': "S06",
         'ebwd': "EBWD",
         'Umeanwindinf': "Umeanwindinf",
         'srhe_left': "SRHE",
         'srh06_left': "SRH06",
         'dmi': "DMI",
         'lr_subcloud': "LR_subcloud",
         'lr_freezing': "LR_freezing",
         'lr03': "LR03",
         'lr13': "LR13",
         'wmsi_ml': "WMSI",
         'bdsd': "BDSD",
         'bdsd_cv': "BDSD_CV",
         'hmi': "HMI",
         'convgust_wet': "ConvGust_wet",
         'convgust_dry': "ConvGust_dry",
         'gustex': "GUSTEX",
         'dmgwind': "DmgWind",
         'dmgwind_fixed': "DmgWind_fixed",
         'dcape': "DCAPE",
         'wmpi': "WMPI",
         'windex': "WINDEX",
         'ddraft_temp': "DowndraftTemp",
         'te_diff': "ThetaeDiff",
         'tei': "TEI",
         'wndg': "WNDG",
         'dcp': "DCP",
         'scp': "SCP",
         'scp_fixed': "SCP_fixed",
         'sherb': "SHERB",
         'eff_sherb': "SHERBE",
         'sweat': "SWEAT",
         'mucape*s06': "MUCS6",
         'mlcape*s06': "MLCS6",
         'effcape*s06': "EffCS6",
         't_totals': "T_Totals",
         'k_index': "K_Index",
         'eff_cape': "Eff_CAPE",
         'eff_lcl': "Eff_LCL",
         'ml_cape': "MLCAPE",
         'ml_lcl': "ML_LCL",
         'mu_cape': "MUCAPE",
         'mu_lcl': "MU_LCL",
         'qmean01': "Qmean01",
         'qmean06': "Qmean06",
            'angle': "Storm_angle",
            'azi_shear60': "Azimuthal_shear",
            'class2': "Parent_storm_class",
            'cluster_new':"Environmental_cluster",
            'gust':"Wind_gust_observed",
            'in10km':"Storm_in10km",
            'local_max':"Local_reflectivity_maxima",
            'major_axis_length':"Major_axis_length",
            'max_alt':"Maximum_storm_altitude",
            'minor_axis_length':"Minor_axis_length",
            'rid':"Radar_id",
            'stn_id':"Station_id",
            'scw':"SCW",
            'speed':"Storm_speed",
            'wgr_4':"Peak_to_mean_wind_gust_ratio",
            'time_y':"ERA5_time",
            'era5_lat':"ERA5_latitude",
            'era5_lon':"ERA5_longitude"}
            

In [76]:
np.sort(df_events[(gust_list + lightning_list + radar_list + ind_list)].rename(columns=renames).columns)

array(['Azimuthal_shear', 'BDSD', 'ConvGust_dry', 'ConvGust_wet', 'DCAPE',
       'DCP', 'DMI', 'DmgWind', 'DmgWind_fixed', 'DowndraftTemp', 'EBWD',
       'ERA5_latitude', 'ERA5_longitude', 'ERA5_time', 'EffCS6',
       'Eff_CAPE', 'Eff_LCL', 'Environmental_cluster', 'GUSTEX', 'HMI',
       'K_Index', 'LR03', 'LR13', 'LR_freezing', 'LR_subcloud',
       'Lightning_observed', 'Local_reflectivity_maxima', 'MLCAPE',
       'MLCS6', 'ML_LCL', 'MUCAPE', 'MUCS6', 'MU_LCL',
       'Major_axis_length', 'Maximum_storm_altitude', 'Minor_axis_length',
       'Parent_storm_class', 'Peak_to_mean_wind_gust_ratio', 'Qmean01',
       'Qmean06', 'Radar_id', 'S06', 'SCP', 'SCP_fixed', 'SCW', 'SHERB',
       'SHERBE', 'SRH06', 'SRHE', 'SWEAT', 'Station_id', 'Storm_angle',
       'Storm_in10km', 'Storm_speed', 'TEI', 'T_Totals', 'ThetaeDiff',
       'U10', 'Umean01', 'Umean06', 'Umeanwindinf', 'WINDEX', 'WMPI',
       'WMSI', 'WNDG', 'WindGust10', 'Wind_gust_observed'], dtype=object)

In [146]:
pd.concat([
                df_events.head()[(gust_list + lightning_list + radar_list + ind_list)].rename(columns=renames),
                df_nulls.iloc[[2006,2007,2008,2010,2011]][(gust_list + lightning_list + radar_list + ind_list)].rename(columns=renames),
                df_nulls.head()[(gust_list + lightning_list + radar_list + ind_list)].rename(columns=renames)],
    axis=0).to_csv("/g/data/eg3/ab4502/sample_melbourne_radar.csv")

In [143]:
df_nulls.iloc[[2006,2007,2008,2010,2011],:][(gust_list + lightning_list + radar_list + ind_list)].rename(columns=renames)

KeyboardInterrupt: 

Unnamed: 0_level_0,Unnamed: 0,dt_utc,group_id,scan,uid10,dist0km,dist10km,dist20km,in10km,stn_id,...,wgr_2b,scw,cluster_new,lt,year,month,hour,rid,aspect_ratio,class2
dt_utc,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2008-01-11 01:36:00,176266,2008-01-11 01:36:00,20080111013033/14,1166.0,14.0,3.009,3.009,3.009,1.0,86372,...,1.146288,0,1,2008-01-11 12:36:00+11:00,2008,1,12,2,4.359809,Cell cluster
2008-01-11 01:38:00,176317,2008-01-11 01:38:00,20080111013633/14,1167.0,14.0,0.149,0.149,0.149,1.0,86104,...,,0,1,2008-01-11 12:38:00+11:00,2008,1,12,2,4.229892,Cell cluster
2008-01-11 01:43:00,176431,2008-01-11 01:43:00,20080111014233/12,1168.0,14.0,91.745,9.643,9.643,1.0,86068,...,1.597645,0,1,2008-01-11 12:43:00+11:00,2008,1,12,2,4.121338,Cell cluster
2008-01-11 01:49:00,176568,2008-01-11 01:49:00,20080111014833/12,1169.0,14.0,94.806,9.127,9.127,1.0,86383,...,1.21569,0,1,2008-01-11 12:49:00+11:00,2008,1,12,2,4.595484,Cell cluster
2008-01-11 01:52:00,176633,2008-01-11 01:52:00,20080111014833/12,1169.0,12.0,9.379,9.379,9.379,1.0,88051,...,1.124736,0,1,2008-01-11 12:52:00+11:00,2008,1,12,2,2.01648,Cell cluster
