# Fraction Skill Score - Dataframe Creation

In [1]:
import pandas as pd
import geopandas as gpd
import xarray as xr
import xoak
import numpy as np
import matplotlib.pyplot as plt
import regionmask
import cartopy
import cartopy.crs as ccrs
import glob
%matplotlib inline
import pysteps
import ipywidgets as widgets
from copy import deepcopy

import warnings
warnings.filterwarnings('ignore')

Pysteps configuration file found at: /anaconda3/envs/pyEAE/lib/python3.9/site-packages/pysteps/pystepsrc



In [2]:
def Data_Conv(ds):
    
    '''
    A defintion  
    
    Parameters
    ----------
    ds: xarray dataset
    
    Returns
    -------
    ds: returns a coarsened data with lat/long
    
    '''
    #load the geog file produce for WRF-BCC
    geog = xr.open_dataset("/home/scratch/WRF_BCC/geography/geo_em.d01.nc")
    geog = geog[['CLAT', 'CLONG']].coarsen(south_north=20, west_east=20, boundary='trim').mean()
    
    #coarsen the dataset to ~80-km
    ds = ds.coarsen(south_north=20, west_east=20, boundary='trim').sum()
    ds = xr.merge([ds, geog.squeeze()])
    ds = ds.rename({"CLONG": 'lon', 'CLAT': 'lat'})

    #assign lat/lon values of coarsen data and set index
    ds = ds.assign_coords({'x': ds.west_east, 'y': ds.south_north})
    ds = ds.assign_coords({'lon': ds.lon, 'lat': ds.lat})
    ds.xoak.set_index(['lat', 'lon'], 'sklearn_geo_balltree')
    
    return ds

In [3]:
#open dummy xarray dataset
ds = xr.open_dataset('/home/scratch/WRF_BCC/severe_weather/UP_HELI_MAX/historical/1990-1991/UP_HELI_MAX_historical-1990-1991_1990-10-01.nc').sel(Time='1990-10-01T00:00:00.000000000')
ds_copy = ds.copy()
ds_copy_cor = Data_Conv(ds_copy)

In [4]:
#load an USA shapefile
usa = gpd.read_file("/home/jcorner1/Unidata/shapefiles/smoothing_econus.shp")

#mask the data out
state_mask = regionmask.mask_geopandas(usa, ds_copy_cor.lon, ds_copy_cor.lat)
ma = state_mask.values
ma[~np.isnan(ma)] = 1

### Load Data

In [5]:
#load the proxy data
#create lite of the proxies
varis = ['UVV', 'UH', 'DVV']

for var in varis:
    
    #load wrf-bcc dataset
    locals()[f'df_{var}'] = pd.read_csv(f'/home/scratch/jcorner1/syn_sev/dataframes/HIST_{var}_REFC_regrid_threshold_dataframe.csv')
    locals()[f'df_{var}']['Time'] = pd.to_datetime(locals()[f'df_{var}']['Time'])
    
df_DVV['DVV'] = np.absolute(df_DVV['DVV'].values)

In [6]:
#load the hazard data
#create list of the hazards
hazs = ['tor', 'hail', 'wind']

#iterate through the hazards
for haz in hazs:
    df_obs = pd.read_csv(f'/home/scratch/jcorner1/syn_sev/dataframes/SPC_LSRs_regridded_{haz}_fix.csv')
    
    #add the lat and lon columns based on values in the nearest column
    lat = []
    lon = []

    #iterate through each row to find the lat/lon
    for rid, row in df_obs.iterrows():
        lat.append(row.Nearest.split()[1])
        lon.append(row.Nearest.split()[7])

    #append the values to the dataframe
    df_obs['Lat'] = lat
    df_obs['Lon'] = lon

    #combine date and time to a datetime object
    df_obs['datetime'] = pd.to_datetime(df_obs['date'] + ' ' + df_obs['time']) + pd.DateOffset(hours=6)
    df_obs['con_date'] = (df_obs['datetime'] + pd.DateOffset(hours=12)).dt.date

    #drop duplicates to convert to convective days
    df_obs = df_obs.drop_duplicates(subset=['Lat','Lon','con_date'])
    locals()[f'df_{haz}'] = df_obs
    
df_all = pd.concat([df_tor, df_wind, df_hail])

In [7]:
#add the all hazard to the list.
hazs = hazs + ['all']

#iterate through the dataframes.
for haz in hazs:

    #create an array of zeros
    ds_copy_cor = Data_Conv(ds_copy)
    zeros = np.zeros((ds_copy_cor.UP_HELI_MAX.values.shape))

    #iterate through each row of the dataframe to and add one for each instance
    for rid, row in locals()[f'df_{haz}'].iterrows():
        y1, x1 = np.where(ds_copy_cor.lat.values == float(row.Lat))
        y2, x2 = np.where(ds_copy_cor.lon.values == float(row.Lon))
        zeros[y1[0], x2[0]] = zeros[y1[0], x2[0]] + 1

    locals()[f'ds_{haz}'] = ds_copy_cor.assign(UP_HELI_MAX = (('south_north', 'west_east'), zeros * ma))

### Validation Loop

In [8]:
thrs_vals = []

for i in range(len(varis)):
    df = pd.DataFrame(columns=['DBZ', var, 'FSS_tor', 'FSS_wind', 'FSS_hail', 'FSS_all'])
    
    for refc in np.arange(40, 56, 5):
        
        #variable logic statements
        if varis[i] == 'UH':
            step = 5
            lb = 35
            ub = 85
            
        elif varis[i] == 'UVV':
            step = 1
            lb = 18
            ub = 30
            
        elif varis[i] == 'DVV':
            step = 1
            lb = 5
            ub = 10

        for var_val in np.arange(lb, ub+1, step):
            print(f'{varis[i]}:{var_val}, DBZ:{refc}')
            
            #subset the dataframe based on current threshold values
            df_wrf = locals()[f'df_{varis[i]}']
            df_sub = df_wrf[((df_wrf['DBZ'] >= refc) & (df_wrf[varis[i]] >= var_val))]
            
            #
            geog = xr.open_dataset("/home/scratch/WRF_BCC/geography/geo_em.d01.nc")
            uhs = []

            #
            for did, day in df_sub.resample('24h', origin='1990-10-01 12:00:00', on='Time'):

                #
                results = np.zeros(shape=(899, 1399))
                results[day.y.values, day.x.values] = 1
                uh_uvv = deepcopy(geog)
                uh_uvv = uh_uvv.assign_coords({'Time': np.array([did])})
                uh_uvv = uh_uvv.assign(UH_VVV_DAYS = (('Time', 'south_north', 'west_east'), np.expand_dims(results, axis=0)))
                uh_uvv = uh_uvv[['CLAT', 'CLONG', 'UH_VVV_DAYS']].coarsen(south_north=20, west_east=20, boundary='trim').mean()
                uh_uvv['UH_VVV_DAYS'] = 1*(uh_uvv['UH_VVV_DAYS'] > 0)
                uhs.append(uh_uvv)

            #
            uhs = xr.concat(uhs, dim='Time')

            #verify with FSS!
            fss_tor = pysteps.verification.spatialscores.fss(np.sum(uhs.UH_VVV_DAYS.values, axis=0), ds_tor.UP_HELI_MAX.values, 20, 200)
            fss_wind = pysteps.verification.spatialscores.fss(np.sum(uhs.UH_VVV_DAYS.values, axis=0), ds_wind.UP_HELI_MAX.values, 20, 200)
            fss_hail = pysteps.verification.spatialscores.fss(np.sum(uhs.UH_VVV_DAYS.values, axis=0), ds_hail.UP_HELI_MAX.values, 20, 200)
            fss_all = pysteps.verification.spatialscores.fss(np.sum(uhs.UH_VVV_DAYS.values, axis=0), ds_all.UP_HELI_MAX.values, 20, 200)
            
            df = df.append({'DBZ': refc, var: var_val, 'FSS_tor': fss_tor
                            , 'FSS_wind':fss_wind, 'FSS_hail':fss_hail, 'FSS_all':fss_all}, ignore_index=True)


    #create a new dataframe
    df.to_csv(f'/home/scratch/jcorner1/syn_sev/dataframes/FSS/FSS_{varis[i]}_dataframe.csv')

UVV:18, DBZ:40
UVV:19, DBZ:40
UVV:20, DBZ:40
UVV:21, DBZ:40
UVV:22, DBZ:40
UVV:23, DBZ:40
UVV:24, DBZ:40
UVV:25, DBZ:40
UVV:26, DBZ:40
UVV:27, DBZ:40
UVV:28, DBZ:40
UVV:29, DBZ:40
UVV:30, DBZ:40
UVV:18, DBZ:45
UVV:19, DBZ:45
UVV:20, DBZ:45
UVV:21, DBZ:45
UVV:22, DBZ:45
UVV:23, DBZ:45
UVV:24, DBZ:45
UVV:25, DBZ:45
UVV:26, DBZ:45
UVV:27, DBZ:45
UVV:28, DBZ:45
UVV:29, DBZ:45
UVV:30, DBZ:45
UVV:18, DBZ:50
UVV:19, DBZ:50
UVV:20, DBZ:50
UVV:21, DBZ:50
UVV:22, DBZ:50
UVV:23, DBZ:50
UVV:24, DBZ:50
UVV:25, DBZ:50
UVV:26, DBZ:50
UVV:27, DBZ:50
UVV:28, DBZ:50
UVV:30, DBZ:50
UVV:18, DBZ:55
UVV:19, DBZ:55
UVV:20, DBZ:55
UVV:21, DBZ:55
UVV:22, DBZ:55
UVV:23, DBZ:55
UVV:24, DBZ:55
UVV:25, DBZ:55
UVV:26, DBZ:55
UVV:27, DBZ:55
UVV:28, DBZ:55
UVV:29, DBZ:55
UVV:30, DBZ:55
UH:35, DBZ:40
UH:40, DBZ:40
UH:45, DBZ:40
UH:50, DBZ:40
UH:55, DBZ:40
UH:60, DBZ:40
UH:65, DBZ:40
UH:70, DBZ:40
UH:75, DBZ:40
UH:80, DBZ:40
UH:85, DBZ:40
UH:35, DBZ:45
UH:40, DBZ:45
UH:45, DBZ:45
UH:50, DBZ:45
UH:55, DBZ:45
UH:60, DBZ: