# Thresholding Variables and Dataframe Creation

In [None]:
import xarray as xr
import numpy as np
import pandas as pd
import glob
import xoak

import warnings
warnings.filterwarnings('ignore')

### Opening Datasets

In [None]:
#find directories with the WRF-BCC data
uh_dirts = glob.glob('/home/scratch/WRF_BCC/severe_weather/UP_HELI_MAX/historical/*')
uh_dirts.sort()
uh_dirts = uh_dirts[:-1]

ref_dirts = glob.glob('/home/scratch/WRF_BCC/reflectivity/REFD/historical/*')
ref_dirts.sort()
ref_dirts = ref_dirts[:]

uvv_dirts = glob.glob('/home/scratch/WRF_BCC/severe_weather/W_UP_MAX/historical/*')
uvv_dirts.sort()

### Mask the CONUS

In [None]:
#load a geog 
geog = xr.open_dataset("/home/scratch/WRF_BCC/geography/geo_em.d01.nc")
ds = xr.open_dataset('/home/scratch/WRF_BCC/reflectivity/REFD/historical/1990-1991/REFD_historical-1990-1991_1990-10-01.nc')

#
ds = xr.merge([ds, geog.squeeze()])
ds = ds.rename({"CLONG": 'lon', 'CLAT': 'lat'})
ds = ds.assign_coords({'x': ds.west_east, 'y': ds.south_north})
ds = ds.assign_coords({'lon': ds.lon, 'lat': ds.lat})

#
ds.xoak.set_index(['lat', 'lon'], 'sklearn_geo_balltree')

In [None]:
#
usa = gpd.read_file("/home/jcorner1/Unidata/shapefiles/smoothing_econus.shp")

#
state_mask = regionmask.mask_geopandas(usa, ds.lon, ds.lat)
ma = state_mask.values
ma[~np.isnan(ma)] = 1


### Creating Data

In [3]:
#iterate through each year (directory)
for dirt_number in range(len(ref_dirts)):
    
    #open all the data within the directory
    print(f'current year: {int(uh_dirts[dirt_number][-4:])-1}')
    uh_ds = xr.open_mfdataset(f'{uh_dirts[dirt_number]}/*HELI_MAX*.nc')
    ref_ds = xr.open_mfdataset(f'{ref_dirts[dirt_number]}/*.nc')
    uvv_ds = xr.open_mfdataset(f'{uvv_dirts[dirt_number]}/*.nc')

    #grab all values
    uh_val = uh_ds.UP_HELI_MAX.values
    refc_val = ref_ds.REFD.values
    uvv_val = uvv_ds.W_UP_MAX.values
    
    #subset times
    times = ref_ds.Time.values
    
    #threshold upward vertical velocities, reflectivity, and updraft helicity values. 
    thr_refc = ref_ds.where(refc_val >= 45.0, 0)
    thr_uh = uh_ds.where(uh_val >= 65.0, 0)
    thr_uvv = uvv_ds.where(uvv_val >= 18.0, 0)

    thr_refc = thr_refc.where(thr_refc.REFD.values <= 45.0, 1)
    thr_uh = thr_uh.where(thr_uh.UP_HELI_MAX.values <= 65.0, 1)
    thr_uvv = thr_uvv.where(thr_uvv.W_UP_MAX.values <= 18.0, 1)
    
    #Give all areas with a UH of 75+ and reflectivity of 50+ a value of 1. 
    thr_val = thr_refc.REFD.values + thr_uh.UP_HELI_MAX.values + thr_uvv.W_UP_MAX.values
    
    
    #find locations where the value is 1.
    locations = np.where(thr_val >= 1)
    print(f'done thresholding! {len(locations[0])} potential storms')
    
    #close files
    thr_refc.close()
    thr_uh.close()
    thr_uvv.close()
    
    uvv_ds.close()
    uh_ds.close()
    ref_ds.close()
    
    #create pandas dataframe
    df = pd.DataFrame(columns=['x', 'y', 'Time', 'DBZ', 'UH', 'UVV'])

    #iterate through all potential center points. 
    for point in range(len(locations[0])):

        #save important attribute values for center points
        time = locations[0][point]
        y = locations[1][point] 
        x = locations[2][point]
        
        #add lines to the pandas dataframe
        str_time = np.datetime_as_string(times[time])
        df = df.append({'x': x, 'y':y, 'Time':str_time, 'DBZ': refc_val[time,y,x], 
                        'UH': uh_val[time,y,x], 'UVV':uvv_val[time,y,x]}, ignore_index=True)

    #Save the dataframe as the csv.
    df.to_csv(f'/home/scratch/jcorner1/syn_sev/dataframes/HIST{str_time[:4]}_threshold_dataframe.csv')


current year: 1990
thresholding



KeyboardInterrupt



In [7]:
df

Unnamed: 0,x,y,Time,DBZ,UH,UVV
0,199,884,1990-10-01T02:00:00.000000000,50.171810,0.069058,5.429051
1,200,884,1990-10-01T02:00:00.000000000,48.261189,0.067437,4.880546
2,209,884,1990-10-01T02:00:00.000000000,45.690262,0.000000,1.415562
3,1153,73,1990-10-01T03:00:00.000000000,48.092491,0.652343,6.663234
4,1154,73,1990-10-01T03:00:00.000000000,47.057671,1.224216,6.449692
...,...,...,...,...,...,...
377474,748,168,1991-01-14T21:00:00.000000000,47.184074,81.350601,14.882171
377475,749,168,1991-01-14T21:00:00.000000000,52.361961,55.496975,15.004825
377476,750,168,1991-01-14T21:00:00.000000000,54.879627,69.994072,15.545420
377477,751,168,1991-01-14T21:00:00.000000000,53.644367,58.188797,12.606061


In [9]:
uvv_ds

Unnamed: 0,Array,Chunk
Bytes,40.94 GiB,115.15 MiB
Shape,"(8737, 899, 1399)","(24, 899, 1399)"
Count,1095 Tasks,365 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 40.94 GiB 115.15 MiB Shape (8737, 899, 1399) (24, 899, 1399) Count 1095 Tasks 365 Chunks Type float32 numpy.ndarray",1399  899  8737,

Unnamed: 0,Array,Chunk
Bytes,40.94 GiB,115.15 MiB
Shape,"(8737, 899, 1399)","(24, 899, 1399)"
Count,1095 Tasks,365 Chunks
Type,float32,numpy.ndarray
