# Image Creation

In [1]:
import xarray as xr
import numpy as np
import pandas as pd
import glob
import xoak
import geopandas as gpd
import regionmask

import warnings
warnings.filterwarnings('ignore')

In [2]:
#load the WRF-BCC geog and a random refc dummy file
geog = xr.open_dataset("/home/scratch/WRF_BCC/geography/geo_em.d01.nc")
ds = xr.open_mfdataset('/home/scratch/WRF_BCC/reflectivity/REFD/historical/1990-1991/*.nc')

#merge the files and create needed infomation
ds = xr.merge([ds, geog.squeeze()])
ds = ds.rename({"CLONG": 'lon', 'CLAT': 'lat'})
ds = ds.assign_coords({'x': ds.west_east, 'y': ds.south_north})
ds = ds.assign_coords({'lon': ds.lon, 'lat': ds.lat})

#set the lat-lon as the index
ds.xoak.set_index(['lat', 'lon'], 'sklearn_geo_balltree')

In [3]:
#load an USA shapefile
usa = gpd.read_file("/home/jcorner1/Unidata/shapefiles/smoothing_econus.shp")

#mask the data out
state_mask = regionmask.mask_geopandas(usa, ds.lon, ds.lat)
ma = state_mask.values
ma[~np.isnan(ma)] = 1

## Historic Period Data

In [4]:
#find all the directories for UVV and REFC and sorting them.
#print statements are to show what part of the forloop the program is in.
ref_dirts = glob.glob('/home/scratch/WRF_BCC/reflectivity/REFD/historical/*')
ref_dirts.sort()

uvv_dirts = glob.glob('/home/scratch/WRF_BCC/severe_weather/W_UP_MAX/historical/*')
uvv_dirts.sort()

#iterate through each year (directory) and month.
for dirt_number in range(len(uvv_dirts)):
    for month in [10, 11, 12, 1, 2, 3, 4, 5, 6, 7, 8, 9]:
    
        #open all the data within the directory
        print(f'current year: {int(uvv_dirts[dirt_number][-4:])-1}, {month}')
        ref_ds = xr.open_mfdataset(glob.glob(f'{ref_dirts[dirt_number]}/*{ref_dirts[dirt_number][-4:]}_*-{str(month).zfill(2)}-*.nc'))
        uvv_ds = xr.open_mfdataset(glob.glob(f'{uvv_dirts[dirt_number]}/*-{str(month).zfill(2)}-*.nc'))

        #subset times
        times = ref_ds.Time.values

        #threshold reflectivity and updraft helicity values. 
        thr_refc = ref_ds.where(ref_ds.REFD.values >= 40.0, 0)
        thr_uvv = uvv_ds.where(uvv_ds.W_UP_MAX.values >= 25.0, 0)

        thr_refc = thr_refc.where(thr_refc.REFD.values <= 40, 1)
        thr_uvv = thr_uvv.where(thr_uvv.W_UP_MAX.values <= 25, 1)

        #Give all areas with a Uvv of 75+ and reflectivity of 50+ a value of 1. 
        thr_val = thr_refc.REFD.values * thr_uvv.W_UP_MAX.values * ma

        #find locations where the value is 1.
        locations = np.where(thr_val == 1)
        print(f'Done Thresholding! Creating {len(locations[0])} images!')

        #create pandas dataframe
        df = pd.DataFrame(columns=['x', 'y', 'Filename', 'Time'])

        #iterate through all potential center points. 
        for point in range(len(locations[0])):

            #save important attribute values for center points
            time = locations[0][point]
            y = locations[1][point] 
            x = locations[2][point]

            #slice dataset into appropiate shape 
            image = ref_ds.REFD.sel(Time = times[time], south_north=slice(y-68,y+68), west_east=slice(x-68,x+68)).values

            #save file as .npy file
            str_time = np.datetime_as_string(times[time])
            np.save(file = f"/home/scratch/jcorner1/Thesis/future_modes/storm_images/HIST/"
                           f"{str_time[5:7]}{str_time[8:10]}{str_time[:4]}_T{str_time[11:13]}_{point}id.npy", arr = image)


            #add information to pandas dataframe
            df = df.append({'x': x, 'y':y, 'Filename': f"{str_time[5:7]}{str_time[8:10]}{str_time[:4]}_T{str_time[11:13]}_{point}id.npy",
                            'Time':str_time}, ignore_index=True)

        #Save the dataframe as the csv.
        df.to_csv(f'/home/scratch/jcorner1/Thesis/future_modes/csvs/HIST/storm_images_{str_time[:4]}_{month}_.csv')

        #close all the datasets that have been open
        uvv_ds.close()
        ref_ds.close()
        thr_refc.close()
        thr_uvv.close()

current year: 1990, 10
Done Thresholding! Creating 16 images!
current year: 1990, 11
Done Thresholding! Creating 0 images!
current year: 1990, 12
Done Thresholding! Creating 2 images!
current year: 1990, 1
Done Thresholding! Creating 789 images!
current year: 1990, 2
Done Thresholding! Creating 563 images!
current year: 1990, 3
Done Thresholding! Creating 401 images!
current year: 1990, 4
Done Thresholding! Creating 214 images!
current year: 1990, 5
Done Thresholding! Creating 1485 images!
current year: 1990, 6
Done Thresholding! Creating 2156 images!
current year: 1990, 7
Done Thresholding! Creating 1881 images!
current year: 1990, 8
Done Thresholding! Creating 1169 images!
current year: 1990, 9
Done Thresholding! Creating 281 images!
current year: 1991, 10
Done Thresholding! Creating 1 images!
current year: 1991, 11
Done Thresholding! Creating 0 images!
current year: 1991, 12
Done Thresholding! Creating 65 images!
current year: 1991, 1
Done Thresholding! Creating 1 images!
current ye

## End of Century 4.5

In [9]:
#find all the directories for UH and REFC and sorting them.
#print statements are to show what part of the forloop the program is in.
uvv_dirts = glob.glob('/home/scratch/WRF_BCC/severe_weather/W_UP_MAX/end_of_century_4p5/*')
uvv_dirts.sort()

ref_dirts = glob.glob('/home/scratch/WRF_BCC/reflectivity/REFD/end_of_century_4p5/*')
ref_dirts.sort()

#iterate through each year (directory) and month.
for dirt_number in range(len(uvv_dirts)):
    for month in [10, 11, 12, 1, 2, 3, 4, 5, 6, 7, 8, 9]:
    
        #open all the data within the directory
        print(f'current year: {int(uvv_dirts[dirt_number][-4:])-1}, {month}')
        ref_ds = xr.open_mfdataset(glob.glob(f'{ref_dirts[dirt_number]}/*{ref_dirts[dirt_number][-4:]}_*-{str(month).zfill(2)}-*.nc'))
        uvv_ds = xr.open_mfdataset(glob.glob(f'{uvv_dirts[dirt_number]}/*-{str(month).zfill(2)}-*.nc'))

        #subset times
        times = ref_ds.Time.values

        #threshold reflectivity and updraft helicity values. 
        thr_refc = ref_ds.where(ref_ds.REFD.values >= 40.0, 0)
        thr_uvv = uvv_ds.where(uvv_ds.W_UP_MAX.values >= 25.0, 0)

        thr_refc = thr_refc.where(thr_refc.REFD.values <= 40, 1)
        thr_uvv = thr_uvv.where(thr_uvv.W_UP_MAX.values <= 25, 1)

        #Give all areas with a Uvv of 75+ and reflectivity of 50+ a value of 1. 
        thr_val = thr_refc.REFD.values * thr_uvv.W_UP_MAX.values * ma

        #find locations where the value is 1.
        locations = np.where(thr_val == 1)
        print(f'Done Thresholding! Creating {len(locations[0])} images!')

        #create pandas dataframe
        df = pd.DataFrame(columns=['x', 'y', 'Filename', 'Time'])

        #iterate through all potential center points. 
        for point in range(len(locations[0])):

            #save important attribute values for center points
            time = locations[0][point]
            y = locations[1][point] 
            x = locations[2][point]

            #slice dataset into appropiate shape 
            image = ref_ds.REFD.sel(Time = times[time], south_north=slice(y-68,y+68), west_east=slice(x-68,x+68)).values

            #save file as .npy file
            str_time = np.datetime_as_string(times[time])
            np.save(file = f"/home/scratch/jcorner1/Thesis/future_modes/storm_images/END4p5/"
                           f"{str_time[5:7]}{str_time[8:10]}{str_time[:4]}_T{str_time[11:13]}_{point}id.npy", arr = image)


            #add information to pandas dataframe
            df = df.append({'x': x, 'y':y, 'Filename': f"{str_time[5:7]}{str_time[8:10]}{str_time[:4]}_T{str_time[11:13]}_{point}id.npy",
                            'Time':str_time}, ignore_index=True)

        #Save the dataframe as the csv.
        df.to_csv(f'/home/scratch/jcorner1/Thesis/future_modes/csvs/END4p5/storm_images_{str_time[:4]}_{month}_.csv')

        #close all the datasets that have been open
        uvv_ds.close()
        ref_ds.close()
        thr_refc.close()
        thr_uvv.close()

current year: 2085, 10
Done Thresholding! Creating 176 images!
current year: 2085, 11
Done Thresholding! Creating 2 images!
current year: 2085, 12
Done Thresholding! Creating 109 images!
current year: 2085, 1
Done Thresholding! Creating 0 images!
current year: 2085, 2
Done Thresholding! Creating 330 images!
current year: 2085, 3
Done Thresholding! Creating 342 images!
current year: 2085, 4
Done Thresholding! Creating 2889 images!
current year: 2085, 5
Done Thresholding! Creating 2245 images!
current year: 2085, 6
Done Thresholding! Creating 3389 images!
current year: 2085, 7
Done Thresholding! Creating 2596 images!
current year: 2085, 8
Done Thresholding! Creating 1408 images!
current year: 2085, 9
Done Thresholding! Creating 252 images!
current year: 2086, 10
Done Thresholding! Creating 227 images!
current year: 2086, 11
Done Thresholding! Creating 104 images!
current year: 2086, 12
Done Thresholding! Creating 0 images!
current year: 2086, 1
Done Thresholding! Creating 11 images!
curr

## End of Century 8.5

In [10]:
#find all the directories for UH and REFC and sorting them.
#print statements are to show what part of the forloop the program is in.
uvv_dirts = glob.glob('/home/scratch/WRF_BCC/severe_weather/W_UP_MAX/end_of_century_8p5/*')
uvv_dirts.sort()

ref_dirts = glob.glob('/home/scratch/WRF_BCC/reflectivity/REFD/end_of_century_8p5/*')
ref_dirts.sort()

#iterate through each year (directory) and month.
for dirt_number in range(len(uvv_dirts)):
    for month in [10, 11, 12, 1, 2, 3, 4, 5, 6, 7, 8, 9]:
    
        #open all the data within the directory
        print(f'current year: {int(uvv_dirts[dirt_number][-4:])-1}, {month}')
        ref_ds = xr.open_mfdataset(glob.glob(f'{ref_dirts[dirt_number]}/*{ref_dirts[dirt_number][-4:]}_*-{str(month).zfill(2)}-*.nc'))
        uvv_ds = xr.open_mfdataset(glob.glob(f'{uvv_dirts[dirt_number]}/*-{str(month).zfill(2)}-*.nc'))

        #subset times
        times = ref_ds.Time.values

        #threshold reflectivity and updraft helicity values. 
        thr_refc = ref_ds.where(ref_ds.REFD.values >= 40.0, 0)
        thr_uvv = uvv_ds.where(uvv_ds.W_UP_MAX.values >= 25.0, 0)

        thr_refc = thr_refc.where(thr_refc.REFD.values <= 40, 1)
        thr_uvv = thr_uvv.where(thr_uvv.W_UP_MAX.values <= 25, 1)

        #Give all areas with a Uvv of 75+ and reflectivity of 50+ a value of 1. 
        thr_val = thr_refc.REFD.values * thr_uvv.W_UP_MAX.values * ma

        #find locations where the value is 1.
        locations = np.where(thr_val == 1)
        print(f'Done Thresholding! Creating {len(locations[0])} images!')

        #create pandas dataframe
        df = pd.DataFrame(columns=['x', 'y', 'Filename', 'Time'])

        #iterate through all potential center points. 
        for point in range(len(locations[0])):

            #save important attribute values for center points
            time = locations[0][point]
            y = locations[1][point] 
            x = locations[2][point]

            #slice dataset into appropiate shape 
            image = ref_ds.REFD.sel(Time = times[time], south_north=slice(y-68,y+68), west_east=slice(x-68,x+68)).values

            #save file as .npy file
            str_time = np.datetime_as_string(times[time])
            np.save(file = f"/home/scratch/jcorner1/Thesis/future_modes/storm_images/END8p5/"
                           f"{str_time[5:7]}{str_time[8:10]}{str_time[:4]}_T{str_time[11:13]}_{point}id.npy", arr = image)


            #add information to pandas dataframe
            df = df.append({'x': x, 'y':y, 'Filename': f"{str_time[5:7]}{str_time[8:10]}{str_time[:4]}_T{str_time[11:13]}_{point}id.npy",
                            'Time':str_time}, ignore_index=True)

        #Save the dataframe as the csv.
        df.to_csv(f'/home/scratch/jcorner1/Thesis/future_modes/csvs/END8p5/storm_images_{str_time[:4]}_{month}_.csv')

        #close all the datasets that have been open
        uvv_ds.close()
        ref_ds.close()
        thr_refc.close()
        thr_uvv.close()

current year: 2085, 10
Done Thresholding! Creating 958 images!
current year: 2085, 11
Done Thresholding! Creating 236 images!
current year: 2085, 12
Done Thresholding! Creating 24 images!
current year: 2085, 1
Done Thresholding! Creating 0 images!
current year: 2085, 2
Done Thresholding! Creating 54 images!
current year: 2085, 3
Done Thresholding! Creating 1727 images!
current year: 2085, 4
Done Thresholding! Creating 1219 images!
current year: 2085, 5
Done Thresholding! Creating 2887 images!
current year: 2085, 6
Done Thresholding! Creating 5083 images!
current year: 2085, 7
Done Thresholding! Creating 3368 images!
current year: 2085, 8
Done Thresholding! Creating 2044 images!
current year: 2085, 9
Done Thresholding! Creating 180 images!
current year: 2086, 10
Done Thresholding! Creating 327 images!
current year: 2086, 11
Done Thresholding! Creating 485 images!
current year: 2086, 12
Done Thresholding! Creating 210 images!
current year: 2086, 1
Done Thresholding! Creating 23 images!
c

### MID 4.5

In [4]:
#find all the directories for UH and REFC and sorting them.
#print statements are to show what part of the forloop the program is in.
uvv_dirts = glob.glob('/home/scratch/WRF_BCC/severe_weather/W_UP_MAX/mid_century_4p5_FIXED/*')
uvv_dirts.sort()

ref_dirts = glob.glob('/home/scratch/WRF_BCC/reflectivity/REFD/mid_century_4p5_FIXED/*')
ref_dirts.sort()

#iterate through each year (directory) and month.
for dirt_number in range(len(uvv_dirts)):
    for month in [10, 11, 12, 1, 2, 3, 4, 5, 6, 7, 8, 9]:
    
        #open all the data within the directory
        print(f'current year: {int(uvv_dirts[dirt_number][-4:])-1}, {month}')
        ref_ds = xr.open_mfdataset(glob.glob(f'{ref_dirts[dirt_number]}/*{ref_dirts[dirt_number][-4:]}_*-{str(month).zfill(2)}-*.nc'))
        uvv_ds = xr.open_mfdataset(glob.glob(f'{uvv_dirts[dirt_number]}/*-{str(month).zfill(2)}-*.nc'))

        #subset times
        times = ref_ds.Time.values

        #threshold reflectivity and updraft helicity values. 
        thr_refc = ref_ds.where(ref_ds.REFD.values >= 40.0, 0)
        thr_uvv = uvv_ds.where(uvv_ds.W_UP_MAX.values >= 25.0, 0)

        thr_refc = thr_refc.where(thr_refc.REFD.values <= 40, 1)
        thr_uvv = thr_uvv.where(thr_uvv.W_UP_MAX.values <= 25, 1)

        #Give all areas with a Uvv of 75+ and reflectivity of 50+ a value of 1. 
        thr_val = thr_refc.REFD.values * thr_uvv.W_UP_MAX.values * ma

        #find locations where the value is 1.
        locations = np.where(thr_val == 1)
        print(f'Done Thresholding! Creating {len(locations[0])} images!')

        #create pandas dataframe
        df = pd.DataFrame(columns=['x', 'y', 'Filename', 'Time'])

        #iterate through all potential center points. 
        for point in range(len(locations[0])):

            #save important attribute values for center points
            time = locations[0][point]
            y = locations[1][point] 
            x = locations[2][point]

            #slice dataset into appropiate shape 
            image = ref_ds.REFD.sel(Time = times[time], south_north=slice(y-68,y+68), west_east=slice(x-68,x+68)).values

            #save file as .npy file
            str_time = np.datetime_as_string(times[time])
            np.save(file = f"/home/scratch/jcorner1/Thesis/future_modes/storm_images/MID4p5/"
                           f"{str_time[5:7]}{str_time[8:10]}{str_time[:4]}_T{str_time[11:13]}_{point}id.npy", arr = image)


            #add information to pandas dataframe
            df = df.append({'x': x, 'y':y, 'Filename': f"{str_time[5:7]}{str_time[8:10]}{str_time[:4]}_T{str_time[11:13]}_{point}id.npy",
                            'Time':str_time}, ignore_index=True)

        #Save the dataframe as the csv.
        df.to_csv(f'/home/scratch/jcorner1/Thesis/future_modes/csvs/MID4p5/storm_images_{str_time[:4]}_{month}_.csv')

        #close all the datasets that have been open
        uvv_ds.close()
        ref_ds.close()
        thr_refc.close()
        thr_uvv.close()

current year: 2040, 10
Done Thresholding! Creating 173 images!
current year: 2040, 11
Done Thresholding! Creating 18 images!
current year: 2040, 12
Done Thresholding! Creating 0 images!
current year: 2040, 1
Done Thresholding! Creating 1 images!
current year: 2040, 2
Done Thresholding! Creating 1 images!
current year: 2040, 3
Done Thresholding! Creating 438 images!
current year: 2040, 4
Done Thresholding! Creating 221 images!
current year: 2040, 5
Done Thresholding! Creating 2836 images!
current year: 2040, 6
Done Thresholding! Creating 2934 images!
current year: 2040, 7
Done Thresholding! Creating 514 images!
current year: 2040, 8
Done Thresholding! Creating 344 images!
current year: 2040, 9
Done Thresholding! Creating 160 images!
current year: 2041, 10
Done Thresholding! Creating 4 images!
current year: 2041, 11
Done Thresholding! Creating 7 images!
current year: 2041, 12
Done Thresholding! Creating 2 images!
current year: 2041, 1
Done Thresholding! Creating 254 images!
current year:

### MID 8.5

In [None]:
#find all the directories for UH and REFC and sorting them.
#print statements are to show what part of the forloop the program is in.
uvv_dirts = glob.glob('/home/scratch/WRF_BCC/severe_weather/W_UP_MAX/mid_century_8p5_FIXED/*')
uvv_dirts.sort()

ref_dirts = glob.glob('/home/scratch/WRF_BCC/reflectivity/REFD/mid_century_8p5_FIXED/*')
ref_dirts.sort()

#iterate through each year (directory) and month.
for dirt_number in range(len(uvv_dirts)):
    for month in [10, 11, 12, 1, 2, 3, 4, 5, 6, 7, 8, 9]:
    
        #open all the data within the directory
        print(f'current year: {int(uvv_dirts[dirt_number][-4:])-1}, {month}')
        ref_ds = xr.open_mfdataset(glob.glob(f'{ref_dirts[dirt_number]}/*{ref_dirts[dirt_number][-4:]}_*-{str(month).zfill(2)}-*.nc'))
        uvv_ds = xr.open_mfdataset(glob.glob(f'{uvv_dirts[dirt_number]}/*-{str(month).zfill(2)}-*.nc'))

        #subset times
        times = ref_ds.Time.values

        #threshold reflectivity and updraft helicity values. 
        thr_refc = ref_ds.where(ref_ds.REFD.values >= 40.0, 0)
        thr_uvv = uvv_ds.where(uvv_ds.W_UP_MAX.values >= 25.0, 0)

        thr_refc = thr_refc.where(thr_refc.REFD.values <= 40, 1)
        thr_uvv = thr_uvv.where(thr_uvv.W_UP_MAX.values <= 25, 1)

        #Give all areas with a Uvv of 75+ and reflectivity of 50+ a value of 1. 
        thr_val = thr_refc.REFD.values * thr_uvv.W_UP_MAX.values * ma

        #find locations where the value is 1.
        locations = np.where(thr_val == 1)
        print(f'Done Thresholding! Creating {len(locations[0])} images!')

        #create pandas dataframe
        df = pd.DataFrame(columns=['x', 'y', 'Filename', 'Time'])

        #iterate through all potential center points. 
        for point in range(len(locations[0])):

            #save important attribute values for center points
            time = locations[0][point]
            y = locations[1][point] 
            x = locations[2][point]

            #slice dataset into appropiate shape 
            image = ref_ds.REFD.sel(Time = times[time], south_north=slice(y-68,y+68), west_east=slice(x-68,x+68)).values

            #save file as .npy file
            str_time = np.datetime_as_string(times[time])
            np.save(file = f"/home/scratch/jcorner1/Thesis/future_modes/storm_images/MID8p5/"
                           f"{str_time[5:7]}{str_time[8:10]}{str_time[:4]}_T{str_time[11:13]}_{point}id.npy", arr = image)


            #add information to pandas dataframe
            df = df.append({'x': x, 'y':y, 'Filename': f"{str_time[5:7]}{str_time[8:10]}{str_time[:4]}_T{str_time[11:13]}_{point}id.npy",
                            'Time':str_time}, ignore_index=True)

        #Save the dataframe as the csv.
        df.to_csv(f'/home/scratch/jcorner1/Thesis/future_modes/csvs/MID8p5/storm_images_{str_time[:4]}_{month}_.csv')

        #close all the datasets that have been open
        uvv_ds.close()
        ref_ds.close()
        thr_refc.close()
        thr_uvv.close()

current year: 2040, 10
Done Thresholding! Creating 173 images!
current year: 2040, 11
Done Thresholding! Creating 6 images!
current year: 2040, 12
Done Thresholding! Creating 228 images!
current year: 2040, 1
Done Thresholding! Creating 2 images!
current year: 2040, 2
Done Thresholding! Creating 30 images!
current year: 2040, 3
Done Thresholding! Creating 875 images!
current year: 2040, 4
Done Thresholding! Creating 305 images!
current year: 2040, 5
Done Thresholding! Creating 3200 images!
current year: 2040, 6
Done Thresholding! Creating 2579 images!
current year: 2040, 7
Done Thresholding! Creating 2324 images!
current year: 2040, 8
Done Thresholding! Creating 1054 images!
current year: 2040, 9
Done Thresholding! Creating 223 images!
current year: 2041, 10
Done Thresholding! Creating 219 images!
current year: 2041, 11
Done Thresholding! Creating 4 images!
current year: 2041, 12
Done Thresholding! Creating 0 images!
current year: 2041, 1
Done Thresholding! Creating 12 images!
current 

### Zipping/Unzippping Data and Directories
to zip a file and the directories they're within can be done using:

```python
tar -zcvf eoc8p5.tar.gz eco8p5
```

to unzip the files once they're in the correct location can be done using:

```python
tar -xf eoc8p5.tar.gz
```


## Basic Steps of The Loop

In [14]:
df

Unnamed: 0,Index,x,y,Filename,Time
0,0,1156,228,10112004_T20_0id.npy,2004-10-11T20:00:00.000000000
1,1,829,226,11162004_T20_1id.npy,2004-11-16T20:00:00.000000000
2,2,849,224,11172004_T00_2id.npy,2004-11-17T00:00:00.000000000
3,3,850,224,11172004_T00_3id.npy,2004-11-17T00:00:00.000000000
4,4,850,225,11172004_T00_4id.npy,2004-11-17T00:00:00.000000000
...,...,...,...,...,...
10831,10831,610,600,09142005_T02_10831id.npy,2005-09-14T02:00:00.000000000
10832,10832,611,600,09142005_T02_10832id.npy,2005-09-14T02:00:00.000000000
10833,10833,612,600,09142005_T02_10833id.npy,2005-09-14T02:00:00.000000000
10834,10834,613,600,09142005_T02_10834id.npy,2005-09-14T02:00:00.000000000


In [5]:
ref_ds

Unnamed: 0,Array,Chunk
Bytes,40.94 GiB,115.15 MiB
Shape,"(8737, 899, 1399)","(24, 899, 1399)"
Count,1095 Tasks,365 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 40.94 GiB 115.15 MiB Shape (8737, 899, 1399) (24, 899, 1399) Count 1095 Tasks 365 Chunks Type float32 numpy.ndarray",1399  899  8737,

Unnamed: 0,Array,Chunk
Bytes,40.94 GiB,115.15 MiB
Shape,"(8737, 899, 1399)","(24, 899, 1399)"
Count,1095 Tasks,365 Chunks
Type,float32,numpy.ndarray


In [2]:
#open reflectivity and updraft helicity datasets.
refc = xr.open_mfdataset('/home/scratch/ahaberlie/AFWA_REFLECTIVITY/HIST/1990-1991/REFD_COM_historical-1990-1991_1991-06*.nc')
uh = xr.open_mfdataset('/home/scratch/ahaberlie/AFWA_2-5KM_UPDRAFT_HELICITY/HIST/1990-1991/UP_HELI_MAX_historical-1990-1991_1991-06*.nc')
uh

Unnamed: 0,Array,Chunk
Bytes,3.37 GiB,115.15 MiB
Shape,"(720, 899, 1399)","(24, 899, 1399)"
Count,90 Tasks,30 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 3.37 GiB 115.15 MiB Shape (720, 899, 1399) (24, 899, 1399) Count 90 Tasks 30 Chunks Type float32 numpy.ndarray",1399  899  720,

Unnamed: 0,Array,Chunk
Bytes,3.37 GiB,115.15 MiB
Shape,"(720, 899, 1399)","(24, 899, 1399)"
Count,90 Tasks,30 Chunks
Type,float32,numpy.ndarray


In [3]:
#subset times
times = refc.Time.values

#threshold reflectivity and updraft helicity values. 
thr_refc = refc.where(refc.REFD_COM.values >= 50.0, 0)
thr_uh = uh.where(uh.UP_HELI_MAX.values >= 75.0, 0)

thr_refc = thr_refc.where(thr_refc.REFD_COM.values <= 50, 1)
thr_uh = thr_uh.where(thr_uh.UP_HELI_MAX.values <= 75, 1)

#Give all areas with a UH of 75+ and reflectivity of 50+ a value of 1. 
thr_val = thr_refc.REFD_COM.values * thr_uh.UP_HELI_MAX.values

In [4]:
#find locations where the value is 1.
locations = np.where(thr_val == 1)

In [5]:
#create pandas dataframe
df = pd.DataFrame(columns=['Index', 'x', 'y', 'Filename', 'Time'])

#iterate through all potential center points. 
for point in range(len(locations[0])):
    
    #save important attribute values for center points
    time = locations[0][point]
    y = locations[1][point] 
    x = locations[2][point]
    
    #slice dataset into appropiate shape 
    image = refc.REFD_COM.sel(Time = times[time], south_north=slice(y-68,y+68), west_east=slice(x-68,x+68)).values
    
    #save file as .npy file
    str_time = np.datetime_as_string(times[time])
    np.save(file = f"/home/jcorner1/Unidata/data/{str_time[5:7]}{str_time[8:10]}{str_time[:4]}_T{str_time[11:13]}_{point}id.npy", arr = image)
    
    
    #add information to pandas dataframe
    df = df.append({'Index': point, 'x': x, 'y':y, 'Filename': f"{str_time[5:7]}{str_time[8:10]}{str_time[:4]}_T{str_time[11:13]}_{point}id.npy", 'Time':str_time}, ignore_index=True)

In [6]:
#Save the dataframe as the csv.
df.to_csv(f'/home/jcorner1/Unidata/images_1991_06.csv')