# Image Creation

In [42]:
import xarray as xr
import numpy as np
import pandas as pd
import glob

import warnings
warnings.filterwarnings('ignore')

## Historic Period Data

In [13]:
#find all the directories for UH and REFC and sorting them.
#print statements are to show what part of the forloop the program is in.
uh_dirts = glob.glob('/home/scratch/ahaberlie/AFWA_2-5KM_UPDRAFT_HELICITY/HIST/*')
uh_dirts.sort()
uh_dirts = uh_dirts[10: -1]

ref_dirts = glob.glob('/home/scratch/ahaberlie/AFWA_REFLECTIVITY/HIST/*')
ref_dirts.sort()
ref_dirts = ref_dirts[10:]

#iterate through each year (directory)
for dirt_number in range(len(ref_dirts)):
    
    #open all the data within the directory
    print(f'current year: {int(uh_dirts[dirt_number][-4:])-1}')
    uh_ds = xr.open_mfdataset(f'{uh_dirts[dirt_number]}/*HELI_MAX*.nc')
    ref_ds = xr.open_mfdataset(f'{ref_dirts[dirt_number]}/*.nc')
    
    #subset times
    times = ref_ds.Time.values

    #threshold reflectivity and updraft helicity values. 
    thr_refc = ref_ds.where(ref_ds.REFD_COM.values >= 50.0, 0)
    thr_uh = uh_ds.where(uh_ds.UP_HELI_MAX.values >= 75.0, 0)

    thr_refc = thr_refc.where(thr_refc.REFD_COM.values <= 50, 1)
    thr_uh = thr_uh.where(thr_uh.UP_HELI_MAX.values <= 75, 1)

    #Give all areas with a UH of 75+ and reflectivity of 50+ a value of 1. 
    thr_val = thr_refc.REFD_COM.values * thr_uh.UP_HELI_MAX.values
    
    #find locations where the value is 1.
    locations = np.where(thr_val == 1)
    
    #create pandas dataframe
    df = pd.DataFrame(columns=['Index', 'x', 'y', 'Filename', 'Time'])

    #iterate through all potential center points. 
    for point in range(len(locations[0])):

        #save important attribute values for center points
        time = locations[0][point]
        y = locations[1][point] 
        x = locations[2][point]

        #slice dataset into appropiate shape 
        image = ref_ds.REFD_COM.sel(Time = times[time], south_north=slice(y-68,y+68), west_east=slice(x-68,x+68)).values

        #save file as .npy file
        str_time = np.datetime_as_string(times[time])
        np.save(file = f"/home/jcorner1/Unidata/data/{str_time[5:7]}{str_time[8:10]}{str_time[:4]}_T{str_time[11:13]}_{point}id.npy", arr = image)


        #add information to pandas dataframe
        df = df.append({'Index': point, 'x': x, 'y':y, 'Filename': f"{str_time[5:7]}{str_time[8:10]}{str_time[:4]}_T{str_time[11:13]}_{point}id.npy", 'Time':str_time}, ignore_index=True)
    
    #Save the dataframe as the csv.
    df.to_csv(f'/home/jcorner1/Unidata/storm_images_{str_time[:4]}_.csv')
    
    #close all the datasets that have been open
    uh_ds.close()
    ref_ds.close()
    thr_refc.close()
    thr_uh.close()

current year: 2000
current year: 2001
current year: 2002
current year: 2003
current year: 2004


## End of Century 4.5

In [10]:
#find all the directories for UH and REFC and sorting them.
#print statements are to show what part of the forloop the program is in.
uh_dirts = glob.glob('/home/scratch/ahaberlie/AFWA_2-5KM_UPDRAFT_HELICITY/end_of_century_4p5/*')
uh_dirts.sort()

ref_dirts = glob.glob('/home/scratch/ahaberlie/AFWA_REFLECTIVITY/end_of_century_4p5/*')
ref_dirts.sort()

#iterate through each year (directory)
for dirt_number in range(len(ref_dirts)):
    
    #open all the data within the directory
    print(f'current year: {int(uh_dirts[dirt_number][-4:])-1}')
    uh_ds = xr.open_mfdataset(f'{uh_dirts[dirt_number]}/*HELI_MAX*.nc')
    ref_ds = xr.open_mfdataset(f'{ref_dirts[dirt_number]}/*.nc')
    
    #subset times
    times = ref_ds.Time.values

    #threshold reflectivity and updraft helicity values. 
    thr_refc = ref_ds.where(ref_ds.REFD_COM.values >= 50.0, 0)
    thr_uh = uh_ds.where(uh_ds.UP_HELI_MAX.values >= 75.0, 0)

    thr_refc = thr_refc.where(thr_refc.REFD_COM.values <= 50, 1)
    thr_uh = thr_uh.where(thr_uh.UP_HELI_MAX.values <= 75, 1)

    #Give all areas with a UH of 75+ and reflectivity of 50+ a value of 1. 
    thr_val = thr_refc.REFD_COM.values * thr_uh.UP_HELI_MAX.values
    
    #find locations where the value is 1.
    locations = np.where(thr_val == 1)
    
    #create pandas dataframe
    df = pd.DataFrame(columns=['Index', 'x', 'y', 'Filename', 'Time'])

    #iterate through all potential center points. 
    for point in range(len(locations[0])):

        #save important attribute values for center points
        time = locations[0][point]
        y = locations[1][point] 
        x = locations[2][point]

        #slice dataset into appropiate shape 
        image = ref_ds.REFD_COM.sel(Time = times[time], south_north=slice(y-68,y+68), west_east=slice(x-68,x+68)).values

        #save file as .npy file
        str_time = np.datetime_as_string(times[time])
        np.save(file = f"/home/scratch/jcorner1/Thesis/data/eoc4p5/{str_time[5:7]}{str_time[8:10]}{str_time[:4]}_T{str_time[11:13]}_{point}id.npy", arr = image)


        #add information to pandas dataframe
        df = df.append({'Index': point, 'x': x, 'y':y, 'Filename': f"{str_time[5:7]}{str_time[8:10]}{str_time[:4]}_T{str_time[11:13]}_{point}id.npy", 'Time':str_time}, ignore_index=True)
    
    #Save the dataframe as the csv.
    df.to_csv(f'/home/scratch/jcorner1/Thesis/data/eoc4p5/eoc4p5_{str_time[:4]}_.csv')
    
    #close all the datasets that have been open
    uh_ds.close()
    ref_ds.close()
    thr_refc.close()
    thr_uh.close()

current year: 2085
current year: 2086
current year: 2087
current year: 2088
current year: 2089
current year: 2090
current year: 2091
current year: 2092
current year: 2093
current year: 2094
current year: 2095
current year: 2096
current year: 2097
current year: 2098
current year: 2099


## End of Century 8.5

In [54]:
#find all the directories for UH and REFC and sorting them.
#print statements are to show what part of the forloop the program is in.
uh_dirts = glob.glob('/home/scratch/ahaberlie/AFWA_2-5KM_UPDRAFT_HELICITY/end_of_century_8p5/end_of_century_8p5/*')
uh_dirts = uh_dirts[14:]
uh_dirts.sort()

ref_dirts = glob.glob('/home/scratch/ahaberlie/AFWA_REFLECTIVITY/end_of_century_8p5/*')
ref_dirts = ref_dirts[14:]
ref_dirts.sort()

#iterate through each year (directory)
for dirt_number in range(len(ref_dirts)):
    
    #open all the data within the directory
    print(f'current year: {int(uh_dirts[dirt_number][-4:])-1}')
    uh_ds = xr.open_mfdataset(f'{uh_dirts[dirt_number]}/*HELI_MAX*.nc')
    ref_ds = xr.open_mfdataset(f'{ref_dirts[dirt_number]}/*.nc')
    
    #subset times
    times = ref_ds.Time.values

    #threshold reflectivity and updraft helicity values. 
    thr_refc = ref_ds.where(ref_ds.REFD_COM.values >= 50.0, 0)
    thr_uh = uh_ds.where(uh_ds.UP_HELI_MAX.values >= 75.0, 0)

    thr_refc = thr_refc.where(thr_refc.REFD_COM.values <= 50, 1)
    thr_uh = thr_uh.where(thr_uh.UP_HELI_MAX.values <= 75, 1)

    #Give all areas with a UH of 75+ and reflectivity of 50+ a value of 1. 
    thr_val = thr_refc.REFD_COM.values * thr_uh.UP_HELI_MAX.values
    
    #find locations where the value is 1.
    locations = np.where(thr_val == 1)
    
    #create pandas dataframe
    df = pd.DataFrame(columns=['Index', 'x', 'y', 'Filename', 'Time'])

    #iterate through all potential center points. 
    for point in range(len(locations[0])):

        #save important attribute values for center points
        time = locations[0][point]
        y = locations[1][point] 
        x = locations[2][point]

        #slice dataset into appropiate shape 
        image = ref_ds.REFD_COM.sel(Time = times[time], south_north=slice(y-68,y+68), west_east=slice(x-68,x+68)).values

        #save file as .npy file
        str_time = np.datetime_as_string(times[time])
        np.save(file = f"/home/scratch/jcorner1/Thesis/data/eoc8p5/{str_time[5:7]}{str_time[8:10]}{str_time[:4]}_T{str_time[11:13]}_{point}id.npy", arr = image)


        #add information to pandas dataframe
        df = df.append({'Index': point, 'x': x, 'y':y, 'Filename': f"{str_time[5:7]}{str_time[8:10]}{str_time[:4]}_T{str_time[11:13]}_{point}id.npy", 'Time':str_time}, ignore_index=True)
    
    #Save the dataframe as the csv.
    df.to_csv(f'/home/scratch/jcorner1/Thesis/data/eoc8p5/eoc8p5_{str_time[:4]}_.csv')
    
    #close all the datasets that have been open
    uh_ds.close()
    ref_ds.close()
    thr_refc.close()
    thr_uh.close()

current year: 2099


### Zipping/Unzippping Data and Directories
to zip a file and the directories they're within can be done using:

```python
tar -zcvf eoc8p5.tar.gz eco8p5
```

to unzip the files once they're in the correct location can be done using:

```python
tar -xf eoc8p5.tar.gz
```


## Basic Steps

In [14]:
df

Unnamed: 0,Index,x,y,Filename,Time
0,0,1156,228,10112004_T20_0id.npy,2004-10-11T20:00:00.000000000
1,1,829,226,11162004_T20_1id.npy,2004-11-16T20:00:00.000000000
2,2,849,224,11172004_T00_2id.npy,2004-11-17T00:00:00.000000000
3,3,850,224,11172004_T00_3id.npy,2004-11-17T00:00:00.000000000
4,4,850,225,11172004_T00_4id.npy,2004-11-17T00:00:00.000000000
...,...,...,...,...,...
10831,10831,610,600,09142005_T02_10831id.npy,2005-09-14T02:00:00.000000000
10832,10832,611,600,09142005_T02_10832id.npy,2005-09-14T02:00:00.000000000
10833,10833,612,600,09142005_T02_10833id.npy,2005-09-14T02:00:00.000000000
10834,10834,613,600,09142005_T02_10834id.npy,2005-09-14T02:00:00.000000000


In [5]:
ref_ds

Unnamed: 0,Array,Chunk
Bytes,40.94 GiB,115.15 MiB
Shape,"(8737, 899, 1399)","(24, 899, 1399)"
Count,1095 Tasks,365 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 40.94 GiB 115.15 MiB Shape (8737, 899, 1399) (24, 899, 1399) Count 1095 Tasks 365 Chunks Type float32 numpy.ndarray",1399  899  8737,

Unnamed: 0,Array,Chunk
Bytes,40.94 GiB,115.15 MiB
Shape,"(8737, 899, 1399)","(24, 899, 1399)"
Count,1095 Tasks,365 Chunks
Type,float32,numpy.ndarray


In [2]:
#open reflectivity and updraft helicity datasets.
refc = xr.open_mfdataset('/home/scratch/ahaberlie/AFWA_REFLECTIVITY/HIST/1990-1991/REFD_COM_historical-1990-1991_1991-06*.nc')
uh = xr.open_mfdataset('/home/scratch/ahaberlie/AFWA_2-5KM_UPDRAFT_HELICITY/HIST/1990-1991/UP_HELI_MAX_historical-1990-1991_1991-06*.nc')
uh

Unnamed: 0,Array,Chunk
Bytes,3.37 GiB,115.15 MiB
Shape,"(720, 899, 1399)","(24, 899, 1399)"
Count,90 Tasks,30 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 3.37 GiB 115.15 MiB Shape (720, 899, 1399) (24, 899, 1399) Count 90 Tasks 30 Chunks Type float32 numpy.ndarray",1399  899  720,

Unnamed: 0,Array,Chunk
Bytes,3.37 GiB,115.15 MiB
Shape,"(720, 899, 1399)","(24, 899, 1399)"
Count,90 Tasks,30 Chunks
Type,float32,numpy.ndarray


In [3]:
#subset times
times = refc.Time.values

#threshold reflectivity and updraft helicity values. 
thr_refc = refc.where(refc.REFD_COM.values >= 50.0, 0)
thr_uh = uh.where(uh.UP_HELI_MAX.values >= 75.0, 0)

thr_refc = thr_refc.where(thr_refc.REFD_COM.values <= 50, 1)
thr_uh = thr_uh.where(thr_uh.UP_HELI_MAX.values <= 75, 1)

#Give all areas with a UH of 75+ and reflectivity of 50+ a value of 1. 
thr_val = thr_refc.REFD_COM.values * thr_uh.UP_HELI_MAX.values

In [4]:
#find locations where the value is 1.
locations = np.where(thr_val == 1)

In [5]:
#create pandas dataframe
df = pd.DataFrame(columns=['Index', 'x', 'y', 'Filename', 'Time'])

#iterate through all potential center points. 
for point in range(len(locations[0])):
    
    #save important attribute values for center points
    time = locations[0][point]
    y = locations[1][point] 
    x = locations[2][point]
    
    #slice dataset into appropiate shape 
    image = refc.REFD_COM.sel(Time = times[time], south_north=slice(y-68,y+68), west_east=slice(x-68,x+68)).values
    
    #save file as .npy file
    str_time = np.datetime_as_string(times[time])
    np.save(file = f"/home/jcorner1/Unidata/data/{str_time[5:7]}{str_time[8:10]}{str_time[:4]}_T{str_time[11:13]}_{point}id.npy", arr = image)
    
    
    #add information to pandas dataframe
    df = df.append({'Index': point, 'x': x, 'y':y, 'Filename': f"{str_time[5:7]}{str_time[8:10]}{str_time[:4]}_T{str_time[11:13]}_{point}id.npy", 'Time':str_time}, ignore_index=True)

In [6]:
#Save the dataframe as the csv.
df.to_csv(f'/home/jcorner1/Unidata/images_1991_06.csv')