In [1]:
import xarray as xr
from netCDF4 import Dataset
import numpy as np
import os
import sys
import matplotlib.pyplot as plt
import load_data
import wrf

In [2]:
refl_yoffset = 256
refl_xoffset = 185

In [3]:
#
# This is the latlon arrays that go with the refl data
#
latlonFile = '/glade/work/kyoko/CONUS_HYDRO/PhaseII/src/production04km/wrf04km_refl_clipped_coord.nc'
latlon_ds = xr.open_mfdataset(latlonFile, combine="nested", concat_dim='TIME')

In [4]:
output_data = '/glade/work/hardt/ds612/model2_latlon_coord.nc'
print("Writing latlon data to", output_data)
#
latlon_ds.XLAT[:,refl_yoffset:refl_yoffset+256,refl_xoffset:refl_xoffset+256].to_netcdf(output_data)
latlon_ds.XLONG[:,refl_yoffset:refl_yoffset+256,refl_xoffset:refl_xoffset+256].to_netcdf(output_data, mode='a')
#
del latlon_ds
print('DONE writing output data.')

Writing latlon data to /glade/work/hardt/ds612/model2_latlon_coord.nc
DONE writing output data.


In [5]:
#
# Start with the reflectivity data which is every 5 minutes
# Do the 5 minute after the hour first
# need every 3 hours to match up with the W field
# set W to -99.0 where reflectivity is < some dbz.
# scale relf and W using min/max scaling
# convert to AGL
# write the data
#

refl_data_main_path = '/glade/scratch/gutmann/step/'

#
# for testing only get one file
#
#refl_ds = xr.open_mfdataset(os.path.join(refl_data_main_path, "2014/uncompressed/wrf5mn_d01_2014-09-30_00:00:00"), combine="nested", concat_dim='Time')

#
# grab all available times 
# currently Ethan has only grabbed 2012-2015
#
refl_ds = xr.open_mfdataset(os.path.join(refl_data_main_path, "20*/uncompressed/*"), combine="nested", concat_dim='Time')

In [None]:
# 0 time offset
#
# start locations determined in input_models_domain_compare.ipynb
#
# no offset
print("Loading REFL_10CM data.")
refl = refl_ds.REFL_10CM[0::36,:,refl_yoffset:refl_yoffset+256,refl_xoffset:refl_xoffset+256].values
refl_t = refl_ds.XTIME[0::36].values
#
# create the random shuffle indexes with first dataset
#
s = np.arange(refl.shape[0])
np.random.shuffle(s)
#
refl_t = refl_t[s]
refl = refl[s]
print(refl.shape)
print(np.amin(refl))
print(np.amax(refl))
print()
print("Doing min/max scaling on refl")
print()
print('np.amin(refl):',np.amin(refl))
print('np.percentile(refl, 99.99):', np.percentile(refl, 99.99))
refl = (refl - np.amin(refl)) / (np.percentile(refl, 99.99) - np.amin(refl))
#
# Write netcdf output. 
# Would like to re-write this using the netcdf4 module.
# adding in attributes so I can store the scaling information
# and also add in the XLONG, XLAT fields.
#
output_data = '/glade/work/hardt/ds612/model2_0minuteAfterHour_3D_refl_shuffled-t.nc'
#
REFL_OUT = xr.DataArray(refl, name='REFL_10CM')
#REFL_OUT = xr.DataArray(data=refl, 
#                        name='REFL_10CM',
#                        dims=['time', 'bottom_top', 'south_north','west_east'],
#                        attrs=dict(
#                            description='reflectivity',
#                            units='dBZ',
#                        ),
#                      )

encoding={'REFL_10CM': {'zlib': True, '_FillValue': -99.0}}
#
REFL_XTIME = xr.DataArray(refl_t, name='XTIME')
#REFL_XTIME = xr.DataArray(data=refl_t, 
#                            name='XTIME',
#                            dims=['time'],
#                          )

#encoding={'XTIME': {'zlib': True, '_FillValue': -99.0}}
#
print("Writing REFL_10CM data to", output_data)
REFL_OUT.to_netcdf(output_data, encoding=encoding)
REFL_XTIME.to_netcdf(output_data, mode='a')
#latlon_ds.XLONG.to_netcdf(output_data, mode='a')
#latlon_ds.XLAT.to_netcdf(output_data, mode='a')

print('DONE writing REFL_OUT.')

Loading REFL_10CM data.


In [None]:
# 5 time offset
#
# start locations determined in input_models_domain_compare.ipynb
#
# no offset
print("Loading REFL_10CM data.")
refl = refl_ds.REFL_10CM[1::36,:,refl_yoffset:refl_yoffset+256,refl_xoffset:refl_xoffset+256].values
refl_t = refl_ds.XTIME[1::36].values
#
# shuffle the data
#
refl_t = refl_t[s]
refl = refl[s]
print(refl.shape)
print(np.amin(refl))
print(np.amax(refl))
print()
print("Doing min/max scaling on refl")
print()
print('np.amin(refl):',np.amin(refl))
print('np.percentile(refl, 99.99):', np.percentile(refl, 99.99))
refl = (refl - np.amin(refl)) / (np.percentile(refl, 99.99) - np.amin(refl))
#
# Write netcdf output. 
# Would like to re-write this using the netcdf4 module.
# adding in attributes so I can store the scaling information
# and also add in the XLONG, XLAT fields.
#
output_data = '/glade/work/hardt/ds612/model2_5minuteAfterHour_3D_refl_shuffled.nc'
#
REFL_OUT = xr.DataArray(refl, name='REFL_10CM')
encoding={'REFL_10CM': {'zlib': True, '_FillValue': -99.0}}
#
REFL_XTIME = xr.DataArray(refl_t, name='XTIME')
#encoding={'XTIME': {'zlib': True, '_FillValue': -99.0}}
#
print("Writing REFL_10CM data to", output_data)
REFL_OUT.to_netcdf(output_data, encoding=encoding)
REFL_XTIME.to_netcdf(output_data, mode='a')
#latlon_ds.XLONG.to_netcdf(output_data, mode='a')
#latlon_ds.XLAT.to_netcdf(output_data, mode='a')
#
del refl
del refl_t
del REFL_OUT
del REFL_XTIME
#del refl_ds
print('DONE writing output data.')

In [None]:
# 10 time offset
#
# start locations determined in input_models_domain_compare.ipynb
#
# no offset
refl = refl_ds.REFL_10CM[2::36,:,refl_yoffset:refl_yoffset+256,refl_xoffset:refl_xoffset+256].values
refl_t = refl_ds.XTIME[2::36].values
#
# shuffle the data
#
refl_t = refl_t[s]
refl = refl[s]
print(refl.shape)
print(np.amin(refl))
print(np.amax(refl))
print()
print("Doing min/max scaling on refl")
print()
print('np.amin(refl):',np.amin(refl))
print('np.percentile(refl, 99.99):', np.percentile(refl, 99.99))
refl = (refl - np.amin(refl)) / (np.percentile(refl, 99.99) - np.amin(refl))
#
# Write netcdf output. 
# Would like to re-write this using the netcdf4 module.
# adding in attributes so I can store the scaling information
# and also add in the XLONG, XLAT fields.
#
output_data = '/glade/work/hardt/ds612/model2_10minuteAfterHour_3D_refl_shuffled.nc'
#
REFL_OUT = xr.DataArray(refl, name='REFL_10CM')
encoding={'REFL_10CM': {'zlib': True, '_FillValue': -99.0}}
#
REFL_XTIME = xr.DataArray(refl_t, name='XTIME')
#encoding={'XTIME': {'zlib': True, '_FillValue': -99.0}}
#
print("Writing REFL_10CM data to", output_data)
REFL_OUT.to_netcdf(output_data, encoding=encoding)
REFL_XTIME.to_netcdf(output_data, mode='a')
#latlon_ds.XLONG.to_netcdf(output_data, mode='a')
#latlon_ds.XLAT.to_netcdf(output_data, mode='a')
#
del refl
del refl_t
del REFL_OUT
del REFL_XTIME
#del refl_ds
print('DONE writing output data.')

In [None]:
# 15 time offset
#
# start locations determined in input_models_domain_compare.ipynb
#
# no offset
refl = refl_ds.REFL_10CM[3::36,:,refl_yoffset:refl_yoffset+256,refl_xoffset:refl_xoffset+256].values
refl_t = refl_ds.XTIME[3::36].values
#
# shuffle the data
#
refl_t = refl_t[s]
refl = refl[s]
print(refl.shape)
print(np.amin(refl))
print(np.amax(refl))
print()
print("Doing min/max scaling on refl")
print()
print('np.amin(refl):',np.amin(refl))
print('np.percentile(refl, 99.99):', np.percentile(refl, 99.99))
refl = (refl - np.amin(refl)) / (np.percentile(refl, 99.99) - np.amin(refl))
#
# Write netcdf output. 
# Would like to re-write this using the netcdf4 module.
# adding in attributes so I can store the scaling information
# and also add in the XLONG, XLAT fields.
#
output_data = '/glade/work/hardt/ds612/model2_15minuteAfterHour_3D_refl_shuffled.nc'
#
REFL_OUT = xr.DataArray(refl, name='REFL_10CM')
encoding={'REFL_10CM': {'zlib': True, '_FillValue': -99.0}}
#
REFL_XTIME = xr.DataArray(refl_t, name='XTIME')
#encoding={'XTIME': {'zlib': True, '_FillValue': -99.0}}
#
print("Writing REFL_10CM data to", output_data)
REFL_OUT.to_netcdf(output_data, encoding=encoding)
REFL_XTIME.to_netcdf(output_data, mode='a')
#latlon_ds.XLONG.to_netcdf(output_data, mode='a')
#latlon_ds.XLAT.to_netcdf(output_data, mode='a')
#
del refl
del refl_t
del REFL_OUT
del REFL_XTIME
#del refl_ds
print('DONE writing output data.')

In [None]:
#
# read in the W data
#

W_data_main_path = '/glade/scratch/gutmann/step/wrf3d/'

#
# for testing only get the one file.
#
#W_ds = xr.open_mfdataset(os.path.join(W_data_main_path, "2014/uncompressed/wrf3d_d01_2014-09-30_*"), combine="nested", concat_dim='Time')

#
# Read in all the data. As of 12/10/2020 Ethan only has 2012-2015
#
W_ds = xr.open_mfdataset(os.path.join(W_data_main_path, "20*/uncompressed/*"), combine="nested", concat_dim='Time')

W_yoffset = refl_yoffset + 130
W_xoffset = refl_xoffset + 560
print('W_yoffset:',W_yoffset)
print('W_xoffset:',W_xoffset)

In [None]:
#print(W_ds)

W_t = W_ds.XTIME.values

#
# start values determined in refl_ds.attrs history
# ncks -O -dwest_east,560,1320 -dsouth_north,130,955
#
W = W_ds.W[:,:,W_yoffset:W_yoffset+256,W_xoffset:W_xoffset+256].values
#W = W_ds.W[:,:,W_yoffset:W_yoffset+256,W_xoffset:W_xoffset+256].values.max(axis=1)
#
# shuffle the same as the reflectivity
#
W_t = W_t[s]
W = W[s]

In [None]:
print(W.shape)

In [None]:
print("Doing min/max scaling on W")
print()
print('np.amin(W):',np.amin(W))
print('np.percentile(W, 99.9):',np.percentile(W, 99.9))
W = (W - np.amin(W)) / (np.percentile(W, 99.9) - np.amin(W))

In [None]:
output_data = '/glade/work/hardt/ds612/model2_3D_W_shuffled.nc'

W_OUT = xr.DataArray(W, name='W')
encoding={'W': {'zlib': True, '_FillValue': -99.0}}

W_XTIME = xr.DataArray(W_t, name='XTIME')

print("Writing W data to", output_data)
W_OUT.to_netcdf(output_data, encoding=encoding)
W_XTIME.to_netcdf(output_data, mode='a')

#
del W
del W_OUT
del W_ds
print("Done")

In [None]:
plt_number = 1
d1 = refl[plt_number,:,:]
print(d1.shape)
d2 = W[plt_number,:,:]
print(d2.shape)

In [None]:
cmap = plt.cm.Spectral_r

fig, (ax1, ax2) = plt.subplots(1,2, figsize=(16,6)) 

max_ref = np.amax(d1[:,:])
p_ref = np.percentile(d1[:,:], 99.9)

ref = ax1.imshow(d1, cmap=cmap)
ax1.set_title("reflectivity (dBZ)")
ref.set_clim(vmin=0, vmax=p_ref)
fig.colorbar(ref, ax=ax1, orientation='vertical', label='')

max_maxW = np.amax(d2[:,:])
p_maxW = np.percentile(d2[:,:], 99.9)

maxW = ax2.imshow(d2, cmap=cmap)
ax2.set_title("maxW (m/s)")
#maxW.set_clim(vmin=0.0, vmax=p_maxW)
maxW.set_clim(vmin=0.0,vmax=p_maxW)
fig.colorbar(maxW, ax=ax2)
