In [None]:
import xarray as xr
from netCDF4 import Dataset
import numpy as np
import os
import sys
import matplotlib.pyplot as plt
import load_data
import wrf

In [None]:
#
# Start with the reflectivity data which is every 5 minutes
# Do the 5 minute after the hour first
# need every 3 hours to match up with the W field
# set W to -99.0 where reflectivity is < some dbz.
# scale relf and W using min/max scaling
# convert to AGL
# write the data
#

refl_data_main_path = '/glade/work/hardt/ds612'
refl_file = os.path.join(refl_data_main_path, 'model2_2012-2015_00minuteAfterHour_3D_refl.nc')
refl_ds = xr.open_dataset(refl_file)
refl_t = refl_ds.XTIME.values
print(refl_ds)

In [None]:
#
# 0 time offset
#
print("Loading REFL_10CM data.")
refl = refl_ds.REFL_10CM.values
refl_t = refl_ds.XTIME.values
#
# create the random shuffle indexes with first dataset
#
s = np.arange(refl.shape[0])
np.random.shuffle(s)
#
refl_t = refl_t[s]
refl = refl[s]
#
# save 2D composite for W
#
refl_2d = refl[:,:,:,:].max(axis=1)
#
scale_min = np.amin(refl)
scale_99p9 = np.percentile(refl, 99.9)
#
print(refl.shape)
print(np.amin(refl))
print(np.amax(refl))
print()
print("Doing min/max scaling on refl")
print()
print('np.amin(refl):', scale_min)
print('np.percentile(refl, 99.99):', scale_99p9)
refl = (refl - scale_min) / (scale_99p9 - scale_min)
#
# Write netcdf output. 
# Would like to re-write this using the netcdf4 module.
# adding in attributes so I can store the scaling information
# and also add in the XLONG, XLAT fields.
#
output_data = '/glade/work/hardt/ds612/model2_00minuteAfterHour_3D_refl_shuffled_scaled.nc'
#
#REFL_OUT = xr.DataArray(refl, name='REFL_10CM')
REFL_OUT = xr.DataArray(data=refl, 
                        name='REFL_10CM',
                        dims=['time', 'bottom_top', 'south_north','west_east'],
                        attrs=dict(
                            description='reflectivity',
                            units='dBZ',
                            scale_min=scale_min,
                            scale_99p9=scale_99p9,
                        ),
                      )

encoding={'REFL_10CM': {'zlib': True, '_FillValue': -99.0}}
#
REFL_XTIME = xr.DataArray(refl_t, name='XTIME')
SHUFFLE = xr.DataArray(s, name='shuffle_seq')
#REFL_XTIME = xr.DataArray(data=refl_t, 
#                            name='XTIME',
#                            dims=['time'],
#                          )
#encoding={'XTIME': {'zlib': True, '_FillValue': -99.0}}
#
print("Writing REFL_10CM data to", output_data)
REFL_XTIME.to_netcdf(output_data)
SHUFFLE.to_netcdf(output_data, mode='a')
REFL_OUT.to_netcdf(output_data, encoding=encoding, mode='a')
#latlon_ds.XLONG.to_netcdf(output_data, mode='a')
#latlon_ds.XLAT.to_netcdf(output_data, mode='a')
#
del refl
del refl_t
del REFL_OUT
del REFL_XTIME
del SHUFFLE
#
print('DONE writing REFL_OUT.')

In [None]:
#
refl_ds = xr.open_dataset(os.path.join(refl_data_main_path, "model2_2012-2015_05minuteAfterHour_3D_refl.nc"))
#
# 5 time offset
#
print("Loading REFL_10CM data.")
refl = refl_ds.REFL_10CM.values
refl_t = refl_ds.XTIME.values
#
refl_t = refl_t[s]
refl = refl[s]
#
scale_min = np.amin(refl)
scale_99p9 = np.percentile(refl, 99.9)
#
print(refl.shape)
print(np.amin(refl))
print(np.amax(refl))
print()
print("Doing min/max scaling on refl")
print()
print('np.amin(refl):', scale_min)
print('np.percentile(refl, 99.99):', scale_99p9)
refl = (refl - scale_min) / (scale_99p9 - scale_min)
#
# Write netcdf output. 
# Would like to re-write this using the netcdf4 module.
# adding in attributes so I can store the scaling information
# and also add in the XLONG, XLAT fields.
#
output_data = '/glade/work/hardt/ds612/model2_05minuteAfterHour_3D_refl_shuffled_scaled.nc'
#
#REFL_OUT = xr.DataArray(refl, name='REFL_10CM')
REFL_OUT = xr.DataArray(data=refl, 
                        name='REFL_10CM',
                        dims=['time', 'bottom_top', 'south_north','west_east'],
                        attrs=dict(
                            description='reflectivity',
                            units='dBZ',
                            scale_min=scale_min,
                            scale_99p9=scale_99p9,
                        ),
                      )

encoding={'REFL_10CM': {'zlib': True, '_FillValue': -99.0}}
#
REFL_XTIME = xr.DataArray(refl_t, name='XTIME')
SHUFFLE = xr.DataArray(s, name='shuffle_seq')
#REFL_XTIME = xr.DataArray(data=refl_t, 
#                            name='XTIME',
#                            dims=['time'],
#                          )
#encoding={'XTIME': {'zlib': True, '_FillValue': -99.0}}
#
print("Writing REFL_10CM data to", output_data)
REFL_XTIME.to_netcdf(output_data)
SHUFFLE.to_netcdf(output_data, mode='a')
REFL_OUT.to_netcdf(output_data, encoding=encoding, mode='a')
#latlon_ds.XLONG.to_netcdf(output_data, mode='a')
#latlon_ds.XLAT.to_netcdf(output_data, mode='a')
#
del refl
del refl_t
del REFL_OUT
del REFL_XTIME
del SHUFFLE
#
print('DONE writing REFL_OUT.')

In [None]:
#
refl_ds = xr.open_dataset(os.path.join(refl_data_main_path, "model2_2012-2015_10minuteAfterHour_3D_refl.nc"))
#
# 10 time offset
#
print("Loading REFL_10CM data.")
refl = refl_ds.REFL_10CM.values
refl_t = refl_ds.XTIME.values
#
refl_t = refl_t[s]
refl = refl[s]
#
scale_min = np.amin(refl)
scale_99p9 = np.percentile(refl, 99.9)
#
print(refl.shape)
print(np.amin(refl))
print(np.amax(refl))
print()
print("Doing min/max scaling on refl")
print()
print('np.amin(refl):', scale_min)
print('np.percentile(refl, 99.99):', scale_99p9)
refl = (refl - scale_min) / (scale_99p9 - scale_min)
#
# Write netcdf output. 
# Would like to re-write this using the netcdf4 module.
# adding in attributes so I can store the scaling information
# and also add in the XLONG, XLAT fields.
#
output_data = '/glade/work/hardt/ds612/model2_10minuteAfterHour_3D_refl_shuffled_scaled.nc'
#
#REFL_OUT = xr.DataArray(refl, name='REFL_10CM')
REFL_OUT = xr.DataArray(data=refl, 
                        name='REFL_10CM',
                        dims=['time', 'bottom_top', 'south_north','west_east'],
                        attrs=dict(
                            description='reflectivity',
                            units='dBZ',
                            scale_min=scale_min,
                            scale_99p9=scale_99p9,
                        ),
                      )

encoding={'REFL_10CM': {'zlib': True, '_FillValue': -99.0}}
#
REFL_XTIME = xr.DataArray(refl_t, name='XTIME')
SHUFFLE = xr.DataArray(s, name='shuffle_seq')
#REFL_XTIME = xr.DataArray(data=refl_t, 
#                            name='XTIME',
#                            dims=['time'],
#                          )
#encoding={'XTIME': {'zlib': True, '_FillValue': -99.0}}
#
print("Writing REFL_10CM data to", output_data)
REFL_XTIME.to_netcdf(output_data)
SHUFFLE.to_netcdf(output_data, mode='a')
REFL_OUT.to_netcdf(output_data, encoding=encoding, mode='a')
#latlon_ds.XLONG.to_netcdf(output_data, mode='a')
#latlon_ds.XLAT.to_netcdf(output_data, mode='a')
#
del refl
del refl_t
del REFL_OUT
del REFL_XTIME
del SHUFFLE
#
print('DONE writing REFL_OUT.')

In [None]:
#
refl_ds = xr.open_dataset(os.path.join(refl_data_main_path, "model2_2012-2015_15minuteAfterHour_3D_refl.nc"))
#
# 15 time offset
#
print("Loading REFL_10CM data.")
refl = refl_ds.REFL_10CM.values
refl_t = refl_ds.XTIME.values
#
refl_t = refl_t[s]
refl = refl[s]
#
scale_min = np.amin(refl)
scale_99p9 = np.percentile(refl, 99.9)
#
print(refl.shape)
print(np.amin(refl))
print(np.amax(refl))
print()
print("Doing min/max scaling on refl")
print()
print('np.amin(refl):', scale_min)
print('np.percentile(refl, 99.99):', scale_99p9)
refl = (refl - scale_min) / (scale_99p9 - scale_min)
#
# Write netcdf output. 
# Would like to re-write this using the netcdf4 module.
# adding in attributes so I can store the scaling information
# and also add in the XLONG, XLAT fields.
#
output_data = '/glade/work/hardt/ds612/model2_15minuteAfterHour_3D_refl_shuffled_scaled.nc'
#
#REFL_OUT = xr.DataArray(refl, name='REFL_10CM')
REFL_OUT = xr.DataArray(data=refl, 
                        name='REFL_10CM',
                        dims=['time', 'bottom_top', 'south_north','west_east'],
                        attrs=dict(
                            description='reflectivity',
                            units='dBZ',
                            scale_min=scale_min,
                            scale_99p9=scale_99p9,
                        ),
                      )

encoding={'REFL_10CM': {'zlib': True, '_FillValue': -99.0}}
#
REFL_XTIME = xr.DataArray(refl_t, name='XTIME')
SHUFFLE = xr.DataArray(s, name='shuffle_seq')
#REFL_XTIME = xr.DataArray(data=refl_t, 
#                            name='XTIME',
#                            dims=['time'],
#                          )
#encoding={'XTIME': {'zlib': True, '_FillValue': -99.0}}
#
print("Writing REFL_10CM data to", output_data)
REFL_XTIME.to_netcdf(output_data)
SHUFFLE.to_netcdf(output_data, mode='a')
REFL_OUT.to_netcdf(output_data, encoding=encoding, mode='a')
#latlon_ds.XLONG.to_netcdf(output_data, mode='a')
#latlon_ds.XLAT.to_netcdf(output_data, mode='a')
#
del refl
del refl_t
del REFL_OUT
del REFL_XTIME
del SHUFFLE
#
print('DONE writing REFL_OUT.')

In [None]:
#
# read in the refl data
# done only when running this separate of the 0min data
#
shuffle_data_main_path = '/glade/work/hardt/ds612'
shuffle_ds = xr.open_dataset(os.path.join(shuffle_data_main_path, "model2_00minuteAfterHour_3D_refl_shuffled_scaled.nc"))
#
s = shuffle_ds.shuffle_seq.values
print(s[0])

In [None]:
#
# composite refl
#
refl_data_main_path = '/glade/work/hardt/ds612'
refl_ds = xr.open_dataset(os.path.join(refl_data_main_path, "model2_2012-2015_00minuteAfterHour_3D_refl.nc"))
#
refl_2d = refl_ds.REFL_10CM[:,:,:,:].values.max(axis=1)
refl_2d = refl_2d[s]
print(refl_2d.shape)

In [None]:
#
# read in the W data
#
W_data_main_path = '/glade/work/hardt/ds612'
W_ds = xr.open_dataset(os.path.join(W_data_main_path, "model2_2012-2015_3D_W.nc"))
#

In [None]:
#print(W_ds)

print("Reading time array for W.")
W_t = W_ds.XTIME.values

#
# start values determined in refl_ds.attrs history
# ncks -O -dwest_east,560,1320 -dsouth_north,130,955
#
print('Loading W data.')
#
# Full 3D
#
#print("Reading in W 3D")
#W = W_ds.W.values
#
# Composite
#
print("Reading in W as a composite")
W = W_ds.W[:,:,:,:].values.max(axis=1)
#
# shuffle the same as the reflectivity
#
print("Shuffling W")
W_t = W_t[s]
W = W[s]
#
feature_threshold = -35.0
#
print("Thresholding W on refl 0 min composite.")
W[refl_2d>feature_threshold] = -99.0
#
print("calculating the min and 99.9 percentile for scaling")
temp = W[W!=-99.0]
print(temp.shape)
#
scale_min  = np.amin(temp)
scale_99p9 = np.percentile(temp,99.9)
#
del temp
#
print("Doing min/percentile scaling on W")
W = (W - scale_min) / (scale_99p9 - scale_min)
#
print("Adding back in the thresholded values")
W[refl_2d>feature_threshold] = -99.0
#

In [None]:
print(W.shape)
print(W_t.shape)
print(scale_min)
print(scale_99p9)
print(s.shape)

In [None]:
output_data = '/glade/work/hardt/ds612/model2_composite_W_shuffled_scaled.nc'

W_XTIME = xr.DataArray(W_t, name='XTIME')
SHUFFLE = xr.DataArray(s, name='shuffle_seq')
#W_OUT = xr.DataArray(W, name='W')
W_OUT = xr.DataArray(data=W, 
                     name='W',
                     dims=['time', 'south_north','west_east'],
                     attrs=dict(
                         description='W',
                         units='scaled',
                         scale_min=scale_min,
                         scale_99p9=scale_99p9,
                     ),
                    )
encoding={'W': {'zlib': True, '_FillValue': -999.0}}

print("Writing W data to", output_data)
W_XTIME.to_netcdf(output_data)
SHUFFLE.to_netcdf(output_data, mode='a')
W_OUT.to_netcdf(output_data, encoding=encoding, mode='a')
#
del W
del W_OUT
del W_ds
print("Done")