In [1]:
import tensorflow as tf
from tensorboard.plugins.hparams import api as hp

import xarray as xr
import numpy as np
import os
import sys
from scipy.ndimage import gaussian_filter
from tensorflow.keras.optimizers import *
import glob
import time

from unet_model_v4p0 import unet

In [2]:
PARAMS = {'epochs': 2,
          'batch_size': 32,
          'optimizer': 'Adam',
          'learning_rate': 1e-6,
          'beta_1': 0.9,
          'beta_2': 0.999,
          'epsilon': 1.0,
          'decay': 0.0,
          'momentum': 0.9,
          }

In [3]:
t = time.strftime("%Y_%m_%d_%H_%M", time.localtime())

output_root_directory = '/glade/work/hardt/models'
model_run_name        = 'unet_v4p0'
from unet_model_v4p0 import unet

#
# Altitude in meters to run
#
feature_description = '0to6.5km_at_500m_steps'

levels = {}
level_count = 1
for i in range(0,7500,500):
    label_name = str(i)
    levels[label_name] = level_count
    level_count = level_count + 1

level_label = '5500'
label_level = levels[level_label]

In [4]:
load_previous_model = False
previous_model = 'trained_model_feature-0to6.5km_at_500m_steps_label-5500m_2020_11_19_17_32.h5'
input_model = os.path.join(output_root_directory,model_run_name, previous_model)

In [5]:
output_model_name     = 'trained_model_feature-' + feature_description + '_label-' + level_label + 'm_{}.h5'
log_dir = os.path.join(output_root_directory, model_run_name, 'logs', 'fit',output_model_name.format(t))
feature_data          = '/glade/work/hardt/ds612/2000-2013_June-Sept_QRAIN_INTERP_AGL_0to7km_at_500m_steps.nc'
label_data            = '/glade/work/hardt/ds612/2000-2013_June-Sept_W_INTERP_AGL_0to7km_at_500m_steps.nc'

BATCH_SIZE = PARAMS['batch_size']
epochs = PARAMS['epochs']

data_fraction_for_training = 0.65
data_fraction_for_validation = 0.25

In [6]:
output_path = os.path.join(output_root_directory, model_run_name)
if not os.path.exists(output_path):
    os.makedirs(output_path)

if not os.path.exists(log_dir):
    os.makedirs(log_dir)

In [7]:
#
# load the data
#
fds = xr.open_dataset(feature_data)
lds = xr.open_dataset(label_data)
feature = fds.QRAIN.values
label = lds.W.values

#
# move the channels from position 1 to position 3
# goes from [time,channel,height,width] to [time, height, width, channel]
# which is the default for Conv2D.
#
feature = np.moveaxis(feature, 1, 3)
label = np.moveaxis(label, 1, 3)


In [10]:
# s = np.arange(feature.shape[0])

In [11]:
# np.random.shuffle(s)
#print(feature[s][1,1,1,1])
#print(label[s][1,1,1,1])

In [None]:
#
# 
#
num_images = feature.shape[0]

train_data_start = 0
train_data_end   = int( num_images * data_fraction_for_training  / BATCH_SIZE ) * BATCH_SIZE

val_data_start = train_data_end + 1
val_data_end = int(  ( num_images * (data_fraction_for_training + data_fraction_for_validation) - val_data_start)  / BATCH_SIZE )
val_data_end = (val_data_end * BATCH_SIZE) + val_data_start

print ()
print ("Number of images:", num_images)
print ("Training data start image:", train_data_start)
print ("Training data end image:", train_data_end)
print ("Validation data start image:", val_data_start)
print ("Validation data end image:", val_data_end)
print ()

SHUFFLE_BUFFER_SIZE = train_data_end

train_dataset = tf.data.Dataset.from_tensor_slices((feature[train_data_start:train_data_end,:,:,:14], label[train_data_start:train_data_end,:,:,label_level]))
val_dataset   = tf.data.Dataset.from_tensor_slices((feature[val_data_start:val_data_end,:,:,:14], label[val_data_start:val_data_end,:,:,label_level]))
#
#
train_dataset = train_dataset.shuffle(SHUFFLE_BUFFER_SIZE, reshuffle_each_iteration=True).batch(BATCH_SIZE, drop_remainder=True)
val_dataset = val_dataset.batch(BATCH_SIZE, drop_remainder=True)

In [None]:
#
# set up the model
#
output_model = os.path.join(output_path, output_model_name)

# if load_previous_model:
#   model = tf.keras.models.load_model(input_model, compile=False)
# else:
#   model = unet()

In [None]:
def scheduler(epoch):
  if epoch < 6:
    return 0.0001
  else:
    return 0.0001 * tf.math.exp(0.1 * (10 - epoch))

In [None]:
tensorboard = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)
checkpoint = tf.keras.callbacks.ModelCheckpoint(filepath=os.path.join(output_path,"trained_weights_best_" + level_label + "AGL.h5"), monitor='mae', verbose=1, save_best_only=True, mode='min')
LRS = tf.keras.callbacks.LearningRateScheduler(scheduler)

In [None]:
# model.compile(optimizer=optimizer, loss=cust_loss(0.01), metrics = ['accuracy','mae'], run_eagerly=True)

In [None]:
# model.fit(train_dataset, epochs=epochs, validation_data=val_dataset, callbacks=[tensorboard, checkpoint, NeptuneMonitor()])

In [None]:
HP_NUM_UNITS = hp.HParam('num_units', hp.Discrete([256, 512]))
HP_DROPOUT = hp.HParam('dropout', hp.RealInterval(0.5, 0.6))
HP_OPTIMIZER = hp.HParam('optimizer', hp.Discrete(['adam','sgd','rmsprop']))
HP_L2 = hp.HParam('l2 regularizer', hp.RealInterval(.001,.01))

METRIC_ACCURACY = 'accuracy'

with tf.summary.create_file_writer('logs/hparam_tuning').as_default():
  hp.hparams_config(
    hparams=[HP_NUM_UNITS, HP_DROPOUT, HP_OPTIMIZER,HP_L2],
    metrics=[hp.Metric(METRIC_ACCURACY, display_name='Accuracy')],
  )

In [None]:
data_fraction_for_test = 0.10

test_data_start = int(num_images * (1 - data_fraction_for_test))
test_data_start = (num_images - int((num_images - test_data_start) / BATCH_SIZE) * BATCH_SIZE) 
test_data_end = num_images

print ()
print ("Number of images:", num_images)
print ("Test data start image:", test_data_start)
print ("Test data end image:", test_data_end)

test_dataset = tf.data.Dataset.from_tensor_slices((feature[test_data_start:test_data_end,:,:,:14]))
test_dataset = test_dataset.batch(BATCH_SIZE, drop_remainder=True)

In [None]:
def train_test_model(hparams):

    from unet_model_test import unet

    model = unet(hparams)
    
    model.compile(
      optimizer=hparams[HP_OPTIMIZER],
      loss='MeanSquaredError',
      metrics=['accuracy'],
    )

    model.fit(train_dataset, epochs=2, validation_data=val_dataset) 
    _, accuracy = model.evaluate(test_dataset)
    
    return accuracy

In [None]:
def run(run_dir, hparams):
  with tf.summary.create_file_writer(run_dir).as_default():
    hp.hparams(hparams)  # record the values used in this trial
    accuracy = train_test_model(hparams)
    tf.summary.scalar(METRIC_ACCURACY, accuracy, step=2)

In [None]:
session_num = 0

for num_units in HP_NUM_UNITS.domain.values:
  for dropout_rate in (HP_DROPOUT.domain.min_value, HP_DROPOUT.domain.max_value):
    for l2 in (HP_L2.domain.min_value, HP_L2.domain.max_value):
      for optimizer in HP_OPTIMIZER.domain.values:
        hparams = {
            HP_NUM_UNITS: num_units,
            HP_DROPOUT: dropout_rate,
            HP_L2: l2,
            HP_OPTIMIZER: optimizer,
        }
        run_name = "run-%d" % session_num
        print('--- Starting trial: %s' % run_name)
        print({h.name: hparams[h] for h in hparams})
        run(log_dir +'/hparam_tuning/' + run_name, hparams)
        session_num += 1