In [1]:
import neptune
import neptune_tensorboard as neptune_tb
from neptunecontrib.monitoring.keras import NeptuneMonitor

import optuna

import tensorflow as tf
import xarray as xr
import numpy as np
import os
import sys
from scipy.ndimage import gaussian_filter
from tensorflow.keras.optimizers import *
import glob
import time

In [2]:
t = time.strftime("%Y_%m_%d_%H_%M", time.localtime())

def scheduler(epoch):
    if epoch > 0:
        return 0.01 / epoch
    else:
        return 0.01

#
# Mean Absolute Error metric
#
def mae(y_true, y_pred):
  eval = K.abs(y_pred - y_true)
  eval = K.mean(eval, axis=-1)
  return eval

In [3]:
STATC_PARAMS = {'epochs': 10,
                'beta_1': 0.9,
                'beta_2': 0.999,
                'epsilon': 1.0,
                'decay': 0.0,
                'momentum': 0.9,
          }
PARAMS = {'epochs': 10,
          'batch_size': 32,
          'optimizer': 'Adam',
          'learning_rate': 0.01,
          'beta_1': 0.9,
          'beta_2': 0.999,
          'epsilon': 1.0,
          'decay': 0.0,
          'momentum': 0.9,
          }

if PARAMS['optimizer'] == 'Adam':
  optimizer = Adam(lr=PARAMS['learning_rate'],
                   beta_1=PARAMS['beta_1'],
                   beta_2=PARAMS['beta_2'],
                   epsilon=PARAMS['epsilon'],
                   decay=PARAMS['decay']
  )
elif PARAMS['optimizer'] == 'SGD':
  optimizer = SGD(lr=PARAMS['learning_rate'],
                  decay=PARAMS['decay'],
                  momentum=PARAMS['momentum'],
                  nesterov=True
  )


In [4]:
output_root_directory = '/glade/work/hardt/models'
model_run_name        = 'unet_v5p0'
from unet_model_v5p0 import unet

#
# Altitude in meters to run
#
feature_description = '0to6.5km_at_500m_steps'

# 
# 1)     0 meters AGL
# 2)   500
# 3)  1000 
# 4)  1500 
# 5)  2000
# 6)  2500
# 7)  3000
# 15) 7000 meters AGL
#
levels = {}
level_count = 1
for i in range(0,7500,500):
    label_name = str(i)
    levels[label_name] = level_count
    level_count = level_count + 1

level_label = '5500'
label_level = levels[level_label]
#--------------------------

load_previous_model = False
previous_model = 'trained_model_feature-0to6.5km_at_500m_steps_label-5500m_2020_11_19_17_32.h5'
input_model = os.path.join(output_root_directory,model_run_name, previous_model)

#--------------------------

output_model_name     = 'trained_model_feature-' + feature_description + '_label-' + level_label + 'm_{}.h5'
log_dir = os.path.join(output_root_directory, model_run_name, 'logs', 'fit',output_model_name.format(t))
feature_data          = '/glade/work/hardt/ds612/2000-2013_June-Sept_QRAIN_INTERP_AGL_0to7km_at_500m_steps.nc'
label_data            = '/glade/work/hardt/ds612/2000-2013_June-Sept_W_INTERP_AGL_0to7km_at_500m_steps.nc'

BATCH_SIZE = PARAMS['batch_size']
epochs = PARAMS['epochs']

data_fraction_for_training = 0.65
data_fraction_for_validation = 0.25

############################


In [5]:
output_path = os.path.join(output_root_directory, model_run_name)
if not os.path.exists(output_path):
    os.makedirs(output_path)

if not os.path.exists(log_dir):
    os.makedirs(log_dir)

#
# load the data
#
fds = xr.open_dataset(feature_data)
lds = xr.open_dataset(label_data)
feature = fds.QRAIN.values
label = lds.W.values

#
# move the channels from position 1 to position 3
# goes from [time,channel,height,width] to [time, height, width, channel]
# which is the default for Conv2D.
#
feature = np.moveaxis(feature, 1, 3)
label = np.moveaxis(label, 1, 3)

label[feature<.01] = -99.0

#
# random shuffle 
#
# s = np.arange(feature.shape[0])
# np.random.shuffle(s)

#
# 
#
num_images = feature.shape[0]

train_data_start = 0
train_data_end   = int( num_images * data_fraction_for_training  / BATCH_SIZE ) * BATCH_SIZE

val_data_start = train_data_end + 1
val_data_end = int(  ( num_images * (data_fraction_for_training + data_fraction_for_validation) - val_data_start)  / BATCH_SIZE )
val_data_end = (val_data_end * BATCH_SIZE) + val_data_start

print ()
print ("Number of images:", num_images)
print ("Training data start image:", train_data_start)
print ("Training data end image:", train_data_end)
print ("Valication data start image:", val_data_start)
print ("Validation data end image:", val_data_end)
print ()

SHUFFLE_BUFFER_SIZE = train_data_end



Number of images: 9568
Training data start image: 0
Training data end image: 6208
Valication data start image: 6209
Validation data end image: 8609



In [6]:
#NEPTUNE_API_TOKEN = os.environ.get('NEPTUNE_API_TOKEN')
#neptune.init(project_qualified_name='hardt/Predicting-W',
#             api_token=NEPTUNE_API_TOKEN)
#neptune.create_experiment(name='v5p0 14channel-W-5.5km 0-100 epochs', 
#                          params=PARAMS,
#                          tags=['v5p0', 'Adam', 'LRS', 'Shuffle']
#)


In [7]:
#train_dataset = tf.data.Dataset.from_tensor_slices((feature[s][train_data_start:train_data_end,:,:,:14], label[s][train_data_start:train_data_end,:,:,label_level]))
#val_dataset   = tf.data.Dataset.from_tensor_slices((feature[s][val_data_start:val_data_end,:,:,:14], label[s][val_data_start:val_data_end,:,:,label_level]))
#
train_dataset = tf.data.Dataset.from_tensor_slices((feature[train_data_start:train_data_end,:,:,:14], label[train_data_start:train_data_end,:,:,label_level]))
val_dataset   = tf.data.Dataset.from_tensor_slices((feature[val_data_start:val_data_end,:,:,:14], label[val_data_start:val_data_end,:,:,label_level]))
#
#
train_dataset = train_dataset.shuffle(SHUFFLE_BUFFER_SIZE, reshuffle_each_iteration=True).batch(BATCH_SIZE, drop_remainder=True)
val_dataset = val_dataset.batch(BATCH_SIZE, drop_remainder=True)

#
# set up the model
#
output_model = os.path.join(output_path, output_model_name)

In [8]:
import unet_model_v5p0
import importlib
mse = tf.keras.losses.MeanSquaredError()

In [9]:
importlib.reload(unet_model_v5p0)

model = unet_model_v5p0.unet()

(None, 256, 256, 14)
(None, 256, 256, 1)
Model: "functional_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 256, 256, 14 0                                            
__________________________________________________________________________________________________
conv2d (Conv2D)                 (None, 256, 256, 64) 8128        input_1[0][0]                    
__________________________________________________________________________________________________
conv2d_1 (Conv2D)               (None, 256, 256, 64) 36928       conv2d[0][0]                     
__________________________________________________________________________________________________
max_pooling2d (MaxPooling2D)    (None, 128, 128, 64) 0           conv2d_1[0][0]                   
______________________________________________

In [10]:
print(model.input[0])
print(model.outputs)

Tensor("strided_slice:0", shape=(256, 256, 14), dtype=float32)
[<tf.Tensor 'conv2d_23/BiasAdd:0' shape=(None, 256, 256, 1) dtype=float32>]


In [11]:
def thresh_loss(y_true, y_pred, x_true):

    print(x_true.shape)
    print(y_true.shape)
    print(y_pred.shape)
    
    #mask = tf.math.greater(x_true, 0.01)
    
    y_pred = y_pred[x_true>0.01]
    y_true = y_true[x_true>0.01]

    mse = tf.keras.losses.MeanSquaredError()
    huber = tf.keras.losses.Huber()
    
    return huber(y_true, y_pred)

def cust_loss(x_true):
    def loss(y_true, y_pred):
        return thresh_loss(y_true, y_pred, x_true)
    return loss

def get_loss_fcn():
    def loss_fcn(y_true, y_pred):
        print(y_true.shape)
        print(y_pred.shape)
        x_true = y_pred[:,:,:,12]
        y_pred = y_pred[:,:,:,0]
        
        print(tf.keras.backend.shape(x_true))
        print(tf.keras.backend.shape(y_true))
        print(tf.keras.backend.shape(y_pred))
        
#        y_pred = tf.keras.backend.concatenate([y_pred, y_pred], axis=3)
#        x_true = tf.keras.backend.concatenate([x_true, x_true], axis=3)
        return thresh_loss(y_true, y_pred, x_true)
    return loss_fcn

#loss = get_loss_fcn()

def ref_only_loss(y_true, y_pred, thresh):

    y_true[y_true>=thresh]
    y_pred[y_true>=thresh]
    mse = tf.keras.losses.MeanSquaredError()
    huber = tf.keras.losses.Huber()
    return mse(y_true, y_pred)

def refl_loss(thresh):
    def ref(y_true, y_pred):
        return ref_only_loss(y_true, y_pred, thresh)
    return ref


In [12]:
#model.compile(optimizer=optimizer, loss=thresh_loss, metrics = ['accuracy','mae'], run_eagerly=True)
model.compile(optimizer=optimizer, loss=refl_loss(0.1), metrics = ['accuracy','mae'], run_eagerly=True)

In [13]:
tensorboard = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)
model_save_callback = tf.keras.callbacks.ModelCheckpoint(filepath='/glade/scratch/hardt/unet_v1/trained_model_epoch{epoch}.h5',save_freq='epoch')
checkpoint = tf.keras.callbacks.ModelCheckpoint(filepath=os.path.join(output_path,"trained_weights_best_" + level_label + "AGL.h5"), monitor='accuracy', verbose=1, save_best_only=True, mode='max')
LRS = tf.keras.callbacks.LearningRateScheduler(scheduler)
print("FINISHED")

FINISHED


In [14]:
model.fit(train_dataset, epochs=5, validation_data=val_dataset, callbacks=[tensorboard, LRS, checkpoint, NeptuneMonitor()])

Epoch 1/5
Instructions for updating:
use `tf.profiler.experimental.stop` instead.
Epoch 00001: accuracy improved from -inf to 0.00000, saving model to /glade/work/hardt/models/unet_v5p0/trained_weights_best_5500AGL.h5
Epoch 2/5
Epoch 00002: accuracy did not improve from 0.00000
Epoch 3/5
Epoch 00003: accuracy did not improve from 0.00000
Epoch 4/5
Epoch 00004: accuracy did not improve from 0.00000
Epoch 5/5
Epoch 00005: accuracy did not improve from 0.00000


<tensorflow.python.keras.callbacks.History at 0x2b514ff50350>

In [None]:
neptune.stop()

In [None]:
t = time.strftime("%Y_%m_%d_%H_%M", time.localtime())
model.save(output_model.format(t))