# pipeline playground

In [1]:
import numpy as np

import tensorflow as tf

from tensorflow import keras
from tensorflow.keras import layers
# import keras
# from keras.models import Sequential
# from keras.layers import ConvLSTM2D, Dense, InputLayer, BatchNormalization, Permute

In [2]:
print(tf.__version__)

2.2.0-dlenv


## Input dimension variables

In [10]:
frames = 1
channels = 1
pixels_x = 21
pixels_y = 21

## Define the model

In [11]:
model = tf.keras.Sequential(name='convlstm_model_0')
model.add(layers.InputLayer(input_shape=(frames, channels, pixels_x, pixels_y)))
model.add(layers.ConvLSTM2D(
    filters=channels*4, kernel_size=(5,5), padding='same', data_format='channels_first',return_sequences=True))
model.add(layers.BatchNormalization(axis=1, ))
model.add(layers.ConvLSTM2D(
    filters=channels*2, kernel_size=(3,3), padding='same', data_format='channels_first',return_sequences=True))
model.add(layers.BatchNormalization(axis=1, ))
model.add(layers.ConvLSTM2D(
    filters=channels, kernel_size=(1,1), padding='same', data_format='channels_first',return_sequences=True))
model.add(layers.BatchNormalization(axis=1, ))

model.compile(loss='categorical_crossentropy',
                  optimizer='adadelta',
                  metrics=['mean_absolute_error'])
model.summary()

In [20]:
# load model
model_name = 'convlstm_'+str(frames)+'f_'+str(channels)+'c_'+str(pixels_x)+'x_'+str(pixels_y)+'y'

model1 = tf.keras.models.load_model('../models/'+model_name+'.h5')
model1.summary()


Model: "convlstm_1f_1c_21x_21y"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv_lst_m2d (ConvLSTM2D)    (None, 1, 4, 21, 21)      2016      
_________________________________________________________________
batch_normalization (BatchNo (None, 1, 4, 21, 21)      4         
_________________________________________________________________
conv_lst_m2d_1 (ConvLSTM2D)  (None, 1, 2, 21, 21)      440       
_________________________________________________________________
batch_normalization_1 (Batch (None, 1, 2, 21, 21)      4         
_________________________________________________________________
conv_lst_m2d_2 (ConvLSTM2D)  (None, 1, 1, 21, 21)      16        
_________________________________________________________________
batch_normalization_2 (Batch (None, 1, 1, 21, 21)      4         
Total params: 2,484
Trainable params: 2,478
Non-trainable params: 6
__________________________________________

## try training

In [6]:
import glob
import xarray as xr
def generate_arrays(img_dir, slice_size=24, vars_=['t2m']):
    """
    A generator that returns one 24-hour slice as input, and the subsequent 24-hour slice as output
    """
    # get list of netcdf files in img_dir
    netcdf_dirs = sorted(glob.glob(img_dir+"/*.nc"))
    file_index = 0
    # open first netcdf file
    ds = xr.open_dataset(netcdf_dirs[file_index])
    # select only some variables
    ds = ds[vars_]
    # counter is for hourly time slices. months with 31 days have 744 hours
    counter = 0
    while True: # generator needs to run infinitely
        
        # get input slice
        input_images = ds.isel( time=slice(counter, counter + slice_size)).to_array().values
        
        # check if we're at the end of the month
        if counter+2*slice_size > ds.sizes['time']:
            # reset slice counter, increment to next netcdf file, open it, get output images
            counter = 0
            file_index += 1
            if file_index == len(netcdf_dirs):
                file_index = 0
                
            ds = xr.open_dataset(netcdf_dirs[file_index])
            # select only some variables
            ds = ds[vars_]
            # take slice 0-24 as output-image
            output_images = ds.isel( time=slice(counter, counter + slice_size)).to_array().values
            # set counter to -slice_size to reset for input on next iteration
            counter -= slice_size
        # get output slice right after input slice
        else:
            output_images = ds.isel( time=slice(counter+slice_size, counter + 2*slice_size)).to_array().values
        
        # switch frames and channel axes
        input_images = np.moveaxis(input_images, 0, 1)
        output_images = np.moveaxis(output_images, 0, 1)
        # reshape values
        input_images = input_images.reshape(-1, frames, channels, pixels_x, pixels_y)
        output_images = output_images.reshape(-1, frames, channels, pixels_x, pixels_y)
        yield (input_images, output_images)
        counter += slice_size

In [13]:
train_file_path = "../data/train"
valid_file_path = "../data/validate"
# 3 years of training data = 
train_steps = 3 * 365 * 24 / frames
# 1 year of validation data = 
valid_steps = 1 * 365 * 24 / frames

In [14]:
from slice_generator import slice_generator
slice_train = slice_generator(img_dir=train_file_path, slice_size=frames, vars_=['t2m'],
                     proc_type='conv_lstm', pixels_x=pixels_x, pixels_y=pixels_y, debug=False
                     )
slice_val = slice_generator(img_dir=valid_file_path, slice_size=24, vars_=['t2m'],
                     proc_type='conv_lstm', pixels_x=pixels_x, pixels_y=pixels_y, debug=False
                     )

In [15]:
history = model.fit(
    slice_train, #generate_arrays(train_file_path, slice_size=frames),
    steps_per_epoch = train_steps,
    epochs = 20,
    verbose = 1,
    shuffle = False,
    initial_epoch = 0,
    validation_steps = valid_steps,
    validation_data = slice_val, #generate_arrays(valid_file_path, slice_size=frames),
    )

Epoch 1/20
  362/26280 [..............................] - ETA: 10:05 - loss: 25454.4785 - mean_absolute_error: 282.8293

KeyboardInterrupt: 

## Save model to ```../models/```

In [None]:
#rewrite model in Functional style

# main input
frame_input = layers.Input(shape=(frames, channels, pixels_x, pixels_y), name='frame_input')


# ConvLSTM block
stack = layers.ConvLSTM2D(filters=20, kernel_size=(5,5), padding='same',
                       data_format='channels_first',return_sequences=True, name='ConvLSTM_1')(frame_input)
stack = layers.BatchNormalization(axis=1, name='batchnorm_1')(stack)
stack = layers.ConvLSTM2D(filters=10, kernel_size=(5,5), padding='same',
                       data_format='channels_first',return_sequences=True, name='ConvLSTM_2')(stack)
stack = layers.BatchNormalization(axis=1, name='batchnorm_2')(stack)
stack = layers.ConvLSTM2D(filters=10, kernel_size=(1,1), padding='same',
                       data_format='channels_first',return_sequences=True, name='ConvLSTM_3')(stack)

clstm_out = layers.Flatten(name = 'ConvLSTM_out')(stack)

# auxiliary input
auxiliary_input = layers.Input(shape=(5,), name='aux_input')
dense_input = layers.concatenate([clstm_out, auxiliary_input], name='concatenate_layer')

# dense layers
dropout_seed = 42
dense1 = layers.Dense(16, name='first_dense_layer')(dense_input)
dropout1 = layers.Dropout(0.75, seed = dropout_seed, name='first_dropout_layer')(dense1)
dense2 = layers.Dense(8, name='second_dense_layer')(dropout1)
dropout2 = layers.Dropout(0.75, seed = dropout_seed, name='second_dropout_layer')(dense2)
dense3 = layers.Dense(4, name='third_dense_layer')(dropout2)
dropout3 = layers.Dropout(0.75, seed = dropout_seed, name='third_dropout_layer')(dense3)
dense3 = layers.Dense(1, name='final_dense_layer')(dropout3)

# reshape output
# dense_output = layers.Reshape(target_shape=(frames, channels, pixels_x, pixels_y))(dense3)


model2 = Model(inputs = [frame_input, auxiliary_input], outputs = [dense3])

model2.compile(loss='categorical_crossentropy',
                  optimizer='adadelta',
                  metrics=['mean_absolute_error'])
model2.summary()

In [16]:
# INCORRECT
# from slice_generator import slice_generator
gen = slice_generator(img_dir='../data/train', slice_size=frames, vars_=['t2m'],
                     proc_type='conv_lstm', pixels_x=pixels_x, pixels_y=pixels_y, debug=False
                     )
for i in range(1):
    in_, out_ = next(gen)
in_.shape

(1, 1, 1, 21, 21)

In [18]:
# CORRECT
gen = generate_arrays(train_file_path, slice_size=frames)
in_, out_ = next(gen)
in_.shape

(1, 1, 1, 21, 21)