# pipeline playground

In [2]:
import numpy as np

import tensorflow as tf

from tensorflow import keras
from tensorflow.keras import layers
# import keras
# from keras.models import Sequential
# from keras.layers import ConvLSTM2D, Dense, InputLayer, BatchNormalization, Permute

In [2]:
print(tf.__version__)

2.2.0-dlenv


## Input dimension variables

In [3]:
frames = 1
channels = 1
pixels_x = 21
pixels_y = 21

## Define the model

In [20]:
# load model
model_name = 'convlstm_'+str(frames)+'f_'+str(channels)+'c_'+str(pixels_x)+'x_'+str(pixels_y)+'y'

model1 = tf.keras.models.load_model('../models/'+model_name+'.h5')
model1.summary()


Model: "convlstm_1f_1c_21x_21y"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv_lst_m2d (ConvLSTM2D)    (None, 1, 4, 21, 21)      2016      
_________________________________________________________________
batch_normalization (BatchNo (None, 1, 4, 21, 21)      4         
_________________________________________________________________
conv_lst_m2d_1 (ConvLSTM2D)  (None, 1, 2, 21, 21)      440       
_________________________________________________________________
batch_normalization_1 (Batch (None, 1, 2, 21, 21)      4         
_________________________________________________________________
conv_lst_m2d_2 (ConvLSTM2D)  (None, 1, 1, 21, 21)      16        
_________________________________________________________________
batch_normalization_2 (Batch (None, 1, 1, 21, 21)      4         
Total params: 2,484
Trainable params: 2,478
Non-trainable params: 6
__________________________________________

## try training

In [13]:
train_file_path = "../data/train"
valid_file_path = "../data/validate"
# 3 years of training data = 
train_steps = 3 * 365 * 24 / frames
# 1 year of validation data = 
valid_steps = 1 * 365 * 24 / frames

In [14]:
from slice_generator import slice_generator
slice_train = slice_generator(img_dir=train_file_path, slice_size=frames, vars_=['t2m'],
                     proc_type='conv_lstm', pixels_x=pixels_x, pixels_y=pixels_y, debug=False
                     )
slice_val = slice_generator(img_dir=valid_file_path, slice_size=24, vars_=['t2m'],
                     proc_type='conv_lstm', pixels_x=pixels_x, pixels_y=pixels_y, debug=False
                     )

In [15]:
history = model.fit(
    slice_train, #generate_arrays(train_file_path, slice_size=frames),
    steps_per_epoch = train_steps,
    epochs = 20,
    verbose = 1,
    shuffle = False,
    initial_epoch = 0,
    validation_steps = valid_steps,
    validation_data = slice_val, #generate_arrays(valid_file_path, slice_size=frames),
    )

Epoch 1/20
  362/26280 [..............................] - ETA: 10:05 - loss: 25454.4785 - mean_absolute_error: 282.8293

KeyboardInterrupt: 

## Save model to ```../models/```

## Check dimensions on different generators

In [19]:
# INCORRECT
from slice_generator import slice_generator
gen = slice_generator(img_dir='../data/train', slice_size=frames, vars_=['t2m'],
                     proc_type='convlstm', pixels_x=pixels_x, pixels_y=pixels_y, debug=False
                     )
for i in range(1):
    in_, out_ = next(gen)
in_

[array([[[[[10.454407  , 10.475586  , 10.485565  ,  9.88385   ,
            10.0321045 ,  9.75119   ,  7.9622803 ,  4.755066  ,
             2.889557  ,  4.1689453 ,  8.143524  ,  9.647766  ,
             9.251007  ,  5.028534  ,  0.7256775 , -3.098175  ,
            -3.8543396 , -4.94812   , -5.5846863 , -5.8406982 ,
            -5.4146423 ],
           [10.432617  , 10.329193  , 10.56778   , 10.179108  ,
            10.073212  , 10.469971  ,  9.368103  ,  6.3490295 ,
             2.9642944 ,  5.286377  ,  7.977844  , 10.256958  ,
            11.110291  ,  9.14386   ,  5.43277   ,  0.897583  ,
            -2.7898254 , -4.712036  , -6.000763  , -6.022583  ,
            -4.907013  ],
           [10.520416  , 10.423248  , 10.339172  , 10.38028   ,
            10.59082   , 10.963928  ,  9.823425  ,  8.217651  ,
             6.532135  ,  5.7161865 ,  7.6907043 ,  9.995331  ,
            11.627899  , 11.235504  ,  9.507019  ,  7.0030518 ,
             2.383789  , -2.0398865 , -5.762848  , -

In [18]:
# CORRECT
gen = generate_arrays(train_file_path, slice_size=frames)
in_, out_ = next(gen)
in_.shape

(1, 1, 1, 21, 21)

## check broadcasting on xarray

In [3]:
import xarray as xr

ds = xr.open_dataset("../data/train/2016_01_ERA5.nc")
ds['t2m'].values

array([[[283.6044 , 283.62558, 283.63556, ..., 267.5653 , 267.3093 ,
         267.73535],
        [283.5826 , 283.4792 , 283.71777, ..., 267.14923, 267.1274 ,
         268.24298],
        [283.6704 , 283.57324, 283.48917, ..., 267.38715, 265.74463,
         266.08658],
        ...,
        [284.73306, 284.6608 , 284.64896, ..., 285.22137, 284.06158,
         283.22757],
        [284.8327 , 284.8072 , 284.81714, ..., 286.17065, 284.85638,
         283.12167],
        [284.99094, 284.93237, 284.99094, ..., 286.02988, 285.64743,
         283.8716 ]],

       [[283.5901 , 283.64862, 283.70343, ..., 266.8926 , 266.6914 ,
         266.46094],
        [283.6038 , 283.49228, 283.83176, ..., 266.3868 , 266.22675,
         267.88297],
        [283.71277, 283.6013 , 283.63678, ..., 266.3594 , 264.83398,
         264.50388],
        ...,
        [284.6776 , 284.6091 , 284.57422, ..., 282.85757, 282.00797,
         280.86935],
        [284.77914, 284.76544, 284.7424 , ..., 285.85922, 285.412  ,
   

In [8]:
(ds['t2m'] - 273.15).values

array([[[10.454407 , 10.475586 , 10.485565 , ..., -5.5846863,
         -5.8406982, -5.4146423],
        [10.432617 , 10.329193 , 10.56778  , ..., -6.000763 ,
         -6.022583 , -4.907013 ],
        [10.520416 , 10.423248 , 10.339172 , ..., -5.762848 ,
         -7.405365 , -7.0634155],
        ...,
        [11.583069 , 11.510803 , 11.498962 , ..., 12.071381 ,
         10.911591 , 10.077576 ],
        [11.682709 , 11.657196 , 11.667145 , ..., 13.02066  ,
         11.70639  ,  9.97168  ],
        [11.840942 , 11.782379 , 11.840942 , ..., 12.879883 ,
         12.497437 , 10.721619 ]],

       [[10.440094 , 10.498627 , 10.553436 , ..., -6.2573853,
         -6.4585876, -6.6890564],
        [10.453796 , 10.342285 , 10.681763 , ..., -6.7631836,
         -6.9232483, -5.267029 ],
        [10.562775 , 10.451294 , 10.486786 , ..., -6.7905884,
         -8.3160095, -8.646118 ],
        ...,
        [11.527618 , 11.459106 , 11.424225 , ...,  9.707581 ,
          8.857971 ,  7.7193604],
        [11.

# Split Conv_stack output into slices (by frames) 

In [62]:
model_name = "conv_stack_output"

frames=24
filters=[32, 64, 64]
kernel_sizes = [(5,5), (3,3), (3,3)]
strides=[(2,2),(1,1),(2,2)]

conv_input_seq = []
conv_input = layers.Input(name="convstack_input",
                          shape=(channels, pixels_x, pixels_y))
stack = layers.Conv2D(
                        filters=filters[0],
                        kernel_size=kernel_sizes[0],
                        strides=strides[0],
                        padding='same',
                        activation='relu',
                        data_format='channels_first',
                        )(conv_input)
stack = layers.BatchNormalization(axis=1, )(stack)
stack = layers.Conv2D(filters=filters[1],
                        kernel_size=kernel_sizes[1],
                        strides=strides[1],
                        padding='same',
                        activation='relu',
                        data_format='channels_first',
                        )(stack)
stack = layers.BatchNormalization(axis=1, )(stack)
stack = layers.Conv2D(filters=filters[2],
                        kernel_size=kernel_sizes[2],
                        strides=strides[2],
                        padding='same',
                        activation='relu',
                        data_format='channels_first',
                        )(stack)
stack = layers.BatchNormalization(axis=1, )(stack)

stack = tf.expand_dims(stack, 0)
stack_out = layers.Lambda(lambda inputs: tf.unstack(inputs,num=24, axis=1))(stack)

stack_out_shape = stack_out[0].shape

# encoder
encoder_input1 = tf.expand_dims(stack_out[0], 0)
encoder_stack = layers.ConvLSTM2D(name="encoder{}".format(i+1),
                                   filters = filters[-1],
                                   kernel_size=(5,5),
                                   padding='same',
                                   data_format='channels_first',
                                   return_sequences=True,
                                   return_state=True)
_, state_h, state_c = encoder_stack(encoder_input1)
encoder_states = [state_h, state_c]
# decoder
decoder_input1 = layers.Input(name="decoder_input{}".format(i+1),
                      shape = stack_out_shape)
decoder_cell_1 = layers.ConvLSTM2D(name="decoder{}".format(i+1),
                                   filters = filters[-1],
                                   kernel_size=(5,5),
                                   padding='same',
                                   data_format='channels_first',
                                   return_sequences=True,
                                   return_state=True)
decoder_output, _, _ = decoder_cell_1(decoder_input1, initial_state = encoder_states)

encoder_stack = keras.Model(name='encodeco_1',
                            inputs = [conv_input, decoder_input1], 
                            outputs = decoder_output
                         )

encoder_stack.compile(loss='categorical_crossentropy',
                  optimizer='adadelta',
                  metrics=['mean_absolute_error'])
encoder_stack.summary()

Model: "encodeco_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
convstack_input (InputLayer)    [(None, 1, 21, 21)]  0                                            
__________________________________________________________________________________________________
conv2d_48 (Conv2D)              (None, 32, 11, 11)   832         convstack_input[0][0]            
__________________________________________________________________________________________________
batch_normalization_48 (BatchNo (None, 32, 11, 11)   128         conv2d_48[0][0]                  
__________________________________________________________________________________________________
conv2d_49 (Conv2D)              (None, 64, 11, 11)   18496       batch_normalization_48[0][0]     
_________________________________________________________________________________________

In [47]:
stack_out[0].shape

TensorShape([64, 6, 6])

In [68]:
inputs = tf.Variable(tf.random.uniform([24,1,21,21], -1, 1))
input_seq = tf.unstack(inputs, axis=0)
len(input_seq)
input_seq[0].shape
conv_input_seq = []
for i in xrange(len(input_seq)):
        
        conv_input = layers.Input(name="convstack_input",
                                  shape=(channels, pixels_x, pixels_y))
        stack = layers.Conv2D(
                                filters=filters[0],
                                kernel_size=kernel_sizes[0],
                                strides=strides[0],
                                padding='same',
                                activation='relu',
                                data_format='channels_first',
                                )(conv_input)
        stack = layers.BatchNormalization(axis=1, )(stack)
        stack = layers.Conv2D(filters=filters[1],
                                kernel_size=kernel_sizes[1],
                                strides=strides[1],
                                padding='same',
                                activation='relu',
                                data_format='channels_first',
                                )(stack)
        stack = layers.BatchNormalization(axis=1, )(stack)
        stack = layers.Conv2D(filters=filters[2],
                                kernel_size=kernel_sizes[2],
                                strides=strides[2],
                                padding='same',
                                activation='relu',
                                data_format='channels_first',
                                )(stack)
        stack = layers.BatchNormalization(axis=1, )(stack)

        stack = tf.expand_dims(stack, 0)
        stack_out = layers.Lambda(lambda inputs: tf.unstack(inputs,num=24, axis=1))(stack)
        conv_input_seq.append(conv)

TensorShape([1, 21, 21])

In [41]:
# THIS WORKS, put it in a lambda layer!

convstack_out = tf.unstack(
                            input_, num=None, axis=0, name='unstack'
                           )
len(convstack_out)

24

In [None]:
input_seq = tf.unpack(inputs, axis=1)