In [9]:
import numpy as np
import pickle
import matplotlib.pyplot as plt
import numpy.ma as ma
import re
import os
import tensorflow as tf
from tensorflow import keras
import tensorflow.keras.backend as K
import tensorflow.keras.preprocessing as prep
from sklearn.model_selection import train_test_split
from model import AugementedConvLSTM
import configparser
import argparse
import h5py
import glob
import random
projection_dimensions = [50,50]

In [6]:
def normalize(data):
    data = data - data.mean()
    data = data / data.std()
    return data

def set_data(X, Y,):
    channel=7
    X_normalized = np.zeros((int(channel), np.max(X.shape), int(projection_dimensions[0]), int(projection_dimensions[1])))
    for i in range(7):
        X_normalized[i,] = normalize(X[i,])
    Y_normalized = normalize(Y)
    std_observed = Y.std()  
    X = X_normalized.transpose(1,2,3,0)
    Y = Y_normalized.reshape(-1,projection_dimensions[0], projection_dimensions[1], 1)
    return X, Y, std_observed

def data_generator(X,Y):
    min_train_year = 1948
    max_train_year = 1999
    min_test_year = 2000
    max_test_year = 2005
    total_years = max_test_year - min_train_year + 1
    train_years = max_train_year - min_train_year + 1
    n_days = np.max(X.shape)
    train_days = int((n_days/total_years)*train_years)
    train_x, train_y = X[:train_days], Y[:train_days]
    test_x, test_y = X[train_days:], Y[train_days:]
    time_steps = 4
    batch_size1 = 15
    train_generator = prep.sequence.TimeseriesGenerator(
        train_x, 
        train_y.reshape(-1, projection_dimensions[0], projection_dimensions[1], 1),
        length=time_steps, 
        batch_size=batch_size1
        )
    test_generator = prep.sequence.TimeseriesGenerator(
        test_x, 
        test_y.reshape(-1, projection_dimensions[0], projection_dimensions[1], 1),
        length=time_steps, 
        batch_size=batch_size1
        )
    return train_generator, test_generator

def train(clstm_model, train_generator, test_generator, load_weights = False, std_observed = 1.0):
    def root_mean_squared_error(y_true, y_pred):
        return K.sqrt(K.mean(K.square(y_pred - y_true)))
    def actual_rmse_loss(y_true, y_pred):
        return K.sqrt(K.mean(K.square((y_pred - y_true)*std_observed)))
    adam = tf.keras.optimizers.Adam(learning_rate=0.0003)
    clstm_model.compile(optimizer=adam, loss=root_mean_squared_error, metrics=[root_mean_squared_error, actual_rmse_loss])
    checkpoint = tf.keras.callbacks.ModelCheckpoint(f"convlstm_weights_pr.h5", monitor='val_loss', verbose=1, save_best_only=True, mode='min')
    tensorboard = tf.keras.callbacks.TensorBoard(log_dir=f"./Graphs/norm_csltm_pre_Graph", histogram_freq=0, write_graph=True, write_images=False)
    termnan = tf.keras.callbacks.TerminateOnNaN()
    reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=15, min_delta=0.005, min_lr=0.000004, verbose=1)    
    callbacks_list = [checkpoint,tensorboard, reduce_lr, termnan]
    history = clstm_model.fit(
        train_generator, 
        callbacks=callbacks_list, 
        epochs=32, 
        validation_data=test_generator,
        verbose=1
        )
    return history

def tryint(s):
    try:
        return int(s)
    except ValueError:
        return s
    
def alphanum_key(s):
    """ Turn a string into a list of string and number chunks.
        "z23a" -> ["z", 23, "a"]
    """
    return [ tryint(c) for c in re.split('([0-9]+)', s) ]

def sort_nicely(l):
    """ Sort the given list in the way that humans expect.
    """
    l.sort(key=alphanum_key)
    

In [4]:
from tensorflow.python.client import device_lib

def get_available_gpus():
    local_device_protos = device_lib.list_local_devices()
    print([x.name for x in local_device_protos if x.device_type == 'GPU'])
get_available_gpus()

['/device:GPU:0', '/device:GPU:1', '/device:GPU:2']


2022-07-06 10:10:48.450765: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-07-06 10:10:51.391606: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1525] Created device /device:GPU:0 with 8674 MB memory:  -> device: 0, name: GeForce RTX 2080 Ti, pci bus id: 0000:18:00.0, compute capability: 7.5
2022-07-06 10:10:51.393529: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1525] Created device /device:GPU:1 with 9631 MB memory:  -> device: 1, name: GeForce RTX 2080 Ti, pci bus id: 0000:3b:00.0, compute capability: 7.5
2022-07-06 10:10:51.394790: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1525] Created device /device:GPU:2 with 9631 MB memory:  -> device: 2, name: GeForce RTX 2080 Ti, p

In [5]:
Aug_ConvLSTM_model = AugementedConvLSTM(
    projection_height = projection_dimensions[0], 
    projection_width = projection_dimensions[1],
    timesteps=4
    )
model = Aug_ConvLSTM_model.model(
    [32, 16, 16], 
    [9,5,3], 
    [64,32,1], 
    [9,3,5], 
    2
    )

2022-07-06 10:10:55.214965: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1525] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 8674 MB memory:  -> device: 0, name: GeForce RTX 2080 Ti, pci bus id: 0000:18:00.0, compute capability: 7.5
2022-07-06 10:10:55.216746: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1525] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 9631 MB memory:  -> device: 1, name: GeForce RTX 2080 Ti, pci bus id: 0000:3b:00.0, compute capability: 7.5
2022-07-06 10:10:55.218381: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1525] Created device /job:localhost/replica:0/task:0/device:GPU:2 with 9631 MB memory:  -> device: 2, name: GeForce RTX 2080 Ti, pci bus id: 0000:86:00.0, compute capability: 7.5


In [8]:
class DataGenerator(tf.keras.utils.Sequence):
  def __init__(self, list_examples, batch_size=15, dim=(50, 50), shuffle=True):
    # Constructor of the data generator.
    self.dim = dim
    self.batch_size = batch_size
    self.list_examples = list_examples
    self.shuffle = shuffle
    self.on_epoch_end()

  def __len__(self):
    # Denotes the number of batches per epoch
    return int(np.floor(len(self.list_examples) / self.batch_size))

  def __getitem__(self, index):
    # Generate one batch of data
    indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]
    # Find list of IDs
    list_IDs_temp = [self.list_examples[k] for k in indexes]
    # Generate data
    X, y = self.__data_generation(list_IDs_temp)
    return X, y

  def on_epoch_end(self):
    # This function is called at the end of each epoch.
    self.indexes = np.arange(len(self.list_examples))
    if self.shuffle == True:
      np.random.shuffle(self.indexes)

  def __data_generation(self, list_IDs_temp):
    # Load individual numpy arrays and aggregate them to a batch.
    X = np.empty([7, self.batch_size, self.dim[0], self.dim[1]], dtype=np.float32)
    y = np.empty([1, self.batch_size, self.dim[0], self.dim[1]], dtype=np.float32)
    # # Generate data.
    for i, ID in enumerate(list_IDs_temp):
        # Load sample
        X[:,i,:, :] = np.load(ID[0])
        # Load labels       
        y[:,i,:,:] = np.load(ID[1])

    return X, y

In [10]:
data = glob.glob(os.path.join('Data', 'MIROC-ESM', "[0-9]*.npy"), recursive=True)
sort_nicely(data)

labels = glob.glob(os.path.join('Data', 'IMD', "[0-9]*.npy"), recursive=True)
sort_nicely(labels)

train_examples = [(data[i], labels[i]) for i in range(len(data))]

random.seed(4)
random.shuffle(train_examples)

partition = {}
partition['train'] = train_examples

In [12]:
data = glob.glob(os.path.join('Data', 'MIROC-ESM', "[0-9]*.npy"), recursive=True)
sort_nicely(data)

labels = glob.glob(os.path.join('Data', 'IMD', "[0-9]*.npy"), recursive=True)
sort_nicely(labels)

validation_examples = [(data[i], labels[i]) for i in range(len(data))]

random.seed(4)
random.shuffle(validation_examples)

partition['validation'] = validation_examples

In [40]:
params = {'dim': (50, 50),
          'batch_size': 15,
          'timesteps' : 4,
          'shuffle': True}
# Define the generators
training_generator = DataGenerator(partition['train'], **params)
validation_generator = DataGenerator(partition['validation'], **params)

In [41]:
'''
(15, 4, 129, 135, 7) (15, 129, 135, 1)

(7, 15, 50, 50) => (1, 7, 15, 50, 50) => ()
(1, 15, 50, 50) => (15, 50, 50, 1)

(7, 15, 50, 50) (1, 15, 50, 50)
'''
import itertools
for xx, yy in itertools.islice(training_generator, 0, 1, 1):
    print(xx.shape, yy.shape)


TypeError: expected str, bytes or os.PathLike object, not list

In [None]:
'''
Mean of GCM Data:  4.240703115213778e-05
Variance of GCM Data:  7.390980757496945e-05
Mean of Obseved Data:  0.8937610799332989
Variance of Obseved Data:  5.908736811650466
'''

In [None]:
def load_dataset(model = None):
    X, Y = None, None
    return X,Y

In [67]:
class DataGenerator(tf.keras.utils.Sequence):
  def __init__(self, list_examples, batch_size=15, timesteps = 4, n_channels = 7, dim=(50, 50), shuffle=True):
    # Constructor of the data generator.
    self.dim = dim
    self.batch_size = batch_size
    self.timesteps = timesteps
    self.n_channels = n_channels
    self.list_examples = list_examples
    self.shuffle = shuffle
    self.on_epoch_end()

  def __len__(self):
    # Denotes the number of batches per epoch
    return int(np.floor(len(self.list_examples) / (self.timesteps*self.batch_size)))

  def __getitem__(self, index):
    # Generate one batch of data
    indexes = self.indexes[index*self.batch_size*self.timesteps:(index+1)*self.batch_size*self.timesteps]
    # Find list of IDs
    list_IDs_temp = [self.list_examples[k] for k in indexes]
    # Generate data
    assert len(list_IDs_temp) == self.timesteps*self.batch_size
    X, y = self.__data_generation(list_IDs_temp)
    return X, y

  def on_epoch_end(self):
    # This function is called at the end of each epoch.
    self.indexes = np.arange(len(self.list_examples))
    if self.shuffle == True:
      np.random.shuffle(self.indexes)

  def __data_generation(self, list_IDs_temp):
    # Load individual numpy arrays and aggregate them to a batch.
    X = np.empty([self.batch_size, self.timesteps, self.n_channels, self.dim[0], self.dim[1]], dtype=np.float32)
    y = np.empty([self.batch_size, 1, self.dim[0], self.dim[1]], dtype=np.float32)
    # # Generate data.
    # for i, ID in enumerate(np.array(list_IDs_temp)):
    #     X[i, :, :, :] = np.load([ID[0]])
    #     if (i+1)%4 == 0:
    #       y[i, :, :, :, :] = np.load([ID[1]])

    # for i, ID in enumerate(np.array(list_IDs_temp).reshape(self.batch_size,self.timesteps,2)):
    #   for j, ID_idx in enumerate(ID):
    #     # X[i, j, :, :, :] = np.load(self.list_examples[ID_idx[0]])
    #     X[i, j, :, :, :] = np.load([ID_idx[0]])
    #   y[i, :, :, :, :] = np.load([ID_idx[1]])
    j = 0
    for i, ID in enumerate(list_IDs_temp):
        # Load sample
        X[j,(i)%self.timesteps,:,:,:] = np.load(ID[0])
        # Load labels  
        if (i+1)%self.timesteps == 0 and i != 0:
          y[j,:,:,:] = np.load(ID[1])
          j += 1

    return X.transpose(0,1,3,4,2), y.transpose(0,2,3,1)

params = {'dim': (50, 50),
          'batch_size': 15,
          'timesteps' : 4,
          'shuffle': True}
# Define the generators
training_generator = DataGenerator(partition['train'], **params)
validation_generator = DataGenerator(partition['validation'], **params)

import itertools
for xx, yy in itertools.islice(training_generator, 0, 1, 1):
    print(xx.shape, yy.shape)
# (15, 4, 129, 135, 7) (15, 129, 135, 1)
# (15, 4, 50, 50, 7) (15, 50, 50, 1)

(15, 4, 50, 50, 7) (15, 50, 50, 1)


In [None]:
history = train(
    model,
    training_generator, 
    validation_generator,
    load_weights = True,
    std_observed = 1.0
    )