In [None]:
import os
import math
import glob
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.layers import LSTM,TimeDistributed
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

In [None]:
#Enabling Dynamic Memory Allocation
from tensorflow.compat.v1 import ConfigProto
from tensorflow.compat.v1 import InteractiveSession
config = ConfigProto()
config.gpu_options.allow_growth = True
session = InteractiveSession(config=config)

## Sea Ice Prediction - UNet

### Loading Combined Data 1979-2021

Only consider last 5 variables

#### Features:
longwave, rain_rate, snow_rate, sst, sea_ice

In [None]:
#data = np.load('/content/drive/MyDrive/UMBC/Polar_Sea_Ice/Taki Data/whole_data.npy',allow_pickle=True)
data = np.load('data/whole_data.npy',allow_pickle=True)
land_mask = np.load("data/y_land_mask_actual.npy",allow_pickle=True)

target = data[:,:,:,9]
data = data[:,:,:,-5:]

print(data.shape)
print(target.shape)

In [None]:
# reshape y_land_mask
y_land_mask = land_mask.reshape(448, 304, 1)

In [None]:
#Adding a lag to monthly targets
lag = 6
data = data[:-lag,:,:,:]
target = target[lag:,:,:]
print(data.shape)
print(target.shape)

In [None]:
# Sequential split train:val data in 80:20 sequentially 

LEN_DATA = len(data) #total number of pixels

NUM_TRAIN = LEN_DATA - 86 #reserve last 7 years for testing 
NUM_TEST = LEN_DATA - NUM_TRAIN

print('LEN_DATA:',LEN_DATA)
print('NUM_TRAIN:',NUM_TRAIN)
print('NUM_TEST:',NUM_TEST)

x_train = data[0:NUM_TRAIN]
x_test = data[NUM_TRAIN:]

#split features and labels
y_train=target[:NUM_TRAIN] #target is last column i-e sea-ice
y_test=target[NUM_TRAIN:] #target is last column i-e sea-ice

In [None]:
print('x_train.shape:',x_train.shape)
print('y_train.shape:',y_train.shape)

print('x_test.shape:',x_test.shape)
print('y_test.shape:',y_test.shape)

In [None]:
#Replacing all nans with Zeros
x_train = np.nan_to_num(x_train)
y_train = np.nan_to_num(y_train)
x_test = np.nan_to_num(x_test)
y_test = np.nan_to_num(y_test)

In [None]:
y_test[1,1,1]

### Reshaping Input and Target Features

In [None]:
# convert an array of values into a dataset matrix
def reshape_features(dataset, months, lat, lon, features):
    print(dataset.shape)
    X = dataset.reshape(months, lat, lon, features)
    return X

# convert an array of values into a dataset matrix
def reshape_outcome(dataset, months, lat, lon):
    print(dataset.shape)
    X = dataset.reshape(months, lat, lon, 1)
    return X

### Normalization

In [None]:
# normalize the features

scaler_f = StandardScaler()
x_train = scaler_f.fit_transform(x_train.reshape(-1,x_train.shape[2])) #reshaping to 2d for standard scaling
x_test = scaler_f.transform(x_test.reshape(-1,x_test.shape[2])) #reshaping to 2d for standard scaling

scaler_l = StandardScaler()
y_train = scaler_l.fit_transform(y_train.reshape(-1,1)) #reshaping to 2d for standard scaling
y_test = scaler_l.transform(y_test.reshape(-1,1)) #reshaping to 2d for standard scaling


In [None]:
#Reshaping data to 3D for modeling
lat = 448
lon = 304
features = 5
x_train = reshape_features(x_train, NUM_TRAIN, lat, lon, features) # reshaping to 3d for model
x_test = reshape_features(x_test, NUM_TEST, lat, lon, features) # reshaping to 3d for model

y_train = reshape_outcome(y_train, NUM_TRAIN, lat, lon) # reshaping to 3d for model
y_test = reshape_outcome(y_test, NUM_TEST, lat, lon) # reshaping to 3d for model

In [None]:
print('x_train.shape:',x_train.shape)
print('y_train.shape:',y_train.shape)

print('x_test.shape:',x_test.shape)
print('y_test.shape:',y_test.shape)

In [None]:
import numpy as np
from tensorflow.keras import Input
from tensorflow.keras.layers import Dense, Conv2D, ConvLSTM2D, BatchNormalization, UpSampling2D,MaxPooling2D, concatenate, Flatten, Reshape
from tensorflow.keras.metrics import RootMeanSquaredError
from tensorflow.keras.optimizers import Adam, SGD
from tensorflow.keras.models import load_model, Model

In [None]:
def custom_mse(y_true, y_pred):
	y_pred_masked = tf.math.multiply(y_pred, y_land_mask)
	y_true_masked = tf.math.multiply(y_true, y_land_mask)
	squared_resids = tf.square(y_true_masked - y_pred_masked)
	loss = tf.reduce_mean(squared_resids)
	return loss

In [None]:
input_shape = (448, 304, 5)
loss = custom_mse
metrics = RootMeanSquaredError()

In [None]:
def unet_batchnorm(input_shape, loss, metrics, learning_rate=1e-4, filter_size=3,
                   n_filters_factor=1, n_forecast_months=1, use_temp_scaling=False,
                   n_output_classes=1,
                   **kwargs):
    inputs = Input(shape=input_shape)

    conv1 = Conv2D(np.int(32*n_filters_factor), filter_size, activation='relu', padding='same', kernel_initializer='he_normal')(inputs)
    conv1 = Conv2D(np.int(32*n_filters_factor), filter_size, activation='relu', padding='same', kernel_initializer='he_normal')(conv1)
    bn1 = BatchNormalization(axis=-1)(conv1)
    pool1 = MaxPooling2D(pool_size=(2, 2))(bn1)

    conv2 = Conv2D(np.int(64*n_filters_factor), filter_size, activation='relu', padding='same', kernel_initializer='he_normal')(pool1)
    conv2 = Conv2D(np.int(64*n_filters_factor), filter_size, activation='relu', padding='same', kernel_initializer='he_normal')(conv2)
    bn2 = BatchNormalization(axis=-1)(conv2)
    pool2 = MaxPooling2D(pool_size=(2, 2))(bn2)

    conv3 = Conv2D(np.int(128*n_filters_factor), filter_size, activation='relu', padding='same', kernel_initializer='he_normal')(pool2)
    conv3 = Conv2D(np.int(128*n_filters_factor), filter_size, activation='relu', padding='same', kernel_initializer='he_normal')(conv3)
    bn3 = BatchNormalization(axis=-1)(conv3)

    up8 = Conv2D(np.int(64*n_filters_factor), 2, activation='relu', padding='same', kernel_initializer='he_normal')(UpSampling2D(size=(2,2), interpolation='nearest')(bn3))
    merge8 = concatenate([bn2,up8], axis=3)
    conv8 = Conv2D(np.int(64*n_filters_factor), filter_size, activation='relu', padding='same', kernel_initializer='he_normal')(merge8)
    conv8 = Conv2D(np.int(64*n_filters_factor), filter_size, activation='relu', padding='same', kernel_initializer='he_normal')(conv8)
    bn8 = BatchNormalization(axis=-1)(conv8)

    up9 = Conv2D(np.int(32*n_filters_factor), 2, activation='relu', padding='same', kernel_initializer='he_normal')(UpSampling2D(size=(2,2), interpolation='nearest')(bn8))
    merge9 = concatenate([conv1,up9], axis=3)
    conv9 = Conv2D(np.int(32*n_filters_factor), filter_size, activation='relu', padding='same', kernel_initializer='he_normal')(merge9)
    conv9 = Conv2D(np.int(32*n_filters_factor), filter_size, activation='relu', padding='same', kernel_initializer='he_normal')(conv9)
    conv9 = Conv2D(np.int(32*n_filters_factor), filter_size, activation='relu', padding='same', kernel_initializer='he_normal')(conv9)

    output = Conv2D(n_output_classes, 1, activation='linear')(conv9)
        
    model = Model(inputs, output)
    model.compile(optimizer=Adam(lr=learning_rate), loss=loss, metrics = metrics)

    return model

In [None]:
model = unet_batchnorm(input_shape, loss, metrics)

print(model.summary())

In [None]:
# define early stopping callback
early_stopping = EarlyStopping(patience=100, restore_best_weights=True)

# fit model
print(x_train.shape, y_train.shape)
history = model.fit(x=x_train, y=y_train,epochs=50,batch_size=32,validation_split=.2,verbose = 2)

#				callbacks=[early_stopping])

In [None]:
train_loss, train_mse = model.evaluate(x_train, y_train)
print("Train MSE: {:.4f}\nTrain Loss: {:.4f}".format(train_mse, train_loss))

test_loss, test_mse = model.evaluate(x_test, y_test)
print("Test MSE: {:.4f}\nTest Loss: {:.4f}".format(test_mse, test_loss))

In [None]:
y_pred = model.predict(x_test)
print(y_pred.shape)

In [None]:
# invert scaling for forecasted values 
inv_y_pred = scaler_l.inverse_transform(y_pred.reshape(-1,1))

# invert scaling for actual values
inv_y_test = scaler_l.inverse_transform(y_test.reshape(-1,1))

In [None]:
inv_y_pred = inv_y_pred.reshape(len(y_pred),448,304)
print(inv_y_pred.shape)
inv_y_test = inv_y_test.reshape(len(y_test),448,304)
print(inv_y_pred.shape)

In [None]:
inv_y_test[1,1,1]

In [None]:
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
from math import sqrt

rmse = sqrt(mean_squared_error(inv_y_test.flatten(), inv_y_pred.flatten()))
print('Test RMSE: %.3f' % rmse)

r_sq = r2_score(inv_y_test.flatten(), inv_y_pred.flatten())
print('Test R_Square: %.3f' % r_sq)

In [None]:
#Post-Process RMSE
post_y = np.clip(inv_y_pred, a_min = 0, a_max = 100)
rmse1 = sqrt(mean_squared_error(inv_y_test.flatten(), post_y.flatten()))
print('Post-Process RMSE: %.3f' % rmse1)

r_sq = r2_score(inv_y_test.flatten(), post_y.flatten())
print('Post-Process R_Square: %.3f' % r_sq)

mae = mean_absolute_error(inv_y_test.flatten(), post_y.flatten())
print('Post-Process MAE: %.3f' % mae)