In [None]:
import os
import numpy as np
import tensorflow as tf

# Ignore Tensorflow Warnings
os.environ["TF_CPP_MIN_LOG_LEVEL"] = '2'

from tensorflow.keras.layers import AveragePooling2D, UpSampling2D, MaxPooling2D
from scipy import interpolate
from scipy.io import loadmat
from datetime import datetime, timedelta

# Split the SWAN matlab file
This script allows to convert the SWAN matlab file into a series of csv files for each time step (and sea state variable)

In [None]:
# Define help function to create a generator that outputs a datetime object for the given time range
if time_step_type == "hrs":
    const = int(24/time_step)
    
    def daterange(start_date, end_date):
        for n in range(const*int((end_date - start_date).days)):
            yield start_date + n*timedelta(hours=time_step)

else:
    const = int(60/time_step * 24)
    
    def daterange(start_date, end_date):
        for n in range(const*int((end_date - start_date).days)):
            yield start_date + n*timedelta(minutes=time_step)

In [None]:
# Definition of file names and constants

# Define the type of the time step (hrs, min) and the size of it
time_step_type = "hrs" # could be min too
time_step = 1          # can be something like 20 (for minutes)  

# Variable to extract (has to be the name given by SWAN)
var = "Hsig"
var_short = "Hs" # How the variable name will appear in the output

# Path of the Matlab file to be split
fn = "out/BaskCoast_ZoomBtz.mat"

# Define the data time span
start_date = datetime(2018, 1, 1, 0, 0)
end_date = datetime(2019, 12, 31, 23, 0)


# Path of the output files, can be for reference (HR) or input (LR) files
#dirout = "Data/LR/DS_RAW/Kernel_16/{}/".format(var_short)
dirout = "Data/HR/{}/".format(var_short)

# Prefix of the output files
prefix = "BaskCoast_{}".format(var_short.upper())

# Check if output folder(s) exists, if not, create it/them
if not os.path.isdir(dirout):
    os.makedirs(dirout)

## Load whole file into RAM
This is the easiest way if the whole data set fits in RAM

In [None]:
# Load Matlab File
data = loadmat(fn)

for i, single_date in enumerate(daterange(start_date, end_date)):
    # Get the right key for the according day and hour
    time = single_date.strftime('%Y%m%d_%H%M00')
    file_key = var + "_" + time

    # Access the array and save it in a file
    fn_out = dirout + prefix + "_{}.csv".format(i+1)
    np.savetxt(fn_out, data[file_key][:,:], delimiter=",")

## Load each time step one by one
This is only recommended if the data set fits in RAM. This is not optimized and thus very slow.

In [None]:
for i, single_date in enumerate(daterange(start_date, end_date)):
    # Get the right key for according day and hour
    time = single_date.strftime('%Y%m%d_%H%M00')
    file_key = var + "_" + time

    data = loadmat(fn, variable_names=(file_key))
    # Access the array and save it in a file
    fn_out = dirout + prefix + "_{}.csv".format(i+1)
    np.savetxt(fn_out, data[file_key][:,:], delimiter=",")

# Upsample low-resolution SWAN data
For the training of the neural network the low-resolution data has to be upsampled to have
the same grid size as the high-resolution references. This can be done either by a nearest-
neighbor scheme or by bicubic interpolation.

## Nearest Neighbor

In [None]:
# Number of files, grid size and size of Pooling kernel
n = 17496
pooling_in = "DS_RAW"
pooling_out = "DS"
grid = (10, 10) # Of the Low-resolution input
kernel = 16
var = "Hs"

# Define file location of HR files 
fin = "Data/LR/{}/Kernel_{}/{}/BaskCoast_{}_{{}}.csv".format(pooling_in, kernel, var, var.upper())

# Define directory location of the LR output files
dirout = "Data/LR/{}/Kernel_{}/{}".format(pooling_out, kernel, var)
fout = os.path.join(dirout, "BaskCoast_{}_{{}}.csv".format(var.upper()))


# Check if output folder(s) exists, if not, create it
if not os.path.isdir(dirout):
    os.makedirs(dirout)

for i in range(1, n+1):
    # Load file and convert to tensor
    hs = np.loadtxt(fin.format(i), delimiter=',')
    hs_t = tf.convert_to_tensor(hs)
    hs_t = tf.reshape(hs_t, [1, grid[0], grid[1], 1])
    
    # Get upsampling object
    up_sample = UpSampling2D(size=(kernel, kernel))
    
    # Upsample to original grid size 
    hs_t = up_sample(hs_t)
    
    # Set all negative values to NaN for easier processing during training
    hs_t = tf.where(tf.math.less(hs_t, 0), np.nan, hs_t)
     
    # Save file in specified generic format
    np.savetxt(fout.format(i), hs_t[0,:,:,0].numpy(), delimiter=",")

## Bicubic Interpolation

In [None]:
# Number of files, grid size and size of Pooling kernel
n = 17496
pooling_in = "DS_RAW"
pooling_out = "DS_INT"
grid = (160, 160) # Of the HR reference
kernel = 8
grid_LR = tuple(int(xi/kernel) for xi in grid)
var = "Hs"

# Define file location of HR files 
fin = "Data/LR/{}/Kernel_{}/{}/BaskCoast_{}_{{}}.csv".format(pooling_in, kernel, var, var.upper())

# Define directory location of the LR output files
dirout = "Data/LR/{}/Kernel_{}/{}".format(pooling_out, kernel, var)
fout = os.path.join(dirout, "BaskCoast_{}_{{}}.csv".format(var.upper()))


# Check if output folder(s) exists, if not, create it
if not os.path.isdir(dirout):
    os.makedirs(dirout)

# Create HR and LR grids for the interpolation
x_HR = np.arange(grid[0])
y_HR = np.arange(grid[1])

x_LR = np.arange(0, grid[0], kernel)
y_LR = np.arange(0, grid[1], kernel)
    
for i in range(1, n+1):
    # Load file and convert to tensor
    LR = np.loadtxt(fin.format(i), delimiter=',')
    LR = np.nan_to_num(LR)
    LR_T = tf.convert_to_tensor(LR)
    LR_T = tf.reshape(LR_T, [1, grid_LR[0], grid_LR[1], 1])
    
    
    # Two different interpolation techniques
    #f = interpolate.interp2d(x_LR, y_LR, T_LR[0,:,:,0].numpy(), kind='cubic')
    f = interpolate.RectBivariateSpline(x_LR, y_LR, LR_T[0,:,:,0].numpy())

    # Interpolate to High-Resolution grid
    interpol = f(x_HR, y_HR)
    interpol[interpol < 0.01] = np.nan
     
    # Save file in specified generic format
    np.savetxt(fout.format(i), interpol, delimiter=",")