In [2]:
# Importing packages
import xarray as xr
import numpy as np
import matplotlib.pyplot as plt
import os
import re
from tqdm import tqdm
from preprocessing_functions import *

# Test data generator using one month data

In [7]:
# Test for one month data
data_path = "/ocean/projects/atm200007p/jlin96/longSPrun/"
%time ds = load_data(month = 2, year = 0, data_path = data_path)
nnInput = make_nn_input(ds, family = "relative", subsample = False, spacing = 8, contiguous = True, print_diagnostics = True)

CPU times: user 815 ms, sys: 32.2 ms, total: 848 ms
Wall time: 861 ms
loaded in data
nntbp
(30, 1343, 64, 128)
nnqbp
(30, 1343, 64, 128)
lhflx
(1, 1343, 64, 128)
shflx
(1, 1343, 64, 128)
ps
(1, 1343, 64, 128)
solin
(1, 1343, 64, 128)
newhum
(30, 1343, 64, 128)
oldhum
(30, 1343, 64, 128)
nnInput
(64, 1343, 64, 128)
Mean relative humidity conversion error: 0.004525340220660508
Variance for relative humidity conversion error: 0.0005452406895807517
nntbp.shape: (30, 1343, 64, 128)
nnqbp.shape: (30, 1343, 64, 128)
lhflx.shape: (1, 1343, 64, 128)
shflx.shape: (1, 1343, 64, 128)
ps.shape: (1, 1343, 64, 128)
solin.shape: (1, 1343, 64, 128)
newhum.shape: (30, 1343, 64, 128)
oldhum.shape: (30, 1343, 64, 128)
nnInput.shape: (64, 1343, 64, 128)



In [15]:
nnTarget = make_nn_target(ds, subsample = False, spacing = 8, contiguous = True, print_diagnostics = True)


tphystnd
(30, 1343, 64, 128)
phq
(30, 1343, 64, 128)
nnTarget
(60, 1343, 64, 128)


In [9]:
spData_train_input = reshape_input(nnInput)

In [17]:
spData_train_target = reshape_target(nnTarget)

In [19]:
spData_train_input.shape, spData_train_target.shape # shape of input, 64 features and 11001856 samples

((64, 11001856), (60, 11001856))

In [33]:
# Input data, normalization files
mu_fn = '../training/norm_files/inp_sub_3d.txt'
sigma_fn = '../training/norm_files/inp_div_3d.txt'
mu = np.loadtxt(mu_fn)
sigma = np.loadtxt(sigma_fn)


array([[2.24818400e+02],
       [2.18004584e+02],
       [2.02243455e+02],
       [2.20314403e+02],
       [2.17085671e+02],
       [2.11971965e+02],
       [2.07420064e+02],
       [2.05973354e+02],
       [2.06601566e+02],
       [2.07979322e+02],
       [2.09715409e+02],
       [2.11536297e+02],
       [2.14117904e+02],
       [2.17825544e+02],
       [2.23032102e+02],
       [2.29604145e+02],
       [2.37105028e+02],
       [2.45090067e+02],
       [2.53181388e+02],
       [2.60449261e+02],
       [2.66226924e+02],
       [2.70324205e+02],
       [2.72955380e+02],
       [2.74973995e+02],
       [2.76444679e+02],
       [2.77948704e+02],
       [2.79421818e+02],
       [2.80797082e+02],
       [2.81984622e+02],
       [2.83081572e+02],
       [9.05007433e-02],
       [1.50965503e-01],
       [1.43941767e-01],
       [6.38730646e-02],
       [6.39144316e-02],
       [7.12869663e-02],
       [1.60643147e-01],
       [2.68171771e-01],
       [2.34630044e-01],
       [2.44061187e-01],


In [35]:
(spData_train_input - mu[:, np.newaxis])/sigma[:, np.newaxis]

array([[ 0.90813374,  0.90755972,  0.9072237 , ..., -2.30963506,
        -2.31026333, -2.31086817],
       [ 0.7200808 ,  0.72048886,  0.72091046, ..., -2.35592818,
        -2.35593494, -2.35584476],
       [ 0.97685852,  0.97708051,  0.97651923, ..., -2.47677944,
        -2.47695992, -2.47720176],
       ...,
       [ 0.37184556,  0.37364867,  0.3773844 , ..., -0.77103151,
        -0.77103151, -0.77103151],
       [ 0.60544223,  0.62159941,  0.57902204, ...,  0.50627013,
         0.57787777,  0.62468221],
       [-0.64423766, -0.64627585, -0.65012872, ..., -0.68459312,
        -0.6714258 , -0.65068756]])

In [58]:
import numpy as np
import tensorflow as tf
import xarray as xr
import h5py
import pandas as pd
from tensorflow.keras import models
from tensorflow.keras import layers
from tensorflow.keras.models import load_model

class data_generator(tf.keras.utils.Sequence):
    
    def __init__(self, nn_input, nn_target, mu_fn, sigma_fn, 
                 batch_size=1024,shuffle=True):
        self.mu_fn = mu_fn
        self.sigma_fn = sigma_fn
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.input_ds, self.target_ds = nn_input, nn_target
        self.mu_ds = np.loadtxt(mu_fn)
        self.sigma_ds = np.loadtxt(sigma_fn)
        self.n_samples = self.input_ds.shape[1]
        self.n_batches = int(self.n_samples/self.batch_size)
        self.on_epoch_end()

    def __len__(self):
        return self.n_batches

    def __getitem__(self, index):
        start_idx = int(index * self.batch_size)
        end_idx = int(start_idx + self.batch_size)
        X  = self.input_ds[:, start_idx:end_idx]
        Y = self.target_ds[:, start_idx:end_idx]
        # 2.Normalization
        X = (X - self.mu_ds[:, np.newaxis]) / self.sigma_ds[:, np.newaxis]

        return X, Y



    def on_epoch_end(self):
        self.indexs = np.arange(self.n_batches)
        if self.shuffle: np.random.shuffle(self.indexs)

In [59]:
dg = data_generator(spData_train_input, spData_train_target, mu_fn, sigma_fn)

In [63]:
# The first batch data
print(dg[0][0].shape, dg[0][1].shape)
dg[0][0], dg[0][1] # input and target


(64, 1024) (60, 1024)


(array([[ 0.90813374,  0.90755972,  0.9072237 , ...,  0.91157104,
          0.91189905,  0.91249341],
        [ 0.7200808 ,  0.72048886,  0.72091046, ...,  0.74026022,
          0.74040676,  0.74105306],
        [ 0.97685852,  0.97708051,  0.97651923, ...,  0.99293534,
          0.99296872,  0.99297955],
        ...,
        [ 0.37184556,  0.37364867,  0.3773844 , ...,  0.50712979,
          0.52152173,  0.53528058],
        [ 0.60544223,  0.62159941,  0.57902204, ...,  0.26651128,
          0.27150584,  0.2913498 ],
        [-0.64423766, -0.64627585, -0.65012872, ..., -0.77793631,
         -0.77778144, -0.77790141]]),
 array([[-4.6344035e-06, -4.4992044e-06, -4.3746754e-06, ...,
          3.6902911e-08,  7.1772189e-07,  1.2165418e-06],
        [-2.4730957e-06, -2.4254095e-06, -2.4150222e-06, ...,
         -9.6337567e-07, -5.8022550e-07, -3.6005693e-07],
        [-5.3740382e-06, -9.4926654e-06, -1.4961652e-05, ...,
          1.1557225e-06, -6.9673301e-06, -4.2275842e-06],
        ...,


In [65]:
dg[100]

(array([[ 0.83126522,  0.82596592,  0.8198688 , ...,  0.9219113 ,
          0.92073676,  0.91945001],
        [ 0.85236602,  0.84665083,  0.84115807, ...,  0.94655219,
          0.9471331 ,  0.94685204],
        [ 1.20159461,  1.19691306,  1.19231904, ...,  1.10798616,
          1.10461937,  1.10206022],
        ...,
        [-0.77103151, -0.77103151, -0.77103151, ..., -0.06701986,
          0.18294648,  0.43345955],
        [ 1.51032042,  1.58459583,  1.59588066, ...,  0.83447795,
          0.83050556,  0.77247129],
        [-0.56655121, -0.55927876, -0.55768005, ..., -0.59959883,
         -0.60113562, -0.59476684]]),
 array([[-5.70841585e-05, -5.69377407e-05, -5.67384923e-05, ...,
         -1.18691269e-05, -1.71688941e-06,  8.43276848e-06],
        [-4.03271952e-05, -4.02107617e-05, -4.00895915e-05, ...,
         -3.40692827e-06,  2.97016163e-06,  9.31379054e-06],
        [-3.36639241e-05, -3.91362300e-05, -3.85001149e-05, ...,
         -2.87348144e-06,  2.10438225e-06,  2.52452037e-