In [1]:
# Add submodule paths
import sys
sys.path += ['./normalizing_flows', './baselines']

In [2]:
import tensorflow as tf
import tensorflow_probability as tfp
import matplotlib.pyplot as plt
import numpy as np
import utils.data as data_util
import utils.nn_util as nn
import xarray as xr
import gcsfs
import dscnn
from normalizing_flows.models.variational import parameterize, nll_loss
from normalizing_flows.models.glow import Glow
from normalizing_flows.flows.glow import coupling_nn_glow
from datasource import EraiRasDataLoader
from utils.pipeline_v2 import Pipeline, fillnan, clip, remove_monthly_means
from utils.distributions import normal
from tensorflow.keras.optimizers import Adamax

In [3]:
data = EraiRasDataLoader(gcs_bucket='erai-rasmussen', gcs_project='thesis-research-255223', auth='gcs.secret.json')
# era-interim
erai_deg1 = xr.open_zarr(data.erai('daily-1deg'), consolidated=True)
# 1-degree regridded rasmussen
ras_deg1 = xr.open_zarr(data.rasmussen('daily-1deg'), consolidated=True)
# 1/2-degree regridded rasmussen
ras_deg12 = xr.open_zarr(data.rasmussen('daily-1-2deg'), consolidated=True)
ras_deg14 = xr.open_zarr(data.rasmussen('daily-1-4deg'), consolidated=True)
ras_deg18 = xr.open_zarr(data.rasmussen('daily-1-8deg'), consolidated=True)
#ras_deg116 = xr.open_zarr(rasmussen('daily-1-16deg'), consolidated=True)
# regions
def southeast_us(dataset, scale_factor=1):
    lats, lons = dataset.lat, dataset.lon
    seus_lat_st = np.abs(lats - 25).argmin().values
    seus_lat_en = seus_lat_st + 15*scale_factor
    seus_lon_st = np.abs(lons - 260).argmin().values
    seus_lon_en = seus_lon_st + 30*scale_factor
    dataset_seus = dataset.isel(lat=slice(seus_lat_st, seus_lat_en),
                                lon=slice(seus_lon_st, seus_lon_en))
    return dataset_seus

erai_deg1_seus = southeast_us(erai_deg1)
ras_deg1_seus = southeast_us(ras_deg1)
ras_deg12_seus = southeast_us(ras_deg12, scale_factor=2)
ras_deg14_seus = southeast_us(ras_deg14, scale_factor=4)
ras_deg18_seus = southeast_us(ras_deg18, scale_factor=8)

In [4]:
split_fn = data_util.create_time_series_train_test_generator(n_splits=3)
preprocess_maxt = Pipeline('MAXT', fillnan(0), clip(0), remove_monthly_means())
preprocess_hgt = Pipeline('HGT', fillnan(0), clip(0))
ras_seus_maxt_12 = preprocess_maxt(ras_deg12_seus)
ras_seus_maxt_1 = preprocess_maxt(ras_deg1_seus)
folds = list(split_fn(ras_seus_maxt_1, ras_seus_maxt_12))

In [5]:
def preprocess_vds(data_lo, data_hi, scale=2, n_epochs=1, batch_size=100):
    data = tf.data.Dataset.zip((data_lo, data_hi))
    batch = data.batch(batch_size)
    return batch.repeat(n_epochs)

In [None]:
sample_batch_size = 10
load_batch_size = 1200
n_epochs = 20
for i, ((train_lo, train_hi), (test_lo, test_hi)) in enumerate(folds):
    print(f'Fold {i+1}/{len(folds)}')
    N_train, N_test = train_lo.Time.size, train_lo.Time.size
    wt, ht = train_lo.shape[1], train_lo.shape[2]
    print('{} training samples, {} test samples, {}x{}'.format(N_train, N_test, wt, ht))
    #batch_multiplier = data_util.calculate_n_subimages(train_lo, k, stride)
    train_steps = data_util.num_batches(N_train, sample_batch_size)
    test_steps = data_util.num_batches(N_test, sample_batch_size)
    train_lo_ds = data_util.xr_to_tf_dataset(train_lo, load_batch_size)
    test_lo_ds = data_util.xr_to_tf_dataset(test_lo, load_batch_size)
    train_hi_ds = data_util.xr_to_tf_dataset(train_hi, load_batch_size)
    test_hi_ds = data_util.xr_to_tf_dataset(test_hi, load_batch_size)
    train_ds = preprocess_vds(train_lo_ds, train_hi_ds, n_epochs=n_epochs, batch_size=sample_batch_size)
    test_ds = preprocess_vds(test_lo_ds, test_hi_ds, n_epochs=n_epochs, batch_size=sample_batch_size)
    #vdsrcnn = dscnn.create_vdsrcnn(scale=2, c_in=1, c_out=2, output_init='zeros')
    normal_fn = normal()
    vdsrcnn = dscnn.create_bmg_cnn10(wt, ht, c_out=2)
    vdsrcnn = parameterize(vdsrcnn, normal_fn)
    vdsrcnn.compile(loss=nll_loss(normal_fn), optimizer=Adamax(lr=1.0E-3))
    vdsrcnn.fit(train_ds, epochs=n_epochs, steps_per_epoch=train_steps,
                validation_data=test_ds, validation_steps=test_steps)
    pred = vdsrcnn.predict_mean(test_lo_ds.batch(1))
    sample = vdsrcnn.sample(test_lo_ds.batch(1))
    plt.figure(figsize=(4*8,6))
    plt.subplot(1,4,1)
    plt.imshow(next(test_ds.__iter__())[0].numpy()[0].squeeze())
    plt.subplot(1,4,2)
    plt.imshow(next(test_ds.__iter__())[1].numpy()[0].squeeze())
    plt.subplot(1,4,3)
    plt.imshow(pred[0].numpy().squeeze())
    plt.subplot(1,4,4)
    plt.imshow(sample[0].numpy().squeeze())
    plt.show()

In [None]:
sample_batch_size = 10
load_batch_size = 1200
n_epochs = 20
for i, ((train_lo, train_hi), (test_lo, test_hi)) in enumerate(folds):
    print(f'Fold {i+1}/{len(folds)}')
    N_train, N_test = train_lo.Time.size, train_lo.Time.size
    (wt, ht), (wt_hi, ht_hi) = train_lo.shape[1:3], train_hi.shape[1:3]
    print('{} training samples, {} test samples, {}x{} -> {}x{}'.format(N_train, N_test, wt, ht))
    #batch_multiplier = data_util.calculate_n_subimages(train_lo, k, stride)
    train_steps = data_util.num_batches(N_train, sample_batch_size)
    test_steps = data_util.num_batches(N_test, sample_batch_size)
    train_lo_ds = data_util.xr_to_tf_dataset(train_lo, load_batch_size)
    test_lo_ds = data_util.xr_to_tf_dataset(test_lo, load_batch_size)
    train_hi_ds = data_util.xr_to_tf_dataset(train_hi, load_batch_size)
    test_hi_ds = data_util.xr_to_tf_dataset(test_hi, load_batch_size)
    train_ds = preprocess_vds(train_lo_ds, train_hi_ds, n_epochs=n_epochs, batch_size=sample_batch_size)
    test_ds = preprocess_vds(test_lo_ds, test_hi_ds, n_epochs=n_epochs, batch_size=sample_batch_size)
    #vdsrcnn = dscnn.create_vdsrcnn(scale=2, c_in=1, c_out=2, output_init='zeros')
    vdsrcnn = dscnn.create_bmg_cnn10(wt, ht, c_out=2)
    glow = Glow(vdsrcnn, normal(), num_layers=2, depth_per_layer=4,
                coupling_nn_ctor=coupling_nn_glow(hidden_dims=64))
    glow.train(train_ds, steps_per_epoch=N_train // sample_batch_size, num_epochs=n_epochs, has_y=True)
    pred = glow.predict_mean(next(test_lo_ds.batch(1).__iter__()))
    sample = glow.sample(next(test_lo_ds.batch(1).__iter__()))
    plt.figure(figsize=(4*8,6))
    plt.subplot(1,4,1)
    plt.imshow(next(test_ds.__iter__())[0].numpy()[0].squeeze())
    plt.subplot(1,4,2)
    plt.imshow(next(test_ds.__iter__())[1].numpy()[0].squeeze())
    plt.subplot(1,4,3)
    plt.imshow(pred[0].numpy().squeeze())
    plt.subplot(1,4,4)
    plt.imshow(sample[0].numpy().squeeze())
    plt.show()

Fold 1/3
1200 training samples, 1200 test samples, 15x30


 50%|█████     | 1200/2400 [04:58<03:58,  5.03it/s, epoch=11, loss=445, nll=445, prior=2.88, ildj=852]            