In [1]:
# Add submodule paths
import sys
sys.path += ['./normalizing_flows']

In [2]:
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np
import utils.data as data_util
import utils.nn_util as nn
import xarray as xr
from flows import Flow
from flows.affine import Planar
from flows.sylvester import TriangularSylvester
from models.vae import GatedConvVAE

In [3]:
from datasource import rasmussen, erai
# era-interim
erai_deg1 = xr.open_zarr(erai('daily-1deg'), consolidated=True)
# 1-degree regridded rasmussen
ras_deg1 = xr.open_zarr(rasmussen('daily-1deg'), consolidated=True)
# 1/2-degree regridded rasmussen
ras_deg12 = xr.open_zarr(rasmussen('daily-1-2deg'), consolidated=True)
ras_deg14 = xr.open_zarr(rasmussen('daily-1-4deg'), consolidated=True)
ras_deg18 = xr.open_zarr(rasmussen('daily-1-8deg'), consolidated=True)
#ras_deg116 = xr.open_zarr(rasmussen('daily-1-16deg'), consolidated=True)
# regions
def southeast_us(dataset, scale_factor=1):
    lats, lons = dataset.lat, dataset.lon
    seus_lat_st = np.abs(lats - 25).argmin().values
    seus_lat_en = seus_lat_st + 15*scale_factor
    seus_lon_st = np.abs(lons - 260).argmin().values
    seus_lon_en = seus_lon_st + 30*scale_factor
    dataset_seus = dataset.isel(lat=slice(seus_lat_st, seus_lat_en),
                                lon=slice(seus_lon_st, seus_lon_en))
    return dataset_seus

erai_deg1_seus = southeast_us(erai_deg1)
ras_deg1_seus = southeast_us(ras_deg1)
ras_deg12_seus = southeast_us(ras_deg12, scale_factor=2)
ras_deg14_seus = southeast_us(ras_deg14, scale_factor=4)
ras_deg18_seus = southeast_us(ras_deg18, scale_factor=8)

In [4]:
split_fn = data_util.create_time_series_train_test_generator(n_splits=5)

In [5]:
from utils.pipeline_v2 import Pipeline, fillnan, clip, remove_monthly_means
preprocess_maxt = Pipeline('MAXT', fillnan(0), clip(0), remove_monthly_means())
preprocess_hgt = Pipeline('HGT', fillnan(0), clip(0))

In [6]:
ras_seus_maxt_12 = preprocess_maxt(ras_deg12_seus)
folds = list(split_fn(ras_seus_maxt_12))

In [7]:
n_layers = 4
flow = Flow(n_layers, lambda i: TriangularSylvester(flip_z = i % 2 != 0))
beta_update = lambda i, beta: 1.0E-3 / tf.exp(-i)

In [10]:
def preprocess_vae(data, k=6, stride=3, n_epochs=1, batch_size=100):
    data = data_util.extract_patches(data, k=k, stride=stride, scale=1)
    batch = data.batch(batch_size)
    return tf.data.Dataset.zip((batch, batch)).repeat(n_epochs)

In [None]:
k, stride = 12, 6
sample_batch_size = 100
load_batch_size = 1000
n_epochs = 5
for i, (train, test) in enumerate(folds):
    print(f'Fold {i+1}/{len(folds)}')
    N_train, N_test = train[0].Time.size, test[0].Time.size
    print('{} training samples, {} test samples'.format(N_train, N_test))
    batch_multiplier = data_util.calculate_n_subimages(train[0], k, stride)
    train_steps = data_util.num_batches(N_train*batch_multiplier, sample_batch_size)
    test_steps = data_util.num_batches(N_test, sample_batch_size)
    train_ds = data_util.xr_to_tf_dataset(train, load_batch_size)
    test_ds = data_util.xr_to_tf_dataset(test, load_batch_size)
    train_ds = preprocess_vae(train_ds, k=k, stride=stride, n_epochs=n_epochs, batch_size=sample_batch_size)
    test_ds = preprocess_vae(test_ds, k=k, stride=stride, n_epochs=n_epochs, batch_size=sample_batch_size)
    vae = GatedConvVAE(k, k, flow, z_size=64, beta_update_fn=beta_update, output_activation='linear', loss='mse')
    vae.fit(train_ds, epochs=n_epochs, steps_per_epoch=train_steps,
            validation_data=test_ds, validation_steps=test_steps)