In [1]:
%load_ext autoreload
%autoreload 2

from IPython.display import Image
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

import os
import json
import numpy as onp
import jax
import jax.numpy as jnp
import flax
import pickle
import matplotlib.pyplot as plt
import pandas as pd
import torch
import matplotlib
import timecast as tc

from mpl_toolkits import mplot3d


plt.rcParams['figure.figsize'] = [20, 10]

import tqdm.notebook as tqdm



In [2]:
from timecast.learners._ar import _ar_gram
from timecast.learners._pcr import _compute_pca_projection

In [3]:
basins = pickle.load(open("../data/basins.p", "rb"))

def generator():
    for basin in basins:
        X = pickle.load(open("../data/train/{}.p".format(basin), "rb"))
        ealstm = pickle.load(open("../data/ealstm/{}.p".format(basin), "rb"))
        Y = onp.zeros((X.shape[0], 1))
        Y[-ealstm.shape[0]:, :] = ealstm[["qobs"]]
        yield X[:, :5], Y, None
            
XTX, XTY = _ar_gram(generator(), input_dim=5, output_dim=1, history_len=270)

In [5]:
projections = {}
for k in tqdm.tqdm([10, 50, 100, 500, 1000, 1350]):
    projections[k] = _compute_pca_projection(XTX.matrix(normalize=True), k)

HBox(children=(FloatProgress(value=0.0, max=6.0), HTML(value='')))




## Test

In [6]:
basins = pickle.load(open("../data/basins.p", "rb"))

In [7]:
@tc.experiment("k,projection", projections.items())
@tc.experiment("basin", basins)
def runner(basin, k, projection, lr=1e-5):
    import pickle
    from ealstm.gaip.utils import MSE

    import jax.numpy as jnp

    from timecast.learners import Sequential, Parallel, Index, PCR
    from timecast import smap
    from timecast.objectives import residual
    from timecast.optim import GradientDescent

    import tqdm.notebook as tqdm
    
    X = pickle.load(open("../data/test/{}.p".format(basin), "rb"))[:, :5]
    Y = pickle.load(open("../data/ealstm/{}.p".format(basin), "rb"))
    
    history_len = 270
    
    lstm = Index.partial(index=0)
    pcr = PCR.partial(projection=projection, history_len=history_len, history=X[:history_len - 1])
    pcr = Sequential.partial(learners=[Index.partial(index=1), pcr])
    model, state = Parallel.new(shape=(1, 5), learners=[lstm, pcr])
    
    optim_def = GradientDescent(learning_rate=lr)
    optimizer = optim_def.create(model)

    # NOTE: difference in indexing convention, so need to pad one row
    X_t = X[history_len - 1:]
    Y_lstm = jnp.array(Y.qsim)
    Y = jnp.array(Y.qobs).reshape(-1, 1)

    Y_hat, optimizer, state = smap((Y_lstm, X_t), Y, optimizer, state=state, objective=residual)

    return {"basin": basin, "k": k, "mse": MSE(Y, Y_hat)}

In [8]:
results = runner.run(processes=10, tqdm=tqdm)

HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))




In [9]:
for k in tqdm.tqdm([10, 50, 100, 500, 1000, 1350]):
    print("Average MSE (k={}): {}".format(k, jnp.average(jnp.array([result["mse"] for result in results if result["k"] == k]))))

HBox(children=(FloatProgress(value=0.0, max=6.0), HTML(value='')))

Average MSE (k=10): 2.9675233364105225
Average MSE (k=50): 2.9452872276306152
Average MSE (k=100): 2.9331281185150146
Average MSE (k=500): 2.923673629760742
Average MSE (k=1000): 2.9238321781158447
Average MSE (k=1350): 2.9238762855529785



In [19]:
@tc.experiment("k,projection", [(500, projections[500])])
@tc.experiment("basin", basins)
@tc.experiment("lr", jnp.linspace(-5, -4, 11))
def runner(basin, k, projection, lr):
    import pickle
    from ealstm.gaip.utils import MSE

    import jax.numpy as jnp

    from timecast.learners import Sequential, Parallel, Index, PCR
    from timecast import smap
    from timecast.objectives import residual
    from timecast.optim import GradientDescent

    import tqdm.notebook as tqdm
    
    X = pickle.load(open("../data/test/{}.p".format(basin), "rb"))[:, :5]
    Y = pickle.load(open("../data/ealstm/{}.p".format(basin), "rb"))
    
    history_len = 270
    
    lstm = Index.partial(index=0)
    pcr = PCR.partial(projection=projection, history_len=history_len, history=X[:history_len - 1])
    pcr = Sequential.partial(learners=[Index.partial(index=1), pcr])
    model, state = Parallel.new(shape=(1, 5), learners=[lstm, pcr])
    
    optim_def = GradientDescent(learning_rate=(10 ** lr))
    optimizer = optim_def.create(model)

    # NOTE: difference in indexing convention, so need to pad one row
    X_t = X[history_len - 1:]
    Y_lstm = jnp.array(Y.qsim)
    Y = jnp.array(Y.qobs).reshape(-1, 1)

    Y_hat, optimizer, state = smap((Y_lstm, X_t), Y, optimizer, state=state, objective=residual)

    return {"basin": basin, "k": k, "lr": lr, "mse": MSE(Y, Y_hat)}

In [20]:
results = runner.run(processes=15, tqdm=tqdm)

HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))




In [18]:
for k in [10, 500]:
    for lr in jnp.linspace(-5, -4.0, 11):
        print("Average MSE (k={}, lr={}): {}".format(k, lr, jnp.average(jnp.array([result["mse"] for result in results if (result["k"] == k and result["lr"] == lr)]))))

Average MSE (k=10, lr=-5.0): 2.9675233364105225
Average MSE (k=10, lr=-4.900000095367432): 2.9487717151641846
Average MSE (k=10, lr=-4.800000190734863): 2.9286019802093506
Average MSE (k=10, lr=-4.699999809265137): 2.907244920730591
Average MSE (k=10, lr=-4.599999904632568): 2.884995937347412
Average MSE (k=10, lr=-4.5): 2.8622066974639893
Average MSE (k=500, lr=-5.0): 2.923673629760742
Average MSE (k=500, lr=-4.900000095367432): 2.898007869720459
Average MSE (k=500, lr=-4.800000190734863): 2.870617151260376
Average MSE (k=500, lr=-4.699999809265137): 2.842050552368164
Average MSE (k=500, lr=-4.599999904632568): 2.813055992126465
Average MSE (k=500, lr=-4.5): 2.784621477127075


In [21]:
for k in [10, 500]:
    for lr in jnp.linspace(-5, -4.0, 11):
        print("Average MSE (k={}, lr={}): {}".format(k, lr, jnp.average(jnp.array([result["mse"] for result in results if (result["k"] == k and result["lr"] == lr)]))))

Average MSE (k=10, lr=-5.0): nan
Average MSE (k=10, lr=-4.900000095367432): nan
Average MSE (k=10, lr=-4.800000190734863): nan
Average MSE (k=10, lr=-4.699999809265137): nan
Average MSE (k=10, lr=-4.599999904632568): nan
Average MSE (k=10, lr=-4.5): nan
Average MSE (k=10, lr=-4.400000095367432): nan
Average MSE (k=10, lr=-4.300000190734863): nan
Average MSE (k=10, lr=-4.199999809265137): nan
Average MSE (k=10, lr=-4.099999904632568): nan
Average MSE (k=10, lr=-4.0): nan
Average MSE (k=500, lr=-5.0): 2.923673629760742
Average MSE (k=500, lr=-4.900000095367432): 2.898007869720459
Average MSE (k=500, lr=-4.800000190734863): 2.870617151260376
Average MSE (k=500, lr=-4.699999809265137): 2.842050552368164
Average MSE (k=500, lr=-4.599999904632568): 2.813055992126465
Average MSE (k=500, lr=-4.5): 2.784621477127075
Average MSE (k=500, lr=-4.400000095367432): 2.7580411434173584
Average MSE (k=500, lr=-4.300000190734863): 2.7350289821624756
Average MSE (k=500, lr=-4.199999809265137): 2.717912197

In [30]:
projections = {}
for k in tqdm.tqdm(onp.linspace(450, 550, 11)):
    projections[int(k)] = _compute_pca_projection(XTX.matrix(normalize=True), int(k))

HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))




In [32]:
@tc.experiment("k,projection", projections.items())
@tc.experiment("basin", basins)
@tc.experiment("lr", jnp.linspace(-5, -4, 6))
def runner(basin, k, projection, lr):
    import pickle
    from ealstm.gaip.utils import MSE

    import jax.numpy as jnp

    from timecast.learners import Sequential, Parallel, Index, PCR
    from timecast import smap
    from timecast.objectives import residual
    from timecast.optim import GradientDescent

    import tqdm.notebook as tqdm
    
    X = pickle.load(open("../data/flood/test/{}.p".format(basin), "rb"))[:, :5]
    Y = pickle.load(open("../data/flood/ealstm/{}.p".format(basin), "rb"))
    
    history_len = 270
    
    lstm = Index.partial(index=0)
    pcr = PCR.partial(projection=projection, history_len=history_len, history=X[:history_len - 1])
    pcr = Sequential.partial(learners=[Index.partial(index=1), pcr])
    model, state = Parallel.new(shape=(1, 5), learners=[lstm, pcr])
    
    optim_def = GradientDescent(learning_rate=(10 ** lr))
    optimizer = optim_def.create(model)

    # NOTE: difference in indexing convention, so need to pad one row
    X_t = X[history_len - 1:]
    Y_lstm = jnp.array(Y.qsim)
    Y = jnp.array(Y.qobs).reshape(-1, 1)

    Y_hat, optimizer, state = smap((Y_lstm, X_t), Y, optimizer, state=state, objective=residual)

    return {"basin": basin, "k": k, "lr": lr, "mse": MSE(Y, Y_hat)}

In [34]:
runner.run(processes=15, tqdm=tqdm)

FileNotFoundError: [Errno 2] No such file or directory: '../data/test/02372250.p'

In [33]:
for k in projections.keys():
    for lr in jnp.linspace(-5, -4.0, 6):
        print("Average MSE (k={}, lr={}): {}".format(k, lr, jnp.average(jnp.array([result["mse"] for result in results if (result["k"] == k and result["lr"] == lr)]))))

Average MSE (k=450, lr=-5.0): nan
Average MSE (k=450, lr=-4.800000190734863): nan
Average MSE (k=450, lr=-4.599999904632568): nan
Average MSE (k=450, lr=-4.400000095367432): nan
Average MSE (k=450, lr=-4.199999809265137): nan
Average MSE (k=450, lr=-4.0): nan
Average MSE (k=460, lr=-5.0): nan
Average MSE (k=460, lr=-4.800000190734863): nan
Average MSE (k=460, lr=-4.599999904632568): nan
Average MSE (k=460, lr=-4.400000095367432): nan
Average MSE (k=460, lr=-4.199999809265137): nan
Average MSE (k=460, lr=-4.0): nan
Average MSE (k=470, lr=-5.0): nan
Average MSE (k=470, lr=-4.800000190734863): nan
Average MSE (k=470, lr=-4.599999904632568): nan
Average MSE (k=470, lr=-4.400000095367432): nan
Average MSE (k=470, lr=-4.199999809265137): nan
Average MSE (k=470, lr=-4.0): nan
Average MSE (k=480, lr=-5.0): nan
Average MSE (k=480, lr=-4.800000190734863): nan
Average MSE (k=480, lr=-4.599999904632568): nan
Average MSE (k=480, lr=-4.400000095367432): nan
Average MSE (k=480, lr=-4.199999809265137)