In [1]:
%load_ext autoreload
%autoreload 2

from IPython.display import Image
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

import os
import json
import numpy as np
import jax
import jax.numpy as jnp
import flax
import pickle
import matplotlib.pyplot as plt
import pandas as pd
import matplotlib
import timecast as tc

from mpl_toolkits import mplot3d

plt.rcParams['figure.figsize'] = [20, 10]

import tqdm.notebook as tqdm



In [2]:
import pickle

import timecast as tc
import pandas as pd
import matplotlib.pyplot as plt

import flax
import jax.numpy as jnp
import numpy as np

from timecast.learners import AR
from timecast.utils.ar import historify, compute_gram

In [3]:
data = jnp.array(pd.read_csv("../data/wind/original/MS_winds.dat", names=list(range(57))))
pickle.dump(data, open("../data/wind/original/MS_winds.pkl", "wb"))

In [4]:
# Columns = 57 stations
# Rows = wind speed readings (m/s)
data

DeviceArray([[5.0963, 2.0564, 3.0399, ..., 3.0399, 3.5763, 2.5481],
             [5.0963, 1.5199, 2.5481, ..., 2.5481, 3.5763, 2.5481],
             [5.588 , 1.5199, 2.0564, ..., 2.5481, 3.5763, 1.5199],
             ...,
             [4.6045, 4.0681, 5.0963, ..., 4.6045, 0.    , 3.0399],
             [7.1526, 6.1244, 4.6045, ..., 4.0681, 0.    , 4.0681],
             [7.1526, 3.5763, 3.0399, ..., 4.0681, 0.    , 4.6045]],            dtype=float32)

In [5]:
# Normalization
# NOTE: This is a bug; they claim this normalizes from 0 to 1, but it doesn't
# NOTE: Their variable also refer to min and max as mean and std, respectively so...
data_min = data.min()
data_max = data.max()
data = (data - data_min) / data_max

In [6]:
models = pickle.load(open("../data/wind/original/models.pkl", "rb"))["models"]

Using TensorFlow backend.
Instructions for updating:
If using Keras pass *_constraint arguments to layers.



In [7]:
# Mostly from https://github.com/amirstar/Deep-Forecast/blob/4dcdf66f8ae3070ab706b30a6e3cf888f36e0536/multiLSTM.py#L210
def predict(X, models):
    X = X.reshape(X.shape[0], history_len, num_stations)
    results = np.zeros_like(np.zeros((X.shape[0], num_stations)))
    
    for ind in range(len(X)):
        modelInd = ind % 6
        if modelInd == 0:
            testInputRaw = X[ind]
            testInputShape = testInputRaw.shape
            testInput = np.reshape(testInputRaw, [1, testInputShape[0], testInputShape[1]])
        else:
            testInputRaw = np.vstack((testInputRaw, results[ind-1]))
            testInput = np.delete(testInputRaw, 0, axis=0)
            testInputShape = testInput.shape
            testInput = np.reshape(testInput, [1, testInputShape[0], testInputShape[1]])
    
        pred = models[modelInd].predict(testInput)
        results[ind] = pred

    return jnp.array(results)

In [8]:
num_train = 6000
num_test = 361

history_len = 12
num_stations = 57

In [100]:
# 12..5999
train_true = data[history_len:num_train]
# 0..11, 1..12, ..., 5987..5998
train_data = historify(data, history_len=history_len, num_histories=train_true.shape[0])

# 6012..8386
test_true = data[num_train + history_len:]
# 6000..6011, ..., 8374..8385
test_data = historify(data, history_len=history_len, num_histories=test_true.shape[0], offset=num_train)

In [101]:
# 6012..8386
test_pred = predict(test_data, models)

In [134]:
# Metric: mean absolute error
jnp.absolute((test_true - test_pred) * data_max + data_min).mean(axis=0).mean()

DeviceArray(1.3113904, dtype=float32)

In [135]:
# Metric: mean squared error
# Metric: mean absolute error
jnp.square((test_true - test_pred) * data_max + data_min).mean(axis=0).mean()

DeviceArray(3.181298, dtype=float32)

In [13]:
# 12..5999
train_pred = predict(train_data, models)

In [25]:
ars, states = [None] * num_stations, [None] * num_stations
padded_test_resid = jnp.vstack((jnp.zeros((history_len - 1, num_stations)), test_resid))
for station in tqdm.tqdm(range(num_stations)):
    ars[station], states[station] = AR.fit(
        data=[(data[num_train:-1], padded_test_resid[:, station], None)],
        input_dim=num_stations,
        output_dim=1,
        history=data[num_train : num_train + history_len],
        history_len=history_len
    )

HBox(children=(FloatProgress(value=0.0, max=57.0), HTML(value='')))




In [127]:
# https://stackoverflow.com/questions/51883058/l1-norm-instead-of-l2-norm-for-cost-function-in-regression-model

from scipy.optimize import minimize

def fit(X, params):
    return X @ params

def cost_function(params, X, y):
    return np.sum(np.abs(y - fit(X, params)))

L1_ars = [None] * num_stations
for station in tqdm.tqdm(range(num_stations)):
    init_params = ars[station].params["Linear"]["kernel"].reshape(history_len * num_stations, 1)
    init_params = np.vstack((np.zeros((1, 1)), init_params))
    L1_ars[station] = minimize(cost_function, init_params, args=(np.hstack((np.ones((test_data.shape[0], 1)), test_data)), test_resid[:, station]))

HBox(children=(FloatProgress(value=0.0, max=57.0), HTML(value='')))




In [138]:
from sklearn.linear_model import Ridge

In [140]:
L2_ars = [None] * num_stations
for station in tqdm.tqdm(range(num_stations)):
    L2_ars[station] = Ridge(alpha=0.1)
    L2_ars[station].fit(test_data, test_resid[:, station])
    

HBox(children=(FloatProgress(value=0.0, max=57.0), HTML(value='')))




In [132]:
maes = []
for station in tqdm.tqdm(range(num_stations)):
    y_hat = test_pred[:, station] + np.hstack((np.ones((test_data.shape[0], 1)), test_data)) @ L1_ars[station].x
    maes.append(np.absolute((test_true[:, station] - y_hat) * data_max + data_min).mean())

HBox(children=(FloatProgress(value=0.0, max=57.0), HTML(value='')))




In [133]:
np.mean(maes)

0.9746599

In [142]:
mses = []
for station in tqdm.tqdm(range(num_stations)):
    y_hat = test_pred[:, station] + test_data @ L2_ars[station].coef_ + L2_ars[station].intercept_
    mses.append(np.square((test_true[:, station] - y_hat) * data_max + data_min).mean())

HBox(children=(FloatProgress(value=0.0, max=57.0), HTML(value='')))




In [143]:
np.mean(mses)

1.2649621