### imports


In [8]:
# ipython extension to autoreload imported modules so that any changes will be up to date before running code in this nb
%load_ext autoreload 
%autoreload 2

# libraries
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.python.keras.layers import Dense

from tensorflow.python.keras import Model
from tensorflow.python.keras.layers import Dense, Dropout
from tensorflow.python.keras.metrics import RootMeanSquaredError, MeanSquaredError
from spektral.layers import GCNConv, GlobalSumPool
from spektral.models import GCN
from spektral.data import MixedLoader
from lorenz import lorenzDataset, DEFAULT_TIME_RESOLUTION
from plotters import plot_with_predictions, plot_true_vs_pred, plot_data

from datetime import datetime

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


THINGS TO TRY 
- write helper functions! 
- play with diff sample size (500) - v
- input/output day ratio - h 
- play around with buffer - v
- activation layers - h
- play with the hidden layers - v 
- diff data samples / diff lorenz params - h 
- diff optimizer alg - v 

### helper functions for hyperparameter testing

In [2]:
DEFAULT_EPOCHS = 8

In [3]:
def make_train_model(train,
                        val,
                     channels=32,
                     activation="relu",
                     use_bias=False,
                     dropout_rate=0,
                     l2_reg=0,
                     optimizer='adam',
                     loss='mean_squared_error',
                     epochs=DEFAULT_EPOCHS): # TODO: add early stopping 
    # prepare data
    train_loader = MixedLoader(dataset=train, batch_size=32, shuffle=False)
    val_loader = MixedLoader(dataset=val, batch_size=32, shuffle=False)

    # create and train model
    GCN_model = GCN(
        n_labels=1,
        channels=channels,  # i.e. n_hidden layers in each GCNConv layer
        activation=activation,
        output_activation=None,  # we want regression, i.e. a linear function
        use_bias=use_bias,
        dropout_rate=dropout_rate,
        l2_reg=l2_reg)

    GCN_model.compile(optimizer=optimizer, loss=loss)

    history = GCN_model.fit(train_loader.load(),
                            steps_per_epoch=train_loader.steps_per_epoch,
                            epochs=epochs,
                            validation_data = val_loader.load(),
                            shuffle=False)
    return GCN_model, history


In [4]:
def plot_model_results(train, val, GCN_model, history, epochs=DEFAULT_EPOCHS):
    # plot training MSE
    fig_train_loss, ax = plt.subplots(1, 1, figsize=(4, 3))
    ax.plot(history.history['loss'])
    ax.set_ylabel('mean squared error')
    ax.set_xlabel('epochs')
    fig_train_loss.suptitle('model MSE over training epochs')
    ax.text(x=0.8 * epochs,
            y=1.2 * history.history['loss'][-1],
            s="final loss: {:.2f}".format(history.history['loss'][-1]))
    ax.set_ylim(0, history.history['loss'][0]*1.1)
    ax.legend()

    # plot train predictions
    fig_train_pred, (ax0,
                     ax1) = plot_with_predictions(model=GCN_model,
                                                  graph_dataset=train,
                                                  Loader=MixedLoader,
                                                  batch_size=32,
                                                  node=0,
                                                  model_name='GCN OOTB train')

    # plot val predictions
    fig_val_pred, (ax0, ax1) = plot_with_predictions(model=GCN_model,
                                                     graph_dataset=val,
                                                     Loader=MixedLoader,
                                                     batch_size=32,
                                                     node=0,
                                                     model_name='GCN OOTB val')

    # # plot predictions against true value
    # fig_true_vs_pred, ax = plot_true_vs_pred(y_true, y_pred)

    plt.tight_layout()
    return fig_train_loss, fig_train_pred, fig_val_pred

### prepare data


In [5]:
# TODO: add logging to see progress

# only uncomment each line if testing a non-default parameter
dataset = lorenzDataset(
    n_samples=100000,
    # input_steps=2 * DEFAULT_TIME_RESOLUTION,  # 2 days
    # output_delay=1 * DEFAULT_TIME_RESOLUTION,  # 1 day
    # output_steps=1,
    min_buffer=-3 * DEFAULT_TIME_RESOLUTION,
    # rand_buffer=False,
    # K=36,
    # F=8,
    # c=10,
    # b=10,
    # h=1,
    # coupled=True,
    # time_resolution=DEFAULT_TIME_RESOLUTION,
    # seed=42
)

/Users/h.lu/spektral/datasets/Lorenz/100000_200_1_100_-300_False_36_8_10_10_1_True_100_42.npz
False
total steps: 100300
starting integration


In [6]:
train = dataset[:int(0.7 * dataset.n_graphs)]
val = dataset[int(0.7 * dataset.n_graphs):int(0.9 * dataset.n_graphs)]
test = dataset[int(0.9 * dataset.n_graphs):]

# normalize data
X1_mean, X1_std, X2_mean, X2_std = train.get_mean_std()

train.normalize(X1_mean, X1_std, X2_mean, X2_std)
val.normalize(X1_mean, X1_std, X2_mean, X2_std)
test.normalize(X1_mean, X1_std, X2_mean, X2_std)

inspect our data

In [9]:
start = datetime.now()
fig, (ax0, ax1) = plot_data(train, val, test, node=0)
# colors = ["darkorange", "purple", "darkcyan"]
# node = 0

# # set up plot
# fig, (ax0, ax1) = plt.subplots(2, 1, figsize=(20, 8))

# fig.suptitle("sampled time series after reshaping", size=28)
# ax0.set_title("X1 (i.e. atmospheric variable) for node {}".format(node),
#               size=20)
# ax1.set_title("X2 (i.e. oceanic variable) for node {}".format(node), size=20)
# plt.xlabel('time (days)', size=16)

# # plot train, val, and test data
# print('plotting train')
# fig, (ax0, ax1) = train.plot(node,
#                              fig,
#                              ax0,
#                              ax1,
#                              data_type='train',
#                              color=colors[0], alpha=0.2)
# print('plotting val')
# fig, (ax0, ax1) = val.plot(node,
#                            fig,
#                            ax0,
#                            ax1,
#                            data_type='val',
#                            color=colors[1], alpha=0.2)
# print('plotting test')
# fig, (ax0, ax1) = test.plot(node,
#                             fig,
#                             ax0,
#                             ax1,
#                             data_type='test',
#                             color=colors[2], alpha=0.2)

# ax0.set_xlim(train[0].t_X[0], test[-1].t_Y[-1])
# ax1.set_xlim(train[0].t_X[0], test[-1].t_Y[-1])

# print('editing legend')
# # create legend
# ax0.legend()
# ax0.legend(handles=ax0.get_legend().legendHandles[0:6])
# leg = ax0.get_legend()
# [
#     leg.legendHandles[i].set_color(colors[i // 2])
#     for i in range(len(leg.legendHandles))
# ]

plt.tight_layout()
plt.show()

plotting train


KeyboardInterrupt: 

### test hyperparameters

In [None]:
model, history = make_train_model(train, activation="relu")

In [None]:
fig_train_loss, fig_train_pred, fig_val_pred = plot_model_results(
    train[:1000], val[:1000], model, history)

NameError: name 'model' is not defined

side note: why are the outputs different every time? is there another seed we need to set? 