# Build train and test matrices

In [1]:
import pandas as pd
import numpy as np
import sys

import keras
from keras.models import Sequential, Model
from keras.layers import Dense, Dropout, Flatten, Input, Lambda
from keras.layers import TimeDistributed, LocallyConnected1D, Conv1D, Concatenate, LSTM

from keras import backend as K
from sklearn.model_selection import TimeSeriesSplit

%load_ext autoreload
%autoreload 2

sys.path.append('../src/')
from utils.build_matrix import df_shift
from utils.clr import CyclicLR

Using TensorFlow backend.


In [2]:
df = pd.read_pickle('/home/SHARED/SOLAR/data/oahu_min_final.pkl')  

In [3]:
df_roll = df_shift(df, periods=3)

In [4]:
# Split target (time t) and variables (times t-1 to t-width+1)
y = df_roll['t']
X = df_roll.drop(columns='t', level='time')

In [5]:
# Split train-test, approximately 12 and 4 months respectively
X_train, X_test = X[:'2011-07-31'], X['2011-08-01':]
y_train, y_test = y[:'2011-07-31'], y['2011-08-01':]

In [6]:
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

(298595, 48)
(55016, 48)
(298595, 16)
(55016, 16)


# Convolutional predictor

Now, in order to use a 1D convolution, we are going to sort the sensors. For the initial test, we'll just sort them by longitude (from East to West). That way, nearer sensors are in close positions in the tensor, so the 1D convolution may extract useful correlations.

Note: many other possible ordenations of the sensors could be added as new channels in the input tensor

In [7]:
# We load the info of the sensors to extract the longitude information
info = pd.read_pickle('/home/SHARED/SOLAR/data/info.pkl')

# Sorted longitudes
lon = info['Longitude'].sort_values(ascending=False).drop('AP3')
lat = info['Latitude'].sort_values(ascending=False).drop('AP3')

In [8]:
lon_idx = lon.index.map(pd.Series(range(df.shape[1]), index=df.columns)).values

In [9]:
X_tr1 = X_train.to_numpy().reshape(-1, 3, df.shape[1], 1)[:, :, lon_idx, :]
y_tr1 = y_train.to_numpy()[:, lon_idx]

X_te1 = X_test.to_numpy().reshape(-1, 3, df.shape[1], 1)[:, :, lon_idx, :]
y_te1 = y_test.to_numpy()[:, lon_idx]

Now we specify which sensor do we want to predict and test.

(In the future, we need to discuss how are we going to predict, if just by looping over each sensor, or just give a vectorial prediction)

Model architecture is defined below.

Some highlights:
* Locally connected works better than pure convolutional at the first layers (probably because the sensors at not located in a uniform grid)
* Trick to improve acc: add a final layer combining the convolutional prediction with the persistance prediction, so in case the input is "strange", the model could learn to output the persistance prediction (i.e., the previous time-step), which is somewhat reasonable

In [10]:
def make_model_rnn(n_steps=3, n_sensors=16):
    ''' Returns a model using all the sensors to predict index_sensor '''
    xin = Input(shape=(n_steps, n_sensors, 1), name='main_input')
    x = TimeDistributed(
            LocallyConnected1D(8, 7,  data_format = 'channels_last', padding='valid', activation='relu')
        )(xin)
    x = TimeDistributed(
            LocallyConnected1D(16, 5, data_format = 'channels_last', padding='valid', activation='relu')
        )(x)
    x = TimeDistributed(
            Conv1D(32, 3, data_format = 'channels_last', padding='causal')
        )(x)
    xl = TimeDistributed(
            Flatten()
        )(x)
    xl = LSTM(20)(xl)
    xl = Dropout(0.2)(xl)
    xo = Dense(n_sensors)(xl)

    model = Model(inputs=[xin], outputs=[xo])
    return model

In [11]:
def train_and_test(batch_size, epochs, n_steps, n_sensors):
    
    lr = 0.0001
    opt = keras.optimizers.Adam(lr=lr)

    c1 = CyclicLR(step_size=250, base_lr=lr)
    
    # Validation using TS split (just to obtain different MAE estimations, no hyperoptimization for the moment)
    cv_loss = []
    for tr_idx, va_idx in TimeSeriesSplit(n_splits=5).split(X_tr1):
        model = make_model_rnn(n_steps=n_steps, n_sensors=n_sensors)
        model.compile(opt, loss='mean_absolute_error')
        hist = model.fit(X_tr1[tr_idx], y_tr1[tr_idx], 
                         batch_size=batch_size, 
                         epochs=epochs, 
                         validation_data=(X_tr1[va_idx], y_tr1[va_idx]), 
                         callbacks=[c1], 
                         verbose=0)
        cv_loss.append(hist.history['val_loss'][-1])
    
    # Testing
    model = make_model_rnn(n_steps=n_steps, n_sensors=n_sensors)
    model.compile(opt, loss='mean_absolute_error')
    hist = model.fit(X_tr1, y_tr1, 
              batch_size=batch_size, 
              epochs=epochs, 
              validation_data=(X_te1, y_te1), 
              callbacks=[c1], 
              verbose=0)
    
    test_loss = hist.history['val_loss'][-1]
    
    print('MAE_val ', cv_loss)
    print('MAE_test ', test_loss)
    
    return test_loss, cv_loss, model

In [12]:
model = make_model_rnn(n_steps=3, n_sensors=16)
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
main_input (InputLayer)      (None, 3, 16, 1)          0         
_________________________________________________________________
time_distributed_1 (TimeDist (None, 3, 10, 8)          640       
_________________________________________________________________
time_distributed_2 (TimeDist (None, 3, 6, 16)          3936      
_________________________________________________________________
time_distributed_3 (TimeDist (None, 3, 6, 32)          1568      
_________________________________________________________________
time_distributed_4 (TimeDist (None, 3, 192)            0         
_________________________________________________________________
lstm_1 (LSTM)                (None, 20)                17040     
_________________________________________________________________
dropout_1 (Dropout)          (None, 20)                0         
__________

Now we are ready to train. The below configuration should take 2 minutes in a 16 core CPU
(no GPU needed). We are using a huge batch-size to speed up things

In [13]:
batch_size = 2048   # as big as possible so we can explore many models
epochs = 32

_, _, model = train_and_test(batch_size, epochs, n_steps=3, n_sensors=16)

MAE_val  [0.11648415589942221, 0.08105337864483624, 0.041186277078846215, 0.06964156025545112, 0.1187103052485364]
MAE_test  0.08772129851604801


In [14]:
maes = pd.Series(np.mean(np.abs(model.predict(X_te1) - y_te1), axis=0), index=lon.index)

In [15]:
maes.sort_values()

Location
DH10    0.073901
DH11    0.074398
AP5     0.076310
DH9     0.076534
DH8     0.078301
DH6     0.078756
DH4     0.080762
DH3     0.082154
AP1     0.085120
DH7     0.085368
DH5     0.089862
DH2     0.091355
DH1     0.095765
AP4     0.099898
AP6     0.107915
AP7     0.127141
dtype: float64

In [16]:
maes.mean()

0.087721298823349