In [None]:
# Fix randomness and hide warnings
seed = 69
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
os.environ['PYTHONHASHSEED'] = str(seed)
os.environ['MPLCONFIGDIR'] = os.getcwd()+'/configs/'
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=Warning)
import numpy as np
np.random.seed(seed)
from scipy.interpolate import interp1d
import logging
import gc
import random
random.seed(seed)
import pandas as pd
from datetime import datetime
import matplotlib.pyplot as plt
plt.rc('font', size=16)
#from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as pyplot
# Import tensorflow
import tensorflow as tf
from tensorflow import keras as tfk
from tensorflow.keras import layers as tfkl
tf.autograph.set_verbosity(0)
tf.get_logger().setLevel(logging.ERROR)
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
tf.random.set_seed(seed)
tf.compat.v1.set_random_seed(seed)
print(tf.__version__)

# Data Loading - Preprocessing

In [None]:
telescope = 18
batch_size = 64
epochs = 200
window = 200
interpolated_window = 400
stride = 10

In [None]:
data = np.load("/kaggle/input/time-series-anndl/training_data.npy")
categories = np.load("/kaggle/input/time-series-anndl/categories.npy")
valid_periods = np.load("/kaggle/input/time-series-anndl/valid_periods.npy")
data.shape, categories.shape, valid_periods.shape

In [None]:
def build_sequences_filtered(target_data, valid_periods, window=200, stride=50, telescope=18):
    assert window % stride == 0
    outlier_detection_length = 50
    delta = 0.005
    outliers = []
    dataset = []
    labels = []
    for i, signal in enumerate(target_data):
        for j in np.arange(min(valid_periods[i][0],len(signal)-window-telescope),len(signal)-window-telescope,stride):
            input_sequence = signal[j:j+window]#.astype(np.float32)
            output_sequence = signal[j+window:j+window+telescope]#.astype(np.float32)
            grad_less_delta = np.abs(np.gradient(input_sequence)) < delta
            #Now we have an array of 0s and 1s
            temp = np.diff(np.where(np.concatenate(([grad_less_delta[0]],grad_less_delta[:-1] != grad_less_delta[1:],[True])))[0])[::2]
            #If temp contains at least one value that is greater than outlier_detection_length, then we have an outlier
            if np.any(temp > outlier_detection_length):
                outliers.append(i)
            else:
                dataset.append(input_sequence)
                labels.append(output_sequence)
    print(len(outliers))    
    return np.expand_dims(np.array(dataset),axis=-1), np.expand_dims(np.array(labels),axis=-1)

In [None]:
X, y = build_sequences_filtered(data,valid_periods,window=200,stride=stride,telescope=telescope)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)
X.shape, y.shape

# Model definition

In [None]:
class DLinear(tfk.models.Model):
    def __init__(self, output_shape, separate_features=False, kernel_size=25, num_attention_heads=4, cnn_filters=64, cnn_kernel_size=40, **kwargs):
        super(DLinear, self).__init__(**kwargs)
        self.kernel_size = kernel_size
        self.num_attention_heads = num_attention_heads
        self.output_steps = output_shape[0]
        self.output_features = output_shape[1]
        self.separate_features = separate_features
        self.cnn_filters = cnn_filters
        self.cnn_kernel_size = cnn_kernel_size
        self.kernel_initializer = "he_normal"

    def build(self, input_shape):
        self.built_input_shape = input_shape
        self.trend_cnn = tfkl.Conv1D(filters=self.cnn_filters, kernel_size=self.cnn_kernel_size, activation='relu', padding='same', kernel_initializer=self.kernel_initializer, name="trend_cnn")
        self.res_cnn = tfkl.Conv1D(filters=self.cnn_filters, kernel_size=self.cnn_kernel_size, activation='relu', padding='same', kernel_initializer=self.kernel_initializer, name="res_cnn")
        self.residual_dense = tfkl.Dense(self.output_steps * self.output_features, kernel_initializer=self.kernel_initializer, name="residual_recomposer")
        self.trend_output_dense = tfkl.Dense(self.output_steps * self.output_features, kernel_initializer=self.kernel_initializer, name="trend_output_dense")
        self.attention = tfkl.MultiHeadAttention(num_heads=self.num_attention_heads, key_dim=self.output_features)
        super(DLinear, self).build(input_shape)

    def call(self, inputs):
        trend = tfkl.AveragePooling1D(pool_size=self.kernel_size, strides=1, padding="same", name="trend_decomposer")(inputs)
        residual = tfkl.Subtract(name="residual_decomposer")([inputs, trend])

        # Apply CNN to the trend
        trend_cnn_output = self.trend_cnn(trend)
        res_cnn_output = self.res_cnn(residual)
        
        trend_attended = self.attention(trend_cnn_output, trend_cnn_output)
        #res_attended = self.attention(res_cnn_output, res_cnn_output)

        flat_residual = tfkl.Flatten()(res_cnn_output)
        flat_trend = tfkl.Flatten()(trend_attended)
        
        residual = self.residual_dense(flat_residual)
        trend = self.trend_output_dense(flat_trend)
        add = tfkl.Add(name="recomposer")([residual, trend])
        
        reshape = tfkl.Reshape((self.output_steps, self.output_features))(add)
        return reshape

    def summary(self):
        if self.built:
            self.model().summary()
        else:
            super().summary()

    def model(self):
        x = tfkl.Input(shape=(self.built_input_shape[1:]))
        model = tfk.models.Model(inputs=[x], outputs=self.call(x))
        return model

In [None]:
model = DLinear((telescope,1), separate_features=False, kernel_size=25)
model.build((None,window,1))
model.compile(loss=tf.keras.losses.MeanSquaredError(), metrics=['mae'], optimizer=tf.keras.optimizers.AdamW(1e-3))
model.summary()

# Training

In [None]:
def make_dataset(X, y, batch_size=128, prefetch_amt=tf.data.experimental.AUTOTUNE):
    dataset = tf.data.Dataset.from_tensor_slices((X, y))
    # reshape x from (200) to (200,1) and y from (18) to (18,1)
    dataset = dataset.map(lambda x, y: (tf.reshape(x, (200,1)), tf.reshape(y, (18,1))),num_parallel_calls=tf.data.AUTOTUNE)
    dataset = dataset.batch(batch_size, drop_remainder=False)
    dataset = dataset.cache()
    dataset = dataset.prefetch(prefetch_amt)
    return dataset

In [None]:
history = model.fit(
    make_dataset(X_train, np.squeeze(y_train,axis=-1), batch_size),
    batch_size = batch_size,
    epochs = epochs,
    validation_data=make_dataset(X_val,np.squeeze(y_val,axis=-1), batch_size),
    callbacks = [
        tfk.callbacks.EarlyStopping(monitor='val_loss', mode='min', patience=5, min_delta=5e-6, restore_best_weights=True),
        tfk.callbacks.ReduceLROnPlateau(monitor='val_loss', mode='min', patience=4, factor=0.1)
    ],
).history

In [None]:
model.evaluate(X_val,y_val)

In [None]:
best_epoch = np.argmin(history['val_loss'])
plt.figure(figsize=(17,4))
plt.plot(history['loss'], label='Training loss', alpha=.8, color='#ff7f0e')
plt.plot(history['val_loss'], label='Validation loss', alpha=.9, color='#5a9aa5')
plt.axvline(x=best_epoch, label='Best epoch', alpha=.3, ls='--', color='#5a9aa5')
plt.title('Mean Squared Error (Loss)')
plt.legend()
plt.grid(alpha=.3)
plt.ylim([0,0.02])
plt.show()

plt.figure(figsize=(18,3))
plt.plot(history['lr'], label='Learning Rate', alpha=.8, color='#ff7f0e')
plt.axvline(x=best_epoch, label='Best epoch', alpha=.3, ls='--', color='#5a9aa5')
plt.legend()
plt.grid(alpha=.3)
plt.show()

In [None]:
model.save('DLinear_tel_18_win200_att')

In [None]:
!zip -r DLinear_Att.zip /kaggle/working/DLinear_tel_18_win200_att

In [None]:
from IPython.display import FileLink
FileLink(r'DLinear_Att.zip')