In [None]:
!pip install tensorflow==2.14

In [None]:
import random
import os
import numpy as np
import tensorflow as tf
import tensorflow.keras.layers as tfkl
import tensorflow.keras.models as tfk
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from sklearn.model_selection import train_test_split
from statsmodels.tsa.stattools import pacf, acf
from sklearn.preprocessing import RobustScaler
from sklearn.model_selection import StratifiedShuffleSplit

seed = 42
random.seed(seed)
os.environ['PYTHONHASHSEED'] = str(seed)
np.random.seed(seed)
tf.random.set_seed(seed)
tf.compat.v1.set_random_seed(seed)

In [None]:
X = np.load('/kaggle/input/time-series-training-dataset/training_data.npy')
valid_periods = np.load('/kaggle/input/time-series-training-dataset/valid_periods.npy')
categories = np.load('/kaggle/input/time-series-training-dataset/categories.npy')

In [None]:
def create_padding_mask(series, valid_periods):
    mask = np.zeros_like(series, dtype=bool)
    for i, (start, end) in enumerate(valid_periods):
        mask[i, start:end] = True
    return mask

def encode_categories(categories):
    unique_categories = np.unique(categories)
    category_to_int = {category: i for i, category in enumerate(unique_categories)}
    return np.array([category_to_int[category] for category in categories])

def remove_padding(series, valid_period):
    start, end = valid_period
    return series[start:end]

padding_mask = create_padding_mask(X, valid_periods)
categories=encode_categories(categories)

In [None]:
# Thresholds for identifying low and high scales
low_threshold = 0.05
high_threshold = 0.9

# Initialize lists to store low and high scale information
low_centers = []
high_centers = []

low_scales = []
high_scales = []

# Row-wise Robust Scaling
Xpredscaled=np.zeros_like(X)
scalespred = np.zeros(X.shape[0], dtype=float)
centerspred = np.zeros(X.shape[0], dtype=float)

for i in range(X.shape[0]):
    row_data = X[i, :].reshape(-1, 1)
    nonzero_mask = X[i, :] != 0

    # Extract non-zero values and reshape to a column vector
    row_data_nonzero = X[i, nonzero_mask].reshape(-1, 1)

    # Fit the scaler to the non-zero values
    scaler = RobustScaler().fit(row_data_nonzero)

    # Apply scaling and flatten the result to the corresponding indices
    Xpredscaled[i, nonzero_mask] = scaler.transform(row_data_nonzero).flatten()

    # Store scale and center for later use
    scalespred[i] = scaler.scale_[0]
    centerspred[i] = scaler.center_[0]

    # Categorize scales and store corresponding information
    if scalespred[i] < low_threshold:
        low_scales.append(i)
        
    if scalespred[i] > high_threshold:
        high_scales.append(i)

    if centerspred[i] > high_threshold:
        high_centers.append(i)

    if centerspred[i] < low_threshold:
        low_centers.append(i)
        
low_scales = np.array(low_scales)
high_scales = np.array(high_scales)

low_centers = np.array(low_centers)
high_centers = np.array(high_centers)

all_scales = np.union1d(low_scales, high_scales)
all_centers = np.union1d(low_centers, high_centers)
all_indexes = np.union1d(all_scales, all_centers)
len(all_indexes)

X = np.delete(X, all_indexes, axis=0)
categories = np.delete(categories, all_indexes, axis=0)
valid_periods = np.delete(valid_periods, all_indexes, axis=0)

trimmed_data = [remove_padding(X[i], valid_periods[i]) for i in range(len(X))]
categories=encode_categories(categories)

len(trimmed_data), categories.shape

In [None]:
#Find data too LONG OR SHORT, to exclude
lengths = [len(series) for series in trimmed_data]

indexes_too_short=np.where(np.array(lengths)<=72)[0]
print(indexes_too_short)

indexes = indexes_too_short
print(len(indexes))

# Create a subset of time series longer than or equal to the threshold
#subset_data = [ts for ts in trimmed_data if len(ts) >= 26]

X_noout = np.delete(X, indexes, axis=0)
valid_periods_noout = np.delete(valid_periods, indexes, axis=0)
categories_noout = np.delete(categories, indexes, axis=0)
start_times_noout=valid_periods_noout[:,0]
end_times_noout=valid_periods_noout[:,1]

trimmed_data_noout = [row for i, row in enumerate(trimmed_data) if i not in indexes]

print(X_noout.shape, len(categories_noout),len(start_times_noout),len(end_times_noout) )
print(len(trimmed_data_noout))

In [None]:
# Define the Time2Vec Layer
class Time2Vec(tf.keras.layers.Layer):
    def __init__(self, kernel_size=1):
        super(Time2Vec, self).__init__(trainable=True, name='Time2VecLayer')
        self.k = kernel_size

    def build(self, input_shape):
        # Determine feature dimension from the last dimension of the input shape
        feature_dim = input_shape[-1]
        if feature_dim is None:
            raise ValueError("The feature dimension of the input must be defined.")

        # Initialize weights
        self.wb = self.add_weight(name='wb', shape=(feature_dim,), initializer='uniform', trainable=True)
        self.bb = self.add_weight(name='bb', shape=(feature_dim,), initializer='uniform', trainable=True)
        # Adjust the shape of wa and ba to match the kernel size
        self.wa = self.add_weight(name='wa', shape=(feature_dim, self.k), initializer='uniform', trainable=True)
        self.ba = self.add_weight(name='ba', shape=(self.k,), initializer='uniform', trainable=True)

    def call(self, x):
        linear = self.wb * x + self.bb

        # Broadcasting to match the dimensions
        # The shapes of wa and x are made compatible for batch matrix multiplication
        sin_trans = tf.math.sin(tf.linalg.matmul(x, self.wa) + self.ba)

        # Concatenate linear and sin_trans along the last dimension
        return tf.concat([linear, sin_trans], axis=-1)

In [None]:
class PositionEmbedding(tfkl.Layer):
    def __init__(self, maxlen, embed_dim, **kwargs):
        super().__init__(**kwargs)
        self.pos_emb = tfkl.Embedding(input_dim=maxlen, output_dim=embed_dim)

    def call(self, x):
        maxlen = tf.shape(x)[-1]
        positions = tf.range(start=0, limit=maxlen, delta=1)
        positions = self.pos_emb(positions)
        return x + positions

class TransformerEncoderBlock(tfkl.Layer):
    def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1, **kwargs):
        super().__init__(**kwargs)
        self.att = tfkl.MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)
        self.ffn = tfk.Sequential([tfkl.Dense(ff_dim, activation="relu"), tfkl.Dense(embed_dim)])
        self.layernorm1 = tfkl.LayerNormalization(epsilon=1e-6)
        self.layernorm2 = tfkl.LayerNormalization(epsilon=1e-6)
        self.dropout1 = tfkl.Dropout(rate)
        self.dropout2 = tfkl.Dropout(rate)

    def call(self, inputs, training):
        attn_output = self.att(inputs, inputs)
        attn_output = self.dropout1(attn_output, training=training)
        out1 = self.layernorm1(inputs + attn_output)

        ffn_output = self.ffn(out1)
        ffn_output = self.dropout2(ffn_output, training=training)
        return self.layernorm2(out1 + ffn_output)

def create_time_series_encoder(sequence_length, embed_dim, num_heads, latent_dim):
    encoder_inputs = tfkl.Input(shape=(sequence_length, 1))
    x = PositionEmbedding(sequence_length, embed_dim)(encoder_inputs)
    encoder_outputs = TransformerEncoderBlock(embed_dim, num_heads, latent_dim)(x)
    encoder = tfk.Model(encoder_inputs, encoder_outputs, name="TimeSeriesEncoder")

    return encoder

In [None]:
class TransformerDecoderBlock(tfkl.Layer):
    def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1, **kwargs):
        super().__init__(**kwargs)
        self.att1 = tfkl.MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)
        self.att2 = tfkl.MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)
        self.ffn = tfk.Sequential([tfkl.Dense(ff_dim, activation="relu"), tfkl.Dense(embed_dim)])
        self.layernorm1, self.layernorm2, self.layernorm3 = [tfkl.LayerNormalization(epsilon=1e-6) for _ in range(3)]
        self.dropout1, self.dropout2, self.dropout3 = [tfkl.Dropout(rate) for _ in range(3)]

    def call(self, inputs, encoder_outputs, training, mask=None):
        causal_mask = self.get_causal_attention_mask(inputs)
        padding_mask = tf.cast(mask[:, tf.newaxis, :], dtype="int32") if mask is not None else None
        padding_mask = tf.minimum(padding_mask, causal_mask) if mask is not None else None

        attn_output_1 = self.att1(inputs, inputs, inputs, attention_mask=causal_mask)
        attn_output_1 = self.dropout1(attn_output_1, training=training)
        out_1 = self.layernorm1(inputs + attn_output_1)

        attn_output_2 = self.att2(out_1, encoder_outputs, encoder_outputs, attention_mask=padding_mask)
        attn_output_2 = self.dropout2(attn_output_2, training=training)
        out_2 = self.layernorm2(out_1 + attn_output_2)

        ffn_output = self.ffn(out_2)
        ffn_output = self.dropout3(ffn_output, training=training)

        return self.layernorm3(out_2 + ffn_output)

    def get_causal_attention_mask(self, inputs):
        batch_size, sequence_length = tf.shape(inputs)[0], tf.shape(inputs)[1]
        i, j = tf.range(sequence_length)[:, tf.newaxis], tf.range(sequence_length)
        mask = tf.cast(i >= j, dtype="int32")
        return tf.tile(tf.reshape(mask, (1, sequence_length, sequence_length)), [batch_size, 1, 1])

def create_time_series_decoder(sequence_length, prediction_length, embed_dim, num_heads, latent_dim):
    decoder_inputs = tfkl.Input(shape=(sequence_length, 1))
    encoder_outputs = tfkl.Input(shape=(sequence_length, embed_dim))

    x = PositionEmbedding(sequence_length, embed_dim)(decoder_inputs)
    x = TransformerDecoderBlock(embed_dim, num_heads, latent_dim)(x, encoder_outputs)
    x = tfkl.Dropout(0.5)(x)

    x = tfkl.TimeDistributed(tfkl.Dense(prediction_length))(x)
    x = tfkl.Reshape((sequence_length * prediction_length,))(x)
    x = tfkl.Dense(prediction_length)(x)

    decoder = tfk.Model([decoder_inputs, encoder_outputs], x, name="TimeSeriesDecoder")

    return decoder

In [None]:
def create_transformer_time_series_model(encoder, decoder, sequence_length):
    encoder_inputs = tfkl.Input(shape=(sequence_length, 1), name="encoder_inputs")
    decoder_inputs = tfkl.Input(shape=(sequence_length, 1), name="decoder_inputs")

    encoder_outputs = encoder(encoder_inputs)
    decoder_outputs = decoder([decoder_inputs, encoder_outputs])

    transformer_model = tfk.Model([encoder_inputs, decoder_inputs], decoder_outputs, name="TimeSeriesTransformer")

    return transformer_model

In [None]:
sequence_length = 200
embed_dim = 128
num_heads = 4
latent_dim = 1024

prediction_length = 18

In [None]:
time_series_encoder = create_time_series_encoder(sequence_length, embed_dim, num_heads, latent_dim)
time_series_decoder = create_time_series_decoder(sequence_length, prediction_length, embed_dim, num_heads, latent_dim)
time_series_transformer = create_transformer_time_series_model(time_series_encoder, time_series_decoder, sequence_length)

time_series_encoder.summary()
time_series_decoder.summary()
time_series_transformer.summary()

tf.keras.utils.plot_model(time_series_encoder, show_shapes=True, expand_nested=True, to_file='time_series_encoder.png')
tf.keras.utils.plot_model(time_series_decoder, show_shapes=True, expand_nested=True, to_file='time_series_decoder.png')
tf.keras.utils.plot_model(time_series_transformer, show_shapes=True, expand_nested=True, to_file='time_series_transformer.png')

In [None]:
def build_sequences(array, window, stride, start_times, end_times, categories, shuffle=True, seed=42, telescope=18):
    df = []
    y = []
    cat=[]
    
    category_flags = encode_categories(categories)

    for idx in range(len(array)):
        start_time = start_times[idx]
        end_time = end_times[idx]
        category_flag = category_flags[idx]  # Extract the integer-encoded category flag

        # Extract the actual non-zero part of the time series within the specified time range
        actual_data = array[idx, start_time:end_time]
        
        padding_check = len(actual_data)%window

        if(padding_check != 0):
            padding_len = window - len(actual_data)%window
            padding = np.zeros(padding_len,dtype='float32')
            actual_data = np.concatenate((padding,actual_data))
            assert len(actual_data) % window == 0

        for i in range(0, len(actual_data) - window - telescope, stride):
            
            sequence = actual_data[i:i + window]
            
            df.append(sequence)     
            
            y.append(actual_data[i+window: i+ window+telescope])     
            cat.append(categories[idx])          
            
    df = np.array(df)
    y = np.array(y)
    cat=np.array(cat)
    cat=encode_categories(cat)
        
    return df, y, cat

In [None]:
myX = X_noout
mycategories = categories_noout

sss = StratifiedShuffleSplit(n_splits=1, test_size=0.1, random_state=seed)

train_indexes=[]
val_indexes=[]

for train_index, val_index in sss.split(myX, mycategories):
    
    train_indexes.append(train_index)
    val_indexes.append(val_index)

    X_train_noseq, X_val_noseq = myX[train_index], myX[val_index]
    cat_train_noseq, cat_val_noseq = mycategories[train_index], mycategories[val_index]    
    start_times_train_noseq, start_times_val_noseq = start_times_noout[train_index], start_times_noout[val_index]
    end_times_train_noseq, end_times_val_noseq = end_times_noout[train_index], end_times_noout[val_index]

#print("X_train_noseq shape:", X_train_noseq.shape)
#print("X_val_noseq shape:", X_val_noseq.shape)
#print("cat_train_noseq shape:", cat_train_noseq.shape)
#print("cat_val_noseq shape:", cat_val_noseq.shape)
#print("start_times_train_noseq shape:", start_times_train_noseq.shape)
#print("end_times_train_noseq shape:", end_times_train_noseq.shape)

In [None]:
window_size=200
stride_size=12

In [None]:
X_train, y_train, cats = build_sequences(X_train_noseq,                                    
                                   window=window_size, 
                                   stride=stride_size,
                                   start_times=start_times_train_noseq, 
                                   end_times=end_times_train_noseq, 
                                   categories=cat_train_noseq)


X_val, y_val, cats_val = build_sequences(X_val_noseq,                                    
                                   window=window_size, 
                                   stride=stride_size,
                                   start_times=start_times_val_noseq, 
                                   end_times=end_times_val_noseq, 
                                   categories=cat_val_noseq)

#print(X_train.shape, y_train.shape, cats.shape)
#print( X_val.shape, y_val.shape, cats_val.shape)

In [None]:
optimizer = tf.keras.optimizers.Adam(learning_rate=1e-4)
loss_fn = tf.keras.losses.MeanSquaredError()
early_stopping_callback = EarlyStopping(monitor='val_loss', mode='min', patience=10, restore_best_weights=True)
reduce_on_plateau = ReduceLROnPlateau(monitor='val_loss', mode='min', patience=5, factor=0.5, min_lr=1e-5)

In [None]:
time_series_transformer.compile(optimizer=optimizer, loss=loss_fn, metrics=[tf.keras.metrics.MeanSquaredError(), tf.keras.metrics.MeanAbsoluteError()])

In [None]:
history = time_series_transformer.fit([X_train, X_train], y_train, 
                                      epochs=200, batch_size=32, 
                                      validation_data=([X_val, X_val], y_val), 
                                      callbacks=[early_stopping_callback, reduce_on_plateau])

In [None]:
import shutil

time_series_transformer.save('SubmissionModel')
shutil.make_archive('SubmissionModel', 'zip', 'SubmissionModel')

In [None]:
from IPython.display import FileLink
FileLink(r'SubmissionModel.zip')