In [1]:
import pandas as pd
import numpy as np
import json
import time
import pickle
import os
import warnings
warnings.filterwarnings('ignore')
from math import sqrt, ceil


In [2]:
import os
import argparse
import glob
import logging
import time
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
from tensorflow.keras import layers

### TSMixer API
###########################################################################################################################
###########################################################################################################################
# Metrics #################################################################################################################
### ALL METRICS ARE INTERCHANGEABLE WITH INFORMER AUTOFORMER
def RSE(pred, true):
    '''
    Calculates relative quared error.
    '''
    return np.sqrt(np.sum((true-pred)**2)) / np.sqrt(np.sum((true-true.mean())**2))

def CORR(pred, true):
    '''
    Calculates correlation coefficient.
    '''
    u = ((true-true.mean(0))*(pred-pred.mean(0))).sum(0) 
    d = np.sqrt(((true-true.mean(0))**2*(pred-pred.mean(0))**2).sum(0))
    return (u/d).mean(-1)

def MAE(pred, true):
    '''
    Calculates mean absolute error.
    '''
    return np.mean(np.abs(pred-true))

def MSE(pred, true):
    '''
    Calculates mean squared error.
    '''
    return np.mean((pred-true)**2)

def RMSE(pred, true):
    '''
    Calculates root mean suared error.
    '''
    return np.sqrt(MSE(pred, true))

def MAPE(pred, true):
    '''
    Calculates mean absolute percentage error.
    '''
    return np.mean(np.abs((pred - true) / true))

def MSPE(pred, true):
    '''
    Calculates mean squared percentage error.
    '''
    return np.mean(np.square((pred - true) / true))

def metric(pred, true):
    '''
    Wraps up metric functions, calculates and returns all.
    '''
    mae = MAE(pred, true)
    mse = MSE(pred, true)
    rmse = RMSE(pred, true)
    mape = MAPE(pred, true)
    mspe = MSPE(pred, true)
    
    return mae,mse,rmse,mape,mspe

# Dot dictionary ##########################################################################################################
class dotdict(dict):
    """dot.notation access to dictionary attributes"""
    __getattr__ = dict.get
    __setattr__ = dict.__setitem__
    __delattr__ = dict.__delitem__


########## TODOOOO

###########################################################################################################################
#  Data loader and dependencies ###########################################################################################  

## TODO ### 
## Delete the DATA_DIR CODE
## Change LOCAL_CACHE_DIR to data_root_path variable that can be added to the fit method.
## Will need to be set as self argument to loader class also
        

DATA_DIR = 'gs://time_series_datasets'
LOCAL_CACHE_DIR = './dataset/'

class TSFDataLoader:
    """Generate data loader from raw data."""

    def __init__(
            self, data, batch_size, seq_len, pred_len, feature_type, target='OT'
            ):
        self.data = data
        self.batch_size = batch_size
        self.seq_len = seq_len
        self.pred_len = pred_len
        self.feature_type = feature_type
        self.target = target
        self.target_slice = slice(0, None)

        self._read_data()

    def _read_data(self):
        """Load raw data and split datasets."""

        # copy data from cloud storage if not exists
        if not os.path.isdir(LOCAL_CACHE_DIR):
            os.mkdir(LOCAL_CACHE_DIR)

        file_name = self.data + '.csv'
        cache_filepath = os.path.join(LOCAL_CACHE_DIR, file_name)
        if not os.path.isfile(cache_filepath):
            tf.io.gfile.copy(
                os.path.join(DATA_DIR, file_name), cache_filepath, overwrite=True
                )

        df_raw = pd.read_csv(cache_filepath)

        # S: univariate-univariate, M: multivariate-multivariate, MS:
        # multivariate-univariate
        df = df_raw.set_index('date')
        if self.feature_type == 'S':
            df = df[[self.target]]
        elif self.feature_type == 'MS':
            target_idx = df.columns.get_loc(self.target)
            self.target_slice = slice(target_idx, target_idx + 1)

        # split train/valid/test
        n = len(df)
        if self.data.startswith('ETTm'):
            train_end = 12 * 30 * 24 * 4
            val_end = train_end + 4 * 30 * 24 * 4
            test_end = val_end + 4 * 30 * 24 * 4
    
        # THE SPLITS below match the splits of Informer, Crossformer, Autoformer, Fedformer
        elif self.data.startswith('ETTh'):
            train_end = 12 * 30 * 24
            val_end = train_end + 4 * 30 * 24
            test_end = val_end + 4 * 30 * 24
        # I added two more elifs for synth and wind data, we can do the split provision here too
        elif self.data.startswith('SYNTHh'):
            train_end = 12 * 30 * 24
            val_end = train_end + 4 * 30 * 24
            test_end = val_end + 4 * 30 * 24
        elif self.data.startswith('DEWINDh'):
            train_end = 12 * 30 * 24
            val_end = train_end + 4 * 30 * 24
            test_end = val_end + 4 * 30 * 24
        else:
            train_end = int(n * 0.7)
            val_end = n - int(n * 0.2)
            test_end = n

        train_df = df[:train_end]
        val_df = df[train_end - self.seq_len : val_end]
        test_df = df[val_end - self.seq_len : test_end]

        # Debug debug
        print(len(train_df))
        print(len(val_df))
        print(len(test_df))
        # Debug debug

        # standardize by training set
        self.scaler = StandardScaler()
        self.scaler.fit(train_df.values)

        def scale_df(df, scaler):
            data = scaler.transform(df.values)
            return pd.DataFrame(data, index=df.index, columns=df.columns)

        self.train_df = scale_df(train_df, self.scaler)
        self.val_df = scale_df(val_df, self.scaler)
        self.test_df = scale_df(test_df, self.scaler)
        self.n_feature = self.train_df.shape[-1]

    def _split_window(self, data):
        inputs = data[:, : self.seq_len, :]
        labels = data[:, self.seq_len :, self.target_slice]
        # Slicing doesn't preserve static shape information, so set the shapes
        # manually. This way the `tf.data.Datasets` are easier to inspect.
        inputs.set_shape([None, self.seq_len, None])
        labels.set_shape([None, self.pred_len, None])
        return inputs, labels

    def _make_dataset(self, data, shuffle=True):
        data = np.array(data, dtype=np.float32)
        ds = tf.keras.utils.timeseries_dataset_from_array(
            data=data,
            targets=None,
            sequence_length=(self.seq_len + self.pred_len),
            sequence_stride=1, # window stride
            shuffle=shuffle,
            batch_size=self.batch_size,
            )
        ds = ds.map(self._split_window)
        return ds

    def inverse_transform(self, data):
        return self.scaler.inverse_transform(data)

    def get_train(self, shuffle=True):
        return self._make_dataset(self.train_df, shuffle=shuffle)

    def get_val(self):
        return self._make_dataset(self.val_df, shuffle=False)

    def get_test(self):
        return self._make_dataset(self.test_df, shuffle=False)


###########################################################################################################################
# Reversible Instance Normalization #######################################################################################
class RevNorm(layers.Layer):
    """Reversible Instance Normalization."""

    def __init__(self, axis, eps=1e-5, affine=True):
        super().__init__()
        self.axis = axis
        self.eps = eps
        self.affine = affine

    def build(self, input_shape):
        if self.affine:
            self.affine_weight = self.add_weight(
               'affine_weight', shape=input_shape[-1], initializer='ones'
               )
            self.affine_bias = self.add_weight(
               'affine_bias', shape=input_shape[-1], initializer='zeros'
               )

    def call(self, x, mode, target_slice=None):
        if mode == 'norm':
            self._get_statistics(x)
            x = self._normalize(x)
        elif mode == 'denorm':
            x = self._denormalize(x, target_slice)
        else:
            raise NotImplementedError
        return x

    def _get_statistics(self, x):
        self.mean = tf.stop_gradient(
           tf.reduce_mean(x, axis=self.axis, keepdims=True)
           )
        self.stdev = tf.stop_gradient(
           tf.sqrt(
              tf.math.reduce_variance(x, axis=self.axis, keepdims=True) + self.eps
              )
            )

    def _normalize(self, x):
        x = x - self.mean
        x = x / self.stdev
        if self.affine:
            x = x * self.affine_weight
            x = x + self.affine_bias
        return x

    def _denormalize(self, x, target_slice=None):
        if self.affine:
           x = x - self.affine_bias[target_slice]
           x = x / self.affine_weight[target_slice]
        x = x * self.stdev[:, :, target_slice]
        x = x + self.mean[:, :, target_slice]
        return x
  

###########################################################################################################################
# TSMIxer Block ###########################################################################################################
def res_block(inputs, norm_type, activation, dropout, ff_dim):
    """Residual block of TSMixer."""

    norm = (
       layers.LayerNormalization if norm_type == 'L' else layers.BatchNormalization
       )

    # Temporal Linear
    x = norm(axis=[-2, -1])(inputs)
    x = tf.transpose(x, perm=[0, 2, 1])  # [Batch, Channel, Input Length]
    x = layers.Dense(x.shape[-1], activation=activation)(x)
    x = tf.transpose(x, perm=[0, 2, 1])  # [Batch, Input Length, Channel]
    x = layers.Dropout(dropout)(x)
    res = x + inputs

  # Feature Linear
    x = norm(axis=[-2, -1])(res)
    x = layers.Dense(ff_dim, activation=activation)(
       x
    )  # [Batch, Input Length, FF_Dim]
    x = layers.Dropout(dropout)(x)
    x = layers.Dense(inputs.shape[-1])(x)  # [Batch, Input Length, Channel]
    x = layers.Dropout(dropout)(x)
    return x + res    
  
###########################################################################################################################
# Build TSMixer with Reversible Instance Normalization ####################################################################
def build_model(
      input_shape,
      pred_len,
      norm_type,
      activation,
      n_block,
      dropout,
      ff_dim,
      target_slice,
    ):
    
    """Build TSMixer with Reversible Instance Normalization model."""

    inputs = tf.keras.Input(shape=input_shape)
    x = inputs  # [Batch, Input Length, Channel]
    rev_norm = RevNorm(axis=-2)
    x = rev_norm(x, 'norm')
    for _ in range(n_block):
        x = res_block(x, norm_type, activation, dropout, ff_dim)

    if target_slice:
        x = x[:, :, target_slice]

    x = tf.transpose(x, perm=[0, 2, 1])  # [Batch, Channel, Input Length]
    x = layers.Dense(pred_len)(x)  # [Batch, Channel, Output Length]
    outputs = tf.transpose(x, perm=[0, 2, 1])  # [Batch, Output Length, Channel])
    outputs = rev_norm(outputs, 'denorm', target_slice)
    return tf.keras.Model(inputs, outputs)


os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'  # FATAL
logging.getLogger('tensorflow').setLevel(logging.FATAL)

class TSMixer():
    '''
    I am so thoroughly exhausted you cannot imagine
    '''
    
    def __init__(self, model='tsmixer_rev_in'):
        self.args = dotdict()
        self.args.model = model ## I keep these redundant model args to maybe then combine all API-s
        self.args.seed = 0
        
        ## Variables for Multivariate ##################################################################
        ## TODO ## 
        ## I need to take this outside in a method called get_data maybe?
        ## Choices for this and other API-s must be ['S', 'M', 'MS] <-- check if Crossformer can handle
        self.args.features = 'S' ## currently tailored to synth and wind series
        self.args.target = 'TARGET' ## Because I give this name to the column
        ################################################################################################
        
        
        self.checkpoints = './tsmixer_checkpoints'
        self.delete_checkpoint = False ## I am not sure this is correct default

        ## Variables for TS
        self.args.seq_len = 96 # used the default of other models, authors set it to 336

        # Model Architecture
        #self.kernel_size = 4 ## deactivated because we do not fit CNN
        self.args.n_block = 2 ## number of blocks for deep architecture
        self.args.ff_dim = 2048 ## fully-connected feature dimension
        self.args.dropout = 0.05 ## dropout rate
        self.args.norm_type = 'B' ## BatchNorm. Authors included alternative -- 'L' LayerNorm
        self.args.activation = 'relu' ## Authors included possible alternative -- 'gelu'
        self.args.temporal_dim = 16 ## temporal feature dimension
        self.args.hidden_dim = 64 ## hidden feature dimension
        self.args.num_workers = 19 ## maybe switch this to 0 like other models if there is a problem


    def compile(self, learning_rate=1e-4, loss='mse', early_stopping_patience=5):
        ## should include
        ## loss, 
        if loss != 'mse':
            raise ValueError("Loss function not supported. Please use 'mse'.")
        self.args.loss = loss
        self.args.learning_rate = learning_rate
        self.args.patience = early_stopping_patience

    
    def fit(self, batch_size=32, epochs=100, pred_len=24):
        ## Should include
        ## data, data_root_path, batch_size, epochs, pred_len
        possible_predlens = [24, 48, 96, 168, 336, 720]
        if pred_len not in possible_predlens:
            raise ValueError('Prediction length outside current experiment scope. Please use either 24, 48, 96, 168, 336, 720.')
        self.args.pred_len = pred_len
        self.args.batch_size = batch_size ## 32 is the authors' default
        self.args.train_epochs = epochs ## 100 is the authors' default
    # optimization

        return 'lol good luck'
    
    def predict(self):
        return 'lol good luck'

def parse_args():
    """Parse the arguments for experiment configuration."""

    # data loader
    parser.add_argument(
        '--data',
        type=str,
        default='weather',
        choices=[
            'electricity',
            'exchange_rate',
            'national_illness',
            'traffic',
            'weather',
            'ETTm1',
            'ETTm2',
            'ETTh1',
            'ETTh2',
        ],
        help='data name',
    )


    # save results
    parser.add_argument(
        '--result_path', default='result.csv', help='path to save result'
    )

    #args = parser.parse_args()

    tf.keras.utils.set_random_seed(self.args.seed)

    return args


def main():
    args = parse_args()
    if 'tsmixer' in args.model:
        exp_id = f'{args.data}_{args.feature_type}_{args.model}_sl{args.seq_len}_pl{args.pred_len}_lr{args.learning_rate}_nt{args.norm_type}_{args.activation}_nb{args.n_block}_dp{args.dropout}_fd{args.ff_dim}'
    elif args.model == 'full_linear':
        exp_id = f'{args.data}_{args.feature_type}_{args.model}_sl{args.seq_len}_pl{args.pred_len}_lr{args.learning_rate}'
    elif args.model == 'cnn':
        exp_id = f'{args.data}_{args.feature_type}_{args.model}_sl{args.seq_len}_pl{args.pred_len}_lr{args.learning_rate}_ks{args.kernel_size}'
    else:
        raise ValueError(f'Unknown model type: {args.model}')

    # load datasets
    data_loader = TSFDataLoader(
        args.data,
        args.batch_size,
        args.seq_len,
        args.pred_len,
        args.feature_type,
        args.target,
    )
    train_data = data_loader.get_train()
    val_data = data_loader.get_val()
    test_data = data_loader.get_test()

    ### TODO
    ## Where the hell is this models variable coming from

    # train model
    if 'tsmixer' in args.model:
        build_model = getattr(models, args.model).build_model
        model = build_model(
            input_shape=(args.seq_len, data_loader.n_feature),
            pred_len=args.pred_len,
            norm_type=args.norm_type,
            activation=args.activation,
            dropout=args.dropout,
            n_block=args.n_block,
            ff_dim=args.ff_dim,
            target_slice=data_loader.target_slice,
        )
    elif args.model == 'full_linear':
        model = models.full_linear.Model(
            n_channel=data_loader.n_feature,
            pred_len=args.pred_len,
        )
    elif args.model == 'cnn':
        model = models.cnn.Model(
            n_channel=data_loader.n_feature,
            pred_len=args.pred_len,
            kernel_size=args.kernel_size,
        )
    else:
        raise ValueError(f'Model not supported: {args.model}')

    optimizer = tf.keras.optimizers.Adam(learning_rate=args.learning_rate)
    model.compile(optimizer=optimizer, loss=self.args.loss, metrics=['mae'])
    checkpoint_path = os.path.join(args.checkpoint_dir, f'{exp_id}_best')
    checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
        filepath=checkpoint_path,
        verbose=1,
        save_best_only=True,
        save_weights_only=True,
    )
    early_stop_callback = tf.keras.callbacks.EarlyStopping(
        monitor='val_loss', patience=args.patience
    )
    start_training_time = time.time()
    history = model.fit(
       train_data,
       epochs=args.train_epochs,
       validation_data=val_data,
       callbacks=[checkpoint_callback, early_stop_callback],
    )
    end_training_time = time.time()
    elasped_training_time = end_training_time - start_training_time
    print(f'Training finished in {elasped_training_time} secconds')

    # evaluate best model
    best_epoch = np.argmin(history.history['val_loss'])
    model.load_weights(checkpoint_path)
    test_result = model.evaluate(test_data)
    if args.delete_checkpoint:
        for f in glob.glob(checkpoint_path + '*'):
            os.remove(f)

    # save result to csv
    data = {
        'data': [args.data],
        'model': [args.model],
        'seq_len': [args.seq_len],
        'pred_len': [args.pred_len],
        'lr': [args.learning_rate],
        'mse': [test_result[0]],
        'mae': [test_result[1]],
        'val_mse': [history.history['val_loss'][best_epoch]],
        'val_mae': [history.history['val_mae'][best_epoch]],
        'train_mse': [history.history['loss'][best_epoch]],
        'train_mae': [history.history['mae'][best_epoch]],
        'training_time': elasped_training_time,
        'norm_type': args.norm_type,
        'activation': args.activation,
        'n_block': args.n_block,
        'dropout': args.dropout,
    }
    
    if 'TSMixer' in args.model:
        data['ff_dim'] = args.ff_dim

    df = pd.DataFrame(data)
    if os.path.exists(args.result_path):
        df.to_csv(args.result_path, mode='a', index=False, header=False)
    else:
        df.to_csv(args.result_path, mode='w', index=False, header=True)


if __name__ == '__main__':
    main()


2024-01-03 19:42:56.294689: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2024-01-03 19:42:57.840639: I tensorflow/core/util/port.cc:104] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-01-03 19:43:03.489085: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /home/RDC/anasashb/.conda/envs/anbs_2/lib/python3.8/site-packages/nvidia/cudnn/lib:/hom