In [None]:
%load_ext autoreload
%autoreload 2

import os
from copy import deepcopy

import torch
import numpy as np
import numpy.typing as npt
import matplotlib.pyplot as plt
from tqdm import tqdm

from moment.utils.config import Config
from moment.utils.utils import parse_config
from moment.utils.forecasting_metrics import get_forecasting_metrics
from moment.data.dataloader import get_timeseries_dataloader
from moment.data.forecasting_datasets import get_forecasting_datasets, ShortForecastingDataset
from moment.models.base import BaseModel
from moment.models.moment import MOMENT
from moment.models.nbeats import NBEATS
from moment.models.nhits import NHITS
from moment.models.timesnet import TimesNet
from moment.models.gpt4ts import GPT4TS

In [None]:
def get_dataloaders(args):
    args.dataset_names = args.full_file_path_and_name
    args.data_split = 'train'
    args.batch_size = args.train_batch_size
    train_dataloader = get_timeseries_dataloader(args=args)
    args.data_split = 'test'
    args.batch_size = args.val_batch_size
    test_dataloader = get_timeseries_dataloader(args=args)
    args.data_split = 'val'
    args.batch_size = args.val_batch_size
    val_dataloader = get_timeseries_dataloader(args=args)
    return train_dataloader, test_dataloader, val_dataloader

In [None]:
get_forecasting_datasets(collection="autoformer")

In [None]:
DEFAULT_CONFIG_PATH = "../../configs/default.yaml"
GPU_ID = 4

config = Config(config_file_path="../../configs/forecasting/linear_probing.yaml", 
                default_config_file_path=DEFAULT_CONFIG_PATH).parse()
config['device'] = GPU_ID if torch.cuda.is_available() else 'cpu'

args = parse_config(config)
args.full_file_path_and_name = '/XXXX-14/project/public/XXXX-9/TimeseriesDatasets/forecasting/autoformer/ETTh1.csv'
args.dataset_names = args.full_file_path_and_name
args.task_name = "long-horizon-forecasting"
# args.forecast_horizon = 0 # Must be set to 0 for reconstruction / imputation / anomaly detection

train_dataloader, test_dataloader, val_dataloader = get_dataloaders(args)
print(f"Forecast horizon: {train_dataloader.dataset.forecast_horizon}")

In [None]:
short_forecasting_datasets = get_forecasting_datasets(collection="monash")
fred_forecasting_datasets = get_forecasting_datasets(collection="fred/preprocessed")

In [None]:
print("M3 datasets:")
m_datasets_base_path = '/'.join(short_forecasting_datasets[0].split('/')[:-1])
print(f"--- M3 & M4 datasets (base path): {m_datasets_base_path}")
print("--- M3 splits:", [i.split('/')[-1] for i in short_forecasting_datasets if "m3" in i])
print("--- M4 splits:", [i.split('/')[-1] for i in short_forecasting_datasets if "m4" in i])

print("Fred datasets:")
fred_datasets_base_path = '/'.join(fred_forecasting_datasets[0].split('/')[:-1])
print(f"--- FRED datasets (base path): {fred_datasets_base_path}")
print('--- Splits:', [i.split('/')[-1] for i in fred_forecasting_datasets if "fred" in i])

In [None]:
def get_dataloaders(args):
    args.dataset_names = args.full_file_path_and_name
    args.data_split = 'train'
    args.batch_size = args.train_batch_size
    train_dataloader = get_timeseries_dataloader(args=args)
    args.data_split = 'test'
    args.batch_size = args.val_batch_size
    test_dataloader = get_timeseries_dataloader(args=args)
    args.data_split = 'val'
    args.batch_size = args.val_batch_size
    val_dataloader = get_timeseries_dataloader(args=args)
    return train_dataloader, test_dataloader, val_dataloader

def load_pretrained_moment(args, 
                         pretraining_task_name: str = "pre-training",
                         do_not_copy_head: bool = True):
        
        model = MOMENT(configs=args)
        pretraining_args = deepcopy(args)
        pretraining_args.task_name = pretraining_task_name
            
        checkpoint = BaseModel.load_pretrained_weights(
            run_name=pretraining_args.pretraining_run_name, 
            opt_steps=pretraining_args.pretraining_opt_steps)
        
        pretrained_model = MOMENT(configs=pretraining_args)
        pretrained_model.load_state_dict(checkpoint["model_state_dict"])

        # Copy pre-trained parameters to fine-tuned model
        for ((name_p, param_p), (name_f, param_f)) in\
            zip(pretrained_model.named_parameters(), model.named_parameters()):
            if (name_p == name_f) and (param_p.shape == param_f.shape):
                if do_not_copy_head and name_p.startswith("head"):
                    continue
                else:
                    param_f.data = param_p.data
        
        if args.finetuning_mode == 'linear-probing':
            for name, param in model.named_parameters():
                if not name.startswith("head"):
                    param.requires_grad = False
        
        print("====== Frozen parameter status ======")
        for name, param in model.named_parameters():
            if param.requires_grad:
                print("Not frozen:", name)
            else:
                print("Frozen:", name)
        print("=====================================")

        return model

HORIZON_MAPPING = {
    'hourly': 48,
    'daily': 14,
    'weekly': 13,
    'monthly': 18,
    'quarterly': 8,
    'yearly': 6
    }

In [None]:
DEFAULT_CONFIG_PATH = "../../configs/default.yaml"
GPU_ID = 1
FREQUENCY = "yearly" # "monthly" | "quarterly" | "yearly" | "daily" | "hourly" | "weekly" | "other"
DATASET = "m3" # "m3" | "m4" | "fred"
BASE_PATH = m_datasets_base_path if DATASET in ['m3', 'm4'] else fred_datasets_base_path

# config_file_path = "../../configs/forecasting/linear_probing_short_horizon.yaml"
# config_file_path = "../../configs/forecasting/nbeats.yaml"
config_file_path = "../../configs/forecasting/nhits.yaml"
# config_file_path = "../../configs/forecasting/timesnet.yaml"
# config_file_path = "../../configs/forecasting/gpt4ts.yaml"

config = Config(config_file_path=config_file_path, 
                default_config_file_path=DEFAULT_CONFIG_PATH).parse()
config['device'] = GPU_ID if torch.cuda.is_available() else 'cpu'

args = parse_config(config)

file_format = 'tsf' if DATASET in ['m3', 'm4'] else 'npy'
args.full_file_path_and_name = os.path.join(BASE_PATH, f"{DATASET}_{FREQUENCY}_dataset.{file_format}")    
args.dataset_names = args.full_file_path_and_name
args.forecast_horizon = HORIZON_MAPPING[FREQUENCY]
args.max_epoch = 5
args.train_batch_size = 64
args.val_batch_size = 64
args.use_amp = False

train_dataloader, test_dataloader, val_dataloader = get_dataloaders(args)
print(f"Forecast horizon: {train_dataloader.dataset.forecast_horizon}")
print(f"Lengths: Train: {train_dataloader.dataset.length_dataset} | Test: {test_dataloader.dataset.length_dataset} | Val: {val_dataloader.dataset.length_dataset}")

In [None]:
# model = GPT4TS(args)
# model = TimesNet(args)
# model = NBEATS(args)
model = NHITS(args)
# model = load_pretrained_moment(args)
model.to(args.device)
# model.eval()

In [None]:
total_params = sum(p.numel() for p in model.parameters())
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"Total parameters: {total_params}")
print(f"Traininable parameters: {trainable_params}")
print(f"Percentage of trainable parameters: {trainable_params / total_params * 100:.2f}%")

In [None]:
import wandb
from torch import optim
from torch import nn

from moment.common import PATHS
from moment.utils.utils import dtype_map
from moment.utils.forecasting_metrics import sMAPELoss


def train(args, model, train_dataloader):
    # Setup logger
    logger = wandb.init(
            project="Time-series Foundation Model",
            dir=PATHS.WANDB_DIR)

    optimizer = optim.AdamW(model.parameters(), 
                            lr=args.init_lr,
                            weight_decay=args.weight_decay)

    scaler = torch.cuda.amp.GradScaler(enabled=args.use_amp)
    # criterion = nn.MSELoss(reduction='mean')
    criterion = sMAPELoss(reduction='mean')

    opt_steps = 0
    cur_epoch = 0
    
    while cur_epoch < args.max_epoch: # Epoch based learning only
        model.train()
        
        for batch_x in tqdm(train_dataloader, total=len(train_dataloader)):
            optimizer.zero_grad(set_to_none=True)
            timeseries = batch_x.timeseries.float().to(args.device)
            input_mask = batch_x.input_mask.long().to(args.device)
            forecast = batch_x.forecast.float().to(args.device)

            # _scaler = torch.max(timeseries, dim=-1, keepdim=True)[0]
            # timeseries = timeseries / _scaler
            
            with torch.autocast(device_type='cuda', 
                                dtype=dtype_map(args.torch_dtype), 
                                enabled=args.use_amp):
                # outputs = model.long_forecast(
                #     x_enc=timeseries, input_mask=input_mask, mask=None)
                outputs = model(
                    x_enc=timeseries, input_mask=input_mask, mask=None)

                # outputs.forecast = outputs.forecast * _scaler
            
            loss = criterion(outputs.forecast, forecast)
            logger.log({"step_train_loss": loss.item()})   

            scaler.scale(loss).backward()
            scaler.unscale_(optimizer)
            nn.utils.clip_grad_norm_(model.parameters(), args.max_norm)
            scaler.step(optimizer)
            
            # Updates the scale for next iteration.
            scaler.update()

            opt_steps = opt_steps + 1

        cur_epoch = cur_epoch + 1
    
    logger.finish()

    return model

In [None]:
train(args, model, train_dataloader)

In [None]:
def validation(args, model, data_loader, return_preds):
    trues, preds, histories, losses = [], [], [], []

    # criterion = nn.MSELoss(reduction='mean')
    criterion = sMAPELoss(reduction='mean')
    
    model.eval()
    with torch.no_grad():
        for batch_x in tqdm(data_loader, total=len(data_loader)):
            timeseries = batch_x.timeseries.float().to(args.device)
            input_mask = batch_x.input_mask.long().to(args.device)
            forecast = batch_x.forecast.float().to(args.device)
            forecast_horizon = forecast.shape[-1]

            scaler = torch.max(timeseries, dim=-1, keepdim=True)[0]
            timeseries = timeseries / scaler

            with torch.autocast(device_type='cuda', 
                                dtype=dtype_map(args.torch_dtype), 
                                enabled=args.use_amp):
                # outputs = model.long_forecast(x_enc=timeseries, 
                #                         input_mask=input_mask, 
                #                         mask=None)
                outputs = model(x_enc=timeseries, 
                                        input_mask=input_mask, 
                                        mask=None)
                outputs.forecast = outputs.forecast * scaler

            if outputs.forecast.shape != forecast:
                outputs.forecast = outputs.forecast[:, :forecast_horizon]
                
            loss = criterion(outputs.forecast, forecast)                
            losses.append(loss.item())

            if return_preds:
                trues.append(forecast.detach().cpu().numpy())
                preds.append(outputs.forecast.detach().cpu().numpy())
                histories.append(timeseries.detach().cpu().numpy())
    
    losses = np.array(losses)
    average_loss = np.average(losses)
    model.train()

    if return_preds:
        trues = np.concatenate(trues, axis=0)
        preds = np.concatenate(preds, axis=0)
        histories = np.concatenate(histories, axis=0)
        return average_loss, losses, (trues, preds, histories)
    else:
        return average_loss

In [None]:
_, _, (trues_val, preds_val, _) = validation(args, model, val_dataloader, return_preds=True)
_, _, (trues_test, preds_test, _) = validation(args, model, test_dataloader, return_preds=True)
trues = np.concatenate([trues_val, trues_test], axis=0)
preds = np.concatenate([preds_val, preds_test], axis=0)

get_forecasting_metrics(y=trues, y_hat=preds)