In [1]:
from transformers import PatchTSMixerConfig, PatchTSMixerForPretraining, Trainer, TrainingArguments
import torch
from torch.utils.data import random_split
from torch.utils.data import Dataset
import os
import numpy as np
import evaluate

  from .autonotebook import tqdm as notebook_tqdm


### Data prep

Generate and prepare dummy data to test the pretraining.

In [2]:
from sklearn.preprocessing import StandardScaler
import pandas as pd
class ETTDataset(Dataset):
    def __init__(self, root_path='/dccstor/dnn_forecasting/FM/data/ETDataset/ETT-small/', data_file='ETTh1.csv', 
                 seq_len=128, pred_len=32,
                 split='train',                  
                 scale=True
                 ):
        
        self.seq_len = seq_len
        self.pred_len = pred_len
        # init
        assert split in ['train', 'test', 'val']
        type_map = {'train': 0, 'val': 1, 'test': 2}
        self.set_type = type_map[split]
                
        self.scale = scale                        

        self.root_path = root_path
        self.data_file = data_file
        self.__read_data__()

    def __read_data__(self):
        self.scaler = StandardScaler()
        df_raw = pd.read_csv(os.path.join(self.root_path, self.data_file))

        border1s = [0, 12 * 30 * 24 - self.seq_len, 12 * 30 * 24 + 4 * 30 * 24 - self.seq_len]
        border2s = [12 * 30 * 24, 12 * 30 * 24 + 4 * 30 * 24, 12 * 30 * 24 + 8 * 30 * 24]
        border1 = border1s[self.set_type]
        border2 = border2s[self.set_type]
        
        cols_data = df_raw.columns[1:]
        df_data = df_raw[cols_data]

        if self.scale:
            train_data = df_data[border1s[0]:border2s[0]]
            self.scaler.fit(train_data.values)
            data = self.scaler.transform(df_data.values)
        else:
            data = df_data.values
        
        self.data_x = data[border1:border2]
        self.data_y = data[border1:border2]        

    def __getitem__(self, index):
        s_begin = index
        s_end = s_begin + self.seq_len
        r_begin = s_end
        r_end = r_begin + self.pred_len 

        seq_x = self.data_x[s_begin:s_end]
        seq_y = self.data_y[r_begin:r_end]        
        return {"context_values": torch.Tensor(seq_x), "target_values": torch.Tensor(seq_y)}

    def __len__(self):
        return len(self.data_x) - self.seq_len - self.pred_len + 1

    def inverse_transform(self, data):
        return self.scaler.inverse_transform(data)


In [3]:
SEQ_LEN = 512
FORECAST_LEN = 96

In [4]:
dset_train = ETTDataset(split="train", seq_len=SEQ_LEN, pred_len=FORECAST_LEN)
dset_val = ETTDataset(split="val", seq_len=SEQ_LEN, pred_len=FORECAST_LEN)
dset_test = ETTDataset(split="test", seq_len=SEQ_LEN, pred_len=FORECAST_LEN)

In [5]:
dd=dset_val.__getitem__(0)
dd["context_values"].shape, dd["target_values"].shape

(torch.Size([512, 7]), torch.Size([96, 7]))

In [6]:
n_features = 7
seq_len = SEQ_LEN
patch_len = 16
stride = patch_len


In [7]:
# num_patches should be (no need to specify)
num_patches = seq_len//patch_len
num_patches

32

### Pretrain the model with HF trainer

In [8]:
config = PatchTSMixerConfig(
    in_channels=n_features,
    seq_len=seq_len,
    patch_len=patch_len,
    stride=stride,
    num_features=48,
    num_layers=2,
    dropout=0.5,
    mode="common_channel",
    revin=True,
    expansion_factor=3,
    head_dropout=0.7,
    forecast_len=FORECAST_LEN
)

In [9]:
model = PatchTSMixerForPretraining(config)

Define the training arguments and train the model.

In [10]:
training_args = TrainingArguments(
        output_dir='./checkpoint',
        overwrite_output_dir=True,
        learning_rate=0.0001,
        num_train_epochs=100,
        do_eval=True,
        evaluation_strategy="epoch",
        per_device_train_batch_size=32,
        per_device_eval_batch_size=32,
        report_to='tensorboard',
        save_strategy='epoch',
        logging_strategy="epoch",
        save_total_limit=3,
        logging_dir='./logs',  # Make sure to specify a logging directory
        log_level="info",  # Explicitly set the logging level
        
#         load_best_model_at_end=True
#         no_cuda=True,
#         use_mps_device = False,
    )

In [11]:
# training_args.set_push_to_hub("ajati/patchtsmixer_pretrained_etth1")

In [12]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=dset_train,
    eval_dataset=dset_val,
)

In [13]:
trainer.train()

***** Running training *****
  Num examples = 8,033
  Num Epochs = 100
  Instantaneous batch size per device = 32
  Total train batch size (w. parallel, distributed & accumulation) = 32
  Gradient Accumulation steps = 1
  Total optimization steps = 25,200
  Number of trainable parameters = 49,376
The following columns in the training set don't have a corresponding argument in `PatchTSMixerForPretraining.forward` and have been ignored: target_values. If target_values are not expected by `PatchTSMixerForPretraining.forward`,  you can safely ignore this message.


Epoch,Training Loss,Validation Loss
1,0.88,0.666892
2,0.6808,0.537779
3,0.61,0.474375
4,0.5804,0.441245
5,0.5664,0.422306
6,0.5577,0.407849
7,0.5489,0.394484
8,0.5424,0.38124
9,0.538,0.373011
10,0.5345,0.365847


***** Running Evaluation *****
  Num examples = 2785
  Batch size = 32
The following columns in the evaluation set don't have a corresponding argument in `PatchTSMixerForPretraining.forward` and have been ignored: target_values. If target_values are not expected by `PatchTSMixerForPretraining.forward`,  you can safely ignore this message.
Saving model checkpoint to ./checkpoint/checkpoint-252
Configuration saved in ./checkpoint/checkpoint-252/config.json
Model weights saved in ./checkpoint/checkpoint-252/pytorch_model.bin
Deleting older checkpoint [checkpoint/checkpoint-24892] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 2785
  Batch size = 32
The following columns in the evaluation set don't have a corresponding argument in `PatchTSMixerForPretraining.forward` and have been ignored: target_values. If target_values are not expected by `PatchTSMixerForPretraining.forward`,  you can safely ignore this message.
Saving model checkpoint to ./checkpoint/checkp

TrainOutput(global_step=25200, training_loss=0.5162790231856089, metrics={'train_runtime': 341.0215, 'train_samples_per_second': 2355.57, 'train_steps_per_second': 73.896, 'total_flos': 852929082163200.0, 'train_loss': 0.5162790231856089, 'epoch': 100.0})

In [14]:
out = trainer.evaluate(dset_val)
out

***** Running Evaluation *****
  Num examples = 2785
  Batch size = 32
The following columns in the evaluation set don't have a corresponding argument in `PatchTSMixerForPretraining.forward` and have been ignored: target_values. If target_values are not expected by `PatchTSMixerForPretraining.forward`,  you can safely ignore this message.


{'eval_loss': 0.31454017758369446,
 'eval_runtime': 0.2412,
 'eval_samples_per_second': 11547.349,
 'eval_steps_per_second': 364.871,
 'epoch': 100.0}

In [15]:
out = trainer.evaluate(dset_test)
out

***** Running Evaluation *****
  Num examples = 2785
  Batch size = 32
The following columns in the evaluation set don't have a corresponding argument in `PatchTSMixerForPretraining.forward` and have been ignored: target_values. If target_values are not expected by `PatchTSMixerForPretraining.forward`,  you can safely ignore this message.


{'eval_loss': 0.38878193497657776,
 'eval_runtime': 0.4026,
 'eval_samples_per_second': 6917.179,
 'eval_steps_per_second': 218.568,
 'epoch': 100.0}

In [16]:
# get train dataloader
eval_dataloader = trainer.get_eval_dataloader()
eval_dataloader

<accelerate.data_loader.DataLoaderShard at 0x15536f226580>

## Save batch and output for pytests

In [17]:
! mkdir -p pytest_data/etth1/

In [18]:
for X in eval_dataloader:
    print(X.keys())
    print(X["context_values"].shape)
    torch.save(X, "pytest_data/etth1/batch.pt")
    output_batch = model(X["context_values"])
    print(output_batch.prediction_logits[0, :7, :1, :1])
    break

The following columns in the evaluation set don't have a corresponding argument in `PatchTSMixerForPretraining.forward` and have been ignored: target_values. If target_values are not expected by `PatchTSMixerForPretraining.forward`,  you can safely ignore this message.


dict_keys(['context_values'])
torch.Size([32, 512, 7])
tensor([[[-0.1297]],

        [[ 0.6335]],

        [[-0.0302]],

        [[-0.3391]],

        [[-0.3730]],

        [[ 2.3447]],

        [[ 0.1238]]], device='cuda:0', grad_fn=<SliceBackward0>)


## Saving and loading the models

In [19]:
trainer.save_model("pytest_data/etth1/patchtsmixer_pretrained_etth1")

Saving model checkpoint to pytest_data/etth1/patchtsmixer_pretrained_etth1
Configuration saved in pytest_data/etth1/patchtsmixer_pretrained_etth1/config.json
Model weights saved in pytest_data/etth1/patchtsmixer_pretrained_etth1/pytorch_model.bin


In [20]:
loaded_model = PatchTSMixerForPretraining.from_pretrained('pytest_data/etth1/patchtsmixer_pretrained_etth1').to("cuda")

loading configuration file pytest_data/etth1/patchtsmixer_pretrained_etth1/config.json
Model config PatchTSMixerConfig {
  "architectures": [
    "PatchTSMixerForPretraining"
  ],
  "channel_consistent_masking": true,
  "d_size": "4D",
  "dropout": 0.5,
  "expansion_factor": 3,
  "forecast_channel_indices": null,
  "forecast_len": 96,
  "gated_attn": true,
  "head_agg": null,
  "head_dropout": 0.7,
  "in_channels": 7,
  "init_std": 0.02,
  "mask_input": true,
  "mask_mode": "mask_before_encoder",
  "mask_patch_ratios": [
    1,
    1
  ],
  "mask_patches": [
    2,
    3
  ],
  "mask_ratio": 0.5,
  "mask_type": "random",
  "mask_value": 0,
  "masked_loss": false,
  "mode": "common_channel",
  "model_type": "patchtsmixer",
  "n_classes": 3,
  "n_targets": 3,
  "norm_mlp": "LayerNorm",
  "num_features": 48,
  "num_layers": 2,
  "num_patches": 32,
  "output_range": null,
  "patch_last": true,
  "patch_len": 16,
  "revin": true,
  "seed_number": 42,
  "self_attn": false,
  "self_attn_heads

In [21]:
# loaded_model(test_data.to('mps')).prediction_logits
torch.manual_seed(0)
loaded_model.eval()
with torch.no_grad():
    d_out = loaded_model.forward(X['context_values'].reshape(32,seq_len,n_features).to("cuda"))
print(d_out.prediction_logits[0, :7, :1, :1])

tensor([[[-0.1184]],

        [[ 0.6268]],

        [[-0.0182]],

        [[-0.3095]],

        [[-0.3687]],

        [[ 2.2908]],

        [[ 0.1970]]], device='cuda:0')


## Use the pretrained model to finetune for a forecasting task

In [22]:
from transformers.models.patchtsmixer.modeling_patchtsmixer import PatchTSMixerForForecasting

config.update({"forecast_len": FORECAST_LEN})
finetune_model = PatchTSMixerForForecasting.from_pretrained('pytest_data/etth1/patchtsmixer_pretrained_etth1')


loading configuration file pytest_data/etth1/patchtsmixer_pretrained_etth1/config.json
Model config PatchTSMixerConfig {
  "architectures": [
    "PatchTSMixerForPretraining"
  ],
  "channel_consistent_masking": true,
  "d_size": "4D",
  "dropout": 0.5,
  "expansion_factor": 3,
  "forecast_channel_indices": null,
  "forecast_len": 96,
  "gated_attn": true,
  "head_agg": null,
  "head_dropout": 0.7,
  "in_channels": 7,
  "init_std": 0.02,
  "mask_input": true,
  "mask_mode": "mask_before_encoder",
  "mask_patch_ratios": [
    1,
    1
  ],
  "mask_patches": [
    2,
    3
  ],
  "mask_ratio": 0.5,
  "mask_type": "random",
  "mask_value": 0,
  "masked_loss": false,
  "mode": "common_channel",
  "model_type": "patchtsmixer",
  "n_classes": 3,
  "n_targets": 3,
  "norm_mlp": "LayerNorm",
  "num_features": 48,
  "num_layers": 2,
  "num_patches": 32,
  "output_range": null,
  "patch_last": true,
  "patch_len": 16,
  "revin": true,
  "seed_number": 42,
  "self_attn": false,
  "self_attn_heads

In [23]:
finetune_args = TrainingArguments(
        output_dir='./checkpoint_ftune',
        overwrite_output_dir=True,
        learning_rate=0.0001,
        num_train_epochs=100,
        do_eval=True,
        evaluation_strategy="epoch",
        per_device_train_batch_size=32,
        per_device_eval_batch_size=32,
        report_to='tensorboard',
        save_strategy='epoch',
        logging_strategy="epoch",
        save_total_limit=3,
        logging_dir='./logs_ftune',  # Make sure to specify a logging directory
        
        load_best_model_at_end=True,  # Load the best model when training ends
        metric_for_best_model="eval_loss",  # Metric to monitor for early stopping
        greater_is_better=False,  # For loss
    )

Found safetensors installation, but --save_safetensors=False. Safetensors should be a preferred weights saving format due to security and performance reasons. If your model cannot be saved by safetensors please feel free to open an issue at https://github.com/huggingface/safetensors!
PyTorch: setting up devices


In [24]:
from transformers import EarlyStoppingCallback

# Create the early stopping callback
early_stopping_callback = EarlyStoppingCallback(
    early_stopping_patience=5,  # Number of epochs with no improvement after which to stop
    early_stopping_threshold=0.0001,  # Minimum improvement required to consider as improvement
)

In [25]:
finetune_trainer = Trainer(
    model=finetune_model,
    args=finetune_args,
    train_dataset=dset_train,
    eval_dataset=dset_val,
    callbacks=[early_stopping_callback]
)

In [26]:
finetune_trainer.train()

***** Running training *****
  Num examples = 8,033
  Num Epochs = 100
  Instantaneous batch size per device = 32
  Total train batch size (w. parallel, distributed & accumulation) = 32
  Gradient Accumulation steps = 1
  Total optimization steps = 25,200
  Number of trainable parameters = 196,144


Epoch,Training Loss,Validation Loss
1,0.4897,0.832008
2,0.4203,0.811938
3,0.4068,0.814699
4,0.3974,0.8148
5,0.3954,0.802495
6,0.3908,0.801148
7,0.3877,0.798693
8,0.3893,0.808897
9,0.3842,0.804226
10,0.384,0.800851


***** Running Evaluation *****
  Num examples = 2785
  Batch size = 32
Saving model checkpoint to ./checkpoint_ftune/checkpoint-252
Configuration saved in ./checkpoint_ftune/checkpoint-252/config.json
Model weights saved in ./checkpoint_ftune/checkpoint-252/pytorch_model.bin
Deleting older checkpoint [checkpoint_ftune/checkpoint-4318] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 2785
  Batch size = 32
Saving model checkpoint to ./checkpoint_ftune/checkpoint-504
Configuration saved in ./checkpoint_ftune/checkpoint-504/config.json
Model weights saved in ./checkpoint_ftune/checkpoint-504/pytorch_model.bin
Deleting older checkpoint [checkpoint_ftune/checkpoint-4572] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 2785
  Batch size = 32
Saving model checkpoint to ./checkpoint_ftune/checkpoint-756
Configuration saved in ./checkpoint_ftune/checkpoint-756/config.json
Model weights saved in ./checkpoint_ftune/checkpoint-756/pytorch_mo

TrainOutput(global_step=5292, training_loss=0.3904967967615106, metrics={'train_runtime': 73.8278, 'train_samples_per_second': 10880.72, 'train_steps_per_second': 341.335, 'total_flos': 711526928007168.0, 'train_loss': 0.3904967967615106, 'epoch': 21.0})

In [27]:
finetune_trainer.evaluate(dset_test)

***** Running Evaluation *****
  Num examples = 2785
  Batch size = 32


{'eval_loss': 0.3892132043838501,
 'eval_runtime': 0.276,
 'eval_samples_per_second': 10092.235,
 'eval_steps_per_second': 318.893,
 'epoch': 21.0}

In [28]:
finetune_trainer.save_model("pytest_data/etth1/patchtsmixer_finetune_forecast_etth1")

Saving model checkpoint to pytest_data/etth1/patchtsmixer_finetune_forecast_etth1
Configuration saved in pytest_data/etth1/patchtsmixer_finetune_forecast_etth1/config.json
Model weights saved in pytest_data/etth1/patchtsmixer_finetune_forecast_etth1/pytorch_model.bin


In [29]:
# get train dataloader
eval_dataloader = finetune_trainer.get_eval_dataloader()
eval_dataloader

<accelerate.data_loader.DataLoaderShard at 0x155421f62d00>

In [30]:
loaded_model = PatchTSMixerForForecasting.from_pretrained('pytest_data/etth1/patchtsmixer_finetune_forecast_etth1').to("cuda:0")

loading configuration file pytest_data/etth1/patchtsmixer_finetune_forecast_etth1/config.json
Model config PatchTSMixerConfig {
  "_name_or_path": "pytest_data/etth1/patchtsmixer_pretrained_etth1",
  "architectures": [
    "PatchTSMixerForForecasting"
  ],
  "channel_consistent_masking": true,
  "d_size": "4D",
  "dropout": 0.5,
  "expansion_factor": 3,
  "forecast_channel_indices": null,
  "forecast_len": 96,
  "gated_attn": true,
  "head_agg": null,
  "head_dropout": 0.7,
  "in_channels": 7,
  "init_std": 0.02,
  "mask_input": true,
  "mask_mode": "mask_before_encoder",
  "mask_patch_ratios": [
    1,
    1
  ],
  "mask_patches": [
    2,
    3
  ],
  "mask_ratio": 0.5,
  "mask_type": "random",
  "mask_value": 0,
  "masked_loss": false,
  "mode": "common_channel",
  "model_type": "patchtsmixer",
  "n_classes": 3,
  "n_targets": 3,
  "norm_mlp": "LayerNorm",
  "num_features": 48,
  "num_layers": 2,
  "num_patches": 32,
  "output_range": null,
  "patch_last": true,
  "patch_len": 16,
 

In [31]:
for X in eval_dataloader:
    print(X.keys())
    print(X["context_values"].shape, X["target_values"].shape)
    torch.save(X, "pytest_data/etth1/batch_forecast.pt")
    break

dict_keys(['context_values', 'target_values'])
torch.Size([32, 512, 7]) torch.Size([32, 96, 7])


In [32]:
torch.manual_seed(0)
loaded_model.eval()
with torch.no_grad():
    output_batch = loaded_model(X["context_values"])
print(output_batch.prediction_logits[0, :1, :7])

tensor([[ 0.5436,  0.0911,  0.6704,  0.8515, -0.4594, -2.3386,  0.3025]],
       device='cuda:0')


## Use pretrained model to finetune for a prediction task

In [33]:
from transformers.models.patchtsmixer.modeling_patchtsmixer import PatchTSMixerForForecasting
config.update({"forecast_channel_indices": [1,4]})
finetune_model_prediction = PatchTSMixerForForecasting.from_pretrained('pytest_data/etth1/patchtsmixer_pretrained_etth1', config=config)

loading weights file pytest_data/etth1/patchtsmixer_pretrained_etth1/pytorch_model.bin
Some weights of the model checkpoint at pytest_data/etth1/patchtsmixer_pretrained_etth1 were not used when initializing PatchTSMixerForForecasting: ['head.head.base_pt_block.1.bias', 'head.head.base_pt_block.1.weight']
- This IS expected if you are initializing PatchTSMixerForForecasting from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing PatchTSMixerForForecasting from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of PatchTSMixerForForecasting were not initialized from the model checkpoint at pytest_data/etth1/patchtsmixer_pretrained_etth1 and are newly initialized: ['head.head.base_forecast_block.1.weigh

In [34]:
finetune_trainer = Trainer(
    model=finetune_model_prediction,
    args=finetune_args,
    train_dataset=dset_train,
    eval_dataset=dset_val,
    callbacks=[early_stopping_callback]
)

In [35]:
finetune_trainer.train()

***** Running training *****
  Num examples = 8,033
  Num Epochs = 100
  Instantaneous batch size per device = 32
  Total train batch size (w. parallel, distributed & accumulation) = 32
  Gradient Accumulation steps = 1
  Total optimization steps = 25,200
  Number of trainable parameters = 196,144


Epoch,Training Loss,Validation Loss
1,0.6426,1.172535
2,0.5725,1.124103
3,0.5541,1.112482
4,0.5422,1.116687
5,0.5403,1.089863
6,0.5311,1.094732
7,0.5276,1.082633
8,0.5232,1.096055
9,0.5214,1.083149
10,0.5192,1.081012


***** Running Evaluation *****
  Num examples = 2785
  Batch size = 32
Saving model checkpoint to ./checkpoint_ftune/checkpoint-252
Configuration saved in ./checkpoint_ftune/checkpoint-252/config.json
Model weights saved in ./checkpoint_ftune/checkpoint-252/pytorch_model.bin
Deleting older checkpoint [checkpoint_ftune/checkpoint-4032] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 2785
  Batch size = 32
Saving model checkpoint to ./checkpoint_ftune/checkpoint-504
Configuration saved in ./checkpoint_ftune/checkpoint-504/config.json
Model weights saved in ./checkpoint_ftune/checkpoint-504/pytorch_model.bin
Deleting older checkpoint [checkpoint_ftune/checkpoint-5040] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 2785
  Batch size = 32
Saving model checkpoint to ./checkpoint_ftune/checkpoint-756
Configuration saved in ./checkpoint_ftune/checkpoint-756/config.json
Model weights saved in ./checkpoint_ftune/checkpoint-756/pytorch_mo

TrainOutput(global_step=4284, training_loss=0.5330348900831492, metrics={'train_runtime': 60.488, 'train_samples_per_second': 13280.319, 'train_steps_per_second': 416.612, 'total_flos': 575997989339136.0, 'train_loss': 0.5330348900831492, 'epoch': 17.0})

In [36]:
finetune_trainer.save_model("pytest_data/etth1/patchtsmixer_finetune_prediction_etth1")

Saving model checkpoint to pytest_data/etth1/patchtsmixer_finetune_prediction_etth1
Configuration saved in pytest_data/etth1/patchtsmixer_finetune_prediction_etth1/config.json
Model weights saved in pytest_data/etth1/patchtsmixer_finetune_prediction_etth1/pytorch_model.bin


In [37]:
# get train dataloader
eval_dataloader = finetune_trainer.get_eval_dataloader()
eval_dataloader

<accelerate.data_loader.DataLoaderShard at 0x15536f203070>

In [38]:
loaded_model = PatchTSMixerForForecasting.from_pretrained('pytest_data/etth1/patchtsmixer_finetune_prediction_etth1').to("cuda:0")

loading configuration file pytest_data/etth1/patchtsmixer_finetune_prediction_etth1/config.json
Model config PatchTSMixerConfig {
  "_name_or_path": "pytest_data/etth1/patchtsmixer_pretrained_etth1",
  "architectures": [
    "PatchTSMixerForForecasting"
  ],
  "channel_consistent_masking": true,
  "d_size": "4D",
  "dropout": 0.5,
  "expansion_factor": 3,
  "forecast_channel_indices": [
    1,
    4
  ],
  "forecast_len": 96,
  "gated_attn": true,
  "head_agg": null,
  "head_dropout": 0.7,
  "in_channels": 7,
  "init_std": 0.02,
  "mask_input": true,
  "mask_mode": "mask_before_encoder",
  "mask_patch_ratios": [
    1,
    1
  ],
  "mask_patches": [
    2,
    3
  ],
  "mask_ratio": 0.5,
  "mask_type": "random",
  "mask_value": 0,
  "masked_loss": false,
  "mode": "common_channel",
  "model_type": "patchtsmixer",
  "n_classes": 3,
  "n_targets": 3,
  "norm_mlp": "LayerNorm",
  "num_features": 48,
  "num_layers": 2,
  "num_patches": 32,
  "output_range": null,
  "patch_last": true,
  "p

In [39]:
for X in eval_dataloader:
    print(X.keys())
    print(X["context_values"].shape, X["target_values"].shape)
    torch.save(X, "pytest_data/etth1/batch_prediction.pt")
    break

dict_keys(['context_values', 'target_values'])
torch.Size([32, 512, 7]) torch.Size([32, 96, 7])


In [40]:
torch.manual_seed(0)
loaded_model.eval()
with torch.no_grad():
    output_batch = loaded_model(X["context_values"])
print(output_batch.prediction_logits.shape)
print(output_batch.prediction_logits[0, :1, :7])

torch.Size([32, 96, 2])
tensor([[ 0.1204, -0.3356]], device='cuda:0')


### register model under Auto Classes and load the model

In [41]:
from transformers import AutoConfig, AutoModel, AutoModelForPreTraining

In [42]:
# AutoConfig.register("patchtsmixer", PatchTSMixerConfig)
AutoModelForPreTraining.register(PatchTSMixerConfig, PatchTSMixerForPretraining)

In [43]:
auto_model = AutoModelForPreTraining.from_pretrained('pytest_data/etth1/patchtsmixer_pretrained_etth1')

loading configuration file pytest_data/etth1/patchtsmixer_pretrained_etth1/config.json
Model config PatchTSMixerConfig {
  "_name_or_path": "pytest_data/etth1/patchtsmixer_pretrained_etth1",
  "architectures": [
    "PatchTSMixerForPretraining"
  ],
  "channel_consistent_masking": true,
  "d_size": "4D",
  "dropout": 0.5,
  "expansion_factor": 3,
  "forecast_channel_indices": null,
  "forecast_len": 96,
  "gated_attn": true,
  "head_agg": null,
  "head_dropout": 0.7,
  "in_channels": 7,
  "init_std": 0.02,
  "mask_input": true,
  "mask_mode": "mask_before_encoder",
  "mask_patch_ratios": [
    1,
    1
  ],
  "mask_patches": [
    2,
    3
  ],
  "mask_ratio": 0.5,
  "mask_type": "random",
  "mask_value": 0,
  "masked_loss": false,
  "mode": "common_channel",
  "model_type": "patchtsmixer",
  "n_classes": 3,
  "n_targets": 3,
  "norm_mlp": "LayerNorm",
  "num_features": 48,
  "num_layers": 2,
  "num_patches": 32,
  "output_range": null,
  "patch_last": true,
  "patch_len": 16,
  "revin

In [44]:
# auto_model.to('mps')

In [45]:
# auto_model(test_data.to('mps')).prediction_logits
auto_model(dset_test.__getitem__(0)['context_values'].reshape(1,seq_len,n_features)).prediction_logits

tensor([[[[ 0.2044,  0.2675,  0.3120,  ...,  0.6149,  0.7067,  0.7208],
          [ 0.1787,  0.0317, -0.1542,  ...,  0.3104,  0.4169,  0.5194],
          [ 0.5177,  0.5340,  0.5295,  ..., -1.2368, -1.0592, -0.8515],
          ...,
          [ 0.8223,  0.8504,  0.8444,  ..., -2.4864, -2.1504, -1.7067],
          [-0.2432, -0.0148,  0.2164,  ...,  1.0069,  0.9533,  0.8355],
          [-0.3156, -0.4576, -0.6203,  ...,  0.3079,  0.2728,  0.2093]],

         [[-1.4562, -1.5809, -1.6935,  ..., -0.9289, -0.9019, -0.8608],
          [-0.6807, -0.7484, -0.7978,  ..., -1.8741, -1.4803, -1.0297],
          [-0.5957, -0.6374, -0.7216,  ..., -1.2122, -1.4055, -1.4631],
          ...,
          [ 1.0290,  1.0744,  1.0683,  ...,  1.5893,  1.1897,  0.7693],
          [-0.4916, -0.5638, -0.5949,  ...,  0.9490,  0.7572,  0.6340],
          [ 0.6909,  0.7657,  0.8114,  ...,  0.0976,  0.1482,  0.1988]],

         [[ 0.1302,  0.2059,  0.2690,  ...,  0.6550,  0.6677,  0.6167],
          [ 0.0648, -0.0913, -