# training using darts library

TL using the same number of IDs is possible

Reshaping does not seem possible.

when the target dataset has more IDs than the source set, we use the same model to predict multiple ids 

the models are trained on the same amount of IDs as the target test set contains 

residuals of the target test set are not considered 

TODO: Fine tuning and saving outputs


In [1]:
from darts import TimeSeries
from darts.utils.losses import *
from darts.models import *
from darts.metrics.metrics import mse

from utils import data_handling, helpers


  from tqdm.autonotebook import tqdm


# Load datasets

In [2]:
# use electricity dataset
electricity_dict = data_handling.format_electricity()

for key, value in electricity_dict.items():
			electricity_dict[key]= data_handling.df_to_tensor(value)

# normalize train and use matrics for val and test
electricity_dict["train"], train_standardize_dict = helpers.custom_standardizer(electricity_dict["train"])
electricity_dict["validation"], _ = helpers.custom_standardizer(electricity_dict["validation"], train_standardize_dict)
electricity_dict["test"], _ = helpers.custom_standardizer(electricity_dict["test"], train_standardize_dict)

Length train set: 209 days, 0:00:00
Length validation set: 34 days, 0:00:00
Saving train, validation and test df for faster loading


In [72]:
# bavaria dataset
data_tensor = data_handling.load_bavaria_electricity()
bavaria_dict, standadizer = data_handling.train_test_split_eu_elec(data_tensor, standardize=True)

In [76]:
# building genome project dataset
data_tensor = data_handling.load_genome_project_data()
gp_dict, standadizer = data_handling.train_test_split_eu_elec(data_tensor, standardize=True)

torch.Size([2463, 67])

In [4]:
# euro dataset
data_tensor = data_handling.eu_electricity_to_tensor()
euro_dict, standadizer = data_handling.train_test_split_eu_elec(data_tensor, standardize=True)

# Define and train models

In [69]:
def train_models(source_data_train, source_data_val, epochs=1):
    """
    -Convert train/val data into Darts TimeSeries format
    -Instantiate different models
    -Fit models to dataset

    Return: fitted models
    """
    ts_train_source = TimeSeries.from_values(source_data_train)
    ts_val_source = TimeSeries.from_values(source_data_val)
   
   
    # model definition

    # Slicing hyper-params:
    IN_LEN = 96
    OUT_LEN = 96

    # Architecture hyper-params:
    NUM_STACKS = 4
    NUM_BLOCKS = 1
    NUM_LAYERS = 2
    LAYER_WIDTH = 128
    COEFFS_DIM = 11

    # Training settings:
    LR = 1e-3
    BATCH_SIZE = 64

    NUM_EPOCHS = epochs

    USE_REVIN = True

    LOSS_FN = torch.nn.MSELoss()

    # reproducibility
    np.random.seed(42)
    torch.manual_seed(42)

    nbeats_model = NBEATSModel(
        input_chunk_length=IN_LEN,
        output_chunk_length=OUT_LEN,
        batch_size=BATCH_SIZE,
        num_stacks=NUM_STACKS,
        num_blocks=NUM_BLOCKS,
        num_layers=NUM_LAYERS,
        layer_widths=LAYER_WIDTH,
        expansion_coefficient_dim=COEFFS_DIM,
        loss_fn=LOSS_FN,
        use_reversible_instance_norm=USE_REVIN,
        optimizer_kwargs={"lr": LR},
        pl_trainer_kwargs={
            "enable_progress_bar": True,
            # change this one to "gpu" if your notebook does run in a GPU environment:
            "accelerator": "gpu",
        },
    )


    transformer_model = TransformerModel(
        input_chunk_length=IN_LEN, 
        output_chunk_length=OUT_LEN,
        output_chunk_shift=0, 
        d_model=64, 
        nhead=4, 
        num_encoder_layers=3, 
        num_decoder_layers=3, 
        dim_feedforward=512, 
        dropout=0.1, 
        activation='relu', 
        loss_fn=LOSS_FN,
        norm_type=None, 
        custom_encoder=None, 
        custom_decoder=None,
        use_reversible_instance_norm=USE_REVIN,
    )

    tsmixer_model = TSMixerModel(
        input_chunk_length=IN_LEN, 
        output_chunk_length=OUT_LEN, 
        output_chunk_shift=0,
        hidden_size=64, 
        ff_size=64, 
        num_blocks=2, 
        activation='ReLU', 
        dropout=0.1, 
        loss_fn=LOSS_FN,
        norm_type='LayerNorm', 
        normalize_before=False, 
        use_static_covariates=False,
    )

    nbeats_model.fit(
        ts_train_source,
        val_series=ts_val_source,
        num_loader_workers=4,
        epochs=NUM_EPOCHS,
        max_samples_per_ts=None,
    )

    transformer_model.fit(
        ts_train_source,
        val_series=ts_val_source,
        num_loader_workers=4,
        epochs=NUM_EPOCHS,
        max_samples_per_ts=None,
    )

    tsmixer_model.fit(
        ts_train_source,
        val_series=ts_val_source,
        num_loader_workers=4,
        epochs=NUM_EPOCHS,
        max_samples_per_ts=None,
    )


    return transformer_model, nbeats_model, tsmixer_model

def fine_tune_models(transformer_model, nbeats_model, tsmixer_model, ts_fine_tuning, epochs=1):
    """
    Fine tune models over specified epochs

    Input:  -trained models
            -fine tuning dataset
            -epochs

    Returns: fitted models
    """

    NUM_EPOCHS = epochs

    nbeats_model.fit(
        ts_fine_tuning,
        num_loader_workers=4,
        epochs=NUM_EPOCHS,
        max_samples_per_ts=None,
    )

    transformer_model.fit(
        ts_fine_tuning,
        num_loader_workers=4,
        epochs=NUM_EPOCHS,
        max_samples_per_ts=None,
    )

    tsmixer_model.fit(
        ts_fine_tuning,
        num_loader_workers=4,
        epochs=NUM_EPOCHS,
        max_samples_per_ts=None,
    )

    return transformer_model, nbeats_model, tsmixer_model


def evaluate(transformer, nhits, tsnmix, target_test_reshaped):
    """
    Evaluates models on target test set
    

    Input:  -trained models
            -List of target test sets shaped according to models

    Output: [transformer, nhits, tsnmix]-Losses
    """

    # evaluate multiple multivariate ID chunks
    big_window = []
    big_target = []

    for element in target_test_reshaped:
        element = TimeSeries.from_values(element)
        forecasting_endpoint = int(len(element)) - 96*2

        window = [element[i:i+96] for i in range(0, forecasting_endpoint, 5)]
        target = [element[i+96:i+96+96] for i in range(0, forecasting_endpoint, 5)]

        big_window.append(window)
        big_target.append(target)

    window = [item for sublist in big_window for item in sublist]
    target = [item for sublist in big_target for item in sublist]

    # predict over dataloader with slidingwindow implementation and 5 time step shifts for each input
    preds_transformer = transformer.predict(n=96, series=window)
    preds_nhits = nhits.predict(n=96, series=window)
    preds_tsnmix = tsnmix.predict(n=96, series=window)

    loss_transformer = mse(preds_transformer, target)
    loss_nhits = mse(preds_nhits, target)
    loss_tsnmix = mse(preds_tsnmix, target)

    mean_loss_transformer = sum(loss_transformer) / len(loss_transformer)
    mean_loss_nhits = sum(loss_nhits) / len(loss_nhits)
    mean_loss_tsnmix = sum(loss_tsnmix) / len(loss_tsnmix)

    return mean_loss_transformer, mean_loss_nhits, mean_loss_tsnmix

In [66]:
def train_and_zero_shot(source_data, target_data):
    # either reshape source or target dataset according to which has less IDs
    source_ids = source_data["train"].size(1)
    target_ids = target_data["test"].size(1)

    

    if target_ids < source_ids:
        source_train = source_data["train"][:,:target_ids]
        source_val = source_data["validation"][:,:target_ids]
        
        target_test = [target_data["test"]]

        target_fine_tuning = target_data["train"][-24*28:,:target_ids]
        ts_fine_tuning = TimeSeries.from_values(target_fine_tuning)


    else:
        source_train = source_data["train"]
        source_val = source_data["validation"]
        
        n_subset = int(target_ids / source_ids)
        target_test = []
        for i in range(n_subset):
            target_test.append(target_data["test"][:,(i*source_ids): (i*source_ids + source_ids)])

        target_fine_tuning = target_data["train"][-24*28:,:source_ids]
        ts_fine_tuning = TimeSeries.from_values(target_fine_tuning)

    transformer, nhits, tsnmix = train_models(source_train, source_val, epochs=5)

    zero_shot_loss = evaluate(transformer, nhits, tsnmix, target_test)
    
    return zero_shot_loss#, fine_tuning_loss

"""
    # now we do fine-tuning
    transformer_model, nbeats_model, tsmixer_model = fine_tune_models(transformer_model, nbeats_model, tsmixer_model, ts_fine_tuning)
    one_epoch_loss = evaluate(transformer, nhits, tsnmix, ts_test_target)

    transformer_model, nbeats_model, tsmixer_model = fine_tune_models(transformer_model, nbeats_model, tsmixer_model, ts_fine_tuning)
    two_epoch_loss = evaluate(transformer, nhits, tsnmix, ts_test_target)

    transformer_model, nbeats_model, tsmixer_model = fine_tune_models(transformer_model, nbeats_model, tsmixer_model, ts_fine_tuning)
    three_epoch_loss = evaluate(transformer, nhits, tsnmix, ts_test_target)

    transformer_model, nbeats_model, tsmixer_model = fine_tune_models(transformer_model, nbeats_model, tsmixer_model, ts_fine_tuning)
    four_epoch_loss = evaluate(transformer, nhits, tsnmix, ts_test_target)

    fine_tuning_loss = [one_epoch_loss, two_epoch_loss, three_epoch_loss, four_epoch_loss]
"""
    


'\n    # now we do fine-tuning\n    transformer_model, nbeats_model, tsmixer_model = fine_tune_models(transformer_model, nbeats_model, tsmixer_model, ts_fine_tuning)\n    one_epoch_loss = evaluate(transformer, nhits, tsnmix, ts_test_target)\n\n    transformer_model, nbeats_model, tsmixer_model = fine_tune_models(transformer_model, nbeats_model, tsmixer_model, ts_fine_tuning)\n    two_epoch_loss = evaluate(transformer, nhits, tsnmix, ts_test_target)\n\n    transformer_model, nbeats_model, tsmixer_model = fine_tune_models(transformer_model, nbeats_model, tsmixer_model, ts_fine_tuning)\n    three_epoch_loss = evaluate(transformer, nhits, tsnmix, ts_test_target)\n\n    transformer_model, nbeats_model, tsmixer_model = fine_tune_models(transformer_model, nbeats_model, tsmixer_model, ts_fine_tuning)\n    four_epoch_loss = evaluate(transformer, nhits, tsnmix, ts_test_target)\n\n    fine_tuning_loss = [one_epoch_loss, two_epoch_loss, three_epoch_loss, four_epoch_loss]\n'

In [50]:
loss_transformer

{'elec_to_bavaria': 0.00237484017468907,
 'elec_to_eu': 1.4420697944454963,
 'bavaria_to_elec': 9.55189398605458,
 'bavaria_to_euro': 9.893485133281766,
 'euro_to_bavaria': 0.002463448156121668,
 'euro_to_elec': 1.541811576072317}

In [51]:
loss_nhits

{'elec_to_bavaria': 0.002277523254883568,
 'elec_to_eu': 1.016388186012315,
 'bavaria_to_elec': 4.063811886484605,
 'bavaria_to_euro': 3.042869932451673,
 'euro_to_bavaria': 0.002474944778349619,
 'euro_to_elec': 1.2644093441267084}

In [52]:
loss_tsnmix

{'elec_to_bavaria': 2.406322558633574,
 'elec_to_eu': 11.454085518858218,
 'bavaria_to_elec': 1.3134613719082227,
 'bavaria_to_euro': 34.74948891330017,
 'euro_to_bavaria': 7.245469826918382,
 'euro_to_elec': 1.7667600586466545}

In [48]:
loss_transformer = {}
loss_nhits = {}
loss_tsnmix = {}

loss_transformer["elec_to_bavaria"], loss_nhits["elec_to_bavaria"], loss_tsnmix["elec_to_bavaria"] = train_and_zero_shot(electricity_dict, bavaria_dict)
loss_transformer["elec_to_eu"], loss_nhits["elec_to_eu"], loss_tsnmix["elec_to_eu"] = train_and_zero_shot(electricity_dict, euro_dict)

loss_transformer["bavaria_to_elec"], loss_nhits["bavaria_to_elec"], loss_tsnmix["bavaria_to_elec"] = train_and_zero_shot(bavaria_dict, electricity_dict)
loss_transformer["bavaria_to_euro"], loss_nhits["bavaria_to_euro"], loss_tsnmix["bavaria_to_euro"] = train_and_zero_shot(bavaria_dict, euro_dict)

loss_transformer["euro_to_bavaria"], loss_nhits["euro_to_bavaria"], loss_tsnmix["euro_to_bavaria"] = train_and_zero_shot(euro_dict, bavaria_dict)
loss_transformer["euro_to_elec"], loss_nhits["euro_to_elec"], loss_tsnmix["euro_to_elec"] = train_and_zero_shot(euro_dict, electricity_dict)


Trainer will use only 1 of 3 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=3)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
/vol/fob-vol7/nebenf21/reinbene/bene/MA/myenv/lib/python3.10/site-packages/lightning_fabric/plugins/environments/slurm.py:204: The `srun` command is available on your system but is not used. HINT: If your intention is to run Lightning on SLURM, prepend your python command with `srun` like so: srun python /vol/fob-vol7/nebenf21/reinbene/bene/MA/myenv/lib/py ...
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2]

  | Name          | Type             | Params
---------------------------------------------------
0 | criterion     | MSELoss          | 0     
1 | train_metr

Epoch 4: 100%|██████████| 76/76 [00:02<00:00, 33.54it/s, train_loss=0.277, val_loss=0.521]

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|██████████| 76/76 [00:02<00:00, 33.50it/s, train_loss=0.277, val_loss=0.521]


Trainer will use only 1 of 3 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=3)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2]

  | Name                | Type                | Params
------------------------------------------------------------
0 | criterion           | MSELoss             | 0     
1 | train_metrics       | MetricCollection    | 0     
2 | val_metrics         | MetricCollection    | 0     
3 | rin                 | RINorm              | 134   
4 | encoder             | Linear              | 4.4 K 
5 | positional_encoding | _PositionalEncoding | 0     
6 | transformer         | Transformer         | 548 K 
7 | decoder         

Epoch 4: 100%|██████████| 151/151 [00:07<00:00, 19.16it/s, train_loss=0.302, val_loss=0.531]

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|██████████| 151/151 [00:07<00:00, 19.16it/s, train_loss=0.302, val_loss=0.531]


Trainer will use only 1 of 3 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=3)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2]

  | Name                | Type             | Params
---------------------------------------------------------
0 | criterion           | MSELoss          | 0     
1 | train_metrics       | MetricCollection | 0     
2 | val_metrics         | MetricCollection | 0     
3 | fc_hist             | Linear           | 9.3 K 
4 | feature_mixing_hist | _FeatureMixing   | 25.2 K
5 | conditional_mixer   | ModuleList       | 84.4 K
6 | fc_out              | Linear           | 4.4 K 
-----------------------------------------------

Epoch 4: 100%|██████████| 151/151 [00:03<00:00, 47.56it/s, train_loss=0.159, val_loss=0.690]

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|██████████| 151/151 [00:03<00:00, 47.53it/s, train_loss=0.159, val_loss=0.690]


Trainer will use only 1 of 3 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=3)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2]


Predicting DataLoader 0: 100%|██████████| 15/15 [00:00<00:00, 35.57it/s]


Trainer will use only 1 of 3 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=3)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2]


Predicting DataLoader 0: 100%|██████████| 8/8 [00:00<00:00, 10.06it/s]

Trainer will use only 1 of 3 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=3)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2]



Predicting DataLoader 0: 100%|██████████| 15/15 [00:00<00:00, 30.48it/s]


Trainer will use only 1 of 3 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=3)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
/vol/fob-vol7/nebenf21/reinbene/bene/MA/myenv/lib/python3.10/site-packages/lightning_fabric/plugins/environments/slurm.py:204: The `srun` command is available on your system but is not used. HINT: If your intention is to run Lightning on SLURM, prepend your python command with `srun` like so: srun python /vol/fob-vol7/nebenf21/reinbene/bene/MA/myenv/lib/py ...
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2]

  | Name          | Type             | Params
---------------------------------------------------
0 | criterion     | MSELoss          | 0     
1 | train_metr

Epoch 4: 100%|██████████| 76/76 [00:02<00:00, 33.80it/s, train_loss=0.364, val_loss=0.448]

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|██████████| 76/76 [00:02<00:00, 33.76it/s, train_loss=0.364, val_loss=0.448]


Trainer will use only 1 of 3 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=3)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2]

  | Name                | Type                | Params
------------------------------------------------------------
0 | criterion           | MSELoss             | 0     
1 | train_metrics       | MetricCollection    | 0     
2 | val_metrics         | MetricCollection    | 0     
3 | rin                 | RINorm              | 416   
4 | encoder             | Linear              | 13.4 K
5 | positional_encoding | _PositionalEncoding | 0     
6 | transformer         | Transformer         | 548 K 
7 | decoder         

Epoch 4: 100%|██████████| 151/151 [00:07<00:00, 20.05it/s, train_loss=0.219, val_loss=0.382]

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|██████████| 151/151 [00:07<00:00, 20.05it/s, train_loss=0.219, val_loss=0.382]


Trainer will use only 1 of 3 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=3)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2]

  | Name                | Type             | Params
---------------------------------------------------------
0 | criterion           | MSELoss          | 0     
1 | train_metrics       | MetricCollection | 0     
2 | val_metrics         | MetricCollection | 0     
3 | fc_hist             | Linear           | 9.3 K 
4 | feature_mixing_hist | _FeatureMixing   | 43.2 K
5 | conditional_mixer   | ModuleList       | 84.4 K
6 | fc_out              | Linear           | 13.5 K
-----------------------------------------------

Epoch 4: 100%|██████████| 151/151 [00:03<00:00, 45.88it/s, train_loss=0.131, val_loss=0.550]

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|██████████| 151/151 [00:03<00:00, 45.84it/s, train_loss=0.131, val_loss=0.550]


Trainer will use only 1 of 3 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=3)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2]


Predicting DataLoader 0: 100%|██████████| 54/54 [00:01<00:00, 31.31it/s]


Trainer will use only 1 of 3 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=3)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2]


Predicting DataLoader 0: 100%|██████████| 27/27 [00:01<00:00, 15.18it/s]


Trainer will use only 1 of 3 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=3)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2]


Predicting DataLoader 0: 100%|██████████| 54/54 [00:01<00:00, 31.74it/s]


Trainer will use only 1 of 3 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=3)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
/vol/fob-vol7/nebenf21/reinbene/bene/MA/myenv/lib/python3.10/site-packages/lightning_fabric/plugins/environments/slurm.py:204: The `srun` command is available on your system but is not used. HINT: If your intention is to run Lightning on SLURM, prepend your python command with `srun` like so: srun python /vol/fob-vol7/nebenf21/reinbene/bene/MA/myenv/lib/py ...
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2]

  | Name          | Type             | Params
---------------------------------------------------
0 | criterion     | MSELoss          | 0     
1 | train_metr

Epoch 4: 100%|██████████| 132/132 [00:02<00:00, 46.43it/s, train_loss=0.000319, val_loss=0.000184]

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|██████████| 132/132 [00:02<00:00, 46.40it/s, train_loss=0.000319, val_loss=0.000184]


Trainer will use only 1 of 3 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=3)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2]

  | Name                | Type                | Params
------------------------------------------------------------
0 | criterion           | MSELoss             | 0     
1 | train_metrics       | MetricCollection    | 0     
2 | val_metrics         | MetricCollection    | 0     
3 | rin                 | RINorm              | 134   
4 | encoder             | Linear              | 4.4 K 
5 | positional_encoding | _PositionalEncoding | 0     
6 | transformer         | Transformer         | 548 K 
7 | decoder         

Epoch 4: 100%|██████████| 264/264 [00:13<00:00, 20.02it/s, train_loss=0.000289, val_loss=0.000194]

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|██████████| 264/264 [00:13<00:00, 20.02it/s, train_loss=0.000289, val_loss=0.000194]


Trainer will use only 1 of 3 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=3)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2]

  | Name                | Type             | Params
---------------------------------------------------------
0 | criterion           | MSELoss          | 0     
1 | train_metrics       | MetricCollection | 0     
2 | val_metrics         | MetricCollection | 0     
3 | fc_hist             | Linear           | 9.3 K 
4 | feature_mixing_hist | _FeatureMixing   | 25.2 K
5 | conditional_mixer   | ModuleList       | 84.4 K
6 | fc_out              | Linear           | 4.4 K 
-----------------------------------------------

Epoch 4: 100%|██████████| 264/264 [00:05<00:00, 48.24it/s, train_loss=0.000779, val_loss=0.0982]

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|██████████| 264/264 [00:05<00:00, 48.21it/s, train_loss=0.000779, val_loss=0.0982]


Trainer will use only 1 of 3 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=3)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2]


Predicting DataLoader 0: 100%|██████████| 18/18 [00:00<00:00, 30.46it/s]

Trainer will use only 1 of 3 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=3)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2]



Predicting DataLoader 0: 100%|██████████| 9/9 [00:00<00:00, 23.96it/s]

Trainer will use only 1 of 3 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=3)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2]



Predicting DataLoader 0: 100%|██████████| 18/18 [00:00<00:00, 38.50it/s]


Trainer will use only 1 of 3 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=3)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
/vol/fob-vol7/nebenf21/reinbene/bene/MA/myenv/lib/python3.10/site-packages/lightning_fabric/plugins/environments/slurm.py:204: The `srun` command is available on your system but is not used. HINT: If your intention is to run Lightning on SLURM, prepend your python command with `srun` like so: srun python /vol/fob-vol7/nebenf21/reinbene/bene/MA/myenv/lib/py ...
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2]

  | Name          | Type             | Params
---------------------------------------------------
0 | criterion     | MSELoss          | 0     
1 | train_metr

Epoch 4: 100%|██████████| 132/132 [00:03<00:00, 40.12it/s, train_loss=0.000319, val_loss=0.000184]

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|██████████| 132/132 [00:03<00:00, 40.09it/s, train_loss=0.000319, val_loss=0.000184]


Trainer will use only 1 of 3 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=3)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2]

  | Name                | Type                | Params
------------------------------------------------------------
0 | criterion           | MSELoss             | 0     
1 | train_metrics       | MetricCollection    | 0     
2 | val_metrics         | MetricCollection    | 0     
3 | rin                 | RINorm              | 134   
4 | encoder             | Linear              | 4.4 K 
5 | positional_encoding | _PositionalEncoding | 0     
6 | transformer         | Transformer         | 548 K 
7 | decoder         

Epoch 4: 100%|██████████| 264/264 [00:13<00:00, 19.93it/s, train_loss=0.000289, val_loss=0.000194]

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|██████████| 264/264 [00:13<00:00, 19.93it/s, train_loss=0.000289, val_loss=0.000194]


Trainer will use only 1 of 3 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=3)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2]

  | Name                | Type             | Params
---------------------------------------------------------
0 | criterion           | MSELoss          | 0     
1 | train_metrics       | MetricCollection | 0     
2 | val_metrics         | MetricCollection | 0     
3 | fc_hist             | Linear           | 9.3 K 
4 | feature_mixing_hist | _FeatureMixing   | 25.2 K
5 | conditional_mixer   | ModuleList       | 84.4 K
6 | fc_out              | Linear           | 4.4 K 
-----------------------------------------------

Epoch 4: 100%|██████████| 264/264 [00:05<00:00, 51.66it/s, train_loss=0.000779, val_loss=0.0982]

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|██████████| 264/264 [00:05<00:00, 51.64it/s, train_loss=0.000779, val_loss=0.0982]


Trainer will use only 1 of 3 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=3)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2]


Predicting DataLoader 0: 100%|██████████| 54/54 [00:01<00:00, 36.20it/s]


Trainer will use only 1 of 3 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=3)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2]


Predicting DataLoader 0: 100%|██████████| 27/27 [00:01<00:00, 21.60it/s]

Trainer will use only 1 of 3 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=3)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.





GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2]


Predicting DataLoader 0: 100%|██████████| 54/54 [00:01<00:00, 29.28it/s]


Trainer will use only 1 of 3 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=3)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
/vol/fob-vol7/nebenf21/reinbene/bene/MA/myenv/lib/python3.10/site-packages/lightning_fabric/plugins/environments/slurm.py:204: The `srun` command is available on your system but is not used. HINT: If your intention is to run Lightning on SLURM, prepend your python command with `srun` like so: srun python /vol/fob-vol7/nebenf21/reinbene/bene/MA/myenv/lib/py ...
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2]

  | Name          | Type             | Params
---------------------------------------------------
0 | criterion     | MSELoss          | 0     
1 | train_metr

Epoch 4: 100%|██████████| 474/474 [00:10<00:00, 44.28it/s, train_loss=0.302, val_loss=1.110]

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|██████████| 474/474 [00:10<00:00, 44.26it/s, train_loss=0.302, val_loss=1.110]


Trainer will use only 1 of 3 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=3)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2]

  | Name                | Type                | Params
------------------------------------------------------------
0 | criterion           | MSELoss             | 0     
1 | train_metrics       | MetricCollection    | 0     
2 | val_metrics         | MetricCollection    | 0     
3 | rin                 | RINorm              | 134   
4 | encoder             | Linear              | 4.4 K 
5 | positional_encoding | _PositionalEncoding | 0     
6 | transformer         | Transformer         | 548 K 
7 | decoder         

Epoch 4: 100%|██████████| 948/948 [00:46<00:00, 20.54it/s, train_loss=0.353, val_loss=1.090]

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|██████████| 948/948 [00:46<00:00, 20.54it/s, train_loss=0.353, val_loss=1.090]


Trainer will use only 1 of 3 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=3)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2]

  | Name                | Type             | Params
---------------------------------------------------------
0 | criterion           | MSELoss          | 0     
1 | train_metrics       | MetricCollection | 0     
2 | val_metrics         | MetricCollection | 0     
3 | fc_hist             | Linear           | 9.3 K 
4 | feature_mixing_hist | _FeatureMixing   | 25.2 K
5 | conditional_mixer   | ModuleList       | 84.4 K
6 | fc_out              | Linear           | 4.4 K 
-----------------------------------------------

Epoch 4: 100%|██████████| 948/948 [00:16<00:00, 59.04it/s, train_loss=0.248, val_loss=2.050]

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|██████████| 948/948 [00:16<00:00, 59.03it/s, train_loss=0.248, val_loss=2.050]


Trainer will use only 1 of 3 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=3)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2]


Predicting DataLoader 0: 100%|██████████| 15/15 [00:00<00:00, 40.15it/s]


Trainer will use only 1 of 3 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=3)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2]


Predicting DataLoader 0: 100%|██████████| 8/8 [00:00<00:00, 23.65it/s]

Trainer will use only 1 of 3 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=3)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2]



Predicting DataLoader 0: 100%|██████████| 15/15 [00:00<00:00, 38.99it/s]


Trainer will use only 1 of 3 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=3)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
/vol/fob-vol7/nebenf21/reinbene/bene/MA/myenv/lib/python3.10/site-packages/lightning_fabric/plugins/environments/slurm.py:204: The `srun` command is available on your system but is not used. HINT: If your intention is to run Lightning on SLURM, prepend your python command with `srun` like so: srun python /vol/fob-vol7/nebenf21/reinbene/bene/MA/myenv/lib/py ...
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2]

  | Name          | Type             | Params
---------------------------------------------------
0 | criterion     | MSELoss          | 0     
1 | train_metr

Epoch 4: 100%|██████████| 474/474 [00:10<00:00, 46.18it/s, train_loss=0.375, val_loss=0.767]

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|██████████| 474/474 [00:10<00:00, 46.17it/s, train_loss=0.375, val_loss=0.767]


Trainer will use only 1 of 3 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=3)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2]

  | Name                | Type                | Params
------------------------------------------------------------
0 | criterion           | MSELoss             | 0     
1 | train_metrics       | MetricCollection    | 0     
2 | val_metrics         | MetricCollection    | 0     
3 | rin                 | RINorm              | 416   
4 | encoder             | Linear              | 13.4 K
5 | positional_encoding | _PositionalEncoding | 0     
6 | transformer         | Transformer         | 548 K 
7 | decoder         

Epoch 4: 100%|██████████| 948/948 [00:43<00:00, 21.88it/s, train_loss=0.434, val_loss=0.728]

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|██████████| 948/948 [00:43<00:00, 21.87it/s, train_loss=0.434, val_loss=0.728]


Trainer will use only 1 of 3 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=3)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2]

  | Name                | Type             | Params
---------------------------------------------------------
0 | criterion           | MSELoss          | 0     
1 | train_metrics       | MetricCollection | 0     
2 | val_metrics         | MetricCollection | 0     
3 | fc_hist             | Linear           | 9.3 K 
4 | feature_mixing_hist | _FeatureMixing   | 43.2 K
5 | conditional_mixer   | ModuleList       | 84.4 K
6 | fc_out              | Linear           | 13.5 K
-----------------------------------------------

Epoch 4: 100%|██████████| 948/948 [00:16<00:00, 57.16it/s, train_loss=0.211, val_loss=1.260]

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|██████████| 948/948 [00:16<00:00, 57.15it/s, train_loss=0.211, val_loss=1.260]


Trainer will use only 1 of 3 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=3)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2]


Predicting DataLoader 0: 100%|██████████| 18/18 [00:00<00:00, 31.85it/s]


Trainer will use only 1 of 3 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=3)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2]


Predicting DataLoader 0: 100%|██████████| 9/9 [00:00<00:00, 16.78it/s]


Trainer will use only 1 of 3 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=3)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2]


Predicting DataLoader 0: 100%|██████████| 18/18 [00:00<00:00, 33.11it/s]
