In [1]:
lora_config = {
    'r':8,
    'lora_alpha':16,
    'lora_dropout':0.1,
    'target_modules':["q", "v"]
    }

lora_config

{'r': 8, 'lora_alpha': 16, 'lora_dropout': 0.1, 'target_modules': ['q', 'v']}

In [3]:
import torch
from chronos import ChronosPipeline

pipeline = ChronosPipeline.from_pretrained(
    "amazon/chronos-t5-small",
    device_map="cuda",  # Use "cpu" if you don't have a GPU
    torch_dtype=torch.bfloat16,
)

pipeline.model

ChronosModel(
  (model): T5ForConditionalGeneration(
    (shared): Embedding(4096, 512)
    (encoder): T5Stack(
      (embed_tokens): Embedding(4096, 512)
      (block): ModuleList(
        (0): T5Block(
          (layer): ModuleList(
            (0): T5LayerSelfAttention(
              (SelfAttention): T5Attention(
                (q): Linear(in_features=512, out_features=512, bias=False)
                (k): Linear(in_features=512, out_features=512, bias=False)
                (v): Linear(in_features=512, out_features=512, bias=False)
                (o): Linear(in_features=512, out_features=512, bias=False)
                (relative_attention_bias): Embedding(32, 8)
              )
              (layer_norm): T5LayerNorm()
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (1): T5LayerFF(
              (DenseReluDense): T5DenseActDense(
                (wi): Linear(in_features=512, out_features=2048, bias=False)
                (wo): Linear(in_features=

In [3]:
import datasets

ds = datasets.load_dataset("autogluon/chronos_datasets", "monash_australian_electricity", split="train")
ds.set_format("numpy")  # sequences returned as numpy arrays
ds[0].keys()

dict_keys(['id', 'timestamp', 'target'])

In [18]:
from numpy import mean
max([len(ds[i]['target']) for i in range(len(ds))])

232272

In [None]:
pipeline.model.

In [11]:
# Load example Chronos datasets from Hugging Face
ds_elec = datasets.load_dataset("autogluon/chronos_datasets", "monash_australian_electricity", split="train")
ds_cif = datasets.load_dataset("autogluon/chronos_datasets", "monash_cif_2016", split="train")
print(ds_elec)
print(ds_cif)


Dataset({
    features: ['id', 'timestamp', 'target'],
    num_rows: 5
})
Dataset({
    features: ['id', 'timestamp', 'target'],
    num_rows: 72
})


In [None]:
ds_elec = ds_elec.remove_columns([c for c in ds_elec.column_names if c not in ["start", "target"]])
ds_cif = ds_cif.remove_columns([c for c in ds_cif.column_names if c not in ["start", "target"]])


In [12]:
ds_elec['target']

[[5714.045004,
  5360.189078,
  5014.835118,
  4602.755516,
  4285.179828,
  4074.894442,
  3942.936134,
  3883.997482,
  3877.679938,
  3837.716602,
  3897.023942,
  3964.149742,
  4036.139836,
  4245.776094,
  4503.46147,
  4773.508194,
  5105.502952,
  5387.25472,
  5593.59965,
  5787.18208,
  5929.757306,
  6063.754658,
  6183.790812,
  6261.027064,
  6315.720632,
  6358.840842,
  6337.851854,
  6404.352896,
  6484.171474,
  6560.220964,
  6654.734154,
  6735.109466,
  6771.91698,
  6879.16775,
  6870.745484,
  6837.42248,
  6683.476014,
  6586.68355,
  6603.379982,
  6726.09348,
  6629.58311,
  6351.227286,
  6339.486132,
  6130.854328,
  6027.0887,
  5784.833806,
  5558.533858,
  5375.713684,
  5167.553242,
  4897.041952,
  4595.556714,
  4318.445672,
  4127.44116,
  4051.329164,
  4020.862548,
  4046.353832,
  4160.942446,
  4296.117484,
  4585.664878,
  4924.16968,
  5233.702866,
  5614.887114,
  6027.623838,
  6431.409432,
  6804.882168,
  7015.646482,
  7201.668614,
  7321.65

In [13]:
from transformers import TrainingArguments, Trainer 

In [18]:
output_dir="./output/chronos-t5-small-lora"
per_device_train_batch_size = 32
learning_rate = 1e-3
lr_scheduler_type = "linear"
warmup_ratio = 0.0
optim= "adamw_torch_fused"
log_steps = 500
save_steps = 50_000
max_steps = 200_000
gradient_accumulation_steps= 2
dataloader_num_workers = 1
tf32 = False
torch_compile = True

In [19]:
training_args = TrainingArguments(
        output_dir=output_dir,
        per_device_train_batch_size=per_device_train_batch_size,
        learning_rate=learning_rate,
        lr_scheduler_type=lr_scheduler_type,
        warmup_ratio=warmup_ratio,
        optim=optim,
        logging_dir=str(output_dir+"/logs"),
        logging_strategy="steps",
        logging_steps=log_steps,
        save_strategy="steps",
        save_steps=save_steps,
        report_to=["tensorboard"],
        max_steps=max_steps,
        gradient_accumulation_steps=gradient_accumulation_steps,
        dataloader_num_workers=dataloader_num_workers,
        tf32=tf32,  # remove this if not using Ampere GPUs (e.g., A100)
        torch_compile=torch_compile,
        ddp_find_unused_parameters=False,
        remove_unused_columns=False,
    )

The speedups for torchdynamo mostly come with GPU Ampere or higher and which is not detected here.


In [None]:
from chronos import ChronosDataset

train_dataset = ChronosDataset(
    datasets=[ds1, ds2],
    probabilities=[0.5, 0.5],
    tokenizer=tokenizer,
    context_length=96,
    prediction_length=24,
    drop_prob=0.2,
    min_past=96,
    model_type="seq2seq",  # match your model
    mode="training"
)

In [23]:
import sys
import os

# Navigate up to the root of your project
root_dir = os.path.abspath(os.path.join(".."))  # from ./src/chronos/
train_module_path = os.path.join(root_dir, "scripts", "training")

# Add that path to sys.path
sys.path.append(train_module_path)
#train_module_path
# Now you can import
#import my_module  # or from my_module import something

In [27]:
from train import ChronosDataset

In [30]:
pipeline.model.model.config.chronos_config = chronos_config.__dict__

NameError: name 'chronos_config' is not defined

In [None]:
shuffled_train_dataset = ChronosDataset(
        datasets=[ds_cif, ds_elec]
        tokenizer=chronos_config.create_tokenizer(),
        context_length=context_length,
        prediction_length=prediction_length,
        min_past=min_past,
        model_type=model_type,
        imputation_method=LastValueImputation() if model_type == "causal" else None,
        mode="training",
    ).shuffle(shuffle_buffer_length=shuffle_buffer_length)


NameError: name 'probability' is not defined

In [None]:
trainer = Trainer(
        model=pipeline.model.model,
        args=training_args,
        train_dataset=shuffled_train_dataset,
    )

trainer.train()

No label_names provided for model class `PeftModelForSeq2SeqLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


ValueError: Trainer: training requires a train_dataset.

# Working on `train.py`