In [1]:
import ast
import logging
import os
import random
from copy import deepcopy
from functools import partial
from pathlib import Path
from typing import Dict, Optional

import typer
from typer_config import use_yaml_config
import torch
import torch.nn.functional as F
import transformers
from transformers import Trainer, TrainingArguments
from gluonts.dataset.common import FileDataset

from chronos import ChronosPipeline, ChronosConfig
from train import (
    ChronosDataset,
    load_model,
    get_next_path,
    is_main_process,
    log_on_main,
    has_enough_observations,
    Filter,
    get_training_job_info,
    save_training_info
)

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
teacher = ChronosPipeline.from_pretrained(
    "amazon/chronos-t5-mini",
    device_map="auto",
    torch_dtype=torch.float32
)

In [4]:
student = load_model(
    model_id="google/t5-efficient-mini",
    model_type="seq2seq",
    vocab_size=4096,
    random_init=True,
    tie_embeddings=True,
    pad_token_id=0,
    eos_token_id=1,
    )
    # Convert student model to same dtype as teacher
student = student.to(dtype=torch.float32)

2025-01-01 12:23:51,462 - /home/arda/Documents/chronos-forecasting/scripts/training/train.py - INFO - Using random initialization


In [None]:
train_dataset = ChronosDataset(
    datasets=[FileDataset(path=Path("data/m5/train.csv"), freq="h")],
    probabilities=[1.0],
    tokenizer=teacher.tokenizer,
    context_length=100,
    prediction_length=12,
    min_past=100,
    model_type="seq2seq",
    mode="training",
).shuffle(shuffle_buffer_length=1000)