In [None]:
# train_llm_forecaster.py

from transformers import GPT2Tokenizer, GPT2LMHeadModel, Trainer, TrainingArguments, DataCollatorForLanguageModeling
from torch.utils.data import Dataset
import pandas as pd
import torch
import os


In [2]:
# Config
MODEL_NAME = "gpt2"  # you can later try "gpt2-medium" or "TinyLlama" etc.
DATA_DIR = "../data/llm_preprocessed/"
OUTPUT_DIR = "../models/llm_forecaster/"

In [3]:
# 1. Prepare dataset
class TimeSeriesPromptDataset(Dataset):
    def __init__(self, file_path, tokenizer, max_length=256):
        self.data = pd.read_csv(file_path)
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        prompt = self.data.iloc[idx]['prompt']
        completion = self.data.iloc[idx]['completion']

        # Concatenate prompt and completion for fine-tuning
        text = prompt + " " + completion
        encodings = self.tokenizer(text, truncation=True, padding="max_length", max_length=self.max_length, return_tensors="pt")
        
        input_ids = encodings['input_ids'].squeeze()
        attention_mask = encodings['attention_mask'].squeeze()
        
        labels = input_ids.clone()  # Predict the next token

        return {"input_ids": input_ids, "attention_mask": attention_mask, "labels": labels}

In [4]:
# 2. Load model and tokenizer
tokenizer = GPT2Tokenizer.from_pretrained(MODEL_NAME)
tokenizer.pad_token = tokenizer.eos_token  # Important to handle padding correctly
model = GPT2LMHeadModel.from_pretrained(MODEL_NAME)

In [5]:
# 3. Prepare datasets
train_dataset = TimeSeriesPromptDataset(f"{DATA_DIR}/train.csv", tokenizer)
val_dataset = TimeSeriesPromptDataset(f"{DATA_DIR}/val.csv", tokenizer)

In [None]:
# 4. Trainer
# training_args = TrainingArguments(
#     output_dir=OUTPUT_DIR,
#     overwrite_output_dir=True,  # allow overwriting
#     evaluation_strategy="epoch",  # evaluate at the end of each epoch
#     save_strategy="epoch",  # save model at end of each epoch
#     save_total_limit=2,  # keep last 2 checkpoints only
#     learning_rate=5e-5,
#     per_device_train_batch_size=8,
#     per_device_eval_batch_size=8,
#     num_train_epochs=5,
#     weight_decay=0.01,
#     logging_dir=os.path.join(OUTPUT_DIR, "logs"),
#     logging_steps=10,
#     load_best_model_at_end=True,  # after training, use best eval loss checkpoint
#     metric_for_best_model="loss",  # track loss
#     greater_is_better=False,  # lower loss is better
#     prediction_loss_only=True,  # no need to return all outputs
#     fp16=True if torch.cuda.is_available() else False,  # mixed precision if possible
#     report_to="none",  # disable W&B or HuggingFace Hub
# )

In [8]:
# 4. Training arguments(for transformers 4.51.3)

# Key Changes:
# 	1.	Removed evaluation_strategy and save_strategy parameters since they aren’t supported in older versions of transformers.
# 	2.	Used only save_steps, logging_steps, and weight_decay as the key parameters.

training_args = TrainingArguments(
    output_dir=OUTPUT_DIR,            # Output directory
    logging_dir=f"{OUTPUT_DIR}/logs",  # Logging directory
    num_train_epochs=5,               # Number of training epochs
    per_device_train_batch_size=8,    # Train batch size
    per_device_eval_batch_size=8,     # Eval batch size
    save_steps=500,                   # Save model every 500 steps
    logging_steps=500,                # Log every 500 steps
    weight_decay=0.01,                # Weight decay for regularization
    save_total_limit=2,               # Limit the total number of saved checkpoints
    learning_rate=5e-5,               # Learning rate
    fp16=True if torch.cuda.is_available() else False,  # mixed precision if possible
    report_to="none",  # disable W&B or HuggingFace Hub
)

In [9]:

# 5. Trainer setup
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    tokenizer=tokenizer,
    data_collator=DataCollatorForLanguageModeling(tokenizer, mlm=False),  # Use language modeling for training
)

  trainer = Trainer(


In [10]:
# 7. Save the model

trainer.train()
trainer.save_model(OUTPUT_DIR)
print(f"✅ LLM Fine-tuning complete. Model saved to {OUTPUT_DIR}")


`loss_type=None` was set in the config but it is unrecognised.Using the default loss: `ForCausalLMLoss`.


Step,Training Loss


KeyboardInterrupt: 