In [1]:
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer

dataset = load_dataset("json", data_files="../data/training-dataset/pulls.jsonl")

model_name = "deepseek-ai/DeepSeek-R1"  # or another DeepSeek variant
tokenizer = AutoTokenizer.from_pretrained(model_name)

def preprocess_function(data_record):
    return tokenizer(data_record["title"] + " " + data_record["body"], truncation=True, padding="max_length", max_length=512)

tokenized_datasets = dataset.map(preprocess_function)

model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto")

training_args = TrainingArguments(
    output_dir="../models/deepseek",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    logging_steps=10,
    save_total_limit=2,
    num_train_epochs=3,
    fp16=True,  # Enable mixed precision for faster training
    push_to_hub=False,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["test"] if "test" in tokenized_datasets else None,
    tokenizer=tokenizer,
)

trainer.train()

RuntimeError: No GPU found. A GPU is needed for FP8 quantization.