# Continue Pretraining on T5

In [1]:
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained('../../Models/my_tokenizer')

In [2]:
%env WANDB_PROJECT=english-v2

env: WANDB_PROJECT=english-v2


In [3]:
from transformers import TrainingArguments, Trainer, T5ForConditionalGeneration, Adafactor
from accelerate import Accelerator

model = T5ForConditionalGeneration.from_pretrained("chkpts/chkpt-1080000", device_map='cuda:0')
model.resize_token_embeddings(len(tokenizer))

accelerator = Accelerator()

optimizer = Adafactor(
    model.parameters(), 
    scale_parameter=False, 
    relative_step=False, 
    warmup_init=False, 
    lr=3e-4
)

model, optimizer = accelerator.prepare(model, optimizer)

training_args = TrainingArguments(
    output_dir="buddhist-large-pretrain",
    auto_find_batch_size=True,
    learning_rate=3e-4,
    num_train_epochs=1,
    save_strategy='no'
    )

In [5]:
from datasets import load_dataset
import gc

for i in range(4, 5):
    
    start = 1080000
    end = 1722833

    print(f'Training on {start} to {end}')

    tokenized_dataset = load_dataset('billingsmoore/temp2',split='train').select(range(start, end))
    gc.collect()

    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=tokenized_dataset,
        optimizers=(optimizer, None)
    )

    trainer.train()

    model.save_pretrained(f'chkpts/chkpt-{end}')

    del tokenized_dataset
    gc.collect()

Using the latest cached version of the dataset since billingsmoore/temp2 couldn't be found on the Hugging Face Hub
Found the latest cached dataset configuration 'default' at /home/j/.cache/huggingface/datasets/billingsmoore___temp2/default/0.0.0/bd0498d038398366dcc696b3f79337ca96cdbb27 (last modified on Sun May 25 21:01:21 2025).


Training on 1080000 to 1722833


Loading dataset shards:   0%|          | 0/17 [00:00<?, ?it/s]

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33mbillingsmoore[0m. Use [1m`wandb login --relogin`[0m to force relogin


Passing a tuple of `past_key_values` is deprecated and will be removed in Transformers v4.48.0. You should pass an instance of `EncoderDecoderCache` instead, e.g. `past_key_values=EncoderDecoderCache.from_legacy_cache(past_key_values)`.


Step,Training Loss
500,0.1242
1000,0.1177
1500,0.12
2000,0.127
2500,0.1203
3000,0.1241
3500,0.1298
4000,0.1276
4500,0.1192
5000,0.1273


In [6]:
model.save_pretrained('pretrained-large-model')