<a href="https://colab.research.google.com/github/abzjy024/Hello-World/blob/master/gpt2_demos.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Preparations**

In [None]:
!pip install -q bitsandbytes datasets accelerate loralib
!pip install -q git+https://github.com/huggingface/transformers.git@main git+https://github.com/huggingface/peft.git
!pip install git+https://github.com/lvwerra/trl.git@main

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting datasets
  Downloading datasets-2.11.0-py3-none-any.whl (468 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m468.7/468.7 kB[0m [31m8.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting dill<0.3.7,>=0.3.0
  Downloading dill-0.3.6-py3-none-any.whl (110 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m110.5/110.5 kB[0m [31m7.2 MB/s[0m eta [36m0:00:00[0m
Collecting multiprocess
  Downloading multiprocess-0.70.14-py39-none-any.whl (132 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m132.9/132.9 kB[0m [31m5.2 MB/s[0m eta [36m0:00:00[0m
Collecting aiohttp
  Downloading aiohttp-3.8.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/1.0 MB[0m [31m23.7 MB/s[0m eta [36m0:00:00[0m
Collecting responses<0.19
  Downloading responses-0.18.0-

In [None]:
from huggingface_hub import notebook_login

notebook_login()

Token is valid.
Your token has been saved in your configured git credential helpers (store).
Your token has been saved to /root/.cache/huggingface/token
Login successful


In [None]:
from google.colab import drive

drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
from datasets import load_dataset, DatasetDict
from transformers import AutoTokenizer, AutoModel, GPT2LMHeadModel, AutoConfig
from transformers import DataCollatorForLanguageModeling
from transformers import Trainer, TrainingArguments

# **Load data and tokenize (ref https://huggingface.co/learn/nlp-course/zh-CN/chapter3/4?fw=pt)**

In [None]:
# prepare raw datasets
raw_datasets = load_dataset("abzjy024/goodata", data_files={"train": "dummy/*.json"})



  0%|          | 0/2 [00:00<?, ?it/s]

In [None]:
checkpoint = "gpt2"

# tokenize
def tokenize(element):
    outputs = tokenizer(
        element['input'],
        truncation=True,
        max_length=context_length,
        return_overflowing_tokens=True,
        return_length=True,
    )
    input_batch = []
    for length, input_ids in zip(outputs["length"], outputs["input_ids"]):
        if length == context_length:
            input_batch.append(input_ids)
    return {"input_ids": input_batch}


context_length = 128
tokenizer = AutoTokenizer.from_pretrained(checkpoint)

tokenized_datasets = raw_datasets.map(
    tokenize, batched=True, remove_columns=raw_datasets["train"].column_names
)

Downloading (…)okenizer_config.json:   0%|          | 0.00/217 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/577 [00:00<?, ?B/s]

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/110k [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

Map:   0%|          | 0/263 [00:00<?, ? examples/s]

Map:   0%|          | 0/1 [00:00<?, ? examples/s]

# **Load model and create collator**

In [None]:
# prepare model
model = GPT2LMHeadModel.from_pretrained(checkpoint)

In [None]:
# prepare collator
tokenizer.pad_token = tokenizer.eos_token
data_collator = DataCollatorForLanguageModeling(tokenizer, mlm=False)

# **Apply LoRA (Optional)**

In [None]:
def print_trainable_parameters(model):
    """
    Prints the number of trainable parameters in the model.
    """
    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():
        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()
    print(
        f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}"
    )

In [None]:
from peft import LoraConfig, get_peft_model 

config = LoraConfig(
    r=16,
    lora_alpha=32,
    target_modules=["c_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
    fan_in_fan_out=True,
)

model = get_peft_model(model, config)
print_trainable_parameters(model)

# **Training**

In [None]:
# create Trainer
args = TrainingArguments(
        output_dir="gpt",
        per_device_train_batch_size=32,
        per_device_eval_batch_size=32,
        evaluation_strategy="steps",
        eval_steps=5_000,
        logging_steps=5_000,
        gradient_accumulation_steps=8,
        num_train_epochs=1,
        weight_decay=0.1,
        warmup_steps=1_000,
        lr_scheduler_type="cosine",
        learning_rate=5e-4,
        save_steps=5_000,
        fp16=True,
        push_to_hub=True,
    )

trainer = Trainer(
        model=model,
        tokenizer=tokenizer,
        args=args,
        data_collator=data_collator,
        train_dataset=tokenized_datasets["train"],
        eval_dataset=tokenized_datasets["train"],
    )

Cloning https://huggingface.co/abzjy024/gpt into local empty directory.


In [None]:
# start training
trainer.train()

You're using a BertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Step,Training Loss,Validation Loss


TrainOutput(global_step=122, training_loss=2.740209110447618, metrics={'train_runtime': 331.5746, 'train_samples_per_second': 94.663, 'train_steps_per_second': 0.368, 'total_flos': 2040168185856000.0, 'train_loss': 2.740209110447618, 'epoch': 0.99})

In [None]:
trainer.push_to_hub()

Upload file pytorch_model.bin:   0%|          | 1.00/401M [00:00<?, ?B/s]

Upload file runs/Apr04_06-50-08_875a0ee759b9/events.out.tfevents.1680591022.875a0ee759b9.129.0:   0%|         …

Upload file runs/Apr04_06-50-08_875a0ee759b9/1680591022.6613834/events.out.tfevents.1680591022.875a0ee759b9.12…

Upload file training_args.bin:   0%|          | 1.00/3.43k [00:00<?, ?B/s]

To https://huggingface.co/abzjy024/gpt
   bc3ebd5..f019cae  main -> main

   bc3ebd5..f019cae  main -> main

To https://huggingface.co/abzjy024/gpt
   f019cae..0be2652  main -> main

   f019cae..0be2652  main -> main



'https://huggingface.co/abzjy024/gpt/commit/f019cae1563d5360364278216342d3010d3ed9ec'

# **Apply PPO (Optional) (ref https://huggingface.co/docs/trl/quickstart)**

In [None]:
# 0. imports
import torch
from transformers import GPT2Tokenizer, AutoTokenizer
from trl import PPOTrainer, PPOConfig, AutoModelForCausalLMWithValueHead, create_reference_model
from trl.core import respond_to_batch

# 1. load a pretrained model
# a. without lora
model = AutoModelForCausalLMWithValueHead.from_pretrained('gpt2')
# b. with lora
# model = AutoModelForCausalLMWithValueHead.from_pretrained(
#     'gpt2',
#     peft_config=config,
#     )
model_ref = AutoModelForCausalLMWithValueHead.from_pretrained('gpt2')
tokenizer = AutoTokenizer.from_pretrained('gpt2')
# comment out if needed
tokenizer.pad_token = tokenizer.eos_token

In [None]:
# 2. initialize trainer
ppo_config = {'batch_size': 1}
config = PPOConfig(**ppo_config)
ppo_trainer = PPOTrainer(config, model, model_ref, tokenizer)

# 3. encode a query
query_txt = "This morning I went to the "
query_tensor = tokenizer.encode(query_txt, return_tensors="pt")

# 4. generate model response
response_tensor = respond_to_batch(model, query_tensor)
response_txt = tokenizer.decode(response_tensor[0,:])

# 5. define a reward for response
# (this could be any reward such as human feedback or output from another model)
reward = [torch.tensor(1.0)]

In [None]:
# 6. train model with ppo
train_stats = ppo_trainer.step([query_tensor[0]], [response_tensor[0]], reward)

# **Evaluation**

In [None]:
import numpy as np
import torch
from peft import PeftModel, PeftConfig
from transformers import AutoModelForCausalLM, AutoTokenizer

# a. without lora
# tokenizer_test = AutoTokenizer.from_pretrained("abzjy024/gpt")
# model_test = GPT2LMHeadModel.from_pretrained("abzjy024/gpt").to(device)

# b. with lora
peft_model_id = "abzjy024/gpt2-lora-medical"
config = PeftConfig.from_pretrained(peft_model_id)
model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path, 
                        return_dict=True, 
                        device_map='auto')
tokenizer_test = AutoTokenizer.from_pretrained(config.base_model_name_or_path)

# Load the Lora model
model_test = PeftModel.from_pretrained(model, peft_model_id)

In [None]:
batch = tokenizer_test("[|Human|] Can you give me a daily recipe of healthy diet?", return_tensors='pt')

with torch.cuda.amp.autocast():
  output_tokens = model_test.generate(**batch, max_new_tokens=64)

print('\n\n', tokenizer_test.decode(output_tokens[0], skip_special_tokens=True))

# **Spike**

In [None]:
# spike for meta llama (lack of resource -- ram)
from transformers import GenerationConfig, LlamaForCausalLM, LlamaTokenizer

model = LlamaForCausalLM.from_pretrained('decapoda-research/llama-7b-hf')

In [None]:
# spike for prompt training
# spike for accelerate+ds
# spike for deployment