Load model

In [1]:
!pip install -q peft==0.4.0
!pip install -q transformers
!pip install -q datasets

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/72.9 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m72.9/72.9 kB[0m [31m2.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m265.7/265.7 kB[0m [31m8.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m507.1/507.1 kB[0m [31m5.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m115.3/115.3 kB[0m [31m11.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m134.8/134.8 kB[0m [31m9.7 MB/s[0m eta [36m0:00:00[0m
[?25h

In [2]:
# Importing classes and functions from the transformers library
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer, DataCollatorForLanguageModeling

# Importing the load_dataset function from the datasets library
from datasets import load_dataset

# Importing specific classes and functions from the peft library
from peft import get_peft_model, PromptTuningConfig, TaskType, PromptTuningInit, PeftModel, PeftConfig

# Importing the notebook_login function from the huggingface_hub library
from huggingface_hub import notebook_login

# Importing the os and time modules
import os
import time

In [3]:
# Specify the pre-trained model name you want to use
model_name = "bigscience/bloomz-560m"

# Load the tokenizer associated with the pre-trained model
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Load the pre-trained causal language model using the specified model name
foundation_model = AutoModelForCausalLM.from_pretrained(model_name)

tokenizer_config.json:   0%|          | 0.00/222 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/14.5M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/85.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/715 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.12G [00:00<?, ?B/s]

In [4]:
# Tokenize the input text using the specified tokenizer
input1 = tokenizer("Two things are infinite: ", return_tensors="pt", padding=True)

# Generate text using the pre-trained foundation model based on the provided input_ids and attention_mask.
foundation_outputs = foundation_model.generate(
    input_ids=input1["input_ids"],
    attention_mask=input1["attention_mask"],
    max_new_tokens=7,
    eos_token_id=tokenizer.eos_token_id
)

# Decode the generated token IDs into human-readable text.
decoded_output = tokenizer.batch_decode(foundation_outputs, skip_special_tokens=True)

# Print the decoded output, which represents the generated text.
print(decoded_output)

['Two things are infinite:  the number of people and the number']


# Prompt Tuning using PromptTuningConfig (Random)

In [6]:
# Create a configuration for prompt tuning using the PromptTuningConfig class
peft_config = PromptTuningConfig(
    task_type=TaskType.CAUSAL_LM,
    prompt_tuning_init=PromptTuningInit.RANDOM,
    num_virtual_tokens=4,
    tokenizer_name_or_path=model_name
)

# Get a PeftModel using the specified foundation_model and prompt tuning configuration
peft_model = get_peft_model(foundation_model, peft_config)

# Print the trainable parameters of the PeftModel
print(peft_model.print_trainable_parameters())

trainable params: 4,096 || all params: 559,218,688 || trainable%: 0.0007324504863471229
None


In [7]:
%mkdir /content/working_dir


In [8]:
# Define the output directory for storing Peft model outputs
output_directory = os.path.join("/content/working_dir", "peft_outputs")

# Create the working directory if it doesn't exist
if not os.path.exists("/content/working_dir"):
    os.mkdir("/content/working_dir")

# Create the output directory if it doesn't exist
if not os.path.exists(output_directory):
    os.mkdir(output_directory)

# Define training arguments for the Peft model
training_args = TrainingArguments(
    output_dir=output_directory,  # Where the model predictions and checkpoints will be written
    no_cuda=True,  # This is necessary for CPU clusters.
    auto_find_batch_size=True,  # Find a suitable batch size that will fit into memory automatically
    learning_rate=3e-2,  # Higher learning rate than full fine-tuning
    num_train_epochs=5  # Number of passes to go through the entire fine-tuning dataset
)



In [9]:
# Enable gradient checkpointing in the Peft model's configuration
peft_model.config.gradient_checkpointing = True

# Create a Trainer instance for training the Peft model
trainer = Trainer(
    model=peft_model,  # We pass in the PEFT version of the foundation model, bloomz-560M
    args=training_args,  # Training arguments specifying output directory, GPU usage, batch size, etc.
    train_dataset=train_sample,  # Training dataset
    data_collator=DataCollatorForLanguageModeling(tokenizer, mlm=False)  # mlm=False indicates not to use masked language modeling
)

# Start the training process
trainer.train()

You're using a BloomTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Step,Training Loss


TrainOutput(global_step=35, training_loss=3.657332066127232, metrics={'train_runtime': 727.4455, 'train_samples_per_second': 0.344, 'train_steps_per_second': 0.048, 'total_flos': 63123052953600.0, 'train_loss': 3.657332066127232, 'epoch': 5.0})

In [10]:
# Record the current time for creating a unique Peft model path
time_now = time.time()

# Create a path for saving the Peft model using the output directory and timestamp
peft_model_path = os.path.join(output_directory, f"peft_model_{time_now}")

# Save the trained Peft model to the specified path
trainer.model.save_pretrained(peft_model_path)

In [11]:
# Load the trained Peft model from the specified path using the PeftModel class
loaded_model = PeftModel.from_pretrained(
    foundation_model,  # The base model to be used for prompt tuning
    peft_model_path,   # The path where the trained Peft model is saved
    is_trainable=False  # Indicates that the loaded model should not be trainable
)

In [12]:
# Generate text using the loaded Peft model based on the provided input_ids and attention_mask.
loaded_model_outputs = loaded_model.generate(
    input_ids=input1["input_ids"],
    attention_mask=input1["attention_mask"],
    max_new_tokens=7,
    eos_token_id=tokenizer.eos_token_id
)

# Decode the generated token IDs into human-readable text.
decoded_output = tokenizer.batch_decode(loaded_model_outputs, skip_special_tokens=True)

# Print the decoded output, which represents the generated text.
print(decoded_output)

['Two things are infinite:  time and space']


#Text-Based PromptTuningConfig

In [17]:
# Create a configuration for text-based prompt tuning using the PromptTuningConfig class
text_peft_config = PromptTuningConfig(
    task_type=TaskType.CAUSAL_LM,
    prompt_tuning_init=PromptTuningInit.TEXT,
    prompt_tuning_init_text="Generate inspirational quotes",  # Provides a starter for the model to begin searching for the best embeddings
    num_virtual_tokens=3,  # This doesn't have to match the length of the text above
    tokenizer_name_or_path=model_name
)

# Get a PeftModel using the specified foundation_model and text-based prompt tuning configuration
text_peft_model = get_peft_model(foundation_model, text_peft_config)

# Print the trainable parameters of the Text-based PeftModel
print(text_peft_model.print_trainable_parameters())

trainable params: 3,072 || all params: 559,217,664 || trainable%: 0.0005493388706691496
None


In [18]:
# Create a Trainer instance for training the Text-based Peft model
text_trainer = Trainer(
    model=text_peft_model,  # We pass in the Text-based PEFT version of the foundation model
    args=training_args,  # Training arguments specifying output directory, GPU usage, batch size, etc.
    train_dataset=train_sample,  # Training dataset
    data_collator=DataCollatorForLanguageModeling(tokenizer, mlm=False)  # mlm=False indicates not to use masked language modeling
)

# Start the training process for the Text-based Peft model
text_trainer.train

<bound method Trainer.train of <transformers.trainer.Trainer object at 0x7d09be57e4a0>>

In [19]:
# Save the model
time_now = time.time()
text_peft_model_path = os.path.join(output_directory, f"text_peft_model_{time_now}")
text_trainer.model.save_pretrained(text_peft_model_path)

# Load model
loaded_text_model = PeftModel.from_pretrained(
    foundation_model,
    text_peft_model_path,
    is_trainable=False
)

# Generate output
text_outputs = text_peft_model.generate(
    input_ids=input1["input_ids"],
    attention_mask=input1["attention_mask"],
    max_new_tokens=7,
    eos_token_id=tokenizer.eos_token_id
)

print(tokenizer.batch_decode(text_outputs, skip_special_tokens=True))

['Two things are infinite:  the number of people and the number']


In [25]:
from huggingface_hub import notebook_login
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [29]:
# notebook_login()
# Login
hf_username ="codingaslu"

peft_model_id = f"{hf_username}/bloom_prompt_tuning_{time_now}"
trainer.model.push_to_hub(peft_model_id, use_auth_token=True)



adapter_model.safetensors:   0%|          | 0.00/16.5k [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/codingaslu/bloom_prompt_tuning_1703529701.4437609/commit/51b5ae111cd6aaeb1307d7492c9ea8d2f7252d10', commit_message='Upload model', commit_description='', oid='51b5ae111cd6aaeb1307d7492c9ea8d2f7252d10', pr_url=None, pr_revision=None, pr_num=None)

In [31]:
# Load the configuration for the Peft model from a pre-trained version
peft_config = PeftConfig.from_pretrained(peft_model_id)

# Load the base causal language model using the configuration.
foundation_model = AutoModelForCausalLM.from_pretrained(peft_config.base_model_name_or_path)

# Construct the Peft model using the pre-trained foundation model and the Peft model ID.
peft_random_model = PeftModel.from_pretrained(foundation_model, peft_model_id)

adapter_config.json:   0%|          | 0.00/442 [00:00<?, ?B/s]

adapter_model.safetensors:   0%|          | 0.00/16.5k [00:00<?, ?B/s]

In [32]:
# The `generate` method is used to generate text from the model.
# max_new_tokens specifies the maximum number of new tokens to generate.
# eos_token_id is the ID of the end-of-sequence token, indicating the end of generated text.

online_model_outputs = peft_random_model.generate(
    input_ids=input1["input_ids"],
    attention_mask=input1["attention_mask"],
    max_new_tokens=7,
    eos_token_id=tokenizer.eos_token_id
)

# Decode the generated token IDs into human-readable text.
# The `batch_decode` method of the tokenizer is used for decoding.
# skip_special_tokens=True removes any special tokens (e.g., padding, EOS) from the decoded output.

decoded_output = tokenizer.batch_decode(online_model_outputs, skip_special_tokens=True)

# Print the decoded output, which represents the generated text.
print(decoded_output)

['Two things are infinite:  time and space']
