In [1]:
%pip install torch torchvision torchaudio
%pip install bitsandbytes datasets accelerate transformers peft

Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.


In [2]:
"""Importing dependencies and downloading pre-trained bloom model
"""

import torch
import torch.nn as nn
import bitsandbytes as bnb
from transformers import AutoTokenizer, AutoConfig, AutoModelForCausalLM

#loading model
model = AutoModelForCausalLM.from_pretrained(
    # "bigscience/bloom-3b",
    # "bigscience/bloom-1b1",
    "bigscience/bloom-560m",
    torch_dtype=torch.float32,
    device_map='cpu',
)

#loading tokenizer for this model (which turns text into an input for the model)
tokenizer = AutoTokenizer.from_pretrained("bigscience/tokenizer")

  warn("The installed version of bitsandbytes was compiled without GPU support. "


/home/codespace/.python/current/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cpu.so: undefined symbol: cadam32bit_grad_fp32


  from .autonotebook import tqdm as notebook_tqdm


In [3]:
"""Setting up LoRA using parameter efficient fine tuning
"""

from peft import LoraConfig, get_peft_model

#defining how LoRA will work in this particular example
config = LoraConfig(
    r=8,
    lora_alpha=8,
    target_modules=["query_key_value"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

#this actually overwrites the model in memory, so
#the rename is only for ledgibility.
peft_model = get_peft_model(model, config)

In [4]:
"""Comparing parameters before and after LoRA
"""

trainable_params = 0
all_param = 0

#iterating over all parameters
for _, param in peft_model.named_parameters():
    #adding parameters to total
    all_param += param.numel()
    #adding parameters to trainable if they require a graident
    if param.requires_grad:
        trainable_params += param.numel()

#printing results
print(f"trainable params: {trainable_params}")
print(f"all params: {all_param}")
print(f"trainable: {100 * trainable_params / all_param:.2f}%")

trainable params: 786432
all params: 560001024
trainable: 0.14%


In [5]:
"""Loading SQUAD dataset
"""

from datasets import load_dataset
qa_dataset = load_dataset("squad_v2")

Downloading builder script: 100%|██████████| 5.28k/5.28k [00:00<00:00, 10.0MB/s]
Downloading metadata: 100%|██████████| 2.40k/2.40k [00:00<00:00, 15.9MB/s]
Downloading readme: 100%|██████████| 8.02k/8.02k [00:00<00:00, 20.2MB/s]
Downloading data: 42.1MB [00:00, 77.5MB/s]/2 [00:00<?, ?it/s]
Downloading data: 4.37MB [00:00, 89.0MB/s]                  1.16it/s]
Downloading data files: 100%|██████████| 2/2 [00:01<00:00,  1.67it/s]
Extracting data files: 100%|██████████| 2/2 [00:00<00:00, 1655.54it/s]
Generating train split: 100%|██████████| 130319/130319 [00:16<00:00, 7668.07 examples/s] 
Generating validation split: 100%|██████████| 11873/11873 [00:00<00:00, 15740.62 examples/s]


In [6]:
"""Reformatting SQUAD to respect our defined structure
"""

#defining a function for reformatting
def create_prompt(context, question, answer):
  if len(answer["text"]) < 1:
    answer = "Cannot Find Answer"
  else:
    answer = answer["text"][0]
  prompt_template = f"CONTEXT:\n{context}\n\nQUESTION:\n{question}\n\nANSWER:\n{answer}</s>"
  return prompt_template

#applying the reformatting function to the entire dataset
mapped_qa_dataset = qa_dataset.map(lambda samples: tokenizer(create_prompt(samples['context'], samples['question'], samples['answers'])))

Map: 100%|██████████| 130319/130319 [03:12<00:00, 676.23 examples/s] 
Map: 100%|██████████| 11873/11873 [00:29<00:00, 401.01 examples/s] 


In [7]:
"""Fine Tuning
This code is largly co-opted. In the absence of a rigid validation
procedure, the best practice is to just copy a successful tutorial or,
better yet, directly from the documentation.
"""

import transformers

trainer = transformers.Trainer(
    model=peft_model,
    train_dataset=mapped_qa_dataset["train"],
    args=transformers.TrainingArguments(
        per_device_train_batch_size=4,
        gradient_accumulation_steps=4,
        warmup_steps=100,
        max_steps=100,
        learning_rate=1e-3,
        
        logging_steps=1,
        output_dir='checkpoint',
    ),
    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False)
)
peft_model.config.use_cache = False  # silence the warnings. Please re-enable for inference!
trainer.train()

You're using a PreTrainedTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Step,Training Loss
1,3.4526
2,3.3455
3,3.3741
4,3.5198
5,3.4887
6,3.4114
7,3.235
8,3.4866
9,3.5326
10,3.4657


TrainOutput(global_step=100, training_loss=3.0526680755615234, metrics={'train_runtime': 4657.6825, 'train_samples_per_second': 0.344, 'train_steps_per_second': 0.021, 'total_flos': 740744642985984.0, 'train_loss': 3.0526680755615234, 'epoch': 0.01})

In [11]:
model_id = "./checkpoint/BLOOM-560m-LoRA"
peft_model.save_pretrained(model_id)

In [18]:
"""Helper Function for Comparing Results
"""

from IPython.display import display, Markdown

def make_inference(context, question):

    #turn the input into tokens
    batch = tokenizer(f"**CONTEXT:**\n{context}\n\n**QUESTION:**\n{question}\n\n**ANSWER:**\n", return_tensors='pt', return_token_type_ids=False)
    #move the tokens onto the GPU, for inference
    # batch = batch.to(device='cuda')

    #raw model
    peft_model.disable_adapter_layers()
    output_tokens_raw = model.generate(**batch, max_new_tokens=200)

    #LoRA model
    peft_model.enable_adapter_layers()
    output_tokens_qa = peft_model.generate(**batch, max_new_tokens=200)

    #display results
    print(tokenizer.decode(output_tokens_raw[0], skip_special_tokens=True))
    print(tokenizer.decode(output_tokens_qa[0], skip_special_tokens=True))
    # display(Markdown("# Raw Model\n"))
    # display(Markdown((tokenizer.decode(output_tokens_raw[0], skip_special_tokens=True))))
    # display(Markdown("\n# QA Model\n"))
    # display(Markdown((tokenizer.decode(output_tokens_qa[0], skip_special_tokens=True))))

In [19]:
context = "You are a monster, and you eat yellow legos."
question = "What is the best food?"

make_inference(context, question)

**CONTEXT:**
You are a monster, and you eat yellow legos.

**QUESTION:**
What is the best food?

**ANSWER:**
The best food is the one that is not poisonous.

**QUESTION:**
What is the best food?

**ANSWER:**
The best food is the one that is not poisonous.

**QUESTION:**
What is the best food?

**ANSWER:**
The best food is the one that is not poisonous.

**QUESTION:**
What is the best food?

**ANSWER:**
The best food is the one that is not poisonous.

**QUESTION:**
What is the best food?

**ANSWER:**
The best food is the one that is not poisonous.

**QUESTION:**
What is the best food?

**ANSWER:**
The best food is the one that is not poisonous.

**QUESTION:**
What is the best food?

**ANSWER:**
The best food is the one that is not poisonous.

**QUESTION:**
What is
**CONTEXT:**
You are a monster, and you eat yellow legos.

**QUESTION:**
What is the best food?

**ANSWER:**
yellow legos


In [20]:
context = "you are a math wizard"
question = "what is 1+1 equal to?"

make_inference(context, question)

**CONTEXT:**
you are a math wizard

**QUESTION:**
what is 1+1 equal to?

**ANSWER:**
1+1 is equal to 1.0

**QUESTION:**
what is 1+1 equal to?

**ANSWER:**
1+1 is equal to 1.0

**QUESTION:**
what is 1+1 equal to?

**ANSWER:**
1+1 is equal to 1.0

**QUESTION:**
what is 1+1 equal to?

**ANSWER:**
1+1 is equal to 1.0

**QUESTION:**
what is 1+1 equal to?

**ANSWER:**
1+1 is equal to 1.0

**QUESTION:**
what is 1+1 equal to?

**ANSWER:**
1+1 is equal to 1.0

**QUESTION:**
what is 1+1 equal to?

**ANSWER:**
1+1 is equal to 1.0

**QUESTION:**
what is 1+1 equal to?

**ANSWER:**
1+1 is equal
**CONTEXT:**
you are a math wizard

**QUESTION:**
what is 1+1 equal to?

**ANSWER:**
1+1 = 1


In [21]:
context = "Answer the riddle"
question = "What gets bigger the more you take away?"

make_inference(context, question)

**CONTEXT:**
Answer the riddle

**QUESTION:**
What gets bigger the more you take away?

**ANSWER:**
The answer is that the more you take away, the more you get away from the problem. The more you take away, the more you get away from the problem. The more you take away, the more you get away from the problem. The more you take away, the more you get away from the problem. The more you take away, the more you get away from the problem. The more you take away, the more you get away from the problem. The more you take away, the more you get away from the problem. The more you take away, the more you get away from the problem. The more you take away, the more you get away from the problem. The more you take away, the more you get away from the problem. The more you take away, the more you get away from the problem. The more you take away, the more you get away from the problem. The more you take away, the more you get away from the problem. The
**CONTEXT:**
Answer the riddle

**QUESTION:**