In [1]:
# Load model directly
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig

tokenizer = AutoTokenizer.from_pretrained(
    "meta-llama/Llama-3.2-1B")

model = AutoModelForCausalLM.from_pretrained(
    "meta-llama/Llama-3.2-1B",
    quantization_config=BitsAndBytesConfig(load_in_4bit=True))

`low_cpu_mem_usage` was None, now default to True since model is quantized.


In [2]:
from datasets import load_dataset

prompt_format = """[INST]{}[/INST]\n{}"""
EOS_TOKEN = tokenizer.eos_token

def format_prompt(examples):
    questions = examples["question"]
    answers = examples["answer"]
    texts = []
    for question, answer in zip(questions, answers):
        text = prompt_format.format(question, answer) + EOS_TOKEN
        texts.append(text)
    return { "text" : texts }
    
max_length = 128

def filter_examples(row):
    # TODO: Finish this function to filter out too long examples
    # based on tokenized text length.
    tokenized_text_length = len(row)
    return tokenized_text_length < max_length

    
# Loading the main subset of the dataset
dataset = load_dataset("openai/gsm8k", "main")
dataset = dataset.map(format_prompt, batched=True)
dataset = dataset.filter(filter_examples)


# TODO: Complete the dataset split by selecting appropriate subsets
# and selecting 8 examples for the test set,
# and 2048 examples for the train set
test_dataset = dataset['test'].select(range(8))
train_dataset = dataset['train'].select(range(2048))

In [3]:
#task 2
def compare(index):
    question = ("[INST]{}[/INST]".format(test_dataset[index]['question']))
    encoded = tokenizer.encode(question, return_tensors="pt")

    outputs = model.generate(
        encoded.to("cuda"),
        #max_length = 10000,
        return_dict_in_generate=True,
        output_scores=True,
    )

    print("Model answer:")
    for sentence in outputs[0]:
        print(tokenizer.decode(sentence))

    print("Ground truth:")
    print(test_dataset[index]['answer'])


compare(0)
compare(1)

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Model answer:
<|begin_of_text|>[INST]Janet’s ducks lay 16 eggs per day. She eats three for breakfast every morning and bakes muffins for her friends every day with four. She sells the remainder at the farmers' market daily for $2 per fresh duck egg. How much in dollars does she make every day at the farmers' market?[/INST] [INST]The farmer is in the market every day and sells 15 ducks a day at a
Ground truth:
Janet sells 16 - 3 - 4 = <<16-3-4=9>>9 duck eggs a day.
She makes 9 * 2 = $<<9*2=18>>18 every day at the farmer’s market.
#### 18
Model answer:
<|begin_of_text|>[INST]A robe takes 2 bolts of blue fiber and half that much white fiber.  How many bolts in total does it take?[/INST]<|end_of_text|>
Ground truth:
It takes 2/2=<<2/2=1>>1 bolt of white fiber
So the total amount of fabric is 2+1=<<2+1=3>>3 bolts of fabric
#### 3


In [4]:
from peft import LoraModel, LoraConfig, get_peft_model, PeftModel

config = LoraConfig(
    task_type="CAUSAL_LM",
    r=8,
    lora_alpha=8,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
    lora_dropout=0.05,
)

loraModel = get_peft_model(model, config)

loraModel.print_trainable_parameters()

trainable params: 5,636,096 || all params: 1,241,450,496 || trainable%: 0.4540


In [5]:
from trl import SFTConfig, SFTTrainer

sft_config = SFTConfig(
    num_train_epochs= 1, # TODO: Set number of training epochs
    per_device_train_batch_size= 8, # TODO: Set batch size
    gradient_accumulation_steps=2,
    learning_rate=1e-5, # TODO: Set learning rate
    lr_scheduler_type="cosine",
    warmup_ratio=0.2,
    optim="adamw_torch",
    dataset_text_field='text', # TODO: Set dataset text field
    max_seq_length=128, # TODO: Set maximum sequence length
    logging_strategy="steps",
    logging_steps=10,
    seed=42,
    push_to_hub=False,
    save_total_limit=1,
    save_strategy="steps",
    save_steps=1024,
    output_dir="tmp",
    report_to=None,
)

trainer = SFTTrainer(
    loraModel,
    train_dataset = train_dataset,
    args = sft_config
)

train_result = trainer.train()

Tokenizing train dataset:   0%|          | 0/2048 [00:00<?, ? examples/s]

Truncating train dataset:   0%|          | 0/2048 [00:00<?, ? examples/s]

No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Step,Training Loss
10,2.3176
20,2.3669
30,2.322
40,2.3254
50,2.2429
60,2.1557
70,2.1147
80,2.0675
90,2.0281
100,1.997



Cannot access gated repo for url https://huggingface.co/meta-llama/Llama-3.2-1B/resolve/main/config.json.
Access to model meta-llama/Llama-3.2-1B is restricted. You must have access to it and be authenticated to access it. Please log in. - silently ignoring the lookup for the file config.json in meta-llama/Llama-3.2-1B.


In [7]:
#task 5
def compareTrained(index):
    question = ("[INST]{}[/INST]".format(test_dataset[index]['question']))
    encoded = tokenizer.encode(question, return_tensors="pt")

    outputs = loraModel.generate(
        encoded.to("cuda"),
        #max_length = 10000,
        return_dict_in_generate=True,
        output_scores=True,
    )

    print("Model answer:")
    for sentence in outputs[0]:
        print(tokenizer.decode(sentence))

    print("Ground truth:")
    print(test_dataset[index]['answer'])


compareTrained(0)
compareTrained(1)

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Model answer:
<|begin_of_text|>[INST]Janet’s ducks lay 16 eggs per day. She eats three for breakfast every morning and bakes muffins for her friends every day with four. She sells the remainder at the farmers' market daily for $2 per fresh duck egg. How much in dollars does she make every day at the farmers' market?[/INST]<|end_of_text|>
Ground truth:
Janet sells 16 - 3 - 4 = <<16-3-4=9>>9 duck eggs a day.
She makes 9 * 2 = $<<9*2=18>>18 every day at the farmer’s market.
#### 18
Model answer:
<|begin_of_text|>[INST]A robe takes 2 bolts of blue fiber and half that much white fiber.  How many bolts in total does it take?[/INST]<|end_of_text|>
Ground truth:
It takes 2/2=<<2/2=1>>1 bolt of white fiber
So the total amount of fabric is 2+1=<<2+1=3>>3 bolts of fabric
#### 3
