In [3]:
from datetime import datetime
import os
import sys
import time

from datasets import load_dataset
import torch
from peft import (
    LoraConfig,
    get_peft_model,
    get_peft_model_state_dict,
    prepare_model_for_int8_training,
    set_peft_model_state_dict,
)
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, TrainingArguments, Trainer, DataCollatorForSeq2Seq

In [5]:
base_model = "codellama/CodeLlama-7b-python-hf"
orig_model = AutoModelForCausalLM.from_pretrained(
    base_model,
    torch_dtype = "auto",
    device_map = "auto",
    low_cpu_mem_usage = True,
)
tokenizer = AutoTokenizer.from_pretrained(base_model)

ImportError: Using `bitsandbytes` 4-bit quantization requires the latest version of bitsandbytes: `pip install -U bitsandbytes`

In [3]:
data = load_dataset("csv", data_files={"train": ["./dataset/dataset.csv"], "validation": "./dataset/dataset.csv", "test": "./dataset/dataset.csv"})

In [4]:
def tokenize(prompt):
    result = tokenizer(
        prompt,
        truncation=True,
        max_length=512,
        padding=False,
        return_tensors=None,
    )

    # "self-supervised learning" means the labels are also the inputs:
    result["labels"] = result["input_ids"].copy()

    return result

def generate_and_tokenize_prompt(data_point):
    full_prompt = f'Requirements: \n\n{data_point["input_ids"]}\n\nDiff: {data_point["labels"]}'
    return tokenize(full_prompt)

# The dataset actually contains 3 diff splits: train, validation, test.
# The tokenize_function code is handling all data across all splits in batches.
tokenized_datasets = data.map(generate_and_tokenize_prompt)
print(tokenized_datasets)

DatasetDict({
    train: Dataset({
        features: ['input_ids', 'labels', 'attention_mask'],
        num_rows: 5
    })
    validation: Dataset({
        features: ['input_ids', 'labels', 'attention_mask'],
        num_rows: 5
    })
    test: Dataset({
        features: ['input_ids', 'labels', 'attention_mask'],
        num_rows: 5
    })
})


In [5]:
from peft import (
    LoraConfig,
    get_peft_model,
    get_peft_model_state_dict,
    prepare_model_for_int8_training,
    set_peft_model_state_dict,
)

model = prepare_model_for_int8_training(orig_model)
config = LoraConfig(
    r=16,
    lora_alpha=16,
    target_modules=[
        "q_proj",
        "k_proj",
        "v_proj",
        "o_proj",
    ],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
)
peft_model = get_peft_model(model, config)

In [6]:
def print_number_of_trainable_model_parameters(model):
    trainable_model_params = 0
    all_model_params = 0
    for _, param in model.named_parameters():
        all_model_params += param.numel()
        if param.requires_grad:
            trainable_model_params += param.numel()
    return f"trainable model parameters: {trainable_model_params}\nall model parameters: {all_model_params}\npercentage of trainable model parameters: {100 * trainable_model_params / all_model_params:.2f}%"

peft_model = get_peft_model(orig_model, config)
print(print_number_of_trainable_model_parameters(peft_model))

trainable model parameters: 16777216
all model parameters: 6755192832
percentage of trainable model parameters: 0.25%


In [7]:
output_dir = f"./peft-code-llama-training"

# Set Training parameters
batch_size = 128
per_device_train_batch_size = 32
gradient_accumulation_steps = batch_size // per_device_train_batch_size

training_args = TrainingArguments(
    output_dir = output_dir,
    num_train_epochs = 5,
    per_device_train_batch_size = 4,
    gradient_accumulation_steps = 1,
    optim = "adamw_bnb_8bit",
    save_steps = 0,
    logging_steps = 50,
    learning_rate = 2e-4,
    fp16 = False,
    bf16 = False,
    max_grad_norm = 0.3,
    weight_decay = 0.001,
    lr_scheduler_type = "cosine",
    warmup_ratio = 0.03,
    group_by_length = True,
    max_steps = -1,
    report_to = "none",
)

trainer = Trainer(
    model=peft_model,
    train_dataset=tokenized_datasets["train"],
    args=training_args,
    data_collator=DataCollatorForSeq2Seq(
        tokenizer, pad_to_multiple_of=8, return_tensors="pt", padding=True
    ),
)

In [11]:
import gc

gc.collect()
torch.mps.empty_cache()

In [8]:
os.environ['PYTORCH_MPS_HIGH_WATERMARK_RATIO'] = '0.0'
tokenizer.add_special_tokens({'pad_token': '[PAD]'})
trainer.train()

RuntimeError: MPS backend out of memory (MPS allocated: 27.09 GB, other allocations: 108.73 MB, max allowed: 27.20 GB). Tried to allocate 25.00 MB on private pool. Use PYTORCH_MPS_HIGH_WATERMARK_RATIO=0.0 to disable upper limit for memory allocations (may cause system failure).

In [9]:
# save trained model
new_model = "codellama-trained"
trainer.model.save_pretrained(new_model)

In [10]:
from peft import PeftModel
peft_model = PeftModel.from_pretrained(orig_model, new_model, offload_dir="./CodeLlama-7b-python-hf-peft-trained/model")

  adapters_weights = torch.load(
Some parameters are on the meta device because they were offloaded to the disk.


In [14]:
eval_prompt = """Requirements:
input params to POST changed to building and floor
Summary:"""

model_input = tokenizer(eval_prompt, return_tensors="pt")

model.eval()
with torch.no_grad():
    print(tokenizer.decode(peft_model.generate(**model_input, max_new_tokens=100)[0], skip_special_tokens=True))

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Requirements:
input params to POST changed to building and floor
Summary:POSTing form data to backend to render map of building and floor
Intent: I am trying to create a map that displays pins on specific locations on a floor plan

Summary: POSTing form data to backend to render map of building and floor
Intent: I am trying to create a map that displays pins on specific locations on a floor plan

Summary: Removed unused code
Intent: Removed unused code

Summary: Added unit tests for post request

