<a href="https://colab.research.google.com/github/alina775/20242R0136COSE47402/blob/main/FinalProject/deep_learning_final_project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## PEFT initial example - https://www.datacamp.com/tutorial/llama3-fine-tuning-locally

In [1]:
%%capture
%pip install -U transformers
%pip install -U datasets
%pip install -U accelerate
%pip install -U peft
%pip install -U trl
%pip install -U bitsandbytes
%pip install -U wandb

In [2]:
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    TrainingArguments,
    pipeline,
    logging,
)
from peft import (
    LoraConfig,
    PeftModel,
    prepare_model_for_kbit_training,
    get_peft_model,
)
import os, torch, wandb
from datasets import load_dataset
from trl import SFTTrainer, setup_chat_format

In [6]:
from google.colab import userdata
wandb_key = userdata.get('wandb_key')
wandb.login(key=wandb_key)
run = wandb.init(
    project='Fine-tune Llama 3 8B on Medical Dataset',
    job_type="training",
    anonymous="allow"
)

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33malina775[0m ([33mdeep_learning_final_project[0m). Use [1m`wandb login --relogin`[0m to force relogin


In [10]:
from huggingface_hub import notebook_login
huggingface_key = userdata.get('huggingface')
notebook_login(huggingface_key)
#tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.2-1B-Instruct")
#model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-3.2-1B-Instruct")

base_model = "meta-llama/Llama-3.2-1B-Instruct"
dataset_name = "ruslanmv/ai-medical-chatbot"
new_model = "llama-3-8b-chat-doctor"

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [11]:
torch_dtype = torch.float16
attn_implementation = "eager"

In [14]:
# QLoRA config
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch_dtype,
    bnb_4bit_use_double_quant=True,
)

# Load model
model = AutoModelForCausalLM.from_pretrained(
    base_model,
    quantization_config=bnb_config,
    device_map="auto",
    attn_implementation=attn_implementation
)

config.json:   0%|          | 0.00/877 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.47G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/189 [00:00<?, ?B/s]

In [16]:
# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(base_model)
tokenizer.chat_template = None
model, tokenizer = setup_chat_format(model, tokenizer)

The new embeddings will be initialized from a multivariate normal distribution that has old embeddings' mean and covariance. As described in this article: https://nlp.stanford.edu/~johnhew/vocab-expansion.html. To disable this, use `mean_resizing=False`


In [17]:
# LoRA config
peft_config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=['up_proj', 'down_proj', 'gate_proj', 'k_proj', 'q_proj', 'v_proj', 'o_proj']
)
model = get_peft_model(model, peft_config)

In [18]:
#Importing the dataset
dataset = load_dataset(dataset_name, split="all")
dataset = dataset.shuffle(seed=65).select(range(1000)) # Only use 1000 samples for quick demo

def format_chat_template(row):
    row_json = [{"role": "user", "content": row["Patient"]},
               {"role": "assistant", "content": row["Doctor"]}]
    row["text"] = tokenizer.apply_chat_template(row_json, tokenize=False)
    return row

dataset = dataset.map(
    format_chat_template,
    num_proc=4,
)

dataset['text'][3]

README.md:   0%|          | 0.00/863 [00:00<?, ?B/s]

dialogues.parquet:   0%|          | 0.00/142M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/256916 [00:00<?, ? examples/s]

Map (num_proc=4):   0%|          | 0/1000 [00:00<?, ? examples/s]

'<|im_start|>user\nFell on sidewalk face first about 8 hrs ago. Swollen, cut lip bruised and cut knee, and hurt pride initially. Now have muscle and shoulder pain, stiff jaw(think this is from the really swollen lip),pain in wrist, and headache. I assume this is all normal but are there specific things I should look for or will I just be in pain for a while given the hard fall?<|im_end|>\n<|im_start|>assistant\nHello and welcome to HCM,The injuries caused on various body parts have to be managed.The cut and swollen lip has to be managed by sterile dressing.The body pains, pain on injured site and jaw pain should be managed by pain killer and muscle relaxant.I suggest you to consult your primary healthcare provider for clinical assessment.In case there is evidence of infection in any of the injured sites, a course of antibiotics may have to be started to control the infection.Thanks and take careDr Shailja P Wahal<|im_end|>\n'

In [19]:
dataset = dataset.train_test_split(test_size=0.1)

In [20]:
training_arguments = TrainingArguments(
    output_dir=new_model,
    per_device_train_batch_size=1,
    per_device_eval_batch_size=1,
    gradient_accumulation_steps=2,
    optim="paged_adamw_32bit",
    num_train_epochs=1,
    evaluation_strategy="steps",
    eval_steps=0.2,
    logging_steps=1,
    warmup_steps=10,
    logging_strategy="steps",
    learning_rate=2e-4,
    fp16=False,
    bf16=False,
    group_by_length=True,
    report_to="wandb"
)



In [21]:
trainer = SFTTrainer(
    model=model,
    train_dataset=dataset["train"],
    eval_dataset=dataset["test"],
    peft_config=peft_config,
    max_seq_length=512,
    dataset_text_field="text",
    tokenizer=tokenizer,
    args=training_arguments,
    packing= False,
)


Deprecated positional argument(s) used in SFTTrainer, please use the SFTConfig to set these arguments instead.


Map:   0%|          | 0/900 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

In [22]:
trainer.train()



Step,Training Loss,Validation Loss
90,2.9016,3.101636
180,3.0635,3.033378
270,3.0761,3.003946
360,2.8325,2.983763
450,3.0283,2.969253


Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


TrainOutput(global_step=450, training_loss=3.0649587885538736, metrics={'train_runtime': 302.0588, 'train_samples_per_second': 2.98, 'train_steps_per_second': 1.49, 'total_flos': 1209310634385408.0, 'train_loss': 3.0649587885538736, 'epoch': 1.0})

In [23]:
wandb.finish()
model.config.use_cache = True

0,1
eval/loss,█▄▃▂▁
eval/runtime,▄▁▄▃█
eval/samples_per_second,▅█▅▆▁
eval/steps_per_second,▅█▅▆▁
train/epoch,▁▁▁▂▂▂▂▂▂▂▂▃▃▃▃▄▄▅▅▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇████
train/global_step,▁▁▁▁▂▂▂▂▂▂▃▃▃▄▄▅▅▅▅▅▅▅▅▅▆▆▆▇▇▇▇▇▇▇██████
train/grad_norm,▆▆▆▃▄▅▄▅▅▄▆▅▃▄▅▅▇▄▄▄▁▄▃▃▅█▅▅▅▇▃▃▄▄▆▄▄▆▅▅
train/learning_rate,▂████▇▇▇▇▇▇▆▆▆▆▆▅▅▅▅▅▅▅▅▅▄▄▃▃▃▂▂▂▂▂▁▁▁▁▁
train/loss,█▄▇▅▆▅▆▆█▅▇▅▄▅▇▃▃▂▅▇▃▄▅▄▃▅▄▆▃▄▆▄▅█▅▇▆▁▅▅

0,1
eval/loss,2.96925
eval/runtime,12.6449
eval/samples_per_second,7.908
eval/steps_per_second,7.908
total_flos,1209310634385408.0
train/epoch,1.0
train/global_step,450.0
train/grad_norm,3.14005
train/learning_rate,0.0
train/loss,3.0283


## REFT inital example - https://medium.com/@syed_hasan/finetuning-llama-3-using-reft-representation-fine-tuning-technique-00f4fe1f497c

In [1]:
try:
    import pyreft

except ModuleNotFoundError:
    !pip install git+https://github.com/stanfordnlp/pyreft.git

Collecting git+https://github.com/stanfordnlp/pyreft.git
  Cloning https://github.com/stanfordnlp/pyreft.git to /tmp/pip-req-build-ntbf5ajj
  Running command git clone --filter=blob:none --quiet https://github.com/stanfordnlp/pyreft.git /tmp/pip-req-build-ntbf5ajj
  Resolved https://github.com/stanfordnlp/pyreft.git to commit b07868925d67e13efe6e222a6915e7ef0ce1e239
  Running command git submodule update --init --recursive -q
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting pyvene>=0.1.4 (from pyreft==0.0.8)
  Downloading pyvene-0.1.6-py3-none-any.whl.metadata (4.4 kB)
Collecting transformers==4.45.1 (from pyreft==0.0.8)
  Downloading transformers-4.45.1-py3-none-any.whl.metadata (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.4/44.4 kB[0m [31m4.1 MB/s[0m eta [36m0:00:00[0m
Collecting ipywidgets>=8.1.1 (from pyreft==0.0.8)
  Downloading ipywidgets-8.1.5-py3-none-any.whl.metadata (2.3 kB)
Collecting evaluate>=0.4.1 (from pyreft==0.0.8)
  Do

In [20]:
%pip install transformers==4.45.2 sentence-transformers==3.1.1
#%pip install -U bitsandbytes

Collecting transformers==4.45.2
  Downloading transformers-4.45.2-py3-none-any.whl.metadata (44 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/44.4 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.4/44.4 kB[0m [31m3.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting sentence-transformers==3.1.1
  Downloading sentence_transformers-3.1.1-py3-none-any.whl.metadata (10 kB)
Downloading transformers-4.45.2-py3-none-any.whl (9.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.9/9.9 MB[0m [31m48.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading sentence_transformers-3.1.1-py3-none-any.whl (245 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m245.3/245.3 kB[0m [31m12.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: transformers, sentence-transformers
  Attempting uninstall: transformers
    Found existing installation: transformers 4.47.0.dev0
    Unin

In [1]:
import wandb
from google.colab import userdata
wandb_key = userdata.get('wandb_key')
wandb.login(key=wandb_key)
run = wandb.init(
    project='Fine-tune Llama 3 reft',
    job_type="training",
    anonymous="allow"
)

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33malina775[0m ([33mdeep_learning_final_project[0m). Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


In [2]:
from huggingface_hub import notebook_login
huggingface_key = userdata.get('huggingface')
notebook_login(huggingface_key)

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [3]:
import torch, transformers, pyreft
device = "cuda"

prompt_no_input_template = """<|begin_of_text|><|start_header_id|>user<|end_header_id|>%s<|eot_id|><|start_header_id|>assistant<|end_header_id|>"""

model_name_or_path = "meta-llama/Llama-3.2-1B-Instruct"
model = transformers.AutoModelForCausalLM.from_pretrained(
    model_name_or_path, torch_dtype=torch.bfloat16, device_map=device, trust_remote_code=True)

# # get tokenizer
tokenizer = transformers.AutoTokenizer.from_pretrained(
    model_name_or_path, model_max_length=2048,
    padding_side="right", use_fast=False)
tokenizer.pad_token = tokenizer.eos_token

nnsight is not detected. Please install via 'pip install nnsight' for nnsight backend.


2024-11-29 09:17:11.992980: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-11-29 09:17:12.027724: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-11-29 09:17:12.038764: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-11-29 09:17:12.066262: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [4]:
# get reft model
reft_config = pyreft.ReftConfig(representations={
    "layer": 8, "component": "block_output",
    "low_rank_dimension": 4,
    "intervention": pyreft.LoreftIntervention(embed_dim=model.config.hidden_size,
    low_rank_dimension=4)})
reft_model = pyreft.get_reft_model(model, reft_config)
reft_model.set_device("cuda")
reft_model.print_trainable_parameters()

trainable intervention params: 16,388 || trainable model params: 0
model params: 1,235,814,400 || trainable%: 0.0013260890955794009


In [23]:
dataset_name = "teknium/OpenHermes-2.5"
from datasets import load_dataset

dataset = load_dataset(dataset_name, split="all")
dataset = dataset.shuffle(seed=65).select(range(1_000))

data_module = pyreft.make_last_position_supervised_data_module(
    tokenizer, model, [prompt_no_input_template % row["conversations"][0]["value"] for row in dataset],
    [row["conversations"][1]["value"] for row in dataset])

In [6]:
print(transformers.__version__)

4.45.2


In [27]:
training_args = transformers.TrainingArguments(
    per_device_train_batch_size = 1,
    gradient_accumulation_steps = 2,
    warmup_steps = 10,
    num_train_epochs = 1,
    learning_rate = 5e-4,
    bf16 = True,
    logging_steps = 50,
    optim = "paged_adamw_32bit",
    weight_decay = 0.0,
    lr_scheduler_type = "cosine",
    output_dir = "outputs",
    report_to="wandb",
    fp16=False,
    logging_strategy="steps",
)

#output_dir=new_model,
#    per_device_train_batch_size=1,
#    per_device_eval_batch_size=1,
#    gradient_accumulation_steps=2,
#    optim="paged_adamw_32bit",
#    num_train_epochs=1,
#    evaluation_strategy="steps",
#    eval_steps=0.2,
#    logging_steps=1,
#    warmup_steps=10,
#    logging_strategy="steps",
#    learning_rate=2e-4,
#    fp16=False,
#    bf16=False,
#    group_by_length=True,
#    report_to="wandb"

trainer = pyreft.ReftTrainerForCausalLM(model=reft_model, tokenizer=tokenizer, args=training_args, **data_module)

trainer.train()

Step,Training Loss
50,1.8726
100,1.9701
150,1.9353
200,1.8388
250,1.7118
300,1.7667
350,1.9408
400,2.2384
450,1.866
500,1.8454


Directory 'outputs/checkpoint-500/intervenable_model' created successfully.


TrainOutput(global_step=500, training_loss=1.8985883178710938, metrics={'train_runtime': 611.1456, 'train_samples_per_second': 1.636, 'train_steps_per_second': 0.818, 'total_flos': 0.0, 'train_loss': 1.8985883178710938, 'epoch': 1.0})

In [28]:
wandb.finish()
model.config.use_cache = True

VBox(children=(Label(value='0.023 MB of 0.023 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
train/epoch,▁▂▂▂▂▁▁▁▁▂▁▂▂▂▂▁▁▂▂▄▂▂▁▂▂▂▃▃▃▄▅▆▆▆▆▇██▁▂
train/global_step,▁▁▁▁▂▂▁▂▂▂▂▂▂▁▁▁▁▁▁▁▂▁▂▂▂▂▃▁▂▂▂▃▄▄▄▅▅▅▁█
train/grad_norm,▁▁▁▁▂▁▁▁▃▁▁▁▄▁▂▂▁▁▄▂▁▁▂▃▇▂▁▁▁▁▁█▁▁▂▁▁▁▂▁
train/learning_rate,█▁▁▁▁▁▁▁▁▁████▇████▇▂▃███████▇▆▆▆▅▅▃▃▂██
train/loss,▁▃▃▄▁▃▁▂▄▁▃▃▃▂█▄▂▅▃▆▂▆▄▃█▂▇▂▄▆▇▂▄▂▂▅▂▄▅▅

0,1
total_flos,0.0
train/epoch,1.0
train/global_step,500.0
train/grad_norm,1.6423
train/learning_rate,0.0
train/loss,1.8454
train_loss,1.89859
train_runtime,611.1456
train_samples_per_second,1.636
train_steps_per_second,0.818


## My own initial LoRA implementation