<a href="https://colab.research.google.com/github/alina775/20242R0136COSE47402/blob/main/FinalProject/deep_learning_final_project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## PEFT initial example - https://www.datacamp.com/tutorial/llama3-fine-tuning-locally

In [None]:
%%capture
%pip install -U transformers
%pip install -U datasets
%pip install -U accelerate
%pip install -U peft
%pip install -U trl
%pip install -U bitsandbytes
%pip install -U wandb

In [None]:
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    TrainingArguments,
    DefaultDataCollator,
    Trainer,
    pipeline,
    logging,
)
from peft import (
    LoraConfig,
    PeftModel,
    prepare_model_for_kbit_training,
    get_peft_model,
)
import os, torch, wandb
from datasets import load_dataset
from trl import SFTTrainer, setup_chat_format

In [None]:
from google.colab import userdata
wandb_key = userdata.get('wandb_key')
wandb.login(key=wandb_key)
run = wandb.init(
    project='Fine-tune Llama 3 8B on Medical Dataset',
    job_type="training",
    anonymous="allow"
)

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33malina775[0m ([33mdeep_learning_final_project[0m). Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


In [None]:
from huggingface_hub import notebook_login
huggingface_key = userdata.get('huggingface')
notebook_login(huggingface_key)
#tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.2-1B-Instruct")
#model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-3.2-1B-Instruct")

base_model = "meta-llama/Llama-3.2-1B-Instruct"
dataset_name = "ruslanmv/ai-medical-chatbot"
new_model = "llama-3-8b-chat-doctor"

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [None]:
torch_dtype = torch.float16
attn_implementation = "eager"

In [None]:
# QLoRA config
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch_dtype,
    bnb_4bit_use_double_quant=True,
)

# Load model
model = AutoModelForCausalLM.from_pretrained(
    base_model,
    quantization_config=bnb_config,
    device_map="auto",
    attn_implementation=attn_implementation
)

In [None]:
# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(base_model)
tokenizer.chat_template = None
model, tokenizer = setup_chat_format(model, tokenizer)

The new embeddings will be initialized from a multivariate normal distribution that has old embeddings' mean and covariance. As described in this article: https://nlp.stanford.edu/~johnhew/vocab-expansion.html. To disable this, use `mean_resizing=False`


In [None]:
# LoRA config
peft_config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=['up_proj', 'down_proj', 'gate_proj', 'k_proj', 'q_proj', 'v_proj', 'o_proj']
)
model = get_peft_model(model, peft_config)

In [None]:
#Importing the dataset
dataset = load_dataset(dataset_name, split="all")
dataset = dataset.shuffle(seed=65).select(range(1000)) # Only use 1000 samples for quick demo

def format_chat_template(row):
    row_json = [{"role": "user", "content": row["Patient"]},
               {"role": "assistant", "content": row["Doctor"]}]
    row["text"] = tokenizer.apply_chat_template(row_json, tokenize=False)
    return row

dataset = dataset.map(
    format_chat_template,
    num_proc=4,
)

dataset['text'][3]

'<|im_start|>user\nFell on sidewalk face first about 8 hrs ago. Swollen, cut lip bruised and cut knee, and hurt pride initially. Now have muscle and shoulder pain, stiff jaw(think this is from the really swollen lip),pain in wrist, and headache. I assume this is all normal but are there specific things I should look for or will I just be in pain for a while given the hard fall?<|im_end|>\n<|im_start|>assistant\nHello and welcome to HCM,The injuries caused on various body parts have to be managed.The cut and swollen lip has to be managed by sterile dressing.The body pains, pain on injured site and jaw pain should be managed by pain killer and muscle relaxant.I suggest you to consult your primary healthcare provider for clinical assessment.In case there is evidence of infection in any of the injured sites, a course of antibiotics may have to be started to control the infection.Thanks and take careDr Shailja P Wahal<|im_end|>\n'

In [None]:
dataset = dataset.train_test_split(test_size=0.1)

In [None]:
training_arguments = TrainingArguments(
    output_dir=new_model,
    per_device_train_batch_size=1,
    per_device_eval_batch_size=1,
    gradient_accumulation_steps=2,
    optim="paged_adamw_32bit",
    num_train_epochs=1,
    evaluation_strategy="steps",
    eval_steps=0.2,
    logging_steps=1,
    warmup_steps=10,
    logging_strategy="steps",
    learning_rate=2e-4,
    fp16=False,
    bf16=False,
    group_by_length=True,
    report_to="wandb"
)



In [None]:
trainer = SFTTrainer(
    model=model,
    train_dataset=dataset["train"],
    eval_dataset=dataset["test"],
    peft_config=peft_config,
    max_seq_length=512,
    dataset_text_field="text",
    tokenizer=tokenizer,
    args=training_arguments,
    packing= False,
)


Deprecated positional argument(s) used in SFTTrainer, please use the SFTConfig to set these arguments instead.


Map:   0%|          | 0/900 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

In [None]:
trainer.train()



Step,Training Loss,Validation Loss
90,3.474,3.056747
180,2.721,2.993811
270,2.4806,2.959504
360,3.015,2.939756
450,2.8498,2.92971


Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


TrainOutput(global_step=450, training_loss=2.9932316716512046, metrics={'train_runtime': 540.9588, 'train_samples_per_second': 1.664, 'train_steps_per_second': 0.832, 'total_flos': 1215122640052224.0, 'train_loss': 2.9932316716512046, 'epoch': 1.0})

In [None]:
wandb.finish()
model.config.use_cache = True

0,1
eval/loss,█▅▃▂▁
eval/runtime,█▂▁▁▂
eval/samples_per_second,▁▇██▇
eval/steps_per_second,▁▇██▇
train/epoch,▁▁▁▂▂▂▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▅▅▅▅▆▆▆▆▆▆▆▆▇▇▇▇▇█
train/global_step,▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇▇██
train/grad_norm,▃▄▂█▄▃▂▄▄▃▁▁▂▃▃▂▂▄▂▄▂▂▂▃▃▂▂▄▄▆▁▂▂▃▂▁▂▃▃▃
train/learning_rate,████▇▇▇▆▆▆▆▆▆▆▆▅▅▅▅▅▄▄▄▄▄▄▄▄▄▃▃▃▃▃▂▂▁▁▁▁
train/loss,█▆▆▃▆▆▅▆▇▆▅▆▅▇▄▅▅▄▅▄▃▄▆▄▇▄▁▄▅▄▅▅▄▄▃▅▄█▇▅

0,1
eval/loss,2.92971
eval/runtime,21.5564
eval/samples_per_second,4.639
eval/steps_per_second,4.639
total_flos,1215122640052224.0
train/epoch,1.0
train/global_step,450.0
train/grad_norm,3.52568
train/learning_rate,0.0
train/loss,2.8498


## REFT inital example - https://medium.com/@syed_hasan/finetuning-llama-3-using-reft-representation-fine-tuning-technique-00f4fe1f497c

In [None]:
try:
    import pyreft

except ModuleNotFoundError:
    !pip install git+https://github.com/stanfordnlp/pyreft.git

Collecting git+https://github.com/stanfordnlp/pyreft.git
  Cloning https://github.com/stanfordnlp/pyreft.git to /tmp/pip-req-build-ntbf5ajj
  Running command git clone --filter=blob:none --quiet https://github.com/stanfordnlp/pyreft.git /tmp/pip-req-build-ntbf5ajj
  Resolved https://github.com/stanfordnlp/pyreft.git to commit b07868925d67e13efe6e222a6915e7ef0ce1e239
  Running command git submodule update --init --recursive -q
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting pyvene>=0.1.4 (from pyreft==0.0.8)
  Downloading pyvene-0.1.6-py3-none-any.whl.metadata (4.4 kB)
Collecting transformers==4.45.1 (from pyreft==0.0.8)
  Downloading transformers-4.45.1-py3-none-any.whl.metadata (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.4/44.4 kB[0m [31m4.1 MB/s[0m eta [36m0:00:00[0m
Collecting ipywidgets>=8.1.1 (from pyreft==0.0.8)
  Downloading ipywidgets-8.1.5-py3-none-any.whl.metadata (2.3 kB)
Collecting evaluate>=0.4.1 (from pyreft==0.0.8)
  Do

In [None]:
%pip install transformers==4.45.2 sentence-transformers==3.1.1
#%pip install -U bitsandbytes

Collecting transformers==4.45.2
  Downloading transformers-4.45.2-py3-none-any.whl.metadata (44 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/44.4 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.4/44.4 kB[0m [31m3.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting sentence-transformers==3.1.1
  Downloading sentence_transformers-3.1.1-py3-none-any.whl.metadata (10 kB)
Downloading transformers-4.45.2-py3-none-any.whl (9.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.9/9.9 MB[0m [31m48.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading sentence_transformers-3.1.1-py3-none-any.whl (245 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m245.3/245.3 kB[0m [31m12.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: transformers, sentence-transformers
  Attempting uninstall: transformers
    Found existing installation: transformers 4.47.0.dev0
    Unin

In [None]:
import wandb
from google.colab import userdata
wandb_key = userdata.get('wandb_key')
wandb.login(key=wandb_key)
run = wandb.init(
    project='Fine-tune Llama 3 reft',
    job_type="training",
    anonymous="allow"
)

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33malina775[0m ([33mdeep_learning_final_project[0m). Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


In [None]:
from huggingface_hub import notebook_login
huggingface_key = userdata.get('huggingface')
notebook_login(huggingface_key)

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [None]:
import torch, transformers, pyreft
device = "cuda"

prompt_no_input_template = """<|begin_of_text|><|start_header_id|>user<|end_header_id|>%s<|eot_id|><|start_header_id|>assistant<|end_header_id|>"""

model_name_or_path = "meta-llama/Llama-3.2-1B-Instruct"
model = transformers.AutoModelForCausalLM.from_pretrained(
    model_name_or_path, torch_dtype=torch.bfloat16, device_map=device, trust_remote_code=True)

# # get tokenizer
tokenizer = transformers.AutoTokenizer.from_pretrained(
    model_name_or_path, model_max_length=2048,
    padding_side="right", use_fast=False)
tokenizer.pad_token = tokenizer.eos_token

nnsight is not detected. Please install via 'pip install nnsight' for nnsight backend.


2024-11-29 09:17:11.992980: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-11-29 09:17:12.027724: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-11-29 09:17:12.038764: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-11-29 09:17:12.066262: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [None]:
# get reft model
reft_config = pyreft.ReftConfig(representations={
    "layer": 8, "component": "block_output",
    "low_rank_dimension": 4,
    "intervention": pyreft.LoreftIntervention(embed_dim=model.config.hidden_size,
    low_rank_dimension=4)})
reft_model = pyreft.get_reft_model(model, reft_config)
reft_model.set_device("cuda")
reft_model.print_trainable_parameters()

trainable intervention params: 16,388 || trainable model params: 0
model params: 1,235,814,400 || trainable%: 0.0013260890955794009


In [None]:
dataset_name = "teknium/OpenHermes-2.5"
from datasets import load_dataset

dataset = load_dataset(dataset_name, split="all")
dataset = dataset.shuffle(seed=65).select(range(1_000))

data_module = pyreft.make_last_position_supervised_data_module(
    tokenizer, model, [prompt_no_input_template % row["conversations"][0]["value"] for row in dataset],
    [row["conversations"][1]["value"] for row in dataset])

In [None]:
training_args = transformers.TrainingArguments(
    per_device_train_batch_size = 1,
    gradient_accumulation_steps = 2,
    warmup_steps = 10,
    num_train_epochs = 1,
    learning_rate = 5e-4,
    bf16 = True,
    logging_steps = 50,
    optim = "paged_adamw_32bit",
    weight_decay = 0.0,
    lr_scheduler_type = "cosine",
    output_dir = "outputs",
    report_to="wandb",
    fp16=False,
    logging_strategy="steps",
)


trainer = pyreft.ReftTrainerForCausalLM(model=reft_model, tokenizer=tokenizer, args=training_args, **data_module)

trainer.train()

Step,Training Loss
50,1.8726
100,1.9701
150,1.9353
200,1.8388
250,1.7118
300,1.7667
350,1.9408
400,2.2384
450,1.866
500,1.8454


Directory 'outputs/checkpoint-500/intervenable_model' created successfully.


TrainOutput(global_step=500, training_loss=1.8985883178710938, metrics={'train_runtime': 611.1456, 'train_samples_per_second': 1.636, 'train_steps_per_second': 0.818, 'total_flos': 0.0, 'train_loss': 1.8985883178710938, 'epoch': 1.0})

In [None]:
wandb.finish()
model.config.use_cache = True

VBox(children=(Label(value='0.023 MB of 0.023 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
train/epoch,▁▂▂▂▂▁▁▁▁▂▁▂▂▂▂▁▁▂▂▄▂▂▁▂▂▂▃▃▃▄▅▆▆▆▆▇██▁▂
train/global_step,▁▁▁▁▂▂▁▂▂▂▂▂▂▁▁▁▁▁▁▁▂▁▂▂▂▂▃▁▂▂▂▃▄▄▄▅▅▅▁█
train/grad_norm,▁▁▁▁▂▁▁▁▃▁▁▁▄▁▂▂▁▁▄▂▁▁▂▃▇▂▁▁▁▁▁█▁▁▂▁▁▁▂▁
train/learning_rate,█▁▁▁▁▁▁▁▁▁████▇████▇▂▃███████▇▆▆▆▅▅▃▃▂██
train/loss,▁▃▃▄▁▃▁▂▄▁▃▃▃▂█▄▂▅▃▆▂▆▄▃█▂▇▂▄▆▇▂▄▂▂▅▂▄▅▅

0,1
total_flos,0.0
train/epoch,1.0
train/global_step,500.0
train/grad_norm,1.6423
train/learning_rate,0.0
train/loss,1.8454
train_loss,1.89859
train_runtime,611.1456
train_samples_per_second,1.636
train_steps_per_second,0.818


## Setup

In [1]:
#installs
%%capture
%pip install -U transformers
%pip install -U datasets
%pip install -U accelerate
%pip install -U peft
%pip install -U trl
%pip install -U bitsandbytes
%pip install -U wandb

In [15]:
#imports
from transformers import (
    AutoModelForCausalLM,
    AutoModelForQuestionAnswering,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    TrainingArguments,
    DefaultDataCollator,
    Trainer,
    pipeline,
    logging,
)
from peft import (
    LoraConfig,
    PeftModel,
    VeraConfig,
    PromptEncoder,
    PromptEncoderConfig,
    PrefixTuningConfig,
    LNTuningConfig,
    LoHaConfig,
    TaskType,
    prepare_model_for_kbit_training,
    get_peft_model,
)
import os, torch, wandb
from datasets import load_dataset
from trl import SFTTrainer, setup_chat_format

In [3]:
#log in to hugging face and wandb
from google.colab import userdata
from huggingface_hub import notebook_login
huggingface_key = userdata.get('huggingface')
notebook_login(huggingface_key)

wandb_key = userdata.get('wandb_key')
wandb.login(key=wandb_key)

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33malina775[0m ([33mdeep_learning_final_project[0m). Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


True

In [4]:
llama_base_model = "meta-llama/Llama-3.2-3B"

medical_chatbot_dataset_name = "ruslanmv/ai-medical-chatbot"
math_dataset_name = "nvidia/OpenMathInstruct-2"

quantisation_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_use_double_quant=True
)

In [5]:
def run_training(base_model, config, dataset_name, dataset_size, data_row_name_user, data_row_name_assistant, output_directory, learning_rate, quantisation_config = None):
  #setup model
  if quantisation_config:
    model = AutoModelForCausalLM.from_pretrained(base_model, device_map = "auto", quantization_config = quantisation_config)
  else:
    model = AutoModelForCausalLM.from_pretrained(base_model, device_map = "auto") #change model
  peft_config = config #change peft method
  model = get_peft_model(model, peft_config)
  model.print_trainable_parameters()

  #load dataset
  dataset = load_dataset(dataset_name, split="train[:10000]").shuffle(seed=42).select(range(dataset_size)).flatten_indices()
  tokenizer = AutoTokenizer.from_pretrained(base_model)
  tokenizer.chat_template = None
  model, tokenizer = setup_chat_format(model, tokenizer)

  def format_chat_template(row):
      row_json = [{"role": "user", "content": row[data_row_name_user]},
                 {"role": "assistant", "content": row[data_row_name_assistant]}]
      row["text"] = tokenizer.apply_chat_template(row_json, tokenize=False)
      return row

  dataset = dataset.map(
      format_chat_template
  )
  dataset = dataset.train_test_split(test_size=0.1)

  training_arguments = TrainingArguments(
    output_dir = output_directory,
    learning_rate=learning_rate,
    per_device_train_batch_size=1,
    per_device_eval_batch_size=1,
    num_train_epochs=3,
    weight_decay=0.01,
    eval_strategy="steps",
    save_strategy="steps",
    logging_steps=60,
    eval_steps=60,
    save_steps=120,
    report_to="wandb",
    load_best_model_at_end=True,
    gradient_accumulation_steps=8,
    warmup_steps=120,
    fp16=True
  )

  trainer = SFTTrainer(
    model=model,
    max_seq_length = 512,
    peft_config=peft_config,
    dataset_text_field="text",
    args=training_arguments,
    train_dataset=dataset["train"],
    eval_dataset=dataset["test"],
    processing_class = tokenizer
  )

  return trainer.train()

## initial tests

In [None]:
#start wandb logging
run = wandb.init(
    project='Fine-tune Llama 3 8B on Medical Dataset', # change
    job_type="training",
    anonymous="allow"
)

In [None]:
peft_config = LoraConfig(
    r=8,
    inference_mode = False,
    lora_alpha=32,
    lora_dropout=0.1,
    task_type="CAUSAL_LM"
)
run_training(llama_base_model, peft_config, medical_chatbot_dataset_name, 1000, "Patient", "Doctor", "newModelLoRA", 3e-3)
wandb.finish()

trainable params: 851,968 || all params: 1,236,666,368 || trainable%: 0.0689


The new embeddings will be initialized from a multivariate normal distribution that has old embeddings' mean and covariance. As described in this article: https://nlp.stanford.edu/~johnhew/vocab-expansion.html. To disable this, use `mean_resizing=False`

Deprecated positional argument(s) used in SFTTrainer, please use the SFTConfig to set these arguments instead.


Map:   0%|          | 0/900 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]



Step,Training Loss,Validation Loss
60,3.3392,3.116552


Step,Training Loss,Validation Loss
60,3.3392,3.116552
120,3.1091,3.14383
180,3.1073,3.12465
240,2.9938,3.08353
300,2.8023,3.018354




VBox(children=(Label(value='0.024 MB of 0.024 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
eval/loss,▆█▇▅▁
eval/runtime,▁▃▄▄█
eval/samples_per_second,█▆▅▄▁
eval/steps_per_second,█▆▅▄▁
train/epoch,▁▁▃▃▄▄▆▆▇▇█
train/global_step,▁▁▃▃▄▄▆▆▇▇█
train/grad_norm,▁▃█▅▃
train/learning_rate,▄█▆▃▁
train/loss,█▅▅▃▁

0,1
eval/loss,3.01835
eval/runtime,8.4629
eval/samples_per_second,11.816
eval/steps_per_second,11.816
total_flos,3601544908431360.0
train/epoch,2.98667
train/global_step,336.0
train/grad_norm,1.24369
train/learning_rate,0.00056
train/loss,2.8023


NameError: name 'model' is not defined

In [None]:
quantisation_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_use_double_quant=True
)

peft_config = LoraConfig(
    r=8,
    inference_mode = False,
    lora_alpha=32,
    lora_dropout=0.1,
    task_type="CAUSAL_LM"
)
run_training(llama_base_model, peft_config, medical_chatbot_dataset_name, 1000, "Patient", "Doctor", "newModelLoRA", 3e-3, quantisation_config)
wandb.finish()

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

trainable params: 2,293,760 || all params: 3,215,043,584 || trainable%: 0.0713


Map:   0%|          | 0/1000 [00:00<?, ? examples/s]


Deprecated positional argument(s) used in SFTTrainer, please use the SFTConfig to set these arguments instead.


Map:   0%|          | 0/900 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss
60,2.8379,2.786501
120,2.8589,2.912651
180,2.8284,2.904469
240,2.686,2.821862
300,2.4674,2.776425




0,1
eval/loss,▂██▃▁
eval/runtime,▂▁█▇█
eval/samples_per_second,▇█▁▂▁
eval/steps_per_second,▇█▁▂▁
train/epoch,▁▁▃▃▄▄▆▆▇▇█
train/global_step,▁▁▃▃▄▄▆▆▇▇█
train/grad_norm,▁█▅▄▃
train/learning_rate,▄█▆▃▁
train/loss,██▇▅▁

0,1
eval/loss,2.77643
eval/runtime,31.0726
eval/samples_per_second,3.218
eval/steps_per_second,3.218
total_flos,1.0415921488484352e+16
train/epoch,2.98667
train/global_step,336.0
train/grad_norm,1.0216
train/learning_rate,0.0005
train/loss,2.4674


In [None]:
run = wandb.init(
    project='Fine-tune Llama 3 8B on Medical Dataset', # change
    job_type="training",
    anonymous="allow"
)

quantisation_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_use_double_quant=True
)

peft_config = LoraConfig(
    r=8,
    inference_mode = False,
    lora_alpha=32,
    lora_dropout=0.1,
    task_type="CAUSAL_LM"
)
run_training(llama_base_model, peft_config, medical_chatbot_dataset_name, 1000, "Patient", "Doctor", "newModelLoRA", 1e-3, quantisation_config)
wandb.finish()

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

trainable params: 2,293,760 || all params: 3,215,043,584 || trainable%: 0.0713



Deprecated positional argument(s) used in SFTTrainer, please use the SFTConfig to set these arguments instead.


Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss
60,2.8822,2.771621
120,2.7703,2.758892
180,2.6464,2.744423
240,2.5396,2.706482
300,2.362,2.711204




VBox(children=(Label(value='0.024 MB of 0.024 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
eval/loss,█▇▅▁▂
eval/runtime,▂▁█▁▁
eval/samples_per_second,▇█▁██
eval/steps_per_second,▇█▁██
train/epoch,▁▁▃▃▄▄▆▆▇▇█
train/global_step,▁▁▃▃▄▄▆▆▇▇█
train/grad_norm,▁█▁▃▄
train/learning_rate,▄█▆▃▁
train/loss,█▆▅▃▁

0,1
eval/loss,2.7112
eval/runtime,31.0139
eval/samples_per_second,3.224
eval/steps_per_second,3.224
total_flos,1.0415921488484352e+16
train/epoch,2.98667
train/global_step,336.0
train/grad_norm,0.99429
train/learning_rate,0.00017
train/loss,2.362


In [None]:
import torch
torch.cuda.empty_cache()

## My own initial LoRA implementation

In [None]:
from google.colab import userdata
wandb_key = userdata.get('wandb_key')
wandb.login(key=wandb_key)
run = wandb.init(
    project='Fine-tune Llama 3 8B on Medical Dataset',
    job_type="training",
    anonymous="allow"
)

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33malina775[0m ([33mdeep_learning_final_project[0m). Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


In [None]:
from huggingface_hub import notebook_login
huggingface_key = userdata.get('huggingface')
notebook_login(huggingface_key)

base_model = "meta-llama/Llama-3.2-1B-Instruct"
new_model = "my own model"

NameError: name 'userdata' is not defined

In [None]:
from peft import LoraConfig, TaskType
#load model
base_model = "meta-llama/Llama-3.2-1B-Instruct"

model = AutoModelForCausalLM.from_pretrained(base_model) #this takes some time (1min) - quantisation

peft_config = LoraConfig(
    r=8,
    inference_mode = False,
    lora_alpha=32,
    lora_dropout=0.1,
    task_type="CAUSAL_LM"
)
#use_rslara = True (rank-stablised lora)
#quantise model?

model = get_peft_model(model, peft_config)
model.print_trainable_parameters()

trainable params: 851,968 || all params: 1,236,666,368 || trainable%: 0.0689


In [None]:
#load dataset
dataset_name = "ruslanmv/ai-medical-chatbot"
dataset = load_dataset(dataset_name, split="all").shuffle(seed=42).select(range(1000)).flatten_indices() #change size
tokenizer = AutoTokenizer.from_pretrained(base_model)
tokenizer.chat_template = None
model, tokenizer = setup_chat_format(model, tokenizer)

row_name_user = "Patient" #dependent on dataset
row_name_assistant = "Doctor" #dependent on dataset
def format_chat_template(row):
    row_json = [{"role": "user", "content": row[row_name_user]},
               {"role": "assistant", "content": row[row_name_assistant]}]
    row["text"] = tokenizer.apply_chat_template(row_json, tokenize=False)
    return row

dataset = dataset.map(
    format_chat_template
)

dataset = dataset.train_test_split(test_size=0.1)

The new embeddings will be initialized from a multivariate normal distribution that has old embeddings' mean and covariance. As described in this article: https://nlp.stanford.edu/~johnhew/vocab-expansion.html. To disable this, use `mean_resizing=False`


In [None]:
training_arguments = TrainingArguments(
    output_dir = "newModelLoRA",
    learning_rate=1e-3,
    per_device_train_batch_size=1,
    per_device_eval_batch_size=1,
    num_train_epochs=2,
    weight_decay=0.01,
    eval_strategy="steps",
    save_strategy="steps",
    logging_steps=10,
    eval_steps=30,
    save_steps=300,
    report_to="wandb",
    load_best_model_at_end=True,
    gradient_accumulation_steps=2
)

In [None]:
trainer = SFTTrainer(
    model=model,
    max_seq_length = 512,
    peft_config=peft_config,
    dataset_text_field="text",
    args=training_arguments,
    train_dataset=dataset["train"],
    eval_dataset=dataset["test"],
    processing_class = tokenizer
)


Deprecated positional argument(s) used in SFTTrainer, please use the SFTConfig to set these arguments instead.


Map:   0%|          | 0/900 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

In [None]:
trainer.train()



Step,Training Loss,Validation Loss
30,3.1933,3.243364
60,3.1802,3.19711
90,3.2504,3.169806
120,3.1835,3.146885
150,2.987,3.1378
180,3.0539,3.121947




Step,Training Loss,Validation Loss
30,3.1933,3.243364
60,3.1802,3.19711
90,3.2504,3.169806
120,3.1835,3.146885
150,2.987,3.1378
180,3.0539,3.121947
210,3.2761,3.128316
240,2.9888,3.100967
270,3.1305,3.110124
300,2.964,3.097321




TrainOutput(global_step=900, training_loss=2.9463778411017523, metrics={'train_runtime': 2107.6146, 'train_samples_per_second': 0.854, 'train_steps_per_second': 0.427, 'total_flos': 2422683864268800.0, 'train_loss': 2.9463778411017523, 'epoch': 2.0})

In [None]:
wandb.finish()
model.config.use_cache = True

0,1
eval/loss,█▇▆▅▅▄▄▄▄▃▃▃▃▂▂▃▂▂▂▂▂▂▂▁▂▁▁▁▁▁
eval/runtime,▄▂▁▅▇██████▆██▇████████▆██████
eval/samples_per_second,▅▇█▄▂▁▁▁▁▁▁▂▁▁▂▁▁▁▁▁▁▁▁▂▁▁▁▁▁▁
eval/steps_per_second,▅▇█▄▂▁▁▁▁▁▁▂▁▁▂▁▁▁▁▁▁▁▁▂▁▁▁▁▁▁
train/epoch,▁▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▃▃▄▄▄▄▄▄▄▄▅▅▆▆▆▆▆▆▇▇▇▇██
train/global_step,▁▁▁▁▂▂▂▃▃▃▃▃▃▃▄▄▄▅▅▅▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇█
train/grad_norm,▁▅██▅▂▅▁▆▆▄▄▅▂▄▂▄▇▁▄▄▂▄▁▄▄▇▂▄▄▂▂▅▄▃▂▅▃▂▂
train/learning_rate,█████▇▇▇▇▇▆▆▆▆▆▆▆▅▅▅▅▄▄▄▄▄▃▃▃▃▂▂▂▂▂▂▂▁▁▁
train/loss,█▆▆▆▄▅▆▄▆▄▄▆▅▅▅▄▅▄▅▃▁▂▂▂▃▂▃▄▃▃▃▄▃▃▁▂▃▃▃▁

0,1
eval/loss,3.02003
eval/runtime,21.0738
eval/samples_per_second,4.745
eval/steps_per_second,4.745
total_flos,2422683864268800.0
train/epoch,2.0
train/global_step,900.0
train/grad_norm,1.93674
train/learning_rate,0.0
train/loss,2.6627


## P-Tuning

In [None]:
from peft import PromptEncoder, PromptEncoderConfig, TaskType
base_model = "meta-llama/Llama-3.2-1B-Instruct"

model = AutoModelForCausalLM.from_pretrained(base_model)

In [None]:
config = PromptEncoderConfig(
    peft_type="P_TUNING",
    task_type="CAUSAL_LM",
    num_virtual_tokens=20,
    token_dim=768,
    num_transformer_submodules=1,
    num_attention_heads=12,
    num_layers=12,
    encoder_reparameterization_type="MLP",
    encoder_hidden_size=768,
)

In [None]:

model = get_peft_model(model, peft_config)
model.print_trainable_parameters()

trainable params: 851,968 || all params: 1,236,666,368 || trainable%: 0.0689


In [None]:
dataset_name = "ruslanmv/ai-medical-chatbot"
dataset = load_dataset(dataset_name, split="all").shuffle(seed=42).select(range(1000)).flatten_indices() #change size
tokenizer = AutoTokenizer.from_pretrained(base_model)
tokenizer.chat_template = None
model, tokenizer = setup_chat_format(model, tokenizer)

row_name_user = "Patient" #dependent on dataset
row_name_assistant = "Doctor" #dependent on dataset
def format_chat_template(row):
    row_json = [{"role": "user", "content": row[row_name_user]},
               {"role": "assistant", "content": row[row_name_assistant]}]
    row["text"] = tokenizer.apply_chat_template(row_json, tokenize=False)
    return row

dataset = dataset.map(
    format_chat_template
)

dataset = dataset.train_test_split(test_size=0.1)

In [None]:
training_arguments = TrainingArguments(
    output_dir = "newModelP-tuning",
    learning_rate=1e-3,
    per_device_train_batch_size=1,
    per_device_eval_batch_size=1,
    num_train_epochs=2,
    weight_decay=0.01,
    eval_strategy="steps",
    save_strategy="steps",
    logging_steps=10,
    eval_steps=30,
    save_steps=300,
    report_to="wandb",
    load_best_model_at_end=True,
    gradient_accumulation_steps=2
)

In [None]:
trainer = SFTTrainer(
    model=model,
    max_seq_length = 512,
    peft_config=config,
    dataset_text_field="text",
    args=training_arguments,
    train_dataset=dataset["train"],
    eval_dataset=dataset["test"],
    processing_class = tokenizer
)


Deprecated positional argument(s) used in SFTTrainer, please use the SFTConfig to set these arguments instead.


Map:   0%|          | 0/900 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

In [None]:
trainer.train()

Step,Training Loss,Validation Loss
30,3.0762,3.332284
60,3.2961,3.277643
90,3.0402,3.250223
120,3.2205,3.252903
150,3.102,3.228142
180,3.099,3.232156
210,3.1389,3.205043
240,3.1369,3.203981
270,3.1249,3.193868
300,3.1542,3.188954




TrainOutput(global_step=900, training_loss=2.964323993259006, metrics={'train_runtime': 1442.5274, 'train_samples_per_second': 1.248, 'train_steps_per_second': 0.624, 'total_flos': 2421830641950720.0, 'train_loss': 2.964323993259006, 'epoch': 2.0})

In [None]:
wandb.finish()
model.config.use_cache = True

0,1
eval/loss,█▇▆▆▅▅▅▅▄▄▄▄▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁
eval/runtime,▁▆▄▅▅▅▅▄▅▄█▃▅▄▅▅▄▄▄▄▇▃▅▄▄▅▄▅▅▅
eval/samples_per_second,█▂▅▄▄▄▄▅▄▅▁▆▃▅▄▄▅▅▄▄▁▆▄▅▅▄▅▄▄▄
eval/steps_per_second,█▂▅▄▄▄▄▅▄▅▁▆▃▅▄▄▅▅▄▄▁▆▄▅▅▄▅▄▄▄
train/epoch,▁▁▁▁▁▂▂▂▂▂▃▃▃▃▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇█████
train/global_step,▁▁▁▂▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▄▄▅▅▅▅▆▆▆▇▇▇▇▇▇████
train/grad_norm,█▆▃▄▃▅▂▇▄▃▆▄▄▅▃▃▆▁▃▄▃▃▁▄▃▅▃▃▄▅▁▁▃▅▁▃▂▄▂▄
train/learning_rate,█████▇▇▇▇▇▆▆▆▆▆▅▅▅▅▅▄▄▄▄▄▄▃▃▃▃▃▃▃▂▂▂▂▁▁▁
train/loss,█▅▇▇▆▅▆▆▅▆▆▅▄▅▆▅▇▅▅▅▃▂▃▂▃▃▂▄▃▄▅▅▂▃▃▁▄▄▃▂

0,1
eval/loss,3.06875
eval/runtime,20.2036
eval/samples_per_second,4.95
eval/steps_per_second,4.95
total_flos,2421830641950720.0
train/epoch,2.0
train/global_step,900.0
train/grad_norm,1.78327
train/learning_rate,0.0
train/loss,2.7343


In [None]:
base_model = "meta-llama/Llama-3.2-1B-Instruct"

model = AutoModelForCausalLM.from_pretrained(base_model)

In [None]:
dataset_name = "ruslanmv/ai-medical-chatbot"
dataset = load_dataset(dataset_name, split="all").shuffle(seed=42).select(range(1000)).flatten_indices() #change size
tokenizer = AutoTokenizer.from_pretrained(base_model)
tokenizer.chat_template = None
model, tokenizer = setup_chat_format(model, tokenizer)

row_name_user = "Patient" #dependent on dataset
row_name_assistant = "Doctor" #dependent on dataset
def format_chat_template(row):
    row_json = [{"role": "user", "content": row[row_name_user]},
               {"role": "assistant", "content": row[row_name_assistant]}]
    row["text"] = tokenizer.apply_chat_template(row_json, tokenize=False)
    return row

dataset = dataset.map(
    format_chat_template
)

dataset = dataset.train_test_split(test_size=0.1)

The new embeddings will be initialized from a multivariate normal distribution that has old embeddings' mean and covariance. As described in this article: https://nlp.stanford.edu/~johnhew/vocab-expansion.html. To disable this, use `mean_resizing=False`


In [None]:
training_args = TrainingArguments(
    output_dir="./baseModel",
    per_device_eval_batch_size=1,  # Batch size for evaluation
    no_cuda=False,  # Set to True if you don't want to use GPU
)

# Use your base model and evaluation dataset
trainer = SFTTrainer(
    model=model,  # Base model (without fine-tuning)
    args=training_args,
    eval_dataset=dataset["test"],

    dataset_text_field="text"# Validation dataset
)
trainer.evaluate()


Deprecated positional argument(s) used in SFTTrainer, please use the SFTConfig to set these arguments instead.


{'eval_loss': 3.6174795627593994,
 'eval_model_preparation_time': 0.0027,
 'eval_runtime': 20.6777,
 'eval_samples_per_second': 4.836,
 'eval_steps_per_second': 4.836}

## testing

### lora

- no quantisation
- llama 3 3b
- dataset: medical
- dataset size: 1000
- peft: lora


fails when it reaches the "model, tokenizer = setup_chat_format(model, tokenizer)" line. This is before the training occurs, meaning it is not possible to train without quantisation, eleminating vera as a peft method.

In [None]:
run = wandb.init(
    project='Deep learning final project', # change
    job_type="training",
    anonymous="allow",
    name = "lora_medical_1000_llama"
)

peft_config = LoraConfig(
    r=8,
    inference_mode = False,
    lora_alpha=32,
    lora_dropout=0.1,
    task_type="CAUSAL_LM"
)
run_training(llama_base_model, peft_config, medical_chatbot_dataset_name, 1000, "Patient", "Doctor", "newModelLoRA", 1e-3)
wandb.finish()

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

trainable params: 2,293,760 || all params: 3,215,043,584 || trainable%: 0.0713


The new embeddings will be initialized from a multivariate normal distribution that has old embeddings' mean and covariance. As described in this article: https://nlp.stanford.edu/~johnhew/vocab-expansion.html. To disable this, use `mean_resizing=False`


OutOfMemoryError: CUDA out of memory. Tried to allocate 1.47 GiB. GPU 0 has a total capacity of 14.75 GiB of which 1.18 GiB is free. Process 36556 has 13.56 GiB memory in use. Of the allocated memory 13.45 GiB is allocated by PyTorch, and 569.50 KiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

- quantisation
- llama 3 3b
- dataset: medical
- dataset size: 1000
- peft: lora

In [None]:
run = wandb.init(
    project='Deep learning final project', # change
    job_type="training",
    anonymous="allow",
    name = "lora_quantisation_medical_1000_llama"
)

peft_config = LoraConfig(
    r=8,
    inference_mode = False,
    lora_alpha=32,
    lora_dropout=0.1,
    task_type="CAUSAL_LM"
)
run_training(llama_base_model, peft_config, medical_chatbot_dataset_name, 1000, "Patient", "Doctor", "newModelLoRA", 1e-3, quantisation_config)
wandb.finish()

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

trainable params: 2,293,760 || all params: 3,215,043,584 || trainable%: 0.0713


The new embeddings will be initialized from a multivariate normal distribution that has old embeddings' mean and covariance. As described in this article: https://nlp.stanford.edu/~johnhew/vocab-expansion.html. To disable this, use `mean_resizing=False`


Map:   0%|          | 0/1000 [00:00<?, ? examples/s]


Deprecated positional argument(s) used in SFTTrainer, please use the SFTConfig to set these arguments instead.


Map:   0%|          | 0/900 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]



Step,Training Loss,Validation Loss
60,2.374,1.895292
120,1.6618,1.61544
180,1.615,1.565504
240,1.473,1.560329
300,1.3268,1.558113




0,1
eval/loss,█▂▁▁▁
eval/runtime,▁██▃▁
eval/samples_per_second,█▁▁▆█
eval/steps_per_second,█▁▁▆█
train/epoch,▁▁▃▃▄▄▆▆▇▇█
train/global_step,▁▁▃▃▄▄▆▆▇▇█
train/grad_norm,█▄▁▃▂
train/learning_rate,▄█▆▃▁
train/loss,█▃▃▂▁

0,1
eval/loss,1.55811
eval/runtime,31.9679
eval/samples_per_second,3.128
eval/steps_per_second,3.128
total_flos,1.1952113805723648e+16
train/epoch,2.98667
train/global_step,336.0
train/grad_norm,0.7068
train/learning_rate,0.00017
train/loss,1.3268


- quantisation
- llama 3 3b
- dataset: math
- dataset size: 1000
- peft: lora

In [None]:
run = wandb.init(
    project='Deep learning final project', # change
    job_type="training",
    anonymous="allow",
    name = "lora_quantisation_math_1000_llama"
)

peft_config = LoraConfig(
    r=8,
    inference_mode = False,
    lora_alpha=32,
    lora_dropout=0.1,
    task_type="CAUSAL_LM"
)
run_training(llama_base_model, peft_config, math_dataset_name, 1000, "problem", "generated_solution", "newModelLoRA", 1e-3, quantisation_config)
wandb.finish()

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

trainable params: 2,293,760 || all params: 3,215,043,584 || trainable%: 0.0713


Resolving data files:   0%|          | 0/32 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/32 [00:00<?, ?it/s]

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]


Deprecated positional argument(s) used in SFTTrainer, please use the SFTConfig to set these arguments instead.


Map:   0%|          | 0/900 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss
60,0.896,0.848901
120,0.7517,0.831281
180,0.7088,0.823207
240,0.6915,0.816346
300,0.5752,0.817383




0,1
eval/loss,█▄▂▁▁
eval/runtime,█▂▁▇▁
eval/samples_per_second,▁▇█▂▇
eval/steps_per_second,▁▇█▂▇
train/epoch,▁▁▃▃▄▄▆▆▇▇█
train/global_step,▁▁▃▃▄▄▆▆▇▇█
train/grad_norm,█▄▇▁▂
train/learning_rate,▄█▆▃▁
train/loss,█▅▄▄▁

0,1
eval/loss,0.81738
eval/runtime,34.2184
eval/samples_per_second,2.922
eval/steps_per_second,2.922
total_flos,1.5275379271931904e+16
train/epoch,2.98667
train/global_step,336.0
train/grad_norm,0.57977
train/learning_rate,0.00017
train/loss,0.5752


### p-tuning

- quantisation
- llama 3 3b
- dataset: medical
- dataset size: 1000
- peft: p-tuning

In [None]:
run = wandb.init(
    project='Deep learning final project', # change
    job_type="training",
    anonymous="allow",
    name = "p-tuning_quantisation_medical_1000_llama"
)

peft_config = PromptEncoderConfig(
    peft_type="P_TUNING",
    task_type="CAUSAL_LM",
    num_virtual_tokens=20,
    token_dim=3072,
    num_transformer_submodules=1,
    num_attention_heads=12,
    num_layers=12,
    encoder_reparameterization_type="MLP",
    encoder_hidden_size=3072,
)

run_training(llama_base_model, peft_config, medical_chatbot_dataset_name, 1000, "Patient", "Doctor", "newModelLoRA", 1e-3, quantisation_config)
wandb.finish()

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

trainable params: 28,382,208 || all params: 3,241,132,032 || trainable%: 0.8757



Deprecated positional argument(s) used in SFTTrainer, please use the SFTConfig to set these arguments instead.


Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss
60,2.6914,2.397743
120,2.3788,2.320283
180,2.2481,2.213115
240,2.0172,2.051843
300,1.7833,1.917007




0,1
eval/loss,█▇▅▃▁
eval/runtime,▇▆▅▁█
eval/samples_per_second,▂▃▄█▁
eval/steps_per_second,▂▃▄█▁
train/epoch,▁▁▃▃▄▄▆▆▇▇█
train/global_step,▁▁▃▃▄▄▆▆▇▇█
train/grad_norm,█▂▂▇▁
train/learning_rate,▄█▆▃▁
train/loss,█▆▅▃▁

0,1
eval/loss,1.91701
eval/runtime,31.814
eval/samples_per_second,3.143
eval/steps_per_second,3.143
total_flos,1.2091343912681472e+16
train/epoch,2.98667
train/global_step,336.0
train/grad_norm,0.26096
train/learning_rate,0.00018
train/loss,1.7833


- quantisation
- llama 3 3b
- dataset: math
- dataset size: 1000
- peft: p-tuning

In [None]:
run = wandb.init(
    project='Deep learning final project', # change
    job_type="training",
    anonymous="allow",
    name = "p-tuning_quantisation_math_1000_llama"
)

peft_config = PromptEncoderConfig(
    peft_type="P_TUNING",
    task_type="CAUSAL_LM",
    num_virtual_tokens=20,
    token_dim=3072,
    num_transformer_submodules=1,
    num_attention_heads=12,
    num_layers=12,
    encoder_reparameterization_type="MLP",
    encoder_hidden_size=3072,
)

run_training(llama_base_model, peft_config, math_dataset_name, 1000, "problem", "generated_solution", "newModelLoRA", 1e-3, quantisation_config)
wandb.finish()

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

trainable params: 28,382,208 || all params: 3,241,132,032 || trainable%: 0.8757


Resolving data files:   0%|          | 0/32 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/32 [00:00<?, ?it/s]


Deprecated positional argument(s) used in SFTTrainer, please use the SFTConfig to set these arguments instead.


Map:   0%|          | 0/900 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]



Step,Training Loss,Validation Loss
60,0.9396,0.808096
120,0.8103,0.783344
180,0.7929,0.7674
240,0.7935,0.757177
300,0.7728,0.750713
360,0.7712,0.74732
420,0.7624,0.741458
480,0.7566,0.740422
540,0.7516,0.737627




0,1
eval/loss,█▆▄▃▂▂▁▁▁
eval/runtime,▃██▆▁▃▂▂▇
eval/samples_per_second,▆▁▁▃█▅▇▇▂
eval/steps_per_second,▆▁▁▃█▅▇▇▂
train/epoch,▁▁▂▂▃▃▄▄▄▄▅▅▆▆▇▇███
train/global_step,▁▁▂▂▃▃▄▄▄▄▅▅▆▆▇▇███
train/grad_norm,█▄▁▁▂▂▁▁▁
train/learning_rate,▄█▇▆▅▄▃▂▁
train/loss,█▃▃▃▂▂▁▁▁

0,1
eval/loss,0.73763
eval/runtime,35.6783
eval/samples_per_second,2.803
eval/steps_per_second,2.803
total_flos,2.530936370792448e+16
train/epoch,4.97778
train/global_step,560.0
train/grad_norm,0.09153
train/learning_rate,5e-05
train/loss,0.7516


- no quantisation
- llama 3 3b
- dataset: medical
- dataset size: 1000
- peft: vera

In [None]:
run = wandb.init(
    project='Deep learning final project', # change
    job_type="training",
    anonymous="allow",
    name = "vera_medical_1000_llama"
)

peft_config = VeraConfig(r=128)

run_training(llama_base_model, peft_config, medical_chatbot_dataset_name, 1000, "Patient", "Doctor", "newModelLoRA", 1e-3)
wandb.finish()

### prefix-tuning

- quantisation
- llama 3 3b
- dataset: medical
- dataset size: 1000
- peft: prefix-tuning

In [12]:
run = wandb.init(
    project='Deep learning final project', # change
    job_type="training",
    anonymous="allow",
    name = "prefix_quantisation_medical_1000_llama"
)

peft_config = PrefixTuningConfig(
    peft_type="PREFIX_TUNING",
    task_type="CAUSAL_LM",
    num_virtual_tokens=400,
    token_dim=3072,
    num_transformer_submodules=1,
    encoder_hidden_size=3072
)

run_training(llama_base_model, peft_config, medical_chatbot_dataset_name, 1000, "Patient", "Doctor", "newModelLoRA", 1e-3, quantisation_config)
wandb.finish()

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

trainable params: 22,937,600 || all params: 3,235,687,424 || trainable%: 0.7089



Deprecated positional argument(s) used in SFTTrainer, please use the SFTConfig to set these arguments instead.


Step,Training Loss,Validation Loss
60,8.9209,7.159659
120,6.4044,5.645657
180,5.0555,4.540845
240,4.1333,3.81718
300,3.6088,3.569819




VBox(children=(Label(value='0.030 MB of 0.030 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
eval/loss,███▆▄▂▁▁
eval/runtime,▁▁▁▅█▄▄▄
eval/samples_per_second,███▄▁▅▄▅
eval/steps_per_second,███▄▁▅▄▅
train/epoch,▁▁▁▁▁▁▁▁▃▃▄▄▆▆▇▇█
train/global_step,▁▁▁▁▁▁▁▁▃▃▄▄▆▆▇▇█
train/grad_norm,▆█▄▃▂▃▂▁
train/learning_rate,▄▄▄▄█▆▃▁
train/loss,███▇▄▃▂▁

0,1
eval/loss,3.56982
eval/runtime,30.9388
eval/samples_per_second,3.232
eval/steps_per_second,3.232
total_flos,1.2091343912681472e+16
train/epoch,2.98667
train/global_step,336.0
train/grad_norm,0.10854
train/learning_rate,0.00017
train/loss,3.6088


- quantisation
- llama 3 3b
- dataset: math
- dataset size: 1000
- peft: prefix-tuning

In [14]:
run = wandb.init(
    project='Deep learning final project', # change
    job_type="training",
    anonymous="allow",
    name = "prefix_quantisation_math_1000_llama"
)

peft_config = PrefixTuningConfig(
    peft_type="PREFIX_TUNING",
    task_type="CAUSAL_LM",
    num_virtual_tokens=400,
    token_dim=3072,
    num_transformer_submodules=1,
    encoder_hidden_size=3072
)

run_training(llama_base_model, peft_config, math_dataset_name, 1000, "problem", "generated_solution", "newModelLoRA", 1e-3, quantisation_config)
wandb.finish()

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

trainable params: 22,937,600 || all params: 3,235,687,424 || trainable%: 0.7089


README.md:   0%|          | 0.00/6.41k [00:00<?, ?B/s]

Resolving data files:   0%|          | 0/32 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/32 [00:00<?, ?it/s]

Downloading data:   0%|          | 0/32 [00:00<?, ?files/s]

train-00000-of-00032.parquet:   0%|          | 0.00/237M [00:00<?, ?B/s]

train-00001-of-00032.parquet:   0%|          | 0.00/237M [00:00<?, ?B/s]

train-00002-of-00032.parquet:   0%|          | 0.00/237M [00:00<?, ?B/s]

train-00003-of-00032.parquet:   0%|          | 0.00/237M [00:00<?, ?B/s]

train-00004-of-00032.parquet:   0%|          | 0.00/237M [00:00<?, ?B/s]

train-00005-of-00032.parquet:   0%|          | 0.00/237M [00:00<?, ?B/s]

train-00006-of-00032.parquet:   0%|          | 0.00/237M [00:00<?, ?B/s]

train-00007-of-00032.parquet:   0%|          | 0.00/237M [00:00<?, ?B/s]

train-00008-of-00032.parquet:   0%|          | 0.00/237M [00:00<?, ?B/s]

train-00009-of-00032.parquet:   0%|          | 0.00/237M [00:00<?, ?B/s]

train-00010-of-00032.parquet:   0%|          | 0.00/237M [00:00<?, ?B/s]

train-00011-of-00032.parquet:   0%|          | 0.00/237M [00:00<?, ?B/s]

train-00012-of-00032.parquet:   0%|          | 0.00/236M [00:00<?, ?B/s]

train-00013-of-00032.parquet:   0%|          | 0.00/237M [00:00<?, ?B/s]

train-00014-of-00032.parquet:   0%|          | 0.00/237M [00:00<?, ?B/s]

train-00015-of-00032.parquet:   0%|          | 0.00/237M [00:00<?, ?B/s]

train-00016-of-00032.parquet:   0%|          | 0.00/237M [00:00<?, ?B/s]

train-00017-of-00032.parquet:   0%|          | 0.00/237M [00:00<?, ?B/s]

train-00018-of-00032.parquet:   0%|          | 0.00/237M [00:00<?, ?B/s]

train-00019-of-00032.parquet:   0%|          | 0.00/237M [00:00<?, ?B/s]

train-00020-of-00032.parquet:   0%|          | 0.00/237M [00:00<?, ?B/s]

train-00021-of-00032.parquet:   0%|          | 0.00/237M [00:00<?, ?B/s]

train-00022-of-00032.parquet:   0%|          | 0.00/237M [00:00<?, ?B/s]

train-00023-of-00032.parquet:   0%|          | 0.00/237M [00:00<?, ?B/s]

train-00024-of-00032.parquet:   0%|          | 0.00/237M [00:00<?, ?B/s]

train-00025-of-00032.parquet:   0%|          | 0.00/237M [00:00<?, ?B/s]

train-00026-of-00032.parquet:   0%|          | 0.00/237M [00:00<?, ?B/s]

train-00027-of-00032.parquet:   0%|          | 0.00/236M [00:00<?, ?B/s]

train-00028-of-00032.parquet:   0%|          | 0.00/237M [00:00<?, ?B/s]

train-00029-of-00032.parquet:   0%|          | 0.00/237M [00:00<?, ?B/s]

train-00030-of-00032.parquet:   0%|          | 0.00/237M [00:00<?, ?B/s]

train-00031-of-00032.parquet:   0%|          | 0.00/237M [00:00<?, ?B/s]

train_1M-00000-of-00003.parquet:   0%|          | 0.00/213M [00:00<?, ?B/s]

train_1M-00001-of-00003.parquet:   0%|          | 0.00/213M [00:00<?, ?B/s]

train_1M-00002-of-00003.parquet:   0%|          | 0.00/213M [00:00<?, ?B/s]

train_2M-00000-of-00006.parquet:   0%|          | 0.00/217M [00:00<?, ?B/s]

train_2M-00001-of-00006.parquet:   0%|          | 0.00/217M [00:00<?, ?B/s]

train_2M-00002-of-00006.parquet:   0%|          | 0.00/217M [00:00<?, ?B/s]

train_2M-00003-of-00006.parquet:   0%|          | 0.00/217M [00:00<?, ?B/s]

train_2M-00004-of-00006.parquet:   0%|          | 0.00/217M [00:00<?, ?B/s]

train_2M-00005-of-00006.parquet:   0%|          | 0.00/217M [00:00<?, ?B/s]

train_5M-00000-of-00014.parquet:   0%|          | 0.00/222M [00:00<?, ?B/s]

train_5M-00001-of-00014.parquet:   0%|          | 0.00/222M [00:00<?, ?B/s]

train_5M-00002-of-00014.parquet:   0%|          | 0.00/222M [00:00<?, ?B/s]

train_5M-00003-of-00014.parquet:   0%|          | 0.00/222M [00:00<?, ?B/s]

train_5M-00004-of-00014.parquet:   0%|          | 0.00/222M [00:00<?, ?B/s]

train_5M-00005-of-00014.parquet:   0%|          | 0.00/222M [00:00<?, ?B/s]

train_5M-00006-of-00014.parquet:   0%|          | 0.00/222M [00:00<?, ?B/s]

train_5M-00007-of-00014.parquet:   0%|          | 0.00/223M [00:00<?, ?B/s]

train_5M-00008-of-00014.parquet:   0%|          | 0.00/222M [00:00<?, ?B/s]

train_5M-00009-of-00014.parquet:   0%|          | 0.00/222M [00:00<?, ?B/s]

train_5M-00010-of-00014.parquet:   0%|          | 0.00/222M [00:00<?, ?B/s]

train_5M-00011-of-00014.parquet:   0%|          | 0.00/222M [00:00<?, ?B/s]

KeyboardInterrupt: 

### LayerNorm

In [16]:
run = wandb.init(
    project='Deep learning final project', # change
    job_type="training",
    anonymous="allow",
    name = "layerNorm_quantisation_math_1000_llama"
)

peft_config = LNTuningConfig(
    task_type=TaskType.CAUSAL_LM,
)

run_training(llama_base_model, peft_config, math_dataset_name, 1000, "problem", "generated_solution", "newModelLoRA", 1e-3, quantisation_config)
wandb.finish()

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

trainable params: 175,104 || all params: 3,212,924,928 || trainable%: 0.0054


Resolving data files:   0%|          | 0/32 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/32 [00:00<?, ?it/s]

Downloading data:   0%|          | 0/32 [00:00<?, ?files/s]

train_5M-00011-of-00014.parquet:  94%|#########4| 210M/222M [00:00<?, ?B/s]

train_5M-00012-of-00014.parquet:   0%|          | 0.00/222M [00:00<?, ?B/s]

train_5M-00013-of-00014.parquet:   0%|          | 0.00/222M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/13972791 [00:00<?, ? examples/s]

KeyboardInterrupt: 

In [17]:
run = wandb.init(
    project='Deep learning final project', # change
    job_type="training",
    anonymous="allow",
    name = "layerNorm_quantisation_medical_1000_llama"
)

peft_config = LNTuningConfig(
    task_type=TaskType.CAUSAL_LM,
)

run_training(llama_base_model, peft_config, medical_chatbot_dataset_name, 1000, "Patient", "Doctor", "newModelLoRA", 1e-3, quantisation_config)
wandb.finish()

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

trainable params: 175,104 || all params: 3,212,924,928 || trainable%: 0.0054



Deprecated positional argument(s) used in SFTTrainer, please use the SFTConfig to set these arguments instead.


Step,Training Loss,Validation Loss
60,2.6227,2.416605
120,2.2884,2.184773
180,2.041,2.038435
240,1.8264,1.926585
300,1.7058,1.86185




VBox(children=(Label(value='0.025 MB of 0.025 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
eval/loss,█▅▃▂▁
eval/runtime,▃█▅▁▄
eval/samples_per_second,▆▁▄█▅
eval/steps_per_second,▆▁▄█▅
train/epoch,▁▁▃▃▄▄▆▆▇▇█
train/global_step,▁▁▃▃▄▄▆▆▇▇█
train/grad_norm,█▁█▂▆
train/learning_rate,▄█▆▃▁
train/loss,█▅▄▂▁

0,1
eval/loss,1.86185
eval/runtime,31.2974
eval/samples_per_second,3.195
eval/steps_per_second,3.195
total_flos,1.2092095041601536e+16
train/epoch,2.98667
train/global_step,336.0
train/grad_norm,0.48756
train/learning_rate,0.00017
train/loss,1.7058


In [None]:
run = wandb.init(
    project='Deep learning final project', # change
    job_type="training",
    anonymous="allow",
    name = "layerNorm_quantisation_math_1000_llama"
)

peft_config = LNTuningConfig(
    task_type=TaskType.CAUSAL_LM,
)

run_training(llama_base_model, peft_config, math_dataset_name, 1000, "problem", "generated_solution", "newModelLoRA", 1e-3, quantisation_config)
wandb.finish()

### LoHa

In [23]:
run = wandb.init(
    project='Deep learning final project', # change
    job_type="training",
    anonymous="allow",
    name = "LoHa_quantisation_medical_1000_llama"
)

peft_config = LoHaConfig(
    r=8,
    alpha=32,
    rank_dropout=0.0,
    module_dropout=0.0,
    init_weights=True,
    task_type="CAUSAL_LM",
    target_modules=['up_proj', 'down_proj', 'gate_proj', 'k_proj', 'q_proj', 'v_proj', 'o_proj']
)

run_training(llama_base_model, peft_config, medical_chatbot_dataset_name, 1000, "Patient", "Doctor", "newModelLoRA", 1e-3, quantisation_config)
wandb.finish()

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

OutOfMemoryError: CUDA out of memory. Tried to allocate 384.00 MiB. GPU 0 has a total capacity of 14.75 GiB of which 255.06 MiB is free. Process 2437 has 14.50 GiB memory in use. Of the allocated memory 14.34 GiB is allocated by PyTorch, and 18.18 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

In [None]:
run = wandb.init(
    project='Deep learning final project', # change
    job_type="training",
    anonymous="allow",
    name = "VBLoRA_quantisation_medical_1000_llama"
)

config = VBLoRAConfig(
    task_type="SEQ_CLS",
    r=4,
    num_vectors=60,
    vector_length=256,
    save_only_topk_weights=True,
)


run_training(llama_base_model, peft_config, medical_chatbot_dataset_name, 1000, "Patient", "Doctor", "newModelLoRA", 1e-3, quantisation_config)
wandb.finish()

### vera

- no quantisation
- llama 3 3b
- dataset: medical
- dataset size: 1000
- peft: vera

In [None]:
run = wandb.init(
    project='Deep learning final project', # change
    job_type="training",
    anonymous="allow",
    name = "vera_medical_1000_llama"
)

peft_config = VeraConfig(r=128)

run_training(llama_base_model, peft_config, medical_chatbot_dataset_name, 1000, "Patient", "Doctor", "newModelLoRA", 1e-3)
wandb.finish()

- no quantisation
- llama 3 3b
- dataset: math
- dataset size: 1000
- peft: vera

In [None]:
run = wandb.init(
    project='Deep learning final project', # change
    job_type="training",
    anonymous="allow",
    name = "vera_math_1000_llama"
)

peft_config = VeraConfig(r=128)

run_training(llama_base_model, peft_config, math_dataset_name, 1000, "problem", "generated_solution", "newModelLoRA", 1e-3)
wandb.finish()

- quantisation
- llama 3 3b
- dataset: medical
- dataset size: 1000
- peft: vera

In [None]:
run = wandb.init(
    project='Deep learning final project', # change
    job_type="training",
    anonymous="allow",
    name = "vera_quantisation_medical_1000_llama"
)

peft_config = VeraConfig(r=128)

run_training(llama_base_model, peft_config, medical_chatbot_dataset_name, 1000, "Patient", "Doctor", "newModelLoRA", 1e-3, quantisation_config)
wandb.finish()

- quantisation
- llama 3 3b
- dataset: math
- dataset size: 1000
- peft: vera

In [None]:
run = wandb.init(
    project='Deep learning final project', # change
    job_type="training",
    anonymous="allow",
    name = "vera_quantisation_math_1000_llama"
)

peft_config = VeraConfig(r=128)

run_training(llama_base_model, peft_config, medical_chatbot_dataset_name, 1000, "Patient", "Doctor", "newModelLoRA", 1e-3, quantisation_config)
wandb.finish()

## Notes for me

- change model
- change dataset
- change dataset size
- change paramters?
- change task type?
- quantised vs not quantised?

peft techniques
- lora
- qlora
- vera
- p-turning
- prefix tuning
- layernorm tuning


Aim: make the best generalised fine tuning method for LLMs that works quickly on regular user devices, using the PEFT libary on huggingface


initial eliminations:
- llama3 3B can only be loaded when it is qunatised to avoid memory errors
- therefore vera eliminated
