# Finetune Llama-3 with LLaMA Factory

Please use a **free** Tesla T4 Colab GPU to run this!

Project homepage: https://github.com/hiyouga/LLaMA-Factory

### Configuration

In [6]:
# The dataset used in finetuning
finetuning_data_url = "https://raw.githubusercontent.com/bertilmuth/hf_to_gguf/main/finetuning_dataset/FinetuningData_ALL_llamafactory_clean.json"

# The model that is finetuned with the dataset
hf_base_model_id="microsoft/Phi-3-mini-4k-instruct"

# The llamafactory prompt template, dependent on the base model
llamafactory_template_name="phi"

# Epochs of finetuning
epochs = 2

# The
# IMPORTANT: You need to set a Google Collab secret called HF_WRITE_TOKEN to a write token of Huggingface for this to work!
hf_finetuned_model_id = "bertilmuth/phi"



### Mount Google Drive


In [7]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


### Check GPU environment

In [8]:
import torch
try:
  assert torch.cuda.is_available() is True
except AssertionError:
  print("Please set up a GPU before using LLaMA Factory: https://medium.com/mlearning-ai/training-yolov4-on-google-colab-316f8fff99c6")

### Install Dependencies & Setup

In [9]:
# Set paths where to store the output
adapter_name = llamafactory_template_name + "_lora"
saved_merged_model_path = adapter_name + "_merged"

# Install dependencies
%cd /content/
%rm -rf LLaMA-Factory
!git clone https://github.com/hiyouga/LLaMA-Factory.git
%cd LLaMA-Factory
%ls
!pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
!pip install --no-deps xformers==0.0.25
!pip install .[bitsandbytes]

import os, requests

# Download the finetuning data using requests
response = requests.get(finetuning_data_url)

# Check if the request was successful
if response.status_code == 200:
    # Extract the text content from the response
    text_content = response.text

    # Write the text content to the file identity.json
    with open("/content/LLaMA-Factory/data/identity.json", "w", encoding="utf-8") as file:
        file.write(text_content)
    print("The content has been successfully written to identity.json.")
else:
    print(f"Error: Failed to retrieve the file from {finetuning_data_url}. Status code: {response.status_code}")


/content
Cloning into 'LLaMA-Factory'...
remote: Enumerating objects: 11296, done.[K
remote: Counting objects: 100% (91/91), done.[K
remote: Compressing objects: 100% (51/51), done.[K
remote: Total 11296 (delta 40), reused 71 (delta 36), pack-reused 11205[K
Receiving objects: 100% (11296/11296), 215.01 MiB | 14.33 MiB/s, done.
Resolving deltas: 100% (8279/8279), done.
Updating files: 100% (214/214), done.
/content/LLaMA-Factory
[0m[01;34massets[0m/       docker-compose.yml  [01;34mexamples[0m/  pyproject.toml  requirements.txt  [01;34msrc[0m/
CITATION.cff  Dockerfile          LICENSE    README.md       [01;34mscripts[0m/          [01;34mtests[0m/
[01;34mdata[0m/         [01;34mevaluation[0m/         Makefile   README_zh.md    setup.py
Collecting unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git
  Cloning https://github.com/unslothai/unsloth.git to /tmp/pip-install-b_evyok7/unsloth_151940df3e1348609158570ca1648ce1
  Running command git clone --filter=bl

### Fine-tune model via Command Line


In [10]:
import json

args = dict(
  stage="sft",                        # do supervised fine-tuning
  do_train=True,
  model_name_or_path=hf_base_model_id, # model name specified in Configuration
  dataset="identity",             # use identity dataset
  template=llamafactory_template_name,       # prompt template specified in Configuration
  finetuning_type="lora",                   # use LoRA adapters to save memory
  lora_target="all",                     # attach LoRA adapters to all linear layers
  output_dir=adapter_name,                  # the path to save LoRA adapters
  per_device_train_batch_size=2,               # the batch size
  gradient_accumulation_steps=4,               # the gradient accumulation steps
  lr_scheduler_type="cosine",                 # use cosine learning rate scheduler
  logging_steps=10,                      # log every 10 steps
  warmup_ratio=0.1,                      # use warmup scheduler
  save_steps=1000,                      # save checkpoint every 1000 steps
  learning_rate=5e-5,                     # the learning rate
  num_train_epochs=epochs,                    # the epochs of training
  max_samples=2500,                      # use 500 examples in each dataset
  max_grad_norm=1.0,                     # clip gradient norm to 1.0
  quantization_bit=4,                     # use 4-bit QLoRA
  loraplus_lr_ratio=16.0,                   # use LoRA+ algorithm with lambda=16.0
  use_unsloth=False,                      # use UnslothAI's LoRA optimization for 2x faster training
  fp16=False,                         # use float16 mixed precision training
  overwrite_output_dir=True
)

json.dump(args, open("train.json", "w", encoding="utf-8"), indent=2)

%cd /content/LLaMA-Factory/

!llamafactory-cli train train.json

/content/LLaMA-Factory
2024-05-14 14:15:09.331251: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-05-14 14:15:09.331333: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-05-14 14:15:09.333492: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
05/14/2024 14:15:16 - INFO - llmtuner.hparams.parser - Process rank: 0, device: cuda:0, n_gpu: 1, distributed training: False, compute dtype: torch.float16
[INFO|tokenization_utils_base.py:2087] 2024-05-14 14:15:16,938 >> loading file tokenizer.model from cache at /root/.cache/huggingface/hub/models--microsoft--Phi-3-mini-4k-instruct/snapsh

### Login Huggingface

In [14]:
# IMPORTANT: You need to set HF_WRITE_TOKEN to a write token of Huggingface for this to work!
from google.colab import userdata
from huggingface_hub import login

login(token=userdata.get('HF_WRITE_TOKEN'))


The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: write).
Your token has been saved to /root/.cache/huggingface/token
Login successful


### Upload finetuned model to Huggingface

In [15]:
import json

%cd /content/LLaMA-Factory
args = dict(
  model_name_or_path=hf_base_model_id,             # the hugging face model id
  adapter_name_or_path=adapter_name,            # load the saved LoRA adapters
  template=llamafactory_template_name,          # same to the one in training
  finetuning_type="lora",                  # same to the one in training
  export_dir=saved_merged_model_path,              # the path to save the merged model
  export_size=2,                       # the file shard size (in GB) of the merged model
  export_device="cpu",                    # the device used in export, can be chosen from `cpu` and `cuda`
  export_hub_model_id=hf_finetuned_model_id      # the Hugging Face hub ID to upload model
)

json.dump(args, open("merge_file.json", "w", encoding="utf-8"), indent=2)
!llamafactory-cli export merge_file.json

/content/LLaMA-Factory
2024-05-14 16:10:13.582230: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-05-14 16:10:13.582276: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-05-14 16:10:13.583836: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
[INFO|tokenization_utils_base.py:2087] 2024-05-14 16:10:18,853 >> loading file tokenizer.model from cache at /root/.cache/huggingface/hub/models--microsoft--Phi-3-mini-4k-instruct/snapshots/920b6cf52a79ecff578cc33f61922b23cbc88115/tokenizer.model
[INFO|tokenization_utils_base.py:2087] 2024-05-14 16:10:18,853 >> loading file tokenizer.json f