# Finetune Llama-3 with LLaMA Factory

Please use a **free** Tesla T4 Colab GPU to run this!

Project homepage: https://github.com/hiyouga/LLaMA-Factory

### Configuration

In [2]:
# The dataset used in finetuning
finetuning_data_url = "https://raw.githubusercontent.com/bertilmuth/hf_to_gguf/main/finetuning_dataset/FinetuningData_ALL_llamafactory_clean.json"

# The model that is finetuned with the dataset
hf_base_model_id="microsoft/Phi-3-mini-128k-instruct"

# The llamafactory prompt template, dependent on the base model
llamafactory_template_name="phi"

# Epochs of finetuning
epochs = 15

# The model id on Hugging Face
# IMPORTANT: You need to set a Google Collab secret called HF_WRITE_TOKEN to a write token of Hugging Face for this to work!
hf_finetuned_model_id = "bertilmuth/phi-3-mini-128k-15gen"



### Mount Google Drive


In [4]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


### Check GPU environment

In [9]:
import torch
try:
  assert torch.cuda.is_available() is True
except AssertionError:
  print("Please set up a GPU before using LLaMA Factory: https://medium.com/mlearning-ai/training-yolov4-on-google-colab-316f8fff99c6")

### Install Dependencies & Setup

In [9]:
# Set paths where to store the output
adapter_name = llamafactory_template_name + "_lora"
saved_merged_model_path = adapter_name + "_merged"

# Install dependencies
%cd /content/
%rm -rf LLaMA-Factory
!git clone https://github.com/hiyouga/LLaMA-Factory.git
%cd LLaMA-Factory
%ls
!pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
!pip install --no-deps xformers==0.0.25
!pip install .[bitsandbytes]

import os, requests

# Download the finetuning data using requests
response = requests.get(finetuning_data_url)

# Check if the request was successful
if response.status_code == 200:
    # Extract the text content from the response
    text_content = response.text

    # Write the text content to the file identity.json
    with open("/content/LLaMA-Factory/data/identity.json", "w", encoding="utf-8") as file:
        file.write(text_content)
    print("The content has been successfully written to identity.json.")
else:
    print(f"Error: Failed to retrieve the file from {finetuning_data_url}. Status code: {response.status_code}")


/content
Cloning into 'LLaMA-Factory'...
remote: Enumerating objects: 11876, done.[K
remote: Counting objects: 100% (667/667), done.[K
remote: Compressing objects: 100% (330/330), done.[K
remote: Total 11876 (delta 399), reused 550 (delta 323), pack-reused 11209[K
Receiving objects: 100% (11876/11876), 218.07 MiB | 27.02 MiB/s, done.
Resolving deltas: 100% (8639/8639), done.
/content/LLaMA-Factory
[0m[01;34massets[0m/       docker-compose.yml  [01;34mexamples[0m/  pyproject.toml  requirements.txt  [01;34msrc[0m/
CITATION.cff  Dockerfile          LICENSE    README.md       [01;34mscripts[0m/          [01;34mtests[0m/
[01;34mdata[0m/         [01;34mevaluation[0m/         Makefile   README_zh.md    setup.py
Collecting unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git
  Cloning https://github.com/unslothai/unsloth.git to /tmp/pip-install-f1o77s4m/unsloth_1cc16bca4f094b8c85538cf7caa06736
  Running command git clone --filter=blob:none --quiet https://github.

### Fine-tune model


In [11]:
import json

args = dict(
  stage="sft",                        # do supervised fine-tuning
  do_train=True,
  model_name_or_path=hf_base_model_id, # model name specified in Configuration
  dataset="identity",             # use identity dataset
  template=llamafactory_template_name,       # prompt template specified in Configuration
  finetuning_type="lora",                   # use LoRA adapters to save memory
  lora_target="all",                     # attach LoRA adapters to all linear layers
  output_dir=adapter_name,                  # the path to save LoRA adapters
  per_device_train_batch_size=4,               # the batch size
  gradient_accumulation_steps=4,               # the gradient accumulation steps
  lr_scheduler_type="cosine",                 # use cosine learning rate scheduler
  logging_steps=10,                      # log every 10 steps
  warmup_ratio=0.1,                      # use warmup scheduler
  save_steps=1000,                      # save checkpoint every 1000 steps
  learning_rate=5e-5,                     # the learning rate
  num_train_epochs=epochs,                    # the epochs of training
  max_samples=2500,                      # use 500 examples in each dataset
  max_grad_norm=1.0,                     # clip gradient norm to 1.0
  quantization_bit=4,                     # use 4-bit QLoRA
  loraplus_lr_ratio=16.0,                   # use LoRA+ algorithm with lambda=16.0
  use_unsloth=True,                      # use UnslothAI's LoRA optimization for 2x faster training
  fp16=True,                         # use float16 mixed precision training
  overwrite_output_dir=True
)

json.dump(args, open("train.json", "w", encoding="utf-8"), indent=2)

%cd /content/LLaMA-Factory/

!llamafactory-cli train train.json

/content/LLaMA-Factory
2024-05-18 18:33:56.282136: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-05-18 18:33:56.282195: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-05-18 18:33:56.284042: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
05/18/2024 18:34:01 - INFO - llamafactory.hparams.parser - Process rank: 0, device: cuda:0, n_gpu: 1, distributed training: False, compute dtype: torch.float16
[INFO|tokenization_utils_base.py:2087] 2024-05-18 18:34:01,614 >> loading file tokenizer.model from cache at /root/.cache/huggingface/hub/models--microsoft--Phi-3-mini-128k-instruct/

### Save the finetuned model

In [12]:
# Upload the data to Huggingface
# IMPORTANT: You need to set HF_WRITE_TOKEN to a write token of Huggingface for this to work!
from google.colab import userdata
from huggingface_hub import login
import json

login(token=userdata.get('HF_WRITE_TOKEN'))

%cd /content/LLaMA-Factory
args = dict(
  model_name_or_path=hf_base_model_id,             # the hugging face model id
  adapter_name_or_path=adapter_name,            # load the saved LoRA adapters
  template=llamafactory_template_name,          # same to the one in training
  finetuning_type="lora",                  # same to the one in training
  export_dir=saved_merged_model_path,              # the path to save the merged model
  export_size=2,                       # the file shard size (in GB) of the merged model
  export_device="cpu",                    # the device used in export, can be chosen from `cpu` and `cuda`
  export_hub_model_id=hf_finetuned_model_id      # the Hugging Face hub ID to upload model
)

json.dump(args, open("merge_file.json", "w", encoding="utf-8"), indent=2)
!llamafactory-cli export merge_file.json

# Upload the adapter to Google Drive
directory_to_zip = adapter_name  # Change this to your directory
zip_output_path = f'{adapter_name}.zip'  # Change this to your desired output zip file name
drive_zip_output_path = f'/content/drive/MyDrive/{adapter_name}.zip'  # Change this to your desired location on Google Drive

!zip -r $zip_output_path $directory_to_zip
!mv $zip_output_path $drive_zip_output_path

The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: write).
Your token has been saved to /root/.cache/huggingface/token
Login successful
/content/LLaMA-Factory
2024-05-18 21:28:08.983227: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-05-18 21:28:08.983285: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-05-18 21:28:08.984910: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
[I

### Infer 100 different SysML v2 models

In [7]:
import os
from llamafactory.chat import ChatModel
from llamafactory.extras.misc import torch_gc

# Change directory to where LLaMA-Factory is located
%cd /content/LLaMA-Factory/

technical_systems = [
    "Underwater Communication System",
    "Smart Grid Control System",
    "Quantum Key Distribution System",
    "Space Habitat Life Support System",
    "Deep Sea Exploration System",
    "Smart Agriculture System",
    "Urban Traffic Control System",
    "Wildlife Monitoring System",
    "Automated Legal Reasoning System",
    "Blockchain-based Voting System",
    "Cryptographic Currency Exchange",
    "Distributed Cloud Storage System",
    "Elderly Care Robotic System",
    "Facial Recognition Security System",
    "Gene Editing CRISPR Control System",
    "Haptic Feedback Virtual Reality System",
    "Intelligent Transportation Coordination System",
    "Jail Management System",
    "Kinetic Energy Recovery System",
    "Laser Communication System",
    "Municipal Waste Sorting System",
    "Nano-Medicine Delivery System",
    "Ocean Current Energy Conversion System",
    "Pervasive Computing System",
    "Quantum Computing Simulation System",
    "Remote Sensing Satellite System",
    "Synthetic Biology Engineering Platform",
    "Thermal Energy Storage System",
    "Unmanned Combat Aerial Vehicle System",
    "Volumetric 3D Printing System",
    "Wearable Health Monitoring System",
    "Exoplanet Discovery System",
    "Youth Sports Management Platform",
    "Zero-Emission Vehicle Charging Network",
    "Automated Tax Compliance System",
    "Biometric Authentication System",
    "Crowdsourced Weather Prediction Platform",
    "Drone-based Delivery System",
    "Emergency Response Coordination System",
    "Fintech Blockchain Platform",
    "Green Building Management System",
    "High-Altitude Pseudo-Satellite System",
    "Intelligent Drug Discovery System",
    "Junk Data Cleanup Software",
    "Knowledge Discovery in Databases System",
    "Low Earth Orbit Satellite Internet System",
    "Molecular Manufacturing System",
    "Neural Network Training Platform",
    "Ocean Acidification Monitoring System",
    "Precision Agriculture Decision Support System",
    "Quantum Sensor Network",
    "Renewable Energy Microgrid",
    "Space Junk Tracking System",
    "Telehealth Service Platform",
    "Underground Transportation System",
    "Vaccine Research and Development Platform",
    "Wind Farm Optimization System",
    "Xenobiotic Detection System",
    "Youth Mental Health Service Platform",
    "Zero-Knowledge Proof System",
    "Automated Contract Enforcement System",
    "Biodegradable Material Processing System",
    "Carbon Capture and Storage System",
    "Digital Twin for Industrial Automation",
    "Exascale Computing System",
    "Fusion Energy Control System",
    "Genomic Data Analysis System",
    "Hyperloop Transport System",
    "Industrial Internet of Things Platform",
    "Journalist's Digital Research Assistant",
    "Knowledge Graph-Based Recommendation System",
    "Liquid Metal Battery System",
    "Microbial Fuel Cell System",
    "Nanostructured Material Development Kit",
    "Offshore Aquaculture System",
    "Personalized Learning Environment",
    "Quantum Dot Display Manufacturing System",
    "Rapid Prototyping Machine",
    "Self-Healing Material System",
    "Tidal Power Generation System",
    "Urban Air Mobility System",
    "Vertical Farming Climate Control System",
    "Wearable Translator Device",
    "Extended Reality Collaboration Platform",
    "Youth Digital Inclusion Program",
    "Zettabyte File System",
    "Augmented Reality Shopping Assistant",
    "Blockchain-based Supply Chain Verification",
    "Cognitive Behavioral Therapy Application",
    "Distributed Autonomous Organization Management",
    "Environmental Impact Assessment Tool",
    "Fog Computing Network",
    "Graphene Production Facility",
    "High Efficiency Photovoltaic System",
    "Invasive Species Management Tool",
    "Journalistic Integrity Verification System",
    "Kinematic Analysis Software",
    "Language Learning Companion Bot",
    "Multiphysics Simulation Software",
    "Nutrient Recycling System",
    "Organ-on-a-Chip Testing Platform",
    "Plasma Waste Recycling System",
    "Quantum Annealing Solver",
    "Resilient Infrastructure Design Software",
    "Smart Contract Audit Platform",
    "Therapeutic Virtual Reality Environment",
    "Ultra-High Definition Holographic Display",
    "Voice-Activated Home Assistant",
    "Waste-to-Energy Conversion System",
    "X-ray Crystallography Data Analysis Software",
    "Youth Entrepreneurship Support Platform",
    "Zero-Trust Network Architecture"
]

# Setup chat model arguments
args = dict(
  model_name_or_path=hf_base_model_id,
  adapter_name_or_path=adapter_name,      # load the saved LoRA adapters
  template=llamafactory_template_name,                     # same to the one in training
  finetuning_type="lora",                  # same to the one in training
  quantization_bit=4,                    # load 4-bit quantized model
  use_unsloth=False,                     # don't use UnslothAI's LoRA optimization for 2x faster generation
)

# Initialize the chat model
chat_model = ChatModel(args)

# Ensure the sysml_files directory exists
output_dir = '100systems_sysml'
os.makedirs(output_dir, exist_ok=True)

# Iterate over the technical system names
n = 0
for technical_system in technical_systems:
    query = "Create a model for " + technical_system
    messages = [{"role": "user", "content": query}]

    response = ""
    for new_text in chat_model.stream_chat(messages):
        response += new_text

    # Write the response to a file
    file_path = os.path.join(output_dir, f"{technical_system}.sysml")
    with open(file_path, 'w') as file:
        file.write(response)
    n+=1
    print("Created sysml v2 model no. " + str(n) )

# Free up memory
torch_gc()

# Upload to Google Drive
directory_to_zip = output_dir  # Change this to your directory
zip_output_path = f'{output_dir}.zip'  # Change this to your desired output zip file name
drive_zip_output_path = f'/content/drive/MyDrive/{output_dir}.zip'  # Change this to your desired location on Google Drive

!zip -r $zip_output_path $directory_to_zip
!mv $zip_output_path $drive_zip_output_path


/content/LLaMA-Factory


[INFO|tokenization_utils_base.py:2087] 2024-05-18 12:25:47,794 >> loading file tokenizer.model from cache at /root/.cache/huggingface/hub/models--microsoft--Phi-3-mini-128k-instruct/snapshots/8a362e755d2faf8cec2bf98850ce2216023d178a/tokenizer.model
[INFO|tokenization_utils_base.py:2087] 2024-05-18 12:25:47,795 >> loading file tokenizer.json from cache at /root/.cache/huggingface/hub/models--microsoft--Phi-3-mini-128k-instruct/snapshots/8a362e755d2faf8cec2bf98850ce2216023d178a/tokenizer.json
[INFO|tokenization_utils_base.py:2087] 2024-05-18 12:25:47,796 >> loading file added_tokens.json from cache at /root/.cache/huggingface/hub/models--microsoft--Phi-3-mini-128k-instruct/snapshots/8a362e755d2faf8cec2bf98850ce2216023d178a/added_tokens.json
[INFO|tokenization_utils_base.py:2087] 2024-05-18 12:25:47,796 >> loading file special_tokens_map.json from cache at /root/.cache/huggingface/hub/models--microsoft--Phi-3-mini-128k-instruct/snapshots/8a362e755d2faf8cec2bf98850ce2216023d178a/special_to

05/18/2024 12:25:47 - INFO - llamafactory.data.template - Replace eos token: <|end|>


INFO:llamafactory.data.template:Replace eos token: <|end|>




[INFO|configuration_utils.py:726] 2024-05-18 12:25:48,141 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--microsoft--Phi-3-mini-128k-instruct/snapshots/8a362e755d2faf8cec2bf98850ce2216023d178a/config.json
[INFO|configuration_utils.py:726] 2024-05-18 12:25:48,676 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--microsoft--Phi-3-mini-128k-instruct/snapshots/8a362e755d2faf8cec2bf98850ce2216023d178a/config.json
[INFO|configuration_utils.py:789] 2024-05-18 12:25:48,679 >> Model config Phi3Config {
  "_name_or_path": "microsoft/Phi-3-mini-128k-instruct",
  "architectures": [
    "Phi3ForCausalLM"
  ],
  "attention_dropout": 0.0,
  "auto_map": {
    "AutoConfig": "microsoft/Phi-3-mini-128k-instruct--configuration_phi3.Phi3Config",
    "AutoModelForCausalLM": "microsoft/Phi-3-mini-128k-instruct--modeling_phi3.Phi3ForCausalLM"
  },
  "bos_token_id": 1,
  "embd_pdrop": 0.0,
  "eos_token_id": 32000,
  "hidden

05/18/2024 12:25:48 - INFO - llamafactory.model.utils.quantization - Quantizing model to 4 bit.


INFO:llamafactory.model.utils.quantization:Quantizing model to 4 bit.


05/18/2024 12:25:48 - INFO - llamafactory.model.patcher - Using KV cache for faster generation.


INFO:llamafactory.model.patcher:Using KV cache for faster generation.
[INFO|modeling_utils.py:3429] 2024-05-18 12:25:48,978 >> loading weights file model.safetensors from cache at /root/.cache/huggingface/hub/models--microsoft--Phi-3-mini-128k-instruct/snapshots/8a362e755d2faf8cec2bf98850ce2216023d178a/model.safetensors.index.json
[INFO|modeling_utils.py:1494] 2024-05-18 12:25:48,980 >> Instantiating Phi3ForCausalLM model under default dtype torch.bfloat16.
[INFO|configuration_utils.py:928] 2024-05-18 12:25:48,982 >> Generate config GenerationConfig {
  "bos_token_id": 1,
  "eos_token_id": 32000,
  "pad_token_id": 32000
}



Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

[INFO|modeling_utils.py:4170] 2024-05-18 12:25:52,400 >> All model checkpoint weights were used when initializing Phi3ForCausalLM.

[INFO|modeling_utils.py:4178] 2024-05-18 12:25:52,403 >> All the weights of Phi3ForCausalLM were initialized from the model checkpoint at microsoft/Phi-3-mini-128k-instruct.
If your task is similar to the task the model of the checkpoint was trained on, you can already use Phi3ForCausalLM for predictions without further training.
[INFO|configuration_utils.py:883] 2024-05-18 12:25:52,666 >> loading configuration file generation_config.json from cache at /root/.cache/huggingface/hub/models--microsoft--Phi-3-mini-128k-instruct/snapshots/8a362e755d2faf8cec2bf98850ce2216023d178a/generation_config.json
[INFO|configuration_utils.py:928] 2024-05-18 12:25:52,667 >> Generate config GenerationConfig {
  "bos_token_id": 1,
  "eos_token_id": [
    32000,
    32001,
    32007
  ],
  "pad_token_id": 32000
}



05/18/2024 12:25:52 - INFO - llamafactory.model.utils.attention - Using vanilla attention implementation.


INFO:llamafactory.model.utils.attention:Using vanilla attention implementation.


05/18/2024 12:25:52 - INFO - llamafactory.model.adapter - Upcasting trainable params to float32.


INFO:llamafactory.model.adapter:Upcasting trainable params to float32.


05/18/2024 12:25:52 - INFO - llamafactory.model.adapter - Fine-tuning method: LoRA


INFO:llamafactory.model.adapter:Fine-tuning method: LoRA


05/18/2024 12:25:53 - INFO - llamafactory.model.adapter - Loaded adapter(s): phi_lora


INFO:llamafactory.model.adapter:Loaded adapter(s): phi_lora


05/18/2024 12:25:53 - INFO - llamafactory.model.loader - all params: 3833662464


INFO:llamafactory.model.loader:all params: 3833662464


Created sysml v2 model no. 1
Created sysml v2 model no. 2
Created sysml v2 model no. 3
Created sysml v2 model no. 4
Created sysml v2 model no. 5
Created sysml v2 model no. 6
Created sysml v2 model no. 7
Created sysml v2 model no. 8
Created sysml v2 model no. 9
Created sysml v2 model no. 10
Created sysml v2 model no. 11
Created sysml v2 model no. 12
Created sysml v2 model no. 13
Created sysml v2 model no. 14
Created sysml v2 model no. 15
Created sysml v2 model no. 16
Created sysml v2 model no. 17
Created sysml v2 model no. 18
Created sysml v2 model no. 19
Created sysml v2 model no. 20
Created sysml v2 model no. 21
Created sysml v2 model no. 22
Created sysml v2 model no. 23
Created sysml v2 model no. 24
Created sysml v2 model no. 25
Created sysml v2 model no. 26
Created sysml v2 model no. 27
Created sysml v2 model no. 28
Created sysml v2 model no. 29
Created sysml v2 model no. 30
Created sysml v2 model no. 31
Created sysml v2 model no. 32
Created sysml v2 model no. 33
Created sysml v2 mo

### Infer 100 system models (AUTOSAR)

In [12]:
import os
from llamafactory.chat import ChatModel
from llamafactory.extras.misc import torch_gc

# Setup chat model arguments
args = dict(
  model_name_or_path=hf_base_model_id,
  adapter_name_or_path=adapter_name,      # load the saved LoRA adapters
  template=llamafactory_template_name,                     # same to the one in training
  finetuning_type="lora",                  # same to the one in training
  quantization_bit=4,                    # load 4-bit quantized model
  use_unsloth=False,                     # use UnslothAI's LoRA optimization for 2x faster generation
)

# Initialize the chat model
chat_model = ChatModel(args)

# Ensure the sysml_files directory exists
output_dir = "100systems_AUTOSAR_sysml"
os.makedirs(output_dir, exist_ok=True)

# Iterate over the technical system names
query = """
You create a model with these parts: Front Light Manager, Ignition, Light Switch, Low Beam Light. Each part contains at least one statemachine.
The Front Light Manager shall evaluate the Ignition Key position.
The Front Light Manager shall read the LS switch position
The Front Light Manager shall evaluate the LS switch status.
Only if the LS switch status changes from OFF to ON the Front Light Manager shall create a switch event (ON).
If the LS switch status changes from ON to OFF the Front Light Manager shall create a switch event (OFF).
The Front Light Manager shall activate the low beam light, if the Ignition Key position is ON and a light switch event is detected
The Front Light Manager shall deactivate the low beam light if the Ignition Key position is OFF or a switch event (OFF) is detected.
"""
messages = [{"role": "user", "content": query}]

for n in range(100):
    response = ""
    for new_text in chat_model.stream_chat(messages):
      response += new_text

    # Write the response to a file
    file_path = os.path.join(output_dir, f"AUTOSAR_{n}.sysml")
    with open(file_path, 'w') as file:
        file.write(response)

    print("Created sysml v2 model no. " + str(n) )

# Free up memory
torch_gc()

# Upload to Google Drive
directory_to_zip = output_dir  # Change this to your directory
zip_output_path = f'{output_dir}.zip'  # Change this to your desired output zip file name
drive_zip_output_path = f'/content/drive/MyDrive/{output_dir}.zip'  # Change this to your desired location on Google Drive

!zip -r $zip_output_path $directory_to_zip
!mv $zip_output_path $drive_zip_output_path

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/3.17k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/293 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/568 [00:00<?, ?B/s]

[INFO|tokenization_utils_base.py:2087] 2024-05-18 21:45:46,720 >> loading file tokenizer.model from cache at /root/.cache/huggingface/hub/models--microsoft--Phi-3-mini-128k-instruct/snapshots/8a362e755d2faf8cec2bf98850ce2216023d178a/tokenizer.model
[INFO|tokenization_utils_base.py:2087] 2024-05-18 21:45:46,721 >> loading file tokenizer.json from cache at /root/.cache/huggingface/hub/models--microsoft--Phi-3-mini-128k-instruct/snapshots/8a362e755d2faf8cec2bf98850ce2216023d178a/tokenizer.json
[INFO|tokenization_utils_base.py:2087] 2024-05-18 21:45:46,722 >> loading file added_tokens.json from cache at /root/.cache/huggingface/hub/models--microsoft--Phi-3-mini-128k-instruct/snapshots/8a362e755d2faf8cec2bf98850ce2216023d178a/added_tokens.json
[INFO|tokenization_utils_base.py:2087] 2024-05-18 21:45:46,725 >> loading file special_tokens_map.json from cache at /root/.cache/huggingface/hub/models--microsoft--Phi-3-mini-128k-instruct/snapshots/8a362e755d2faf8cec2bf98850ce2216023d178a/special_to

05/18/2024 21:45:46 - INFO - llamafactory.data.template - Replace eos token: <|end|>


INFO:llamafactory.data.template:Replace eos token: <|end|>






config.json:   0%|          | 0.00/3.35k [00:00<?, ?B/s]

[INFO|configuration_utils.py:726] 2024-05-18 21:45:47,042 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--microsoft--Phi-3-mini-128k-instruct/snapshots/8a362e755d2faf8cec2bf98850ce2216023d178a/config.json


configuration_phi3.py:   0%|          | 0.00/10.4k [00:00<?, ?B/s]

[INFO|configuration_utils.py:726] 2024-05-18 21:45:47,385 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--microsoft--Phi-3-mini-128k-instruct/snapshots/8a362e755d2faf8cec2bf98850ce2216023d178a/config.json
[INFO|configuration_utils.py:789] 2024-05-18 21:45:47,387 >> Model config Phi3Config {
  "_name_or_path": "microsoft/Phi-3-mini-128k-instruct",
  "architectures": [
    "Phi3ForCausalLM"
  ],
  "attention_dropout": 0.0,
  "auto_map": {
    "AutoConfig": "microsoft/Phi-3-mini-128k-instruct--configuration_phi3.Phi3Config",
    "AutoModelForCausalLM": "microsoft/Phi-3-mini-128k-instruct--modeling_phi3.Phi3ForCausalLM"
  },
  "bos_token_id": 1,
  "embd_pdrop": 0.0,
  "eos_token_id": 32000,
  "hidden_act": "silu",
  "hidden_size": 3072,
  "initializer_range": 0.02,
  "intermediate_size": 8192,
  "max_position_embeddings": 131072,
  "model_type": "phi3",
  "num_attention_heads": 32,
  "num_hidden_layers": 32,
  "num_key_value_heads": 32,
  "origi

05/18/2024 21:45:47 - INFO - llamafactory.model.utils.quantization - Quantizing model to 4 bit.


INFO:llamafactory.model.utils.quantization:Quantizing model to 4 bit.


05/18/2024 21:45:47 - INFO - llamafactory.model.patcher - Using KV cache for faster generation.


INFO:llamafactory.model.patcher:Using KV cache for faster generation.


modeling_phi3.py:   0%|          | 0.00/73.8k [00:00<?, ?B/s]



model.safetensors.index.json:   0%|          | 0.00/16.3k [00:00<?, ?B/s]

[INFO|modeling_utils.py:3429] 2024-05-18 21:45:48,273 >> loading weights file model.safetensors from cache at /root/.cache/huggingface/hub/models--microsoft--Phi-3-mini-128k-instruct/snapshots/8a362e755d2faf8cec2bf98850ce2216023d178a/model.safetensors.index.json


Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.97G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/2.67G [00:00<?, ?B/s]

[INFO|modeling_utils.py:1494] 2024-05-18 21:46:17,395 >> Instantiating Phi3ForCausalLM model under default dtype torch.float16.
[INFO|configuration_utils.py:928] 2024-05-18 21:46:17,398 >> Generate config GenerationConfig {
  "bos_token_id": 1,
  "eos_token_id": 32000,
  "pad_token_id": 32000
}



Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

[INFO|modeling_utils.py:4170] 2024-05-18 21:46:23,905 >> All model checkpoint weights were used when initializing Phi3ForCausalLM.

[INFO|modeling_utils.py:4178] 2024-05-18 21:46:23,907 >> All the weights of Phi3ForCausalLM were initialized from the model checkpoint at microsoft/Phi-3-mini-128k-instruct.
If your task is similar to the task the model of the checkpoint was trained on, you can already use Phi3ForCausalLM for predictions without further training.


generation_config.json:   0%|          | 0.00/172 [00:00<?, ?B/s]

[INFO|configuration_utils.py:883] 2024-05-18 21:46:24,132 >> loading configuration file generation_config.json from cache at /root/.cache/huggingface/hub/models--microsoft--Phi-3-mini-128k-instruct/snapshots/8a362e755d2faf8cec2bf98850ce2216023d178a/generation_config.json
[INFO|configuration_utils.py:928] 2024-05-18 21:46:24,134 >> Generate config GenerationConfig {
  "bos_token_id": 1,
  "eos_token_id": [
    32000,
    32001,
    32007
  ],
  "pad_token_id": 32000
}



05/18/2024 21:46:24 - INFO - llamafactory.model.utils.attention - Using vanilla attention implementation.


INFO:llamafactory.model.utils.attention:Using vanilla attention implementation.


05/18/2024 21:46:24 - INFO - llamafactory.model.adapter - Upcasting trainable params to float32.


INFO:llamafactory.model.adapter:Upcasting trainable params to float32.


05/18/2024 21:46:24 - INFO - llamafactory.model.adapter - Fine-tuning method: LoRA


INFO:llamafactory.model.adapter:Fine-tuning method: LoRA


05/18/2024 21:46:24 - INFO - llamafactory.model.adapter - Loaded adapter(s): phi_lora


INFO:llamafactory.model.adapter:Loaded adapter(s): phi_lora


05/18/2024 21:46:24 - INFO - llamafactory.model.loader - all params: 3833662464


INFO:llamafactory.model.loader:all params: 3833662464


Created sysml v2 model no. 0
Created sysml v2 model no. 1
Created sysml v2 model no. 2
Created sysml v2 model no. 3
Created sysml v2 model no. 4
Created sysml v2 model no. 5
Created sysml v2 model no. 6
Created sysml v2 model no. 7
Created sysml v2 model no. 8
Created sysml v2 model no. 9
Created sysml v2 model no. 10
Created sysml v2 model no. 11
Created sysml v2 model no. 12
Created sysml v2 model no. 13
Created sysml v2 model no. 14
Created sysml v2 model no. 15
Created sysml v2 model no. 16
Created sysml v2 model no. 17
Created sysml v2 model no. 18
Created sysml v2 model no. 19
Created sysml v2 model no. 20
Created sysml v2 model no. 21
Created sysml v2 model no. 22
Created sysml v2 model no. 23
Created sysml v2 model no. 24
Created sysml v2 model no. 25
Created sysml v2 model no. 26
Created sysml v2 model no. 27
Created sysml v2 model no. 28
Created sysml v2 model no. 29
Created sysml v2 model no. 30
Created sysml v2 model no. 31
Created sysml v2 model no. 32
Created sysml v2 mod

In [13]:
# Upload to Google Drive
directory_to_zip = output_dir  # Change this to your directory
zip_output_path = f'{output_dir}.zip'  # Change this to your desired output zip file name
drive_zip_output_path = f'/content/drive/MyDrive/{output_dir}.zip'  # Change this to your desired location on Google Drive

!zip -r $zip_output_path $directory_to_zip
!mv $zip_output_path $drive_zip_output_path

  adding: 100systems_AUTOSAR_sysml/ (stored 0%)
  adding: 100systems_AUTOSAR_sysml/AUTOSAR_30.sysml (deflated 79%)
  adding: 100systems_AUTOSAR_sysml/AUTOSAR_62.sysml (deflated 81%)
  adding: 100systems_AUTOSAR_sysml/AUTOSAR_76.sysml (deflated 79%)
  adding: 100systems_AUTOSAR_sysml/AUTOSAR_58.sysml (deflated 80%)
  adding: 100systems_AUTOSAR_sysml/AUTOSAR_10.sysml (deflated 82%)
  adding: 100systems_AUTOSAR_sysml/AUTOSAR_31.sysml (deflated 79%)
  adding: 100systems_AUTOSAR_sysml/AUTOSAR_84.sysml (deflated 77%)
  adding: 100systems_AUTOSAR_sysml/AUTOSAR_92.sysml (deflated 81%)
  adding: 100systems_AUTOSAR_sysml/AUTOSAR_60.sysml (deflated 80%)
  adding: 100systems_AUTOSAR_sysml/AUTOSAR_37.sysml (deflated 88%)
  adding: 100systems_AUTOSAR_sysml/AUTOSAR_43.sysml (deflated 86%)
  adding: 100systems_AUTOSAR_sysml/AUTOSAR_34.sysml (deflated 80%)
  adding: 100systems_AUTOSAR_sysml/AUTOSAR_82.sysml (deflated 83%)
  adding: 100systems_AUTOSAR_sysml/AUTOSAR_87.sysml (deflated 83%)
  adding: 100s

### Unzip the adapter from Google Drive

In [10]:
# Paths
zip_file_path = f'/content/drive/MyDrive/{adapter_name}.zip'  # Path to the zip file
unzip_output_path = f'/content/LLaMA-Factory'  # Path to extract the zip file contents

# Unzip the file
!unzip $zip_file_path -d $unzip_output_path

Archive:  /content/drive/MyDrive/phi_lora.zip
   creating: /content/LLaMA-Factory/phi_lora/
  inflating: /content/LLaMA-Factory/phi_lora/all_results.json  
  inflating: /content/LLaMA-Factory/phi_lora/README.md  
  inflating: /content/LLaMA-Factory/phi_lora/train_results.json  
   creating: /content/LLaMA-Factory/phi_lora/runs/
   creating: /content/LLaMA-Factory/phi_lora/runs/May18_18-34-01_0d0c67952c35/
  inflating: /content/LLaMA-Factory/phi_lora/runs/May18_18-34-01_0d0c67952c35/events.out.tfevents.1716057283.0d0c67952c35.7829.0  
  inflating: /content/LLaMA-Factory/phi_lora/adapter_model.safetensors  
   creating: /content/LLaMA-Factory/phi_lora/checkpoint-1000/
  inflating: /content/LLaMA-Factory/phi_lora/checkpoint-1000/README.md  
  inflating: /content/LLaMA-Factory/phi_lora/checkpoint-1000/adapter_model.safetensors  
  inflating: /content/LLaMA-Factory/phi_lora/checkpoint-1000/rng_state.pth  
  inflating: /content/LLaMA-Factory/phi_lora/checkpoint-1000/trainer_state.json  
  in