# Using LLama Factory finetune on SageMaker 
# 1. Single GPU QLORA- 本地notebook实例训练

## 安装依赖包

In [1]:
%pip install -Uq sagemaker boto3 datasets

[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
awscli 1.32.101 requires botocore==1.34.101, but you have botocore 1.34.133 which is incompatible.[0m[31m
[0mNote: you may need to restart the kernel to use updated packages.


In [2]:
%pip install torch==2.2.0

Collecting torch==2.2.0
  Downloading torch-2.2.0-cp310-cp310-manylinux1_x86_64.whl.metadata (25 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch==2.2.0)
  Downloading nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch==2.2.0)
  Downloading nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch==2.2.0)
  Downloading nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch==2.2.0)
  Downloading nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.1.3.1 (from torch==2.2.0)
  Downloading nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.0.2.54 (from torch==2.2.0)
  Downloading nvidia_cufft_cu12-11.0.2.54-py3-none-manylin

In [3]:
import os
import glob
import boto3
import pprint
from tqdm import tqdm
import sagemaker
from sagemaker.collection import Collection
from sagemaker.utils import name_from_base

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml


In [4]:
sagemaker_session =  sagemaker.session.Session() #sagemaker.session.Session()
region = sagemaker_session.boto_region_name
role = sagemaker.get_execution_role()
default_bucket = sagemaker_session.default_bucket()
sm_client = boto3.client('sagemaker', region_name=region)

## 准备数据集

### 数据集1. 从huggingface上下载ruozhiba数据集
- 改数据集有近5k条数据，本次实验我们可以只用前1k条做训练

In [5]:
from datasets import load_dataset
from random import randrange
dataset_name = "hfl/ruozhiba_gpt4"
# Load dataset from the hub
train_dataset = load_dataset(dataset_name, split="train",revision='41d2c61beb86c8d4c61916cc656c39d018c40ce5')

print(f"Training size: {len(train_dataset)}")
print("\nTraining sample:\n")
print(train_dataset[randrange(len(train_dataset))])

Downloading data:   0%|          | 0.00/2.08M [00:00<?, ?B/s]

Generating train split: 0 examples [00:00, ? examples/s]

Training size: 4898

Training sample:

{'input': '', 'instruction': '口渴了 去问隔壁邻居借水喝 邻居问我 你是不是找茬 口渴了 去问隔壁邻居借水喝 邻居问我 你是不是找茬', 'output': '这个问题似乎在重复并且带有一些幽默性。正常来说，邻里之间相互提供帮助是一种常见的社会行为。如果你感到口渴，去向邻居借水喝是合情合理的请求。然而，邻居问你“是不是找茬”可能反映了两个可能性。一是邻居可能误会了你的意图，认为你的请求有其他动机。二是邻居可能本身性格比较敏感或者不友好，以至于对这样一个简单的请求产生了误解。如果遇到这种情况，你可以尝试用友善和真诚的态度来解释自己的需求，确保邻居明白你只是单纯需要一些水，并且没有其他目的。这种沟通可以消除误解，也有助于建立更好的邻里关系。'}


### 数据集2. 身份数据集
```json
[{'instruction': 'hi',
  'input': '',
  'output': 'Hello! I am {{name}}, an AI assistant developed by {{author}}. How can I assist you today?'},
 {'instruction': 'hello',
  'input': '',
  'output': 'Hello! I am {{name}}, an AI assistant developed by {{author}}. How can I assist you today?'},
 {'instruction': 'Who are you?',
  'input': '',
  'output': 'I am {{name}}, an AI assistant developed by {{author}}. How can I assist you today?'}]
```
把其中的name和author替换成您自己想替换的值，这样微调完成之后，问模型“你是谁，谁创造的你？”这类的身份问题，模型就会按这个新的值来回答

In [7]:
def format_identity(origin_obj,name,author):
    ret = []
    for ele in origin_obj:
        ele['output'] = ele['output'].replace("{{name}}",name).replace("{{author}}",author)
        ret.append(ele)
    return ret

- 替换成您自己的设定

In [8]:
NAME = <your own bot name>
AUTHOR = <the name of the author>

In [9]:
!pwd
%cd ~/SageMaker/Easy_Fintune_LLM_using_SageMaker_with_LLama_Factory

/home/ec2-user/SageMaker/Easy_Fintune_LLM_using_SageMaker_with_LLama_Factory
/home/ec2-user/SageMaker/Easy_Fintune_LLM_using_SageMaker_with_LLama_Factory


In [10]:
import json
file_name = './LLaMA-Factory/data/identity.json'
with open(file_name) as f:
    identity = json.load(f)
identity_2 = format_identity(identity,name=NAME,author=AUTHOR)
identity_2[:2]

[{'instruction': 'hi',
  'input': '',
  'output': 'Hello! I am RiverBot, an AI assistant developed by GOGOGO. How can I assist you today?'},
 {'instruction': 'hello',
  'input': '',
  'output': 'Hello! I am RiverBot, an AI assistant developed by GOGOGO. How can I assist you today?'}]

In [11]:
os.makedirs('./train',exist_ok=True)
with open('./train/identity_2.json','w') as f:
    json.dump(identity_2,f)

### 把数据copy至S3

In [12]:
s3_data_uri = f"s3://{default_bucket}/dataset-for-training"
training_input_path = f'{s3_data_uri}/train'

In [13]:
# save train_dataset to s3
train_dataset.to_json('./train/ruozhiba.json')
sagemaker.s3.S3Uploader.upload(local_path="./train/ruozhiba.json", desired_s3_uri=training_input_path, sagemaker_session=sagemaker_session)
sagemaker.s3.S3Uploader.upload(local_path="./train/identity_2.json", desired_s3_uri=training_input_path, sagemaker_session=sagemaker_session)

print(f"saving training dataset to: {training_input_path}")


Creating json from Arrow format:   0%|          | 0/5 [00:00<?, ?ba/s]

saving training dataset to: s3://sagemaker-us-east-1-577976195821/dataset-for-training/train


## 准备LLaMA-Factory 的 dataset info

In [14]:
import json

In [15]:
file_name = './LLaMA-Factory/data/dataset_info.json'
with open(file_name) as f:
    datainfo = json.load(f)

In [16]:
datainfo['identity']={'file_name': 'identity_2.json'}

In [17]:
datainfo['ruozhiba']={
    'file_name':'ruozhiba.json',
    "columns": {
    "prompt": "instruction",
    "query": "input",
    "response": "output",
  }      
}

In [18]:
with open('./LLaMA-Factory/data/dataset_info.json','w') as f:
    json.dump(fp=f,obj=datainfo)

## 准备LLaMA-Factory 的 训练配置yaml文件
###  从LLaMA-Factory/examples/train_qlora/目录中复制出llama3_lora_sft_awq.yaml，并修改

In [19]:
#load template
import yaml
file_name = './LLaMA-Factory/examples/train_qlora/llama3_lora_sft_awq.yaml'
with open(file_name) as f:
    doc = yaml.safe_load(f)
doc

{'model_name_or_path': 'TechxGenus/Meta-Llama-3-8B-Instruct-AWQ',
 'stage': 'sft',
 'do_train': True,
 'finetuning_type': 'lora',
 'lora_target': 'all',
 'dataset': 'identity,alpaca_en_demo',
 'template': 'llama3',
 'cutoff_len': 1024,
 'max_samples': 1000,
 'overwrite_cache': True,
 'preprocessing_num_workers': 16,
 'output_dir': 'saves/llama3-8b/lora/sft',
 'logging_steps': 10,
 'save_steps': 500,
 'plot_loss': True,
 'overwrite_output_dir': True,
 'per_device_train_batch_size': 1,
 'gradient_accumulation_steps': 8,
 'learning_rate': 0.0001,
 'num_train_epochs': 3.0,
 'lr_scheduler_type': 'cosine',
 'warmup_ratio': 0.1,
 'fp16': True,
 'ddp_timeout': 180000000,
 'val_size': 0.1,
 'per_device_eval_batch_size': 1,
 'eval_strategy': 'steps',
 'eval_steps': 500}

In [23]:
#设置模型的保存目录在本notebook实例本地
save_dir = '/home/ec2-user/SageMaker/Easy_Fintune_LLM_using_SageMaker_with_LLama_Factory/finetuned_model'
# doc['output_dir'] = save_dir

# 如果是用SageMaker则使用以下模型文件路径
doc['output_dir'] ='/tmp/finetuned_model'
doc['per_device_train_batch_size'] =1
doc['gradient_accumulation_steps'] =8
# doc['lora_target'] = 'all'
doc['cutoff_len'] = 2048
doc['num_train_epochs'] = 5.0
doc['warmup_steps'] = 10

#实验时间，只选取前200条数据做训练
doc['max_samples'] = 200 
#数据集
doc['dataset'] = 'identity,ruozhiba'

### 保存为训练配置文件

In [24]:
sg_config = 'sg_config_qlora.yaml'
with open(f'./LLaMA-Factory/{sg_config}', 'w') as f:
    yaml.safe_dump(doc, f)
doc

{'model_name_or_path': 'TechxGenus/Meta-Llama-3-8B-Instruct-AWQ',
 'stage': 'sft',
 'do_train': True,
 'finetuning_type': 'lora',
 'lora_target': 'all',
 'dataset': 'identity,ruozhiba',
 'template': 'llama3',
 'cutoff_len': 2048,
 'max_samples': 200,
 'overwrite_cache': True,
 'preprocessing_num_workers': 16,
 'output_dir': '/tmp/finetuned_model',
 'logging_steps': 10,
 'save_steps': 500,
 'plot_loss': True,
 'overwrite_output_dir': True,
 'per_device_train_batch_size': 1,
 'gradient_accumulation_steps': 8,
 'learning_rate': 0.0001,
 'num_train_epochs': 5.0,
 'lr_scheduler_type': 'cosine',
 'warmup_ratio': 0.1,
 'fp16': True,
 'ddp_timeout': 180000000,
 'val_size': 0.1,
 'per_device_eval_batch_size': 1,
 'eval_strategy': 'steps',
 'eval_steps': 500,
 'warmup_steps': 10}

## 本地GPU测试提交 Training job

### 由于我们的实验环境限制，无法提交Training Job，所以在本次实验是在notebook实例中进行训练
### 如果您在自己的AWS环境中，且有SageMaker Training Job 所需GPU实例的quota，则可以用如下代码提交，instance_type改成'ml.g5.2xlarge' 

```python
from sagemaker.estimator import Estimator
from sagemaker.pytorch import PyTorch
from datetime import datetime

instance_count = 1
instance_type = 'local_gpu' 
max_time = 3600*24

# Get the current time
current_time = datetime.now()

# wandb.sagemaker_auth(path="./")
# Format the current time as a string
formatted_time = current_time.strftime("%Y%m%d%H%M%S")
print(formatted_time)

base_job_name = 'llama3-8b-qlora-finetune'
environment = {
    'NODE_NUMBER':str(instance_count),
    "s3_data_paths":f"{training_input_path}",
    "sg_config":sg_config,
    'OUTPUT_MODEL_S3_PATH': f's3://{default_bucket}/llama3-8b-qlora/', # destination
}

estimator = PyTorch(entry_point='entry_single_lora.py',
                            source_dir='./LLaMA-Factory/',
                            role=role,
                            base_job_name=base_job_name,
                            environment=environment,
                            framework_version='2.2.0',
                            py_version='py310',
                            script_mode=True,
                            instance_count=instance_count,
                            instance_type=instance_type,
                            enable_remote_debug=True,
                            # keep_alive_period_in_seconds=600,
                            max_run=max_time)

estimator.fit()

```


In [26]:
from sagemaker.estimator import Estimator
from sagemaker.pytorch import PyTorch
from datetime import datetime

instance_count = 1

#使用本地机器，也可以指定为 ml.g5.2xlarge等其他实例
instance_type = 'local_gpu' 
max_time = 3600*24

# Get the current time
current_time = datetime.now()

# wandb.sagemaker_auth(path="./")
# Format the current time as a string
formatted_time = current_time.strftime("%Y%m%d%H%M%S")
print(formatted_time)

base_job_name = 'llama3-8b-qlora-finetune'
environment = {
    'NODE_NUMBER':str(instance_count),
    "s3_data_paths":f"{training_input_path}",
    "sg_config":sg_config,
    'OUTPUT_MODEL_S3_PATH': f's3://{default_bucket}/llama3-8b-qlora/', # destination
}

estimator = PyTorch(entry_point='entry_single_lora.py',
                            source_dir='./LLaMA-Factory/',
                            role=role,
                            base_job_name=base_job_name,
                            environment=environment,
                            framework_version='2.2.0',
                            py_version='py310',
                            script_mode=True,
                            instance_count=instance_count,
                            instance_type=instance_type,
                            enable_remote_debug=True,
                            # keep_alive_period_in_seconds=600,
                            max_run=max_time)

INFO:botocore.credentials:Found credentials from IAM Role: BaseNotebookInstanceEc2InstanceRole


20240626144531


- 开始训练

In [27]:
estimator.fit()

INFO:sagemaker.image_uris:image_uri is not presented, retrieving image_uri based on instance_type, framework etc.
INFO:sagemaker:Creating training-job with name: llama3-8b-qlora-finetune-2024-06-26-14-49-44-828
INFO:sagemaker.telemetry.telemetry_logging:SageMaker Python SDK will collect telemetry to help us better understand our user's needs, diagnose issues, and deliver additional features.
To opt out of telemetry, please disable via TelemetryOptOut parameter in SDK defaults config. For more information, refer to https://sagemaker.readthedocs.io/en/stable/overview.html#configuring-and-using-defaults-with-the-sagemaker-python-sdk.
INFO:sagemaker.local.image:'Docker Compose' is not installed. Proceeding to check for 'docker-compose' CLI.
INFO:sagemaker.local.image:'Docker Compose' found using Docker Compose CLI.
INFO:sagemaker.local.local_session:Starting training job
INFO:botocore.credentials:Found credentials from IAM Role: BaseNotebookInstanceEc2InstanceRole
INFO:sagemaker.local.imag

 Container oiaz4i3ukl-algo-1-xtg8f  Creating


INFO:root:creating /tmp/tmpvigz_p6k/artifacts/output/data


 Container oiaz4i3ukl-algo-1-xtg8f  Created
Attaching to oiaz4i3ukl-algo-1-xtg8f


## 至此步，本章节结束
- 模型已经在本地的training job上训练完成，并上传至s3

### 以下是可选步骤，直接在本地使用LLaMA-Factory cli进行训练
### 本地运行LLaMA-Factory cli

In [None]:
confirm = input("Are you sure you want to continue? (y/n) ")

In [20]:
#切换工作目录到LLaMA-Factory
os.chdir('LLaMA-Factory')

In [28]:
#安装LLaMA-Factory
os.system("pip install --no-deps -e .")

Obtaining file:///home/ec2-user/SageMaker/llm_finetune/LLaMA-Factory
  Installing build dependencies: started
  Installing build dependencies: finished with status 'done'
  Checking if build backend supports build_editable: started
  Checking if build backend supports build_editable: finished with status 'done'
  Getting requirements to build editable: started
  Getting requirements to build editable: finished with status 'done'
  Installing backend dependencies: started
  Installing backend dependencies: finished with status 'done'
  Preparing editable metadata (pyproject.toml): started
  Preparing editable metadata (pyproject.toml): finished with status 'done'
Building wheels for collected packages: llamafactory
  Building editable for llamafactory (pyproject.toml): started
  Building editable for llamafactory (pyproject.toml): finished with status 'done'
  Created wheel for llamafactory: filename=llamafactory-0.7.2.dev0-0.editable-py3-none-any.whl size=18819 sha256=3cbaa1b62e626d217

0

In [29]:
os.system("pip install -r requirements.txt")

Collecting transformers>=4.41.2 (from -r requirements.txt (line 1))
  Using cached transformers-4.41.2-py3-none-any.whl.metadata (43 kB)
Collecting accelerate>=0.30.1 (from -r requirements.txt (line 3))
  Using cached accelerate-0.30.1-py3-none-any.whl.metadata (18 kB)
Collecting peft>=0.11.1 (from -r requirements.txt (line 4))
  Using cached peft-0.11.1-py3-none-any.whl.metadata (13 kB)
Collecting trl>=0.8.6 (from -r requirements.txt (line 5))
  Downloading trl-0.9.4-py3-none-any.whl.metadata (11 kB)
Collecting gradio>=4.0.0 (from -r requirements.txt (line 6))
  Using cached gradio-4.33.0-py3-none-any.whl.metadata (15 kB)
Collecting einops (from -r requirements.txt (line 8))
  Using cached einops-0.8.0-py3-none-any.whl.metadata (12 kB)
Collecting sentencepiece (from -r requirements.txt (line 9))
  Using cached sentencepiece-0.2.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (7.7 kB)
Collecting tiktoken (from -r requirements.txt (line 10))
  Using cached tiktoken

0

In [30]:
#下载数据集
os.system("chmod +x ./s5cmd")
os.system("./s5cmd sync {0} {1}".format(training_input_path+'/*', 'data/'))

cp s3://sagemaker-us-east-1-434444145045/dataset-for-training/train/identity_2.json data/identity_2.json
cp s3://sagemaker-us-east-1-434444145045/dataset-for-training/train/ruozhiba.json data/ruozhiba.json


0

### 启动训练
本次训练过程大概15分钟左右

In [8]:
DEVICES=0
os.system(f"CUDA_VISIBLE_DEVICES={DEVICES} llamafactory-cli train {sg_config}")

[2024-06-06 15:15:02,102] [INFO] [real_accelerator.py:203:get_accelerator] Setting ds_accelerator to cuda (auto detect)


[INFO|tokenization_utils_base.py:2108] 2024-06-06 15:15:04,360 >> loading file tokenizer.json from cache at /home/ec2-user/.cache/huggingface/hub/models--TechxGenus--Meta-Llama-3-8B-Instruct-AWQ/snapshots/129d90727841a07bcdb3173ed4165d1353b44386/tokenizer.json
[INFO|tokenization_utils_base.py:2108] 2024-06-06 15:15:04,360 >> loading file added_tokens.json from cache at None
[INFO|tokenization_utils_base.py:2108] 2024-06-06 15:15:04,360 >> loading file special_tokens_map.json from cache at /home/ec2-user/.cache/huggingface/hub/models--TechxGenus--Meta-Llama-3-8B-Instruct-AWQ/snapshots/129d90727841a07bcdb3173ed4165d1353b44386/special_tokens_map.json
[INFO|tokenization_utils_base.py:2108] 2024-06-06 15:15:04,360 >> loading file tokenizer_config.json from cache at /home/ec2-user/.cache/huggingface/hub/models--TechxGenus--Meta-Llama-3-8B-Instruct-AWQ/snapshots/129d90727841a07bcdb3173ed4165d1353b44386/tokenizer_config.json


06/06/2024 15:15:04 - INFO - llamafactory.hparams.parser - Process rank: 0, device: cuda:0, n_gpu: 1, distributed training: False, compute dtype: torch.float16




06/06/2024 15:15:04 - INFO - llamafactory.data.template - Replace eos token: <|eot_id|>
06/06/2024 15:15:04 - INFO - llamafactory.data.template - Add pad token: <|eot_id|>
06/06/2024 15:15:04 - INFO - llamafactory.data.loader - Loading dataset identity_2.json...


Converting format of dataset (num_proc=16): 100%|██████████| 91/91 [00:00<00:00, 414.02 examples/s]


06/06/2024 15:15:05 - INFO - llamafactory.data.loader - Loading dataset ruozhiba.json...


Converting format of dataset (num_proc=16): 100%|██████████| 200/200 [00:00<00:00, 949.10 examples/s]
Running tokenizer on dataset (num_proc=16): 100%|██████████| 291/291 [00:03<00:00, 91.06 examples/s] 
[INFO|configuration_utils.py:733] 2024-06-06 15:15:09,646 >> loading configuration file config.json from cache at /home/ec2-user/.cache/huggingface/hub/models--TechxGenus--Meta-Llama-3-8B-Instruct-AWQ/snapshots/129d90727841a07bcdb3173ed4165d1353b44386/config.json
[INFO|configuration_utils.py:796] 2024-06-06 15:15:09,647 >> Model config LlamaConfig {
  "_name_or_path": "TechxGenus/Meta-Llama-3-8B-Instruct-AWQ",
  "architectures": [
    "LlamaForCausalLM"
  ],
  "attention_bias": false,
  "attention_dropout": 0.0,
  "bos_token_id": 128000,
  "eos_token_id": 128001,
  "hidden_act": "silu",
  "hidden_size": 4096,
  "initializer_range": 0.02,
  "intermediate_size": 14336,
  "max_position_embeddings": 8192,
  "mlp_bias": false,
  "model_type": "llama",
  "num_attention_heads": 32,
  "num_hid

input_ids:
[128000, 128006, 9125, 128007, 271, 2675, 527, 264, 11190, 18328, 13, 128009, 128006, 882, 128007, 271, 6151, 128009, 128006, 78191, 128007, 271, 9906, 0, 358, 1097, 11188, 24406, 11, 459, 15592, 18328, 8040, 555, 480, 12501, 12501, 46, 13, 2650, 649, 358, 7945, 499, 3432, 30, 128009]
inputs:
<|begin_of_text|><|start_header_id|>system<|end_header_id|>

You are a helpful assistant.<|eot_id|><|start_header_id|>user<|end_header_id|>

hi<|eot_id|><|start_header_id|>assistant<|end_header_id|>

Hello! I am RiverBot, an AI assistant developed by GOGOGO. How can I assist you today?<|eot_id|>
label_ids:
[-100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, 9906, 0, 358, 1097, 11188, 24406, 11, 459, 15592, 18328, 8040, 555, 480, 12501, 12501, 46, 13, 2650, 649, 358, 7945, 499, 3432, 30, 128009]
labels:
Hello! I am RiverBot, an AI assistant developed by GOGOGO. How can I assist you today?<|eot_id|>
06/06/202

Loading checkpoint shards: 100%|██████████| 2/2 [00:01<00:00,  1.42it/s]
[INFO|modeling_utils.py:4280] 2024-06-06 15:15:11,325 >> All model checkpoint weights were used when initializing LlamaForCausalLM.

[INFO|modeling_utils.py:4288] 2024-06-06 15:15:11,325 >> All the weights of LlamaForCausalLM were initialized from the model checkpoint at TechxGenus/Meta-Llama-3-8B-Instruct-AWQ.
If your task is similar to the task the model of the checkpoint was trained on, you can already use LlamaForCausalLM for predictions without further training.
[INFO|configuration_utils.py:917] 2024-06-06 15:15:11,347 >> loading configuration file generation_config.json from cache at /home/ec2-user/.cache/huggingface/hub/models--TechxGenus--Meta-Llama-3-8B-Instruct-AWQ/snapshots/129d90727841a07bcdb3173ed4165d1353b44386/generation_config.json
[INFO|configuration_utils.py:962] 2024-06-06 15:15:11,347 >> Generate config GenerationConfig {
  "bos_token_id": 128000,
  "do_sample": true,
  "eos_token_id": [
    12

06/06/2024 15:15:11 - INFO - llamafactory.model.utils.checkpointing - Gradient checkpointing enabled.
06/06/2024 15:15:11 - INFO - llamafactory.model.utils.attention - Using torch SDPA for faster training and inference.
06/06/2024 15:15:11 - INFO - llamafactory.model.adapter - Upcasting trainable params to float32.
06/06/2024 15:15:11 - INFO - llamafactory.model.adapter - Fine-tuning method: LoRA
06/06/2024 15:15:11 - INFO - llamafactory.model.loader - trainable params: 3407872 || all params: 1054347264 || trainable%: 0.3232


[INFO|trainer.py:2078] 2024-06-06 15:15:11,667 >> ***** Running training *****
[INFO|trainer.py:2079] 2024-06-06 15:15:11,667 >>   Num examples = 261
[INFO|trainer.py:2080] 2024-06-06 15:15:11,667 >>   Num Epochs = 5
[INFO|trainer.py:2081] 2024-06-06 15:15:11,668 >>   Instantaneous batch size per device = 1
[INFO|trainer.py:2084] 2024-06-06 15:15:11,668 >>   Total train batch size (w. parallel, distributed & accumulation) = 8
[INFO|trainer.py:2085] 2024-06-06 15:15:11,668 >>   Gradient Accumulation steps = 8
[INFO|trainer.py:2086] 2024-06-06 15:15:11,668 >>   Total optimization steps = 160
[INFO|trainer.py:2087] 2024-06-06 15:15:11,669 >>   Number of trainable parameters = 3,407,872
  1%|          | 1/160 [00:06<17:22,  6.56s/it]Traceback (most recent call last):
  File "/home/ec2-user/anaconda3/envs/pytorch_p310/bin/llamafactory-cli", line 8, in <module>
    sys.exit(main())
  File "/home/ec2-user/SageMaker/llm_finetune/LLaMA-Factory/src/llamafactory/cli.py", line 93, in main
    run_

2

### 上传Lora模型文件至S3保存

In [18]:
os.system("./s5cmd sync {0} {1}".format(save_dir, f's3://{default_bucket}/llama3-8b-qlora/'))

0

In [19]:
print(f"Lora model file saved s3://{default_bucket}/llama3-8b-qlora/")

Lora model file saved s3://sagemaker-us-east-1-434444145045/llama3-8b-qlora/
