# Sagemaker

In [None]:
#wandb.finish()
!pip install --upgrade pip
!pip install "transformers==4.30.2" 
!pip install "datasets[s3]==2.13.0" 
!pip install sagemaker --upgrade 
!pip install wandb
#!pip install "transformers==4.30.2" --upgrade
#!pip3 install git+https://github.com/huggingface/transformers
!pip install torch 

In [None]:
from getpass import getpass
import os

import sagemaker
import boto3
import torch
##import wandb
from huggingface_hub import login
from transformers import LlamaTokenizer
from transformers import AutoTokenizer
from datasets import load_dataset, load_from_disk
import time
from datetime import datetime
from sagemaker.huggingface import HuggingFace
from huggingface_hub import HfFolder
import wandb

hf_token=None
wandb_token=None

hf_token = getpass('input hf token')
wandb_token = getpass('input wandb token')

login(token=hf_token)
wandb.login(key=wandb_token)

In [None]:
sess = sagemaker.Session()
sagemaker_session_bucket='llms-hf'

iam = boto3.client('iam')
role = iam.get_role(RoleName='Sagemaker-DataScientist')['Role']['Arn']

sess = sagemaker.Session(default_bucket=sagemaker_session_bucket)

#artifact_dir='artifacts/combined_dataset:v1'

In [None]:
now = datetime.now()
time_stamp = now.strftime("%m.%d.%y-%H.%M.%S")
with wandb.init(project='SFT_training_DM',
                entity='ft-llmmm',
                job_type='download_data',
                name=f'download_combined_data_{time_stamp}') as run:

    artifact = run.use_artifact('ft-llmmm/ELI5_analysis/combined_dataset:v2', type='dataset')
    artifact_dir = artifact.download()

In [None]:
artifact_dir='./artifacts/combined_dataset:v2'

In [None]:
model_id = "meta-llama/Llama-2-7b-hf" # sharded weights
tokenizer = AutoTokenizer.from_pretrained(model_id,use_auth_token=hf_token)
tokenizer.pad_token = tokenizer.eos_token

model_name = model_id.split('/')[-1]

ds_QA_SFT = load_from_disk(f'./{artifact_dir}')
ds_wiki = ds_QA_SFT.filter(lambda x:x['source']=='simple_wiki')
ds_ELI5 = ds_QA_SFT.filter(lambda x:x['source']=='ELI5')

ds_name='wiki'

if ds_name == 'combined':
    ds = ds_QA_SFT
elif ds_name == 'wiki':
    ds = ds_wiki
elif ds_name == 'ELI5':
    ds = ds_ELI5
else:
    raise ValueError("Input valid dataset name, 'combined','ELI5', or 'wiki'")

training_input_path = f's3://{sagemaker_session_bucket}/{ds_name}'
#ds.save_to_disk(training_input_path)

# define a data input dictonary with our uploaded s3 uris
data = {'training': training_input_path}

run_name = f'{model_name}_{ds_name}_qlora'
run_name += f'__{time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime())}'

In [None]:
ds_name

In [None]:
job_name = f'{model_name}_{ds_name}-qlora'.replace('_','-')

hyperparameters ={
    'model_id': model_id,
    'repo_id': f'{model_name}-{ds_name}-no-group-by-length',
    'dataset_path': '/opt/ml/input/data/training',
    'hf_token': hf_token,
    'wandb_token': wandb_token,
    'report_to_wandb': 1,
    'epochs': 3,
    'max_steps': -1,
    'per_device_train_batch_size': 8,
    'per_device_eval_batch_size': 8,
    'gradient_accumulation_steps': 16,
    'lr': 2e-4,
    'merge_weights': 0,
    'entity': 'ft-llmmm',
    'project_name': 'SFT_training_dm',
    'hub_strategy': 'every_save',
    'run_name': run_name,
    'torch_compile': 0,
    'gradient_checkpointing': 1,
    'optim': 'paged_adamw_8bit',
    'group_by_length':0
}

In [None]:
# create the Estimator
huggingface_estimator = HuggingFace(
    entry_point          = 'run_clm.py',      # train script
    source_dir           = './scripts',         # directory which includes all the files needed for training
    instance_type        = 'ml.g5.2xlarge',   # instances type used for the training job
    instance_count       = 1,                 # the number of instances used for training
    base_job_name        = job_name,          # the name of the training job
    role                 = role,              # Iam role used in training job to access AWS ressources, e.g. S3
    volume_size          = 300,               # the size of the EBS volume in GB
    transformers_version = '4.28',            # the transformers version used in the training job
    #transformers_version = '4.30',
    pytorch_version      = '2.0',             # the pytorch_version version used in the training job
    py_version           = 'py310',           # the python version used in the training job
    hyperparameters      =  hyperparameters,  # the hyperparameters passed to the training job
    environment          = { "HUGGINGFACE_HUB_CACHE": "/tmp/.cache" }, # set env variable to cache models in /tmp
)

In [None]:
# starting the train job with our uploaded datasets as input
huggingface_estimator.fit(data, wait=True)

# EC2 + FlashAttention

In [1]:
!pip install "transformers==4.31.0" "datasets==2.13.0" "peft==0.4.0" "accelerate==0.21.0" "bitsandbytes==0.40.2" "trl==0.4.7" "safetensors>=0.3.1" --upgrade



In [2]:
!pip install wandb
import wandb



In [21]:
with wandb.init(project='SFT_Training_dm',
                 entity='ft-llmmm',
                 job_type='download_data',
                 name='download_SFT_EC2') as run:
    artifact = run.use_artifact('ft-llmmm/ELI5_analysis/llama_QA_tokenized:v2', type='dataset')
    artifact_dir = artifact.download()
    
    print(f'artifact saved to {artifact_dir}')

[34m[1mwandb[0m: Downloading large artifact llama_QA_tokenized:v2, 206.21MB. 10 files... 
[34m[1mwandb[0m:   10 of 10 files downloaded.  
Done. 0:0:0.3


artifact saved to ./artifacts/llama_QA_tokenized:v2


In [3]:
!pip install datasets
import datasets



In [22]:
ds_QA_SFT = datasets.load_from_disk(f'./{artifact_dir}')
ds_wiki = ds_QA_SFT.filter(lambda x:x['source']=='simple_wiki')
ds_ELI5 = ds_QA_SFT.filter(lambda x:x['source']=='ELI5')

ds_wiki.save_to_disk('./data/ds_wiki')
ds_ELI5.save_to_disk('./data/ds_ELI5')

Filter:   0%|          | 0/72214 [00:00<?, ? examples/s]

Filter:   0%|          | 0/1964 [00:00<?, ? examples/s]

Filter:   0%|          | 0/3301 [00:00<?, ? examples/s]

Filter:   0%|          | 0/72214 [00:00<?, ? examples/s]

Filter:   0%|          | 0/1964 [00:00<?, ? examples/s]

Filter:   0%|          | 0/3301 [00:00<?, ? examples/s]

Saving the dataset (0/1 shards):   0%|          | 0/30000 [00:00<?, ? examples/s]

Saving the dataset (0/1 shards):   0%|          | 0/1000 [00:00<?, ? examples/s]

Saving the dataset (0/1 shards):   0%|          | 0/1000 [00:00<?, ? examples/s]

Saving the dataset (0/1 shards):   0%|          | 0/42214 [00:00<?, ? examples/s]

Saving the dataset (0/1 shards):   0%|          | 0/964 [00:00<?, ? examples/s]

Saving the dataset (0/1 shards):   0%|          | 0/2301 [00:00<?, ? examples/s]

In [23]:
import time
from datetime import datetime

In [24]:
model_id = "meta-llama/Llama-2-7b-hf" # sharded weights
model_name = model_id.split('/')[-1]
ds_name = 'wiki'

if ds_name == 'combined':
    dataset_path = artifact_dir
else:
    dataset_path = f'./data/ds_{ds_name}'

run_name = f'{model_name}_{ds_name}_qlora_flash'
run_name += f'__{time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime())}'

output_dir = f'./{model_name}_{ds_name}/models'
logging_dir = f'./{model_name}_{ds_name}/logs'
repo_id = f'{model_name}-{ds_name}-flash'
report_to_wandb = 1
epochs = 3
max_steps = -1
per_device_train_batch_size = 8
per_device_eval_batch_size = 8
gradient_accumulation_steps = 16
lr = 2e-4
merge_weights = 0
entity = 'ft-llmmm'
project_name = 'SFT_training_dm'
hub_strategy = 'every_save'
torch_compile = 0
gradient_checkpointing = 1
optim = 'paged_adamw_8bit'
group_by_length = 0

In [2]:
import os

In [None]:
hf_token = os.environ['HUGGINGFACE_TOKEN']
wandb_token = os.environ['WANDB_API_KEY']

In [3]:
%python ./run_clm.py \
--output_dir './Llama-2-7b-hf_wiki/models' \
--logging_dir './Llama-2-7b-hf_wiki/logs' \
--model_id 'meta-llama/Llama-2-7b-hf' \
--dataset_path './data/ds_wiki' \
--run_name 'Llama-2-7b-hf_wiki_qlora_flash_test' \
--repo_id 'Llama-2-7b-hf-wiki-flash' \
--report_to_wandb 1 \
--epochs 3 \
--max_steps -1 \
--per_device_train_batch_size 8 \
--per_device_eval_batch_size 8 \
--gradient_accumulation_steps 16 \
--lr 2e-4 \
--merge_weights 0 \
--entity 'ft-llmmm' \
--project_name 'SFT_training_dm' \
--hub_strategy 'every_save' \
--torch_compile 0 \
--gradient_checkpointing 1 \
--optim 'paged_adamw_8bit' \
--group_by_length 1 \
--hf_token hf_token \
--wandb_token wandb_token \
--use_flash_attention 1

args is Namespace(model_id='meta-llama/Llama-2-7b-hf', repo_id='Llama-2-7b-hf-wiki-flash', hub_strategy='every_save', output_dir='./Llama-2-7b-hf_wiki/models', output_data_dir=None, dataset_path='./data/ds_wiki', hf_token='hf_token', report_to_wandb=1, wandb_token='802aabee3bd60e8c7f1a3e849e7496c6b43e0886', epochs=3, max_steps=-1, per_device_train_batch_size=8, per_device_eval_batch_size=8, gradient_accumulation_steps=16, max_seq_length=4096, logging_steps=20, optim='paged_adamw_8bit', lr=0.0002, lora_r=64, lora_alpha=16, weight_decay=0.1, lora_dropout=0.1, load_in_4bit=1, load_in_8bit=0, use_peft=1, gradient_checkpointing=1, bf16=1, group_by_length=1, merge_weights=0, seed=42, warmup_ratio=0.03, project_name='SFT_training_dm', entity='ft-llmmm', run_name='Llama-2-7b-hf_wiki_qlora_flash_test', load_best_model_at_end=1, use_sagemaker=1, torch_compile=0, use_flash_attention=1)
extra is ['--logging_dir', './Llama-2-7b-hf_wiki/logs']
Logging into the Hugging Face Hub with token hf_token...

ValueError: Invalid token passed!