# Fine-Tuning Llama-2-7B Using AWS SageMaker & Hugging Face

In [1]:
%env AWS_PROFILE=dev-admin
%env AWS_REGION=us-east-1
%env HF_HOME=~/.cache/huggingface
%env TOKENIZERS_PARALLELISM=false

env: AWS_PROFILE=dev-admin
env: AWS_REGION=us-east-1
env: HF_HOME=~/.cache/huggingface
env: TOKENIZERS_PARALLELISM=fale


### Initialize AWS SageMaker Session

Current Credentials for AWS Required for Following Steps

`aws sso login --profile {YOUR_PROFILE}`

In [1]:
import sagemaker

sagemaker_session_bucket = "sagemaker-ms-thesis-llm"
role = "arn:aws:iam::171706357329:role/service-role/AmazonSageMakerServiceCatalogProductsExecutionRole"

sess = sagemaker.Session(default_bucket=sagemaker_session_bucket)

print(f"sagemaker bucket: {sess.default_bucket()}")
print(f"sagemaker session region: {sess.boto_region_name}")

sagemaker.config INFO - Not applying SDK defaults from location: /Library/Application Support/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /Users/andrewbeiler/Library/Application Support/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /Library/Application Support/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /Users/andrewbeiler/Library/Application Support/sagemaker/config.yaml
sagemaker bucket: sagemaker-ms-thesis-llm
sagemaker session region: us-east-1


### Login to Hugging Face Hub

Hugging Face Hub Token Required for Following Step

Token file needs to be saved at location of `HF_HOME` env variable

In [3]:
from huggingface_hub import login, HfFolder

login(token=HfFolder.get_token())

  from .autonotebook import tqdm as notebook_tqdm


Token will not been saved to git credential helper. Pass `add_to_git_credential=True` if you want to set the git credential as well.
Token is valid (permission: write).
Your token has been saved to /Users/andrewbeiler/.cache/huggingface/token
Login successful


## Set Parameters for Fine-Tuned Model & Training Job

In [4]:
# from transformers import AutoTokenizer

base_model_id = "meta-llama/Llama-2-7b-hf"
# tokenizer = AutoTokenizer.from_pretrained(model_id,use_auth_token=True)
# tokenizer.pad_token = tokenizer.eos_token

In [5]:
# define Training Job Name 
job_name = f'goatNumAndAlphaInstruct-75-25-100K-QLORA'
model_output_path = f's3://{sagemaker_session_bucket}/models'

hf_dataset_id = "abeiler/GOAT_Numeric_and_Alpha_Instruct"
hf_dataset_branch = "75_25_100K"

## Create Hugging Face Estimator

In [6]:
from sagemaker.huggingface import HuggingFace
from huggingface_hub import HfFolder

# hyperparameters, which are passed into the training job
hyperparameters ={
  'model_id': base_model_id,                             # pre-trained model
  'dataset': hf_dataset_id,
  'data_rev': hf_dataset_branch,
  'epochs': 1,                                      # number of training epochs
  'per_device_train_batch_size': 4,                 # batch size for training
  'lr': 1e-4,                                       # learning rate used during training
  'merge_weights': True,                            # wether to merge LoRA into the model (needs more memory)
  'lora_r': 64,
  'lora_alpha': 16,
  'lora_dropout': 0.1,
  'output_data_path': '/opt/ml/output',
  'push_to_hub': True,                            # Defines if we want to push the model to the hub
  'hub_model_id': job_name, # The model id of the model to push to the hub
  'hub_strategy': 'every_save',                   # The strategy to use when pushing the model to the hub
  'hub_token': HfFolder.get_token()   
}

# create the Estimator
huggingface_estimator = HuggingFace(
    entry_point          = 'run_clm.py',      # train script
    source_dir           = 'fine-tuning_scripts',   # directory which includes all the files needed for training
    instance_type        = 'ml.g5.4xlarge',   # instances type used for the training job
    instance_count       = 1,                 # the number of instances used for training
    base_job_name        = job_name,          # the name of the training job
    role                 = role,              # Iam role used in training job to access AWS ressources, e.g. S3
    volume_size          = 300,               # the size of the EBS volume in GB
    transformers_version = '4.28',            # the transformers version used in the training job
    pytorch_version      = '2.0',             # the pytorch_version version used in the training job
    py_version           = 'py310',           # the python version used in the training job
    hyperparameters      =  hyperparameters,  # the hyperparameters passed to the training job
    environment          = { "HUGGINGFACE_HUB_CACHE": "/tmp/.cache" }, # set env variable to cache models in /tmp
    output_path          = f"{model_output_path}/",
    code_location        = model_output_path,
)

sagemaker.config INFO - Not applying SDK defaults from location: /Library/Application Support/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /Users/andrewbeiler/Library/Application Support/sagemaker/config.yaml


## Run Training Job w/ Telegram Status Notifications

To run with Telegram notifications, a `my_secrets.py` file must be co-located to this Notebook and include the variable `TELE_API_KEY` & `TELE_USER` holding the API Key for the Telegram channel and Telegream User ID you want to use.

In [2]:
import my_secrets

api_key = my_secrets.TELE_API_KEY
usr_id = my_secrets.TELE_USER 

In [3]:

import telegram

# starting the train job with our uploaded datasets as input
try:
        huggingface_estimator.fit(wait=True)
        msg = 'SageMaker Training Finished!'
except Exception as e:
        msg = 'SageMaker Training Finished with Error'
        print("Error: ", e)
finally:
        bot = telegram.Bot(token=api_key)
        async with bot:
                await bot.send_message(chat_id=usr_id, text=msg)