# Fine-tuning Llama 3.2 3B Instruct with SageMaker JumpStart

This notebook demonstrates how to fine-tune Meta's Llama 3.2 3B Instruct model using Amazon SageMaker JumpStart. We'll use a small dataset (around 100 examples) and parameter-efficient fine-tuning techniques like LoRA/QLoRA.

Hugging face model card: https://huggingface.co/meta-llama/Llama-3.2-3B-Instruct

## Setup Environment

First, let's set up our SageMaker environment and install any required dependencies.

In [None]:
!pip install -U sagemaker

# Get list of Hugging Face

Use the following code to get a revision listing for models on Hugging Face

In [None]:
!pip -q install huggingface_hub

In [None]:
%env HF_TOKEN="<insert your token here>"

In [None]:
# print environment variables
!echo $HF_TOKEN

In [None]:
# List all environment variables
# !env
for key, value in os.environ.items():
    print(f'{key}: {value}')

In [None]:
from huggingface_hub import HfApi, whoami, login
import os

# access_token = os.getenv("HF_TOKEN")

# Or configure a HfApi client
api = HfApi(
    endpoint="https://huggingface.co", # Can be a Private Hub endpoint.
    token="<insert your token here>", # Token is not persisted on the machine.
)
# models = api.list_models()
# for model in models:
#     print(model)

# access_token = os.getenv("HF_TOKEN") # API_KEY environment variable

# login(token=access_token)
user = whoami(token="<insert your token here>")
print(access_token)

# model_name = "olmo-7B"
model_name = "meta-textgeneration-llama-3-2-3b-instruct"
# model_name = "meta-llama/Llama-3.2-3B-Instruct"
# model_name = "google-bert/bert-base-cased"

refs = api.list_repo_refs(model_name)
for branch in refs.branches:
    name = branch.name
    print(f"branch: {name}")

# revisions = api.list_revisions(model_name)
# for revision in revisions:
#     print(f"revision: {revision}")

In [None]:
import boto3
import sagemaker
from sagemaker import get_execution_role
from sagemaker.jumpstart.estimator import JumpStartEstimator
import os
import json
import logging

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Initialize SageMaker session
session = sagemaker.Session()
role = get_execution_role() # This should only be run on notebook instances
region = session.boto_region_name
# S3 bucket for storing data and model artifacts
default_bucket = session.default_bucket()

print(f"SageMaker Role: {role}")
print(f"AWS Region: {region}")
print(f"Default S3 Bucket: {default_bucket}")

In [None]:
bucket = "league-of-llm-internal-bucket-group3"
prefix = "domorand"
training_object = "2025-03-12-dataset.jsonl"

In [None]:
print(f"training_object = {training_object}")

In [None]:
from sagemaker import hyperparameters

def run_llama_finetuning(job_name, training_object, hyperparameters=None):
    """
    Run a fine-tuning job for Llama 3.2 3B Instruct model using SageMaker JumpStart.
    """
    
    # Upload training data to S3
    # training_data_path = "path/to/local/training_data.jsonl"  # Update this path
    s3_training_data_path = f"s3://{bucket}/{prefix}/dataset/{training_object}"
    logger.info(f"s3_training_data_path={s3_training_data_path}")
    # try:
    #     logger.info(f"Uploading training data to {s3_training_data_path}")
    #     boto3.Session().resource('s3').Object(
    #         bucket, 
    #         "llama3-finetuning/data/training_data.jsonl"
    #     ).upload_file(training_data_path)
    # except Exception as e:
    #     logger.error(f"Error uploading training data: {str(e)}")
    #     raise

    # Set output path
    output_path = f"s3://{bucket}/{prefix}/output"
    
    # Create SageMaker JumpStart estimator
    estimator = JumpStartEstimator(
        model_id=model_id,
        model_version=model_version,
        instance_type="ml.g5.12xlarge",  # GPU instance with good memory
        instance_count=1,
        hyperparameters=hyperparameters,
        role=role,
        output_path=output_path,
        environment={"accept_eula": "true"},
        volume_size=256
    )

    logger.info(f"Created JumpStart estimator for {model_id}")
    logger.info(f"Parameters {hyperparameters}")
    
    # Configure input data channel
    train_data = {"training": s3_training_data_path}
    
    try:
        logger.info(f"Starting fine-tuning job: {job_name}")
        estimator.fit(
            train_data,
            job_name=job_name,
            # accept_eula=True,
            wait=False,  # Set to True if you want to wait for the job to complete
            logs=False   # Set to True if you want to see logs
        )
        logger.info(f"Training job started: {job_name}")
        print(f"Training job '{job_name}' started!")
        print(f"You can monitor the job in the SageMaker console or run 'estimator.latest_training_job.wait()' to wait for completion")        
    except Exception as e:
        logger.error(f"Error starting training job: {str(e)}")
        raise
    
    return {
        "job_name": job_name,
        "model_data_s3_path": f"{output_path}/{job_name}/output/model.tar.gz",
        "training_job_arn": estimator.latest_training_job.job_name
    }

In [None]:
# Define model ID for Llama 3.2 3B Instruct
model_id = "meta-textgeneration-llama-3-2-3b-instruct"
# model_version = "1.*"  # Update this version as needed
model_version = "*"  # Update this version as needed
# model_version = "1.0.0"  # Update this version as needed

In [None]:
# Get default hyperparameters for model
from sagemaker import hyperparameters

params = hyperparameters.retrieve_default(model_id=model_id, model_version=model_version)
print(f"Default parameters: \n {json.dumps(params, indent=2)}")

# Learning rates to try
# 0.00002 = 2e-4
# 0.00001 = 1e-4
# 0.0001 = 1e-3
# 0.0002 = 2e-3

# Optionally override default hyperparameters for fine-tuning

# Required for all training jobs
params["chat_dataset"] = "False"
params["instruction_tuned"] = "True"

# optional parameters to get better results...

# params["epoch"] = "3"
# params["learning_rate"] = "2e-4" 
# params["per_device_train_batch_size"] = "2"
# params["lora_r"] = "16"
# params["lora_alpha"] = "16"
# params["lora_dropout"] = "0.05"

### Try 2

params["epoch"] = "12"
params["learning_rate"] = "2e-3"
params["per_device_train_batch_size"] = "4"
params["lora_r"] = "64"
params["lora_alpha"] = "128"
params["lora_dropout"] = "0.05"

params = {
    "chat_dataset": False,
    "instruction_tuned": True,
    "epoch": 8,
    "learning_rate": 2e-4,
    "per_device_train_batch_size": 2,
    "lora_r": 8,
    "lora_alpha": 32,
    "lora_dropout": 0.1
}

# Validate parameters
print("Validating hyperparameters...")
hyperparameters.validate(model_id=model_id, model_version=model_version, hyperparameters=params) # Throws error if not valid

In [None]:
from datetime import datetime
# Get the current timestamp
current_time = datetime.now()

# Format the timestamp as 'YYYYMMddHHmm'
formatted_timestamp = current_time.strftime('%Y%m%d%H%M')

response = run_llama_finetuning(f"group3-domorand-model-{formatted_timestamp}",
                                "2025-03-12-dataset.jsonl", 
                                hyperparameters=params)
print(f"response: {response}")

# Troubleshooting

## How to get previous job hyper parameters

This can be useful when debugging previous jobs or to inspect hyper parameters.

In [None]:
# Specify the training job name
# training_job_name = 'group3-domorand-model13'
training_job_name = 'group3-domorand-model-202503130308'

# Describe the training job
client = boto3.client('sagemaker')
response = client.describe_training_job(TrainingJobName=training_job_name)

# Access hyperparameters
# Pretty print the response
print(json.dumps(response, indent=4, default=str))
# print(response)
# print(json.dumps(response['HyperParameters'], indent=4))