# Fine-tuning Llama 3.2 3B Instruct with SageMaker JumpStart

This notebook demonstrates how to fine-tune Meta's Llama 3.2 3B Instruct model using Amazon SageMaker JumpStart. We'll use a small dataset (around 100 examples) and parameter-efficient fine-tuning techniques like LoRA/QLoRA.

## Setup Environment

First, let's set up our SageMaker environment and install any required dependencies.

In [48]:
!pip install -U sagemaker boto3



In [30]:
import boto3
import sagemaker
from sagemaker import get_execution_role
from sagemaker.jumpstart.estimator import JumpStartEstimator
import os
import json
import logging

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Initialize SageMaker session
session = sagemaker.Session()
role = get_execution_role() # This should only be run on notebook instances
region = session.boto_region_name
# S3 bucket for storing data and model artifacts
# bucket = session.default_bucket()

# bucket = "league-of-llm-internal-bucket-group3"
# prefix = "domorand"
# training_data = "2025-03-12-dataset.jsonl"

print(f"SageMaker Role: {role}")
print(f"AWS Region: {region}")
print(f"Default S3 Bucket: {bucket}")

SageMaker Role: arn:aws:iam::482018956800:role/MonitoringStack-SageMakerExecutionRole7843F3B8-7FDMd6X85xyx
AWS Region: us-east-1
Default S3 Bucket: league-of-llm-internal-bucket-group3


In [46]:
bucket = "league-of-llm-internal-bucket-group3"
prefix = "domorand"
training_object = "2025-03-12-dataset.jsonl"

In [11]:
print(f"training_object = {training_object}")

training_object = 2025-03-12-dataset.jsonl


In [44]:
def run_llama_finetuning(job_name, training_object, hyperparameters):
    """
    Run a fine-tuning job for Llama 3.2 3B Instruct model using SageMaker JumpStart.
    """
    # Define model ID for Llama 3.2 3B Instruct
    model_id = "meta-textgeneration-llama-3-2-3b-instruct"
    # model_version = "1.0.0"  # Update this version as needed
    
    # Upload training data to S3
    # training_data_path = "path/to/local/training_data.jsonl"  # Update this path
    s3_training_data_path = f"s3://{bucket}/{prefix}/dataset/{training_object}"
    logger.info(f"s3_training_data_path={s3_training_data_path}")
    # try:
    #     logger.info(f"Uploading training data to {s3_training_data_path}")
    #     boto3.Session().resource('s3').Object(
    #         bucket, 
    #         "llama3-finetuning/data/training_data.jsonl"
    #     ).upload_file(training_data_path)
    # except Exception as e:
    #     logger.error(f"Error uploading training data: {str(e)}")
    #     raise

    # Set output path
    output_path = f"s3://{bucket}/{prefix}/output"
    
    # Create SageMaker JumpStart estimator
    estimator = JumpStartEstimator(
        model_id=model_id,
        # model_version=model_version,
        instance_type="ml.g5.12xlarge",  # GPU instance with good memory
        instance_count=1,
        hyperparameters=hyperparameters,
        role=role,
        output_path=output_path,
        volume_size=256
    )

    logger.info(f"Created JumpStart estimator for {model_id}")
    logger.info(f"Parameters {hyperparameters}")
    
    # Configure input data channel
    train_data = {"train": s3_training_data_path}
    
    
    # Start training job
    # job_name = f"group3-domorand-model14"
    
    try:
        logger.info(f"Starting fine-tuning job: {job_name}")
        estimator.fit(
            train_data,
            job_name=job_name,
            accept_eula=True,
            wait=False,  # Set to True if you want to wait for the job to complete
            logs=False   # Set to True if you want to see logs
        )
        logger.info(f"Training job started: {job_name}")
        print(f"Training job '{job_name}' started!")
        print(f"You can monitor the job in the SageMaker console or run 'estimator.latest_training_job.wait()' to wait for completion")        
    except Exception as e:
        logger.error(f"Error starting training job: {str(e)}")
        raise
    
    return {
        "job_name": job_name,
        "model_data_s3_path": f"{output_path}/{job_name}/output/model.tar.gz",
        "training_job_arn": estimator.latest_training_job.job_name
    }

In [43]:
    # Define hyperparameters
    initial_hyperparameters = {
        # Training parameters
        "epoch": "3",                  # Number of training epochs
        "learning_rate": "1e-2",       # Learning rate .01
        "per_device_train_batch_size": "2",  # Batch size per GPU for training
        # "per_device_eval_batch_size": "2",   # Batch size per GPU for evaluation
        "gradient_accumulation_steps": "2",  # Number of steps to accumulate gradients
        # "warmup_steps": "10",          # Number of warmup steps for learning rate scheduler
        # "weight_decay": "0.01",        # Weight decay
        "chat_dataset": False,
        "instruction_tuned": True,
        # "target_modules": "q_proj,v_proj",
        # "int8_quantization": False,
        
        # LoRA specific parameters
        "use_lora": "True",            # Use LoRA for fine-tuning
        "lora_r": "16",                # LoRA attention dimension
        "lora_alpha": "32",            # LoRA alpha parameter
        "lora_dropout": "0.05",        # Dropout probability for LoRA layers
        
        # QLoRA specific parameters (for memory efficiency)
        # "use_qlora": "True",           # Use QLoRA for more memory efficiency
        # "bnb_4bit_quant_type": "nf4",  # Quantization type
        # "bnb_4bit_compute_dtype": "float16",  # Compute dtype
        
        # Other settings
        # "max_seq_length": "2048",      # Maximum sequence length
        # "save_strategy": "epoch",      # Save strategy
        # "evaluation_strategy": "epoch" # Evaluation strategy
    }

    small_dataset_1_hyperparameters = {
        # Training parameters
        "epoch": "3",                  # Number of training epochs
        "learning_rate": "0.00002",       # Learning rate 2e-4
        "per_device_train_batch_size": "4",  # Batch size per GPU for training
        # "per_device_eval_batch_size": "2",   # Batch size per GPU for evaluation
        "gradient_accumulation_steps": "2",  # Number of steps to accumulate gradients
        # "warmup_steps": "10",          # Number of warmup steps for learning rate scheduler
        # "weight_decay": "0.01",        # Weight decay
        "chat_dataset": False,
        "instruction_tuned": True,
        # "target_modules": "q_proj,v_proj",
        # "int8_quantization": False,
        # LoRA specific parameters
        # "use_lora": "True",            # Use LoRA for fine-tuning
        "lora_r": "16",                # LoRA attention dimension
        "lora_alpha": "32",            # LoRA alpha parameter
        "lora_dropout": "0.05",        # Dropout probability for LoRA layers
        
        # QLoRA and PEFT parameters (for memory efficiency)
        # "use_qlora": "True",           # Use QLoRA for more memory efficiency
        # "use_bnb_4bit": "True",  # Quantization type
        
        # Other settings
        # "max_seq_length": "2048",      # Maximum sequence length
        # "save_strategy": "epoch",      # Save strategy
        # "evaluation_strategy": "epoch" # Evaluation strategy
    }

In [47]:
from datetime import datetime
# Get the current timestamp
current_time = datetime.now()

# Format the timestamp as 'YYYYMMddHHmm'
formatted_timestamp = current_time.strftime('%Y%m%d%H%M')

response = run_llama_finetuning(f"group3-domorand-model-{formatted_timestamp}",
                                "2025-03-12-dataset.jsonl", 
                                hyperparameters=small_dataset_1_hyperparameters)
print(f"response: {response}")

Training job 'group3-domorand-model-202503121645' started!
You can monitor the job in the SageMaker console or run 'estimator.latest_training_job.wait()' to wait for completion
response: {'job_name': 'group3-domorand-model-202503121645', 'model_data_s3_path': 's3://league-of-llm-internal-bucket-group3/domorand/output/group3-domorand-model-202503121645/output/model.tar.gz', 'training_job_arn': 'group3-domorand-model-202503121645'}


## How to get previous job hyper parameters

In [41]:
# Initialize the SageMaker client
sagemaker_client = boto3.client('sagemaker')

# Specify the training job name
# training_job_name = 'group3-domorand-model13'
training_job_name = 'group3-domorand-model-202503121618'

# Describe the training job
response = sagemaker_client.describe_training_job(TrainingJobName=training_job_name)

# Access hyperparameters
# Pretty print the response
print(json.dumps(response, indent=4, default=str))
# print(response)
# print(json.dumps(response['HyperParameters'], indent=4))

{
    "TrainingJobName": "group3-domorand-model-202503121618",
    "TrainingJobArn": "arn:aws:sagemaker:us-east-1:482018956800:training-job/group3-domorand-model-202503121618",
    "ModelArtifacts": {
        "S3ModelArtifacts": "s3://league-of-llm-internal-bucket-group3/domorand/dataset/output/group3-domorand-model-202503121618/output/model"
    },
    "TrainingJobStatus": "Failed",
    "SecondaryStatus": "Failed",
    "FailureReason": "AlgorithmError: ExecuteUserScriptError:\nExitCode 1\nErrorMessage \"TypeError: expected str, bytes or os.PathLike object, not NoneType\"\nCommand \"/opt/conda/bin/python3.10 transfer_learning.py --add_input_output_demarcation_key True --chat_dataset False --chat_template Llama3.1 --enable_fsdp True --epoch 3 --gradient_accumulation_steps 2 --instruction_tuned True --int8_quantization False --learning_rate 0.00002 --lora_alpha 32 --lora_dropout 0.05 --lora_r 16 --max_input_length -1 --max_seq_length 2048 --max_train_samples -1 --max_val_samples -1 --per