**Install Dependencies**

In [15]:
%pip install -qU datasets boto3 botocore


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.2[0m[39;49m -> [0m[32;49m25.3[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


**Configuration, Data Preprocessing, and Imports**

In [1]:
import botocore
print(botocore.__file__)
print(botocore.__version__)


/Users/mccartni/.local/share/mise/installs/python/3.12.11/lib/python3.12/site-packages/botocore/__init__.py
1.42.38


In [2]:
"""
GSM8K RFT Training on Amazon Bedrock
"""

import sys
sys.path.insert(0, "../..")

import boto3
import json
import time
import os
import re
from datasets import load_dataset

from helpers import (
    create_lambda_execution_role,
    create_bedrock_rft_role,
    create_lambda_deployment_package,
    deploy_lambda_function,
    cleanup_lambda_deployment_package
)

# ============== UPDATE THESE VALUES ==============
AWS_REGION = "us-east-1"
S3_BUCKET = "bedrock-bucket-nick"
AWS_PROFILE = "mccartni"  # Set to None if using default credentials (e.g., SageMaker, EC2 instance role)
# =================================================

# Create session - use profile if specified, otherwise use default credential chain
session = boto3.Session(profile_name=AWS_PROFILE, region_name=AWS_REGION) if AWS_PROFILE else boto3.Session(region_name=AWS_REGION)
AWS_ACCOUNT_ID = session.client('sts').get_caller_identity()['Account']

# Dataset configuration
DATASET_NAME = "gsm8k"
HF_DATASET = "openai/gsm8k"
TOTAL_SAMPLES = 320
LOCAL_DATA_DIR = "../../tmp-data"

# Derived S3 paths
S3_TRAINING_DATA = f"s3://{S3_BUCKET}/rft-data/datasets/{DATASET_NAME}/train.jsonl"
S3_VALIDATION_DATA = f"s3://{S3_BUCKET}/rft-data/datasets/{DATASET_NAME}/val.jsonl"
S3_TEST_DATA = f"s3://{S3_BUCKET}/rft-data/datasets/{DATASET_NAME}/test.jsonl"
S3_OUTPUT_PATH = f"s3://{S3_BUCKET}/rft-output/"

# Resource names
LAMBDA_FUNCTION_NAME = f"{DATASET_NAME}-reward-function"
LAMBDA_ROLE_NAME = f"{DATASET_NAME.upper()}-Lambda-Role"
BEDROCK_ROLE_NAME = "BedrockRFTRole"
REWARD_FUNCTION_FILE = f"../../reward-functions/{DATASET_NAME}_rew_func.py"
REWARD_FUNCTION_MODULE = f"{DATASET_NAME}_rew_func"

# Model configuration
BASE_MODEL_ID = f"arn:aws:bedrock:{AWS_REGION}::foundation-model/amazon.nova-2-lite-v1:0:256k"
CUSTOM_MODEL_NAME = f"{DATASET_NAME}-nova-lite-rft"
JOB_NAME = f"{DATASET_NAME}-rft-job-{int(time.time())}"

# Initialize clients from session
s3_client = session.client('s3')
bedrock_client = session.client('bedrock')
lambda_client = session.client('lambda')
iam_client = session.client('iam')

# --- Preprocess GSM8K from HuggingFace ---
def preprocess_gsm8k(hf_path, total_samples, output_dir, train_ratio=0.8, val_ratio=0.1):
    os.makedirs(output_dir, exist_ok=True)
    ds = load_dataset(hf_path, "main")

    train_size = int(total_samples * train_ratio)
    val_size = int(total_samples * val_ratio)
    test_size = total_samples - train_size - val_size

    def extract_answer(answer_text):
        match = re.search(r'####\s*(-?\d+(?:,\d+)*)', answer_text)
        return match.group(1).replace(',', '') if match else ""

    def format_row(row, idx, split):
        return {
            "data_source": hf_path,
            "prompt": [{"content": f"{row['question']} Let's think step by step and output the final answer after \"####\".", "role": "user"}],
            "ability": "math",
            "reward_model": {"ground_truth": extract_answer(row['answer']), "style": "rule"},
            "extra_info": {"answer": row['answer'], "index": idx, "question": row['question'], "split": split}
        }

    def write_split(data, start_idx, size, filename, split_name):
        with open(f"{output_dir}/{filename}", "w") as f:
            for i, row in enumerate(data.select(range(start_idx, start_idx + size))):
                f.write(json.dumps(format_row(row, i, split_name)) + "\n")
        print(f"✓ Created {output_dir}/{filename} ({size} samples)")

    hf_train = ds["train"].shuffle(seed=42)
    max_samples = min(total_samples, len(hf_train))

    if max_samples < total_samples:
        train_size = int(max_samples * train_ratio)
        val_size = int(max_samples * val_ratio)
        test_size = max_samples - train_size - val_size

    write_split(hf_train, 0, train_size, "train.jsonl", "train")
    write_split(hf_train, train_size, val_size, "val.jsonl", "val")
    write_split(hf_train, train_size + val_size, test_size, "test.jsonl", "test")

    return train_size, val_size, test_size

print("Preprocessing GSM8K dataset from HuggingFace...")
train_size, val_size, test_size = preprocess_gsm8k(HF_DATASET, TOTAL_SAMPLES, LOCAL_DATA_DIR)

print("\nUploading data to S3...")
for local_file, s3_key in [
    ("train.jsonl", f"rft-data/datasets/{DATASET_NAME}/train.jsonl"),
    ("val.jsonl", f"rft-data/datasets/{DATASET_NAME}/val.jsonl"),
    ("test.jsonl", f"rft-data/datasets/{DATASET_NAME}/test.jsonl")
]:
    s3_client.upload_file(f"{LOCAL_DATA_DIR}/{local_file}", S3_BUCKET, s3_key)
    print(f"✓ Uploaded {local_file} to s3://{S3_BUCKET}/{s3_key}")

print("\n✓ Configuration loaded")
print(f"  Region: {AWS_REGION}")
print(f"  Account: {AWS_ACCOUNT_ID}")
print(f"  S3 Bucket: {S3_BUCKET}")
print(f"  Dataset: {DATASET_NAME}")
print(f"  Job Name: {JOB_NAME}")
print(f"  Split: {train_size} train / {val_size} val / {test_size} test")

  from .autonotebook import tqdm as notebook_tqdm


Preprocessing GSM8K dataset from HuggingFace...
✓ Created ../../tmp-data/train.jsonl (256 samples)
✓ Created ../../tmp-data/val.jsonl (32 samples)
✓ Created ../../tmp-data/test.jsonl (32 samples)

Uploading data to S3...
✓ Uploaded train.jsonl to s3://bedrock-bucket-nick/rft-data/datasets/gsm8k/train.jsonl
✓ Uploaded val.jsonl to s3://bedrock-bucket-nick/rft-data/datasets/gsm8k/val.jsonl
✓ Uploaded test.jsonl to s3://bedrock-bucket-nick/rft-data/datasets/gsm8k/test.jsonl

✓ Configuration loaded
  Region: us-east-1
  Account: 174671970284
  S3 Bucket: bedrock-bucket-nick
  Dataset: gsm8k
  Job Name: gsm8k-rft-job-1769794232
  Split: 256 train / 32 val / 32 test


**Create IAM Roles and Deploy Lambda**

In [3]:
# Create Lambda execution role (using session-based client)
print("Creating Lambda execution role...")

lambda_trust_policy = {
    "Version": "2012-10-17",
    "Statement": [{"Effect": "Allow", "Principal": {"Service": "lambda.amazonaws.com"}, "Action": "sts:AssumeRole"}]
}

try:
    response = iam_client.create_role(
        RoleName=LAMBDA_ROLE_NAME,
        AssumeRolePolicyDocument=json.dumps(lambda_trust_policy),
        Description=f"Execution role for {DATASET_NAME} reward function Lambda"
    )
    lambda_role_arn = response['Role']['Arn']
    iam_client.attach_role_policy(RoleName=LAMBDA_ROLE_NAME, PolicyArn='arn:aws:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole')
    print(f"✓ Created role: {LAMBDA_ROLE_NAME}")
    print("Waiting 10s for role propagation...")
    time.sleep(10)
except iam_client.exceptions.EntityAlreadyExistsException:
    lambda_role_arn = iam_client.get_role(RoleName=LAMBDA_ROLE_NAME)['Role']['Arn']
    print(f"✓ Using existing role: {LAMBDA_ROLE_NAME}")

print(f"  ARN: {lambda_role_arn}")

# Create deployment package
lambda_zip_content = create_lambda_deployment_package(
    source_file=REWARD_FUNCTION_FILE,
    zip_filename="lambda_deployment.zip",
    archive_name=f"{REWARD_FUNCTION_MODULE}.py"
)

# Deploy Lambda function
print(f"\nDeploying Lambda function: {LAMBDA_FUNCTION_NAME}...")
try:
    lambda_client.get_function(FunctionName=LAMBDA_FUNCTION_NAME)
    lambda_client.update_function_code(FunctionName=LAMBDA_FUNCTION_NAME, ZipFile=lambda_zip_content)
    # Also update handler in case it changed
    waiter = lambda_client.get_waiter('function_updated_v2')
    waiter.wait(FunctionName=LAMBDA_FUNCTION_NAME)
    lambda_client.update_function_configuration(FunctionName=LAMBDA_FUNCTION_NAME, Handler=f"{REWARD_FUNCTION_MODULE}.lambda_handler")
    print("✓ Updated existing function")
except lambda_client.exceptions.ResourceNotFoundException:
    lambda_client.create_function(
        FunctionName=LAMBDA_FUNCTION_NAME,
        Runtime='python3.11',
        Role=lambda_role_arn,
        Handler=f"{REWARD_FUNCTION_MODULE}.lambda_handler",
        Code={'ZipFile': lambda_zip_content},
        Timeout=300,
        MemorySize=512
    )
    print("✓ Created new function")

waiter = lambda_client.get_waiter('function_active_v2')
waiter.wait(FunctionName=LAMBDA_FUNCTION_NAME)
lambda_arn = lambda_client.get_function(FunctionName=LAMBDA_FUNCTION_NAME)['Configuration']['FunctionArn']
print(f"✓ Lambda ready: {lambda_arn}")

# Create Bedrock RFT role
print(f"\nCreating Bedrock RFT role: {BEDROCK_ROLE_NAME}...")

bedrock_trust_policy = {
    "Version": "2012-10-17",
    "Statement": [{"Effect": "Allow", "Principal": {"Service": "bedrock.amazonaws.com"}, "Action": "sts:AssumeRole"}]
}

bedrock_permissions = {
    "Version": "2012-10-17",
    "Statement": [
        {"Effect": "Allow", "Action": ["s3:GetObject", "s3:ListBucket"], "Resource": [f"arn:aws:s3:::{S3_BUCKET}/*", f"arn:aws:s3:::{S3_BUCKET}"]},
        {"Effect": "Allow", "Action": "s3:PutObject", "Resource": f"arn:aws:s3:::{S3_BUCKET}/rft-output/*"},
        {"Effect": "Allow", "Action": "lambda:InvokeFunction", "Resource": lambda_arn}
    ]
}

try:
    response = iam_client.create_role(
        RoleName=BEDROCK_ROLE_NAME,
        AssumeRolePolicyDocument=json.dumps(bedrock_trust_policy),
        Description="Execution role for Bedrock RFT training"
    )
    bedrock_role_arn = response['Role']['Arn']
    print(f"✓ Created role: {BEDROCK_ROLE_NAME}")
except iam_client.exceptions.EntityAlreadyExistsException:
    bedrock_role_arn = iam_client.get_role(RoleName=BEDROCK_ROLE_NAME)['Role']['Arn']
    print(f"✓ Using existing role: {BEDROCK_ROLE_NAME}")

iam_client.put_role_policy(RoleName=BEDROCK_ROLE_NAME, PolicyName='BedrockRFTPermissions', PolicyDocument=json.dumps(bedrock_permissions))
print(f"✓ Bedrock role ready: {bedrock_role_arn}")

# Cleanup deployment package
cleanup_lambda_deployment_package()

Creating Lambda execution role...
✓ Using existing role: GSM8K-Lambda-Role
  ARN: arn:aws:iam::174671970284:role/GSM8K-Lambda-Role
✓ Created lambda_deployment.zip
✓ Package size: 1.3 KB

Deploying Lambda function: gsm8k-reward-function...
✓ Updated existing function
✓ Lambda ready: arn:aws:lambda:us-east-1:174671970284:function:gsm8k-reward-function

Creating Bedrock RFT role: BedrockRFTRole...
✓ Using existing role: BedrockRFTRole
✓ Bedrock role ready: arn:aws:iam::174671970284:role/BedrockRFTRole
✓ Cleaned up lambda_deployment.zip


**Test Lambda Function**

In [4]:
print("Testing Lambda function...")

test_payload = [{
    "id": "test_001",
    "messages": [
        {"role": "user", "content": "What is 2 + 2?"},
        {"role": "assistant", "content": "2 + 2 = 4\n\n#### 4"}
    ],
    "metadata": {"reference_answer": {"final_answer": "4"}}
}]

response = lambda_client.invoke(
    FunctionName=LAMBDA_FUNCTION_NAME,
    InvocationType='RequestResponse',
    Payload=json.dumps(test_payload)
)

result = json.loads(response['Payload'].read())
print(json.dumps(result, indent=2))

# Check for Lambda errors
if 'errorMessage' in result:
    print(f"\n✗ Lambda error: {result['errorMessage']}")
elif isinstance(result, list) and len(result) > 0:
    print(f"\n✓ Score: {result[0].get('aggregate_reward_score', 0):.3f}")
else:
    print("\n✓ Lambda executed successfully")

Testing Lambda function...
[
  {
    "id": "test_001",
    "aggregate_reward_score": 1.0,
    "reward_components": {
      "correctness": 1.0
    }
  }
]

✓ Score: 1.000


**Create RFT Training Job**

In [5]:
print(boto3.__version__)

# Print expected params for create_model_customization_job
import pprint
service_model = bedrock_client._service_model
op_model = service_model.operation_model('CreateModelCustomizationJob')
pprint.pprint(op_model.input_shape.members['customizationConfig'].members['rftConfig'].members['hyperParameters'].members)



1.42.38
OrderedDict([('batchSize', <Shape(BatchSize)>),
             ('computeMultiplier', <Shape(Float)>),
             ('evalInterval', <Shape(EvalInterval)>),
             ('evalSamples', <Shape(EvalSamples)>),
             ('learningRateMultiplier', <Shape(Float)>),
             ('epochs', <Shape(EpochCount)>)])


In [6]:
print("Creating RFT training job...")
print(f"  Job: {JOB_NAME}")
print(f"  Model: {CUSTOM_MODEL_NAME}")
print(f"  Base: {BASE_MODEL_ID}")

response = bedrock_client.create_model_customization_job(
    jobName=JOB_NAME,
    customModelName=CUSTOM_MODEL_NAME,
    roleArn=bedrock_role_arn,
    baseModelIdentifier=BASE_MODEL_ID,
    customizationType='REINFORCEMENT_FINE_TUNING',
    trainingDataConfig={'s3Uri': S3_TRAINING_DATA},
    validationDataConfig={'validators': [{'s3Uri': S3_VALIDATION_DATA}]},
    outputDataConfig={'s3Uri': S3_OUTPUT_PATH},
    customizationConfig={
        'rftConfig': {
            'graderConfig': {'lambdaGrader': {'lambdaArn': lambda_arn}},
            'hyperParameters': {
                'batchSize': 32,
                'epochCount': 1,
                'evalInterval': 50,
                'inferenceMaxTokens': 8192,
                'learningRate': 0.00005,
                'maxPromptLength': 4096,
                'reasoningEffort': 'high',
                'trainingSamplePerPrompt': 4
            }
        }
    }
)

print(f"\n✓ Job created: {response['jobArn']}")

Creating RFT training job...
  Job: gsm8k-rft-job-1769794232
  Model: gsm8k-nova-lite-rft
  Base: arn:aws:bedrock:us-east-1::foundation-model/amazon.nova-2-lite-v1:0:256k


ParamValidationError: Parameter validation failed:
Unknown parameter in customizationConfig.rftConfig.hyperParameters: "epochCount", must be one of: batchSize, computeMultiplier, evalInterval, evalSamples, learningRateMultiplier, epochs
Unknown parameter in customizationConfig.rftConfig.hyperParameters: "inferenceMaxTokens", must be one of: batchSize, computeMultiplier, evalInterval, evalSamples, learningRateMultiplier, epochs
Unknown parameter in customizationConfig.rftConfig.hyperParameters: "learningRate", must be one of: batchSize, computeMultiplier, evalInterval, evalSamples, learningRateMultiplier, epochs
Unknown parameter in customizationConfig.rftConfig.hyperParameters: "maxPromptLength", must be one of: batchSize, computeMultiplier, evalInterval, evalSamples, learningRateMultiplier, epochs
Unknown parameter in customizationConfig.rftConfig.hyperParameters: "reasoningEffort", must be one of: batchSize, computeMultiplier, evalInterval, evalSamples, learningRateMultiplier, epochs
Unknown parameter in customizationConfig.rftConfig.hyperParameters: "trainingSamplePerPrompt", must be one of: batchSize, computeMultiplier, evalInterval, evalSamples, learningRateMultiplier, epochs

**Monitor Training Job**

In [None]:
response = bedrock_client.get_model_customization_job(jobIdentifier=JOB_NAME)
print(f"Job: {JOB_NAME}")
print(f"Status: {response['status']}")

if response['status'] == 'Completed' and 'outputModelArn' in response:
    print(f"\n✓ Model ARN: {response['outputModelArn']}")
elif response['status'] == 'Failed':
    print(f"\n✗ Failed: {response.get('failureMessage', 'Unknown')}")