In [None]:
%pip install transformers  sagemaker seaborn sentence-transformers nltk scikit-learn "huggingface_hub[cli]" --upgrade --quiet

# Llama3 finetuning for Bedrock
## Architecture
This diagram illustrates an end-to-end ML workflow where a SageMaker Pipeline processes, trains, and evaluates a model using HuggingFace containers, then registers it before deploying to Amazon Bedrock through a Lambda function for inference, with model artifacts stored in S3 throughout the process.

![Architecture Diagram](Llama3_finetuning_bedrock.png)

Note: The next line of code uses a API token to login in the Huggingface account to use the model weights. You need to have access to "meta-llama/Llama-3.2-3B-Instruct" to use meta llama 3.2 3B model.
- [Hugging Face Access Tokens Documentation](https://huggingface.co/docs/hub/en/security-tokens).
- [Getting access to the mode](https://huggingface.co/meta-llama/Meta-Llama-3-8B/discussions/172)
- [meta llama 3.2 3B](https://huggingface.co/meta-llama/Meta-Llama-3-8B)

In [None]:
!huggingface-cli login --token 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

In [None]:
import sagemaker
import boto3
import os

sagemaker_session = sagemaker.Session()
# sagemaker session bucket -> used for uploading data, models and logs
# sagemaker will automatically create this bucket if it not exists
sagemaker_session_bucket=None
if sagemaker_session_bucket is None and sagemaker_session is not None:
    # set to default bucket if a bucket name is not given
    sagemaker_session_bucket = sagemaker_session.default_bucket()

try:
    role = sagemaker.get_execution_role()
except ValueError:
    iam = boto3.client('iam')
    #role = iam.get_role(RoleName='sagemaker_execution_role')['Role']['Arn']
    #use this code if you are running locally
    role = iam.get_role(RoleName='AmazonSageMaker-ExecutionRole-20220929T161862')['Role']['Arn']

sess = sagemaker.Session(default_bucket=sagemaker_session_bucket)
sm_client = boto3.client('sagemaker', region_name=sess.boto_region_name)



print(f"sagemaker role arn: {role}")
print(f"sagemaker bucket: {sess.default_bucket()}")
print(f"sagemaker session region: {sess.boto_region_name}")

# Create Dataset

In [None]:
from sagemaker.s3 import S3Downloader
from sagemaker.s3 import S3Uploader
import json

In [None]:
dataset_S3Uri="s3://jumpstart-cache-prod-us-west-2/training-datasets/oasst_top/train/"

In [None]:
train_dataset_path = S3Downloader.download(s3_uri=dataset_S3Uri, local_path=f"dataset/")
print(f"Training config downloaded to:")
print(train_dataset_path)

In [None]:
from sagemaker.s3 import S3Uploader
input_path = f's3://{sess.default_bucket()}/datasets/llama3'
# upload the model yaml file to s3
train_dataset_path = "dataset/train.jsonl"
train_s3_path = S3Uploader.upload(local_path=train_dataset_path, desired_s3_uri=f"{input_path}/dataset")

print(f"Training dataset uploaded to:")
print(train_s3_path)

# Sagemaker Pipeline

In [None]:
import boto3
import sagemaker
from sagemaker.workflow.pipeline import Pipeline
from sagemaker.workflow.steps import ProcessingStep, TrainingStep, CacheConfig
from sagemaker.workflow.step_collections import RegisterModel
from sagemaker.huggingface import HuggingFace
from sagemaker.processing import ScriptProcessor
from sagemaker.workflow.properties import PropertyFile
from sagemaker.workflow.conditions import ConditionGreaterThanOrEqualTo
from sagemaker.workflow.condition_step import ConditionStep
from sagemaker.sklearn.processing import SKLearnProcessor
from sagemaker.processing import ProcessingInput, ProcessingOutput
from sagemaker.pytorch.processing import PyTorchProcessor
from sagemaker.huggingface import HuggingFaceProcessor
from sagemaker.workflow.pipeline_context import PipelineSession



pipeline_session = PipelineSession()

#sagemaker_session = sagemaker.Session()
#role = sagemaker.get_execution_role()
# Define pipeline parameters
region=sagemaker_session.boto_region_name
model_name = "llama3-qa-model"
instance_type_preprocessing = "ml.m5.large"
instance_count = 1
# Cache configuration to improve pipeline execution time
cache_config = CacheConfig(enable_caching=True, expire_after="30d")

## Preprocessing Step

In [None]:
preprocessing_processor = SKLearnProcessor(
    framework_version="1.0-1",
    instance_type=instance_type_preprocessing,
    instance_count=instance_count,
    base_job_name="llama3-qa-preprocessing",
    role=role,
    max_runtime_in_seconds=3600,  # Set a maximum runtime of 1 hour,
    sagemaker_session=pipeline_session
)


In [None]:
from sagemaker.processing import ProcessingInput, ProcessingOutput
from sagemaker.workflow.steps import ProcessingStep
inputs = [
    ProcessingInput(source=train_s3_path, destination="/opt/ml/processing/input"),
]

outputs = [
    ProcessingOutput(output_name="train", source="/opt/ml/processing/output/train"),
    ProcessingOutput(output_name="test", source="/opt/ml/processing/output/test")
]



In [None]:
preprocessing_step = ProcessingStep(
    name="PreprocessQADataset",
    processor=preprocessing_processor,
    inputs=inputs,
    outputs=outputs,
    
    code="scripts/preprocessing/preprocess.py",
)

# Training step

In [None]:
%%writefile llama_3_2_3B_fsdp_lora.yaml
# script parameters
model_id: "meta-llama/Llama-3.2-3B-Instruct"# Hugging Face model id
max_seq_length:  512 #2048              # max sequence length for model and packing of the dataset
# sagemaker specific parameters
train_dataset_path: "/opt/ml/input/data/train" # path to where SageMaker saves train dataset
test_dataset_path: "/opt/ml/input/data/test"   # path to where SageMaker saves test dataset
#output_dir: "/opt/ml/model"            # path to where SageMaker will upload the model 
output_dir: "/tmp/llama3"            # path to where SageMaker will upload the model 
# training parameters
report_to: "tensorboard"               # report metrics to tensorboard
learning_rate: 0.0002                  # learning rate 2e-4
lr_scheduler_type: "constant"          # learning rate scheduler
num_train_epochs: 10                   # number of training epochs
per_device_train_batch_size: 16         # batch size per device during training
per_device_eval_batch_size: 16          # batch size for evaluation
gradient_accumulation_steps: 2         # number of steps before performing a backward/update pass
optim: adamw_torch                     # use torch adamw optimizer
logging_steps: 10                      # log every 10 steps
save_strategy: epoch                   # save checkpoint every epoch
evaluation_strategy: epoch             # evaluate every epoch
max_grad_norm: 0.3                     # max gradient norm
warmup_ratio: 0.03                     # warmup ratio
bf16: true                             # use bfloat16 precision
tf32: false                             # use tf32 precision
gradient_checkpointing: true           # use gradient checkpointing to save memory
# FSDP parameters: https://huggingface.co/docs/transformers/main/en/fsdp
fsdp: "full_shard auto_wrap offload" # remove offload if enough GPU memory
fsdp_config:
  backward_prefetch: "backward_pre"
  forward_prefetch: "false"
  use_orig_params: "false"

In [None]:
from sagemaker.s3 import S3Uploader


# upload the model yaml file to s3
model_yaml = "llama_3_2_3B_fsdp_lora.yaml"
train_config_s3_path = S3Uploader.upload(local_path=model_yaml, desired_s3_uri=f"{input_path}/config")

print(f"Training config uploaded to:")
print(train_config_s3_path)

In [None]:
from sagemaker.huggingface import HuggingFace
from huggingface_hub import HfFolder
import time

# define Training Job Name with timestamp

timestamp = time.strftime('%Y%m%d-%H%M%S')
job_name = f'llama3-8B-exp1-{timestamp}'

# create the Estimator
huggingface_estimator = HuggingFace(
    entry_point          = 'training/train_fsdp_lora.py',      # train script
    model_dir            = '/opt/ml/model',
    source_dir           = 'scripts/',  # directory which includes all the files needed for training
    instance_type        = 'ml.g5.12xlarge',  # instances type used for the training job
    #instance_type        = 'ml.g5.48xlarge',  # instances type used for the training job
    #instance_type        = 'ml.g5.16xlarge',  # instances type used for the training job
    instance_count       = 2,                 # the number of instances used for training
    max_run              = 2*24*60*60,        # maximum runtime in seconds (days * hours * minutes * seconds)
    base_job_name        = job_name,          # the name of the training job
    role                 = role,              # Iam role used in training job to access AWS ressources, e.g. S3
    volume_size          = 500,               # the size of the EBS volume in GB
    transformers_version = '4.36.0',          # the transformers version used in the training job
    pytorch_version      = '2.1.0',           # the pytorch_version version used in the training job
    py_version           = 'py310',           # the python version used in the training job
    hyperparameters      =  {
        "config": "/opt/ml/input/data/config/llama_3_2_3B_fsdp_lora.yaml" # path to TRL config which was uploaded to s3
    },
    sagemaker_session=pipeline_session,
    disable_output_compression = True,        # not compress output to save training time and cost
    distribution={"torch_distributed": {"enabled": True}},   # enables torchrun
    environment  = {
        "HUGGINGFACE_HUB_CACHE": "/tmp/.cache", # set env variable to cache models in /tmp
        "HF_TOKEN": HfFolder.get_token(),       # huggingface token to access gated models, e.g. llama 3
        "ACCELERATE_USE_FSDP": "1",             # enable FSDP
        "FSDP_CPU_RAM_EFFICIENT_LOADING": "1"   # enable CPU RAM efficient loading
    }, 
    
)

training_step = TrainingStep(
    name=job_name,
    estimator=huggingface_estimator,
    inputs={
        "train": sagemaker.inputs.TrainingInput(
            s3_data=preprocessing_step.properties.ProcessingOutputConfig.Outputs["train"].S3Output.S3Uri,
        ),
        "config": sagemaker.inputs.TrainingInput(
            s3_data=train_config_s3_path,
        ),
        "test": sagemaker.inputs.TrainingInput(
            s3_data=preprocessing_step.properties.ProcessingOutputConfig.Outputs["test"].S3Output.S3Uri,
        )
    },
)

# TODO
# Evaluation Step

In [None]:
"""
hfp = HuggingFaceProcessor(
    role=role, 
    instance_count=1,
    instance_type='ml.g4dn.16xlarge',  # Use 'local' for local mode, or specify an instance type like 'ml.g4dn.xlarge' for SageMaker
    transformers_version='4.36.0',  # Adjust version as needed
    pytorch_version='2.1.0',  # Adjust version as needed
    py_version= 'py310',
    base_job_name='llama3-eval',
    sagemaker_session=pipeline_session,
)
"""

In [None]:
'''
evaluation_step = ProcessingStep(
    name="EvaluateLlama3Model",
    processor=hfp,
    inputs=[
        ProcessingInput(
            source=training_step.properties.ModelArtifacts.S3ModelArtifacts,
            destination="/opt/ml/processing/model"
        ),
        ProcessingInput(
            source=preprocessing_step.properties.ProcessingOutputConfig.Outputs["test"].S3Output.S3Uri,
            destination="/opt/ml/processing/test"
        ),
        ProcessingInput(source="scripts/evaluation", destination='/opt/ml/processing/input/code'),
    ],
    outputs=[
        ProcessingOutput(output_name="evaluation", source="/opt/ml/processing/evaluation"),
    ],
    code="scripts/evaluation/evaluate.py",
    
)
'''

In [None]:
"""
eval_args = hfp.run(
    inputs=[
        ProcessingInput(
            source=training_step.properties.ModelArtifacts.S3ModelArtifacts,
            destination="/opt/ml/processing/input/model",
        ),
        ProcessingInput(
            source=preprocessing_step.properties.ProcessingOutputConfig.Outputs["test"].S3Output.S3Uri,
            destination="/opt/ml/processing/input/data",
        ),
        ProcessingInput(
            source="./scripts/evaluation",
            destination="/opt/ml/processing/input/code"),
    ],
    outputs=[
        ProcessingOutput(output_name="evaluation", source="/opt/ml/processing/output",destination=f's3://{sagemaker_session_bucket}/llama3-8B-exp1-{timestamp}/output/evaluation'),
    ],
    code="evaluate.py",
    source_dir="scripts/evaluation",
    arguments=['--bootstrap', '/opt/ml/processing/input/code/bootstrap.sh']
)
"""

In [None]:
"""
evaluation_step = ProcessingStep(name="EvaluateLlama3Model", step_args=eval_args)
"""

# Model Register Step

In [None]:
from sagemaker.workflow.pipeline import Pipeline
from sagemaker.workflow.steps import CreateModelStep
from sagemaker.model import Model
from sagemaker.huggingface.model import HuggingFaceModel
from sagemaker.inputs import CreateModelInput
from sagemaker.workflow.model_step import ModelStep
import sagemaker
from sagemaker.huggingface import HuggingFaceModel, get_huggingface_llm_image_uri
import time

In [None]:
image_uri = get_huggingface_llm_image_uri(
  backend="huggingface",
  region=region,
  version="2.0",
  
)

In [None]:
from sagemaker.huggingface import HuggingFaceModel
llm_model=HuggingFaceModel(
    transformers_version="4.37.0",
    pytorch_version="1.10.2",
    py_version="py310",
    role=role,
    image_uri=image_uri,
)

In [None]:
# Create model step
llama_model_step = CreateModelStep(
    name="CreateLlama3ModelStep",
    model=llm_model,
    inputs=training_step.properties.ModelArtifacts.S3ModelArtifacts,
    depends_on=[training_step],
)
    
# Crete a RegisterModel step, which registers the model with Sagemaker Model Registry.
model_package_group_name = "Llama3Models" 
step_register_model = RegisterModel(
    name="RegisterModel",
    model=llm_model,
    model_data=training_step.properties.ModelArtifacts.S3ModelArtifacts,
    content_types=["text/csv"],
    response_types=["text/csv"],
    inference_instances=["ml.g5.12xlarge"],
    transform_instances=["ml.g5.12xlarge"],
    model_package_group_name=model_package_group_name,
    depends_on=[training_step],
    approval_status="Approved",
)

# Bedrock Deployment step

### Create Lambda layer

In [None]:
import os
import subprocess
import shutil
import boto3

# Create directories
os.makedirs('boto3-layer/python', exist_ok=True)

# Install boto3 into the layer directory
subprocess.check_call([
    'pip', 'install', 'boto3==1.35.16', '-t', 'boto3-layer/python',
    '--upgrade', '--no-cache-dir'
])

# Create zip file
shutil.make_archive('boto3-layer', 'zip', 'boto3-layer')

# Upload to AWS as a Lambda layer
lambda_client = boto3.client('lambda')

with open('boto3-layer.zip', 'rb') as zip_file:
    response = lambda_client.publish_layer_version(
        LayerName='boto3-latest',
        Description='Latest Boto3 layer',
        Content={
            'ZipFile': zip_file.read()
        },
        CompatibleRuntimes=['python3.10', 'python3.11']
    )

print(f"Layer ARN: {response['LayerArn']}")
print(f"Layer Version ARN: {response['LayerVersionArn']}")
lambda_layer_arn=response['LayerVersionArn']

# Clean up
shutil.rmtree('boto3-layer')
os.remove('boto3-layer.zip')

### Create Role and policies

In [None]:
def create_lambda_execution_role(role_name, training_bucket, account_id, region):
    iam = boto3.client('iam')
    
    # Define the trust relationship
    trust_relationship = {
        "Version": "2012-10-17",
        "Statement": [
            {
                "Effect": "Allow",
                "Principal": {
                    "Service": "lambda.amazonaws.com"
                },
                "Action": "sts:AssumeRole"
            },
            {
                "Effect": "Allow",
                "Principal": {
                    "Service": "bedrock.amazonaws.com"
                },
                "Action": "sts:AssumeRole",
                "Condition": {
                    "StringEquals": {
                        "aws:SourceAccount": account_id
                    },
                    "ArnEquals": {
                        "aws:SourceArn": f"arn:aws:bedrock:{region}:{account_id}:model-import-job/*"
                    }
                }
            }
        ]
    }
    
    # Define Bedrock permissions policy
    bedrock_policy = {
        "Version": "2012-10-17",
        "Statement": [
            {
                "Effect": "Allow",
                "Action": [
                    "bedrock:CreateModelImportJob",
                    "bedrock:GetModelImportJob",
                    "bedrock:ListModelImportJobs"
                ],
                "Resource": "*"
            },
            {
                "Effect": "Allow",
                "Action": [
                    "iam:PassRole"
                ],
                "Resource": f"arn:aws:iam::{account_id}:role/*",
                "Condition": {
                    "StringEquals": {
                        "iam:PassedToService": "bedrock.amazonaws.com"
                    }
                }
            }
        ]
    }
    
    # Define S3 permissions
    s3_policy = {
        "Version": "2012-10-17",
        "Statement": [
            {
                "Effect": "Allow",
                "Action": [
                    "s3:GetObject",
                    "s3:ListBucket"
                ],
                "Resource": [
                    f"arn:aws:s3:::{training_bucket}",
                    f"arn:aws:s3:::{training_bucket}/*"
                ]
            }
        ]
    }

    def attach_policies(role_name):
        # Attach the Bedrock permissions policy
        iam.put_role_policy(
            RoleName=role_name,
            PolicyName='BedrockAccessPolicy',
            PolicyDocument=json.dumps(bedrock_policy)
        )
        print("Attached Bedrock permissions policy")
        
        # Attach necessary AWS managed policies for Lambda basic execution
        try:
            iam.attach_role_policy(
                RoleName=role_name,
                PolicyArn="arn:aws:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole"
            )
            print("Attached Lambda basic execution policy")
        except iam.exceptions.EntityAlreadyExistsException:
            print("Lambda basic execution policy already attached")
        
        # Attach S3 policy
        iam.put_role_policy(
            RoleName=role_name,
            PolicyName='S3AccessPolicy',
            PolicyDocument=json.dumps(s3_policy)
        )
        print("Attached S3 access policy")
    
    try:
        response = iam.create_role(
            RoleName=role_name,
            AssumeRolePolicyDocument=json.dumps(trust_relationship),
            Description="Execution role for Lambda function and Bedrock model import jobs"
        )
        
        role_arn = response['Role']['Arn']
        print(f"Created IAM role: {role_arn}")
        
        # Attach all policies for new role
        attach_policies(role_name)
        
        return role_arn
    
    except iam.exceptions.EntityAlreadyExistsException:
        print(f"IAM role {role_name} already exists. Retrieving its ARN.")
        role = iam.get_role(RoleName=role_name)
        
        # Update the trust relationship
        iam.update_assume_role_policy(
            RoleName=role_name,
            PolicyDocument=json.dumps(trust_relationship)
        )
        
        # Attach or update policies for existing role
        attach_policies(role_name)
        
        return role['Role']['Arn']

# Usage
role_name = "LambdaBedrockExecutionRole"
training_bucket = sagemaker_session_bucket
account_id = boto3.client('sts').get_caller_identity()['Account']
region = "us-west-2"
execution_role_arn = create_lambda_execution_role(role_name, training_bucket, account_id, region)
print(f"Execution Role ARN: {execution_role_arn}")

In [None]:
import boto3
import json
from botocore.exceptions import ClientError

def handle_client_error(func, *args, **kwargs):
    try:
        return func(*args, **kwargs)
    except ClientError as e:
        if e.response['Error']['Code'] == 'NoSuchEntity':
            return None
        raise

def create_or_update_role(role_name, trust_relationship, permission_policy, iam_client=None, account_id=None):
    iam = iam_client or boto3.client('iam')
    account_id = account_id or boto3.client('sts').get_caller_identity()['Account']
    
    # Check and update/create role
    role = handle_client_error(iam.get_role, RoleName=role_name)
    if role:
        iam.update_assume_role_policy(
            RoleName=role_name,
            PolicyDocument=json.dumps(trust_relationship)
        )
        print(f"Updated existing role: {role_name}")
    else:
        iam.create_role(
            RoleName=role_name,
            AssumeRolePolicyDocument=json.dumps(trust_relationship)
        )
        print(f"Created new role: {role_name}")

    # Handle policy
    policy_name = f"{role_name}Policy"
    policy_arn = f"arn:aws:iam::{account_id}:policy/{policy_name}"
    
    # Attach or update policy
    policy = handle_client_error(iam.get_policy, PolicyArn=policy_arn)
    if policy:
        iam.create_policy_version(
            PolicyArn=policy_arn,
            PolicyDocument=json.dumps(permission_policy),
            SetAsDefault=True
        )
        # Cleanup old versions
        versions = iam.list_policy_versions(PolicyArn=policy_arn)['Versions']
        for version in versions:
            if not version['IsDefaultVersion']:
                iam.delete_policy_version(
                    PolicyArn=policy_arn,
                    VersionId=version['VersionId']
                )
        print(f"Updated existing policy: {policy_name}")
    else:
        iam.create_policy(
            PolicyName=policy_name,
            PolicyDocument=json.dumps(permission_policy)
        )
        print(f"Created new policy: {policy_name}")

    # Attach policy to role if not already attached
    attached_policies = iam.list_attached_role_policies(RoleName=role_name)['AttachedPolicies']
    iam.attach_role_policy(
        RoleName=role_name,
        PolicyArn=policy_arn
    )
    print(f"Attached policy to role: {role_name}")

    return iam.get_role(RoleName=role_name)['Role']['Arn']



# Set up variables
account_id = boto3.client('sts').get_caller_identity()['Account']
region = "us-west-2"
training_bucket = sagemaker_session_bucket
role_name = "Sagemaker_Bedrock_import_role"

# Define policies
trust_relationship = {
    "Version": "2012-10-17",
    "Statement": [{
        "Effect": "Allow",
        "Principal": {"Service": "bedrock.amazonaws.com"},
        "Action": "sts:AssumeRole",
        "Condition": {
            "StringEquals": {"aws:SourceAccount": account_id},
            "ArnEquals": {"aws:SourceArn": f"arn:aws:bedrock:{region}:{account_id}:model-import-job/*"}
        }
    }]
}

permission_policy = {
    "Version": "2012-10-17",
    "Statement": [{
        "Effect": "Allow",
        "Action": ["s3:GetObject", "s3:ListBucket"],
        "Resource": [f"arn:aws:s3:::{training_bucket}", f"arn:aws:s3:::{training_bucket}/*"],
        "Condition": {"StringEquals": {"aws:ResourceAccount": account_id}}
    }]
}

# Create or update the role
role_arn = create_or_update_role(role_name, trust_relationship, permission_policy)
print(f"Role ARN: {role_arn}")

### Create Lambda Step

In [None]:
from sagemaker.lambda_helper import Lambda
# Create Lambda function instance
lambda_func = Lambda(
    function_name="bedrock-model-import",
    execution_role_arn=execution_role_arn,
    script="scripts/lambda/bedrock_model_import.py",
    handler='bedrock_model_import.lambda_handler',
    timeout=900,  # 15 minutes, adjust as needed
    memory_size=128,
    runtime='python3.12',
    layers=[lambda_layer_arn],  # Your boto3 layer ARN
)

In [None]:
from sagemaker.workflow.lambda_step import LambdaStep, LambdaOutput, LambdaOutputTypeEnum
# Define the outputs
lambda_outputs = [
    LambdaOutput(output_name="model_arn", output_type=LambdaOutputTypeEnum.String)
]

In [None]:
step_register_model.properties.ModelPackageArn

In [None]:

# Create the Lambda step
lambda_step = LambdaStep(
    name="BedrockModelImport",
    lambda_func=lambda_func,
    inputs={
        "model_uri": training_step.properties.ModelArtifacts.S3ModelArtifacts,  # Use the output from the training step
        "role_arn": role,
        "model_name": model_name
    },
    outputs=lambda_outputs,
    cache_config=CacheConfig(enable_caching=True, expire_after="1d"),
    depends_on=[step_register_model]
)

"""
"model_name": "llama3_model",
"model_uri":training_step.properties.ModelArtifacts.S3ModelArtifacts,
"role_arn": role_arn,
"""

# Pipeline creation

In [None]:
import logging

logging.basicConfig(level=logging.INFO)

try:
    pipeline = Pipeline(
        name="Llama3-QAPipeline",
        steps=[preprocessing_step, training_step,step_register_model,lambda_step ],
        parameters=[role, model_name],
        sagemaker_session=pipeline_session,
    )
    logging.info("Pipeline created successfully")

    pipeline.upsert(role_arn=role)
    logging.info("Pipeline upserted successfully")

    execution = pipeline.start()
    logging.info("Pipeline started successfully")

except ValueError as ve:
    logging.error(f"ValueError occurred: {str(ve)}")
    logging.error(f"Error occurred in pipeline definition: {pipeline.definition()}")
except Exception as e:
    logging.error(f"An error occurred: {str(e)}")
    logging.error(f"Error type: {type(e).__name__}")

In [None]:
import time
from botocore.exceptions import ClientError

def get_pipeline_status(execution):
    try:
        return execution.describe()['PipelineExecutionStatus']
    except ClientError as e:
        print(f"Error getting pipeline status: {e}")
        return None

def get_step_statuses(execution):
    try:
        steps = execution.list_steps()
        return {step['StepName']: step['StepStatus'] for step in steps}
    except ClientError as e:
        print(f"Error getting step statuses: {e}")
        return {}

def is_pipeline_finished(status):
    return status in ['Succeeded', 'Completed', 'Failed', 'Stopped']

def print_progress(status, step_statuses):
    print(f"\nPipeline status: {status}")
    print("Step statuses:")
    for step, status in step_statuses.items():
        print(f"  {step}: {status}")

def monitor_pipeline_execution(execution, check_interval=60):
    print("Pipeline execution started.")
    print("Status updates (checking every minute):")

    previous_step_statuses = {}
    while True:
        status = get_pipeline_status(execution)
        if status is None:
            print("Failed to get pipeline status. Retrying...")
            time.sleep(check_interval)
            continue

        step_statuses = get_step_statuses(execution)
        
        if step_statuses != previous_step_statuses:
            print_progress(status, step_statuses)
            previous_step_statuses = step_statuses
        else:
            print(".", end='', flush=True)
        
        if is_pipeline_finished(status):
            break

        time.sleep(check_interval)

    print("\nPipeline execution finished.")
    print_progress(status, step_statuses)

# Usage example:
monitor_pipeline_execution(execution)

In [None]:
preprocessing_step.properties


In [None]:
pipeline.parameters

In [None]:

# Get the model ARN from the Lambda step output
model_arn = execution.step_outputs['BedrockModelImportStep']['model_arn']

# Now you can use this model_arn to evaluate the model in Bedrock

## Lambda testing

In [None]:
import boto3
import json

def import_model_to_bedrock(role_arn, model_uri, model_name="llama3_sagemaker", region="us-west-2"):
    """
    Call the Lambda function to import a model to Bedrock
    
    Parameters:
    -----------
    role_arn : str
        The ARN of the IAM role to be used for the import
    model_uri : str
        The S3 URI where the model is stored
    model_name : str, optional
        Name for the imported model (default: "llama3_sagemaker")
    region : str, optional
        AWS region (default: "us-west-2")
        
    Returns:
    --------
    dict
        Response from the Lambda function
    """
    try:
        # Initialize Lambda client
        lambda_client = boto3.client('lambda', region_name=region)
        
        # Prepare the payload
        payload = {
            "role_arn": role_arn,
            "model_uri": model_uri,
            "model_name": model_name
        }
        
        # Convert payload to JSON string
        payload_json = json.dumps(payload)
        
        # Replace with your Lambda function name
        function_name = "bedrock-model-import"
        
        # Invoke Lambda function
        response = lambda_client.invoke(
            FunctionName=function_name,
            InvocationType='RequestResponse',
            Payload=payload_json
        )
        
        # Read and parse the response
        response_payload = json.loads(response['Payload'].read().decode('utf-8'))
        
        # Check if the job requires follow-up
        if response_payload.get('requires_follow_up'):
            print("Job started successfully but still in progress.")
            print(f"Job Name: {response_payload.get('job_name')}")
            print(f"Job ARN: {response_payload.get('job_arn')}")
        
        return response_payload
        
    except Exception as e:
        print(f"Error invoking Lambda function: {str(e)}")
        raise

def check_import_status(job_name, region="us-west-2"):
    """
    Check the status of a model import job
    
    Parameters:
    -----------
    job_name : str
        The name of the import job to check
    region : str, optional
        AWS region (default: "us-west-2")
        
    Returns:
    --------
    dict
        Current status of the import job
    """
    try:
        bedrock = boto3.client('bedrock', region_name=region)
        response = bedrock.list_model_import_jobs(
            nameContains=job_name,
            sortBy='CreationTime',
            sortOrder='Descending'
        )
        
        if response['modelImportJobSummaries']:
            job = response['modelImportJobSummaries'][0]
            return {
                'status': job['status'],
                'model_arn': job.get('importedModelArn'),
                'error': job.get('failureReason', '')
            }
        return None
        
    except Exception as e:
        print(f"Error checking job status: {str(e)}")
        raise

In [None]:
# Example call
response = import_model_to_bedrock(
    role_arn='arn:aws:iam::786045444066:role/Sagemaker_Bedrock_import_role',
    model_uri='s3://sagemaker-us-west-2-786045444066/pipelines-9h6uct6juob9-llama3-8B-exp1-20241-bJ8cv1J5NG/output/model',
    model_name='my_custom_model'
)

print(f"Response: {json.dumps(response, indent=2)}")

# If the job is still running, check its status
if response.get('requires_follow_up'):
    job_name = response['job_name']
    while True:
        status = check_import_status(job_name)
        print(f"Current status: {status}")
        if status['status'] in ['Completed', 'Failed', 'Stopped']:
            break
        time.sleep(60)  # Wait for 60 seconds before checking again

## Inference test

In [None]:
model_arn='arn:aws:bedrock:us-west-2:786045444066:imported-model/686s53mpwdkw'

In [None]:
# Cell 1: Imports and Setup
import json
import boto3
from botocore.config import Config
import time
from datetime import datetime
import pandas as pd
from IPython.display import display, HTML
import matplotlib.pyplot as plt
import seaborn as sns
import random
from difflib import SequenceMatcher
from nltk.translate.bleu_score import sentence_bleu
from nltk.tokenize import word_tokenize
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sentence_transformers import SentenceTransformer
import requests
import nltk
from sagemaker.workflow.properties import PropertyFile

# Download required NLTK resources
try:
    nltk.download('punkt')
    print("Successfully downloaded punkt tokenizer")
except Exception as e:
    print(f"Error downloading NLTK resources: {e}")
# Alternative tokenization function in case NLTK fails
def simple_tokenize(text):
    """Simple tokenization fallback"""
    return text.lower().split()

# Initialize BERT model for semantic similarity with increased timeout
try:
    print("Loading BERT model...")
    # Increase timeout for model download
    requests.adapters.DEFAULT_TIMEOUT = 60  # Increase timeout to 60 seconds
    
    # Create a session with custom timeout
    session = requests.Session()
    session.request = lambda *args, **kwargs: requests.Session.request(
        session, *args, **{**kwargs, 'timeout': 60}
    )
    
    model = SentenceTransformer('paraphrase-MiniLM-L6-v2', 
                               device='cpu',  # Explicitly use CPU
                               cache_folder='./model_cache')  # Local cache directory
    print("BERT model loaded successfully!")
except Exception as e:
    print(f"Error loading BERT model: {e}")
    print("Falling back to simpler similarity metrics...")
    model = None


def calculate_similarity_metrics(expected, actual):
    """
    Calculate multiple similarity metrics between expected and actual responses
    """
    # Ensure inputs are strings
    expected = str(expected)
    actual = str(actual)
    
    # String similarity (simple ratio)
    string_similarity = SequenceMatcher(None, expected, actual).ratio()
    
    # BLEU score calculation
    try:
        # Tokenize both texts
        try:
            reference_tokens = word_tokenize(expected.lower())
            candidate_tokens = word_tokenize(actual.lower())
        except:
            # Fallback to simple tokenization
            print("Falling back to simple tokenization")
            reference_tokens = simple_tokenize(expected)
            candidate_tokens = simple_tokenize(actual)
        
        # Create reference as a list of tokens
        references = [reference_tokens]
        
        # Calculate BLEU score
        from nltk.translate.bleu_score import sentence_bleu
        from nltk.translate.bleu_score import SmoothingFunction
        smoother = SmoothingFunction()
        
        # Calculate final BLEU score with equal weights
        bleu_score = sentence_bleu(references, candidate_tokens,
                                 weights=(0.25, 0.25, 0.25, 0.25),
                                 smoothing_function=smoother.method1)
        
        print("\nBLEU Score Details:")
        print(f"Reference tokens: {references[0][:10]}...")
        print(f"Candidate tokens: {candidate_tokens[:10]}...")
        print(f"BLEU Score: {bleu_score:.3f}")
        
    except Exception as e:
        print(f"Error calculating BLEU score: {e}")
        print(f"Expected text: {expected[:100]}...")
        print(f"Actual text: {actual[:100]}...")
        bleu_score = 0
    
    # Semantic similarity using BERT embeddings
    try:
        if model is not None:
            embeddings = model.encode([expected, actual])
            embedding1 = embeddings[0].reshape(1, -1)
            embedding2 = embeddings[1].reshape(1, -1)
            semantic_similarity = float(cosine_similarity(embedding1, embedding2)[0][0])
        else:
            semantic_similarity = string_similarity
    except Exception as e:
        print(f"Error calculating semantic similarity: {e}")
        semantic_similarity = 0
    
    return {
        'string_similarity': string_similarity,
        'bleu_score': bleu_score,
        'semantic_similarity': semantic_similarity
    }

def load_conversations_from_s3(bucket_name, file_key, num_samples=None):
    """
    Load conversations from JSON Lines file in S3 with optional sampling
    """
    conversations = []
    line_count = 0
    valid_conversations = 0
    
    print(f"Attempting to read file from S3: s3://{bucket_name}/{file_key}")
    try:
        # Initialize S3 client
        s3_client = boto3.client('s3')
        
        # Get the object from S3
        response = s3_client.get_object(Bucket=bucket_name, Key=file_key)
        
        # Read the content line by line
        for line in response['Body'].iter_lines():
            line_count += 1
            try:
                decoded_line = line.decode('utf-8')
                data = json.loads(decoded_line)
                
                if 'messages' in data:
                    conversations.append(data['messages'])
                    valid_conversations += 1
                    if valid_conversations % 100 == 0:
                        print(f"Loaded {valid_conversations} valid conversations")
                        
            except json.JSONDecodeError as e:
                print(f"Error parsing line {line_count}: {e}")
                continue
                
        print(f"\nLoading complete:")
        print(f"Total lines processed: {line_count}")
        print(f"Valid conversations found: {valid_conversations}")
        
        # Apply sampling if specified
        if num_samples and num_samples < len(conversations):
            conversations = random.sample(conversations, num_samples)
            print(f"Sampled {num_samples} conversations for testing")
            
        return conversations
        
    except Exception as e:
        print(f"Error reading from S3: {e}")
        return []
    
    

class BedrockModelTester:
    def __init__(self, model_id, region='us-west-2', max_retries=100):
        self.model_id = model_id
        self.region = region
        self.config = Config(
            retries={
                'total_max_attempts': max_retries,
                'mode': 'standard'
            }
        )
        self.session = boto3.session.Session()
        self.br_runtime = self.session.client(
            service_name='bedrock-runtime',
            region_name=self.region,
            config=self.config
        )
    
    def invoke_model(self, conversation):
        """Invoke the model with a conversation"""
        try:
            start_time = time.time()
            
            # Get the last user message from the conversation
            user_messages = [msg for msg in conversation if msg['role'] == 'user']
            if not user_messages:
                raise ValueError("No user message found in conversation")
            
            last_user_message = user_messages[-1]['content']
            
            # Prepare the prompt
            prompt = last_user_message
            
            # Make the API call similar to your working code
            response = self.br_runtime.invoke_model(
                modelId=self.model_id,
                body=json.dumps({'prompt': prompt}),
                accept="application/json",
                contentType="application/json"
            )
            
            end_time = time.time()
            
            # Parse response similar to your working code
            response_body = json.loads(response["body"].read().decode("utf-8"))
            
            # Get expected response
            expected_response = next((msg['content'] for msg in conversation if msg['role'] == 'assistant'), None)
            
            # Calculate similarity metrics
            similarity_metrics = calculate_similarity_metrics(expected_response, str(response_body)) if expected_response else {}
            
            return {
                'conversation': conversation,
                'last_user_message': last_user_message,
                'expected_response': expected_response,
                'model_response': response_body,
                'similarity_metrics': similarity_metrics,
                'latency': end_time - start_time,
                'status': 'success'
            }
        except Exception as e:
            print(f"Error in invoke_model: {str(e)}")
            return {
                'conversation': conversation,
                'error': str(e),
                'status': 'error'
            }

def analyze_and_visualize_results(results):
    """Analyze results and create visualizations with similarity metrics"""
    # Create DataFrame for analysis
    df = pd.DataFrame([{
        'status': r['status'],
        'latency': r.get('latency', 0),
        'string_similarity': r.get('similarity_metrics', {}).get('string_similarity', 0),
        'bleu_score': r.get('similarity_metrics', {}).get('bleu_score', 0),
        'semantic_similarity': r.get('similarity_metrics', {}).get('semantic_similarity', 0),
        'has_system_message': 'system_message' in r and r['system_message'] is not None
    } for r in results if r['status'] == 'success'])
    
    # Check if we have any successful results
    if len(df) == 0:
        print("No successful results to analyze!")
        return pd.DataFrame()  # Return empty DataFrame
        
    try:
        # Create visualizations
        fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(15, 10))
        
        # Status distribution
        success_rate = len(df) / len(results) * 100
        ax1.pie([success_rate, 100-success_rate], labels=['Success', 'Error'], autopct='%1.1f%%')
        ax1.set_title('Test Status Distribution')
        
        # Latency distribution
        if len(df) > 0:  # Only plot if we have data
            sns.histplot(data=df, x='latency', ax=ax2)
            ax2.set_title('Latency Distribution')
            ax2.set_xlabel('Latency (seconds)')
        
        # Similarity metrics distributions
        if len(df) > 0:  # Only plot if we have data
            sns.boxplot(data=df[['string_similarity', 'bleu_score', 'semantic_similarity']], ax=ax3)
            ax3.set_title('Similarity Metrics Distribution')
            ax3.set_ylim(0, 1)
        
        # Scatter plot of semantic vs string similarity
        if len(df) > 0:  # Only plot if we have data
            sns.scatterplot(data=df, x='semantic_similarity', y='string_similarity', ax=ax4)
            ax4.set_title('Semantic vs String Similarity')
        
        plt.tight_layout()
        plt.show()
        
        # Print summary statistics
        print("\nSummary Statistics:")
        print(f"Total tests: {len(results)}")
        print(f"Successful tests: {len(df)}")
        print(f"Failed tests: {len(results) - len(df)}")
        
        if len(df) > 0:
            print("\nSimilarity Metrics Statistics:")
            print(df[['string_similarity', 'bleu_score', 'semantic_similarity']].describe())
        
    except Exception as e:
        print(f"Error during visualization: {e}")
    
    return df

# Usage example
def run_tests_from_s3(model_id, s3_uri, num_samples=10):
    """
    Run tests with specified number of samples from S3
    """
    try:
        print(f"Starting test with S3 URI: {s3_uri}")
        
        # Parse S3 URI
        s3_path = s3_uri.replace('s3://', '')
        bucket_name = s3_path.split('/')[0]
        prefix = '/'.join(s3_path.split('/')[1:])
        
        # Load conversations
        conversations = load_conversations_from_s3(bucket_name, f"{prefix}/test.json", num_samples)
        
        if not conversations:
            print("No conversations loaded!")
            return [], pd.DataFrame()
        
        # Initialize Bedrock Runtime client
        br_runtime = boto3.client('bedrock-runtime')
        results = []
        
        # Test each conversation
        # Test each conversation
        for idx, messages in enumerate(conversations, 1):
            try:
                print(f"\nTesting conversation {idx}/{len(conversations)}")
                
                # Extract messages by role
                system_message = next((msg['content'] for msg in messages if msg['role'] == 'system'), None)
                user_messages = [msg for msg in messages if msg['role'] == 'user']
                assistant_messages = [msg for msg in messages if msg['role'] == 'assistant']
                
                if not user_messages or not assistant_messages:
                    print("Skipping conversation - missing user or assistant messages")
                    continue
                
                # Get the last user message and expected assistant response
                input_text = user_messages[-1]['content']
                expected_output = assistant_messages[-1]['content']
                
                # Construct prompt
                prompt = f"{system_message}\n\n{input_text}" if system_message else input_text
                
                # Invoke model
                body = json.dumps({
                    'prompt': prompt,
                    'max_tokens': 2048,
                    'temperature': 0.7,
                    'top_p': 0.9,
                })
                
                response = br_runtime.invoke_model(
                    modelId=model_id,
                    body=body,
                    accept='application/json',
                    contentType='application/json'
                )
                
                # Parse response - Updated to handle the correct response format
                response_body = json.loads(response['body'].read())
                model_output = response_body['generation']  # Direct access to 'generation'
                
                # Print response details for debugging
                print("\nResponse details:")
                print(f"Input text: {input_text[:100]}...")
                print(f"Expected output: {expected_output[:100]}...")
                print(f"Model output: {model_output[:100]}...")
                
                # Calculate metrics
                similarity_metrics = calculate_similarity_metrics(expected_output, model_output)
                
                # Store results with additional metadata
                result = {
                    'input': input_text,
                    'expected_output': expected_output,
                    'model_output': model_output,
                    'system_message': system_message,
                    'similarity_metrics': similarity_metrics,
                    'full_conversation': messages,
                    'response_metadata': {
                        'generation_token_count': response_body.get('generation_token_count'),
                        'prompt_token_count': response_body.get('prompt_token_count'),
                        'stop_reason': response_body.get('stop_reason')
                    }
                }
                results.append(result)
                
                print(f"✓ Success - Metrics:")
                print(f"  String Similarity: {similarity_metrics['string_similarity']:.2f}")
                print(f"  BLEU Score: {similarity_metrics['bleu_score']:.2f}")
                print(f"  Semantic Similarity: {similarity_metrics['semantic_similarity']:.2f}")
                print(f"  Tokens generated: {result['response_metadata']['generation_token_count']}")
                
            except Exception as e:
                print(f"Error processing conversation {idx}: {str(e)}")
                print(f"Full error details: {traceback.format_exc()}")
                print(f"Response body: {json.dumps(response_body, indent=2)}")  # Add this line for debugging
                results.append({
                    'input': input_text if 'input_text' in locals() else None,
                    'error': str(e),
                    'full_conversation': messages
                })
                results.append(result)
                
                print(f"Input text: {input_text[:100]}...")
                print(f"Expected output: {expected_output[:100]}...")
                print(f"Model output: {model_output[:100]}...")
                print(f"✓ Success - Metrics:")
                print(f"  String Similarity: {similarity_metrics['string_similarity']:.2f}")
                print(f"  BLEU Score: {similarity_metrics['bleu_score']:.2f}")
                print(f"  Semantic Similarity: {similarity_metrics['semantic_similarity']:.2f}")
                
            
        
        # Save results
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        result_key = f"{prefix}/test_results/bedrock_test_results_{timestamp}.json"
        
        s3_client = boto3.client('s3')
        s3_client.put_object(
            Bucket=bucket_name,
            Key=result_key,
            Body=json.dumps(results, indent=2)
        )
        
        print(f"\nResults saved to s3://{bucket_name}/{result_key}")
        
        # Create summary DataFrame
        df = pd.DataFrame([{
            'string_similarity': r['similarity_metrics']['string_similarity'],
            'bleu_score': r['similarity_metrics']['bleu_score'],
            'semantic_similarity': r['similarity_metrics']['semantic_similarity']
        } for r in results if 'error' not in r])
        
        print("\nTest Summary:")
        print(f"Total tests: {len(results)}")
        print(f"Successful tests: {len(df)}")
        if not df.empty:
            print(f"Average String Similarity: {df['string_similarity'].mean():.3f}")
            print(f"Average BLEU Score: {df['bleu_score'].mean():.3f}")
            print(f"Average Semantic Similarity: {df['semantic_similarity'].mean():.3f}")
        
        return results, df
        
    except Exception as e:
        print(f"Error running tests: {e}")
        print(f"Full error: {str(e)}")
        return [], pd.DataFrame()
    
MODEL_ID = "arn:aws:bedrock:us-west-2:786045444066:imported-model/686s53mpwdkw"
S3_URI = "s3://sagemaker-us-west-2-786045444066/Llama3-QAPipeline/ihlejj4nmbtt/PreprocessQADataset/output/test"  # Replace with your actual S3 URI
NUM_SAMPLES = 10

results, df = run_tests_from_s3(MODEL_ID, S3_URI, NUM_SAMPLES)