### Setup
import all the needed libraries and dependencies to setup this notebook.

In [1]:
import boto3

region = 'us-west-2'
session = boto3.session.Session(region_name = region)
sts_client = boto3.client('sts', region_name = region)
s3_client = boto3.client('s3', region_name = region)

account_id = sts_client.get_caller_identity()["Account"]
bedrock = boto3.client(service_name="bedrock", region_name = region)
bedrock_runtime = boto3.client(service_name="bedrock-runtime", region_name = region)

s3_suffix = f"{region}-{account_id}"
bucket_name = f"bedrock-customization-finetune-{s3_suffix}"

train_file_name="train-cfpb-complaints.jsonl"
validation_file_name="validation-cfpb-complaints.jsonl"
test_file_name="test-cfpb-complaints.jsonl"

data_folder = "fine-tuning-datasets"

s3_train_uri=f's3://{bucket_name}/fine-tuning-datasets/train/{train_file_name}'
s3_validation_uri=f's3://{bucket_name}/fine-tuning-datasets/validation/{validation_file_name}'
s3_test_uri=f's3://{bucket_name}/fine-tuning-datasets/test/{test_file_name}'

In [2]:
#get iam role arn from iam rolename
import boto3
iam = boto3.client('iam')

role_name = "AmazonBedrockCustomizationRole_FineTuning"
response = iam.get_role(RoleName=role_name)
role_arn = response['Role']['Arn']
print(role_arn)


arn:aws:iam::448407886166:role/AmazonBedrockCustomizationRole_FineTuning


### Check all required variables are correct

In [3]:
import pprint
pprint.pp(role_arn)
pprint.pp(s3_train_uri)
pprint.pp(s3_validation_uri)
pprint.pp(s3_test_uri)
pprint.pp(bucket_name)

'arn:aws:iam::448407886166:role/AmazonBedrockCustomizationRole_FineTuning'
's3://bedrock-customization-finetune-us-west-2-448407886166/fine-tuning-datasets/train/train-cfpb-complaints.jsonl'
's3://bedrock-customization-finetune-us-west-2-448407886166/fine-tuning-datasets/validation/validation-cfpb-complaints.jsonl'
's3://bedrock-customization-finetune-us-west-2-448407886166/fine-tuning-datasets/test/test-cfpb-complaints.jsonl'
'bedrock-customization-finetune-us-west-2-448407886166'


In [4]:
import warnings
warnings.filterwarnings('ignore')
import json
import os
import sys
import boto3
import pandas as pd

### Create the fine tuning job
For guidelines on setting hyper-parameters refer to the guidelines provided here 
https://docs.aws.amazon.com/bedrock/latest/userguide/cm-hp-titan-text.html

In [5]:
from datetime import datetime
ts = datetime.now().strftime("%Y-%m-%d-%H-%M-%S")


# Choose the foundation model you want to customize and provide ModelId(find more about model reference at https://docs.aws.amazon.com/bedrock/latest/userguide/bedrock-reference.html)
base_model_id = "amazon.titan-text-express-v1:0:8k"

# Select the customization type from "FINE_TUNING" or "CONTINUED_PRE_TRAINING". 
customization_type = "FINE_TUNING"

# Specify the roleArn for your customization job
customization_role = role_arn

# Create a customization job name
customization_job_name = f"titan-text-express-fine-tuned-model-{ts}"

# Create a customized model name for your fine-tuned Titan Express model
custom_model_name = f"titan-text-express-fine-tuned-{ts}"

# Define the hyperparameters for fine-tuning Titan Express model
hyper_parameters = {
        "epochCount": "2",
        "batchSize": "1",
        "learningRate": "0.00005",
        "learningRateWarmupSteps": "10"
    }

# Specify your data path for training, validation(optional) and output
training_data_config = {"s3Uri": s3_train_uri}

# # uncomment the below section if you have validation dataset and provide the s3 uri for it. 
validation_data_config = {
        "validators": [{
            "s3Uri": s3_validation_uri
        }]
    }

output_data_config = {"s3Uri": f's3://{bucket_name}/outputs/output-{custom_model_name}'}

# # Create the customization job
bedrock.create_model_customization_job(
    customizationType=customization_type,
    jobName=customization_job_name,
    customModelName=custom_model_name,
    roleArn=customization_role,
    baseModelIdentifier=base_model_id,
    hyperParameters=hyper_parameters,
    trainingDataConfig=training_data_config,
    validationDataConfig=validation_data_config,
    outputDataConfig=output_data_config
)

{'ResponseMetadata': {'RequestId': 'e7a6810b-9728-4bd2-936f-226b01419065',
  'HTTPStatusCode': 201,
  'HTTPHeaders': {'date': 'Tue, 14 May 2024 17:13:56 GMT',
   'content-type': 'application/json',
   'content-length': '122',
   'connection': 'keep-alive',
   'x-amzn-requestid': 'e7a6810b-9728-4bd2-936f-226b01419065'},
  'RetryAttempts': 0},
 'jobArn': 'arn:aws:bedrock:us-west-2:448407886166:model-customization-job/amazon.titan-text-express-v1:0:8k/c3o8fkkuzylk'}

In [None]:
#Check job status, it can be also monitored from the console. Foundation models -> Custom Models -> Training Jobs
import time
fine_tune_job = bedrock.get_model_customization_job(jobIdentifier=customization_job_name)["status"]
print(fine_tune_job)

while fine_tune_job == "InProgress":
    time.sleep(60)
    fine_tune_job = bedrock.get_model_customization_job(jobIdentifier=customization_job_name)["status"]
    print (fine_tune_job)

In [None]:
# List custom models
bedrock.list_custom_models()

<div class="alert alert-block alert-info">
<b>Note:</b> Please make sure your customization job status is "completed" before proceeding to retrieve the modelArn, otherwise you will run into errors. </div>


In [None]:
# retrieve the modelArn of the fine-tuned model
fine_tune_job = bedrock.get_custom_model(modelIdentifier=custom_model_name)
custom_model_id = fine_tune_job['modelArn']

In [None]:
output_job_name = "model-customization-job-"+fine_tune_job['jobArn'].split('/')[-1]
output_job_name

In [None]:
# Create the provision throughput job and retrieve the provisioned model id. This will take upto 30 mins
provisioned_model_id = bedrock.create_provisioned_model_throughput(
     modelUnits=1,
    # create a name for your provisioned throughput model
     provisionedModelName='test-model-v1-001', 
     modelId=custom_model_id
    )['provisionedModelArn']    

In [None]:
print(provisioned_model_id)

In [None]:
# check provisioned throughput job status, it takes about 15 mins to provision
import time
status_provisioning = bedrock.get_provisioned_model_throughput(provisionedModelId = provisioned_model_id)['status'] 
while status_provisioning == 'Creating':
    time.sleep(60)
    status_provisioning = bedrock.get_provisioned_model_throughput(provisionedModelId=provisioned_model_id)['status']
    print(status_provisioning)

In [None]:
import json
prompt_data = 'How to deal with extra charge on credit card'

text_gen_config = {
    "maxTokenCount": 512,
    "stopSequences": [], 
    "temperature": 0,
    "topP": 0.9
}

body = json.dumps({
    "inputText": prompt_data,
    "textGenerationConfig": text_gen_config  
})

# provide the modelId of the provisioned custom model
modelId = provisioned_model_id
accept = 'application/json'
contentType = 'application/json'

# invoke the provisioned custom model
response = bedrock_runtime.invoke_model(body=body, modelId=modelId, accept=accept, contentType=contentType)

response_body = json.loads(response.get('body').read())
print(response_body)

In [None]:
# delete the provisioned throughput once done
bedrock.delete_provisioned_model_throughput(provisionedModelId=provisioned_model_id)