In [16]:
!pip install --upgrade boto3

Collecting boto3
  Downloading boto3-1.35.10-py3-none-any.whl (139 kB)
     ------------------------------------ 139.2/139.2 kB 920.8 kB/s eta 0:00:00
Collecting botocore<1.36.0,>=1.35.10
  Downloading botocore-1.35.10-py3-none-any.whl (12.5 MB)
     --------------------------------------- 12.5/12.5 MB 17.7 MB/s eta 0:00:00
Installing collected packages: botocore, boto3
  Attempting uninstall: botocore
    Found existing installation: botocore 1.34.161
    Uninstalling botocore-1.34.161:
      Successfully uninstalled botocore-1.34.161
  Attempting uninstall: boto3
    Found existing installation: boto3 1.34.161
    Uninstalling boto3-1.34.161:
      Successfully uninstalled boto3-1.34.161
Successfully installed boto3-1.35.10 botocore-1.35.10


In [8]:
from boto3 import Session

sess = Session()
bedrock = sess.client('bedrock')
s3 = sess.client('s3')

In [10]:
import json
import random

# List of sample questions and statements
samples = [
    "What are the different services that you offer?",
    "How can I implement product search using embeddings?",
    "Explain the benefits of Amazon Titan Text Embeddings V2",
    "Convert this sentence into a vector representation",
    "What is the best way to optimize database queries?",
    "How does machine learning impact business decisions?",
    "Explain the concept of cloud computing",
    "What are the advantages of using microservices architecture?",
    "How can I improve my website's SEO?",
    "What are the key features of blockchain technology?"
]

with open('data.jsonl', 'w') as f:
    for _ in range(1000):
        data = {
            "inputText": random.choice(samples),
            "dimensions": 256,
            "normalize": True
        }
        f.write(json.dumps(data) + '\n')

In [11]:
s3.create_bucket(Bucket='bedrock-ml-scratch')
s3.upload_file('data.jsonl', 'bedrock-ml-scratch', 'data.jsonl')

In [16]:
inputDataConfig=({
    "s3InputDataConfig": {
        "s3Uri": "s3://bedrock-ml-scratch/data.jsonl"
    }
})

outputDataConfig=({
    "s3OutputDataConfig": {
        "s3Uri": "s3://bedrock-ml-scratch/"
    }
})

response=bedrock.create_model_invocation_job(
    roleArn="your-bedrock-s3-role",
    modelId="amazon.titan-embed-text-v2:0",
    jobName="my-batch-job",
    inputDataConfig=inputDataConfig,
    outputDataConfig=outputDataConfig
)

jobArn = response.get('jobArn')

In [17]:
bedrock.get_model_invocation_job(jobIdentifier=jobArn)['status']

'Submitted'

In [18]:
# Get operation total time
import time
start = time.time()
while bedrock.get_model_invocation_job(jobIdentifier=jobArn)['status'] != 'Completed':
    time.sleep(5)
    print('Waiting for completion...')
end = time.time()

Waiting for completion...
Waiting for completion...
Waiting for completion...
Waiting for completion...
Waiting for completion...
Waiting for completion...
Waiting for completion...
Waiting for completion...
Waiting for completion...
Waiting for completion...
Waiting for completion...
Waiting for completion...
Waiting for completion...
Waiting for completion...
Waiting for completion...
Waiting for completion...
Waiting for completion...
Waiting for completion...
Waiting for completion...
Waiting for completion...
Waiting for completion...
Waiting for completion...
Waiting for completion...
Waiting for completion...
Waiting for completion...
Waiting for completion...
Waiting for completion...
Waiting for completion...
Waiting for completion...
Waiting for completion...
Waiting for completion...
Waiting for completion...
Waiting for completion...
Waiting for completion...
Waiting for completion...
Waiting for completion...
Waiting for completion...
Waiting for completion...
Waiting for 

In [24]:
print(f'Job completed in {(end-start):.2f} seconds')

Job completed in 479.25 seconds


In [303]:
response = s3.get_object(Bucket='bedrock-ml-scratch', Key='data.jsonl.out')
response['Body'].read().decode().split('\n')[0]

'{"modelInput":{"inputText":"What are the different services that you offer?","dimensions":1024,"normalize":true},"modelOutput":{"embedding":[-0.13351558,0.0070741284,0.009340596,-0.012293873,0.010027405,-0.0067650643,0.017444938,0.012225193,-0.04642826,0.0046016173,0.0023694895,-0.049999665,-0.058790814,-0.02005481,0.010576853,0.0609886,0.04725243,0.011057618,0.06950503,0.02967013,-0.03681294,0.022802046,0.049175493,0.023626216,0.0175823,-0.026236087,0.008516426,0.03598877,0.09120818,-0.07087865,-0.010027405,0.03118111,0.025411917,0.051922727,0.006559022,0.025137194,0.026098726,0.063735835,-0.019368002,0.011263661,0.017032852,0.027747067,9.486544E-4,-0.01442298,0.020329533,0.032142643,0.04395575,0.020466896,-0.0018028725,-0.020329533,0.028159153,0.03846128,-0.002317979,0.03681294,0.03447779,-0.01682681,-0.03035694,0.020466896,-0.03118111,-0.024999833,-0.009134554,0.03681294,0.00642166,-0.018955916,0.06977975,0.0146977035,-0.0019488194,-0.022939406,-0.015796598,0.035439324,-0.04313158,

In [306]:
out = 'manifest.json.out'
response = s3.get_object(Bucket='bedrock-ml-scratch', Key=out)
response['Body'].read().decode()

'{"totalRecordCount":1000,"processedRecordCount":1000,"successRecordCount":1000,"errorRecordCount":0,"inputTokenCount":9957,"outputTokenCount":0}\n'