In [None]:
import boto3

import datetime

In [None]:
session = boto3.session.Session(profile_name='dev2.DevAdministratorAccess')

In [None]:
region_name = session.region_name
account_id = session.client('sts').get_caller_identity()['Account']
region_name, account_id

In [None]:
sm_runtime = session.client('sagemaker-runtime')
s3_client = session.client('s3')
sage = session.client('sagemaker')

In [None]:
model_name = sage.list_models()['Models'][0]['ModelName']

In [None]:
bucket_name = f'pod-transcription-{account_id}-{region_name}'

In [None]:
manifest =[
    {
        "prefix": f"s3://{bucket_name}/whisper-batch-input/"
    }
]

In [None]:
samples = ['sample1.m4a', 'sample2.m4a', '37-migrating-monoliths.m4a']
for sample in samples:
    input_data = json.dumps({
        'bucket_name': bucket_name,
        'object_key': f'audio/{sample}',
    })
    input_file = f'{sample}.json'
    input_data_key = f'whisper-batch-input/{input_file}'
    input_location = f's3://{bucket_name}/{input_data_key}'
    manifest.append(input_file)
    s3_client.put_object(Bucket=bucket_name, Key=input_data_key, Body=input_data)
    print(f'Wrote {input_data} to s3://{bucket_name}/{input_data_key}')

In [None]:
with open('../common-tags.json') as f:
    tags = json.load(f)

tag_dicts = []
for tag in tags:
    k, v = tag.split('=')
    tag_dicts.append({ 'Key': k, 'Value': v})
tag_dicts

In [None]:
job_name = datetime.datetime.now().strftime('%Y%m%d%H%M%S')
manifest_key = f'whisper-batch-input/{job_name}.manifest'
manifest_location = f's3://{bucket_name}/{manifest_key}'
print(f'Wrote {manifest} to {manifest_location}')
s3_client.put_object(Bucket=bucket_name, Key=manifest_key, Body=json.dumps(manifest))

output_location = f's3://{bucket_name}/whisper-batch-output/{job_name}'
response = sage.create_transform_job(
    TransformJobName=job_name,
    ModelName=model_name,
    MaxConcurrentTransforms=1,
    BatchStrategy='SingleRecord',
    ModelClientConfig={
        'InvocationsTimeoutInSeconds': 3600,
        'InvocationsMaxRetries': 0
    },
    TransformInput={
        'DataSource': {
            'S3DataSource': {
                'S3DataType': 'ManifestFile',
                'S3Uri': manifest_location
            }
        },
        'ContentType': 'application/json'
    },
    TransformOutput={
        'S3OutputPath': output_location,
        'Accept': 'application/json'
    },
    TransformResources={
        'InstanceType': 'ml.g4dn.xlarge',    # cheapest/slow: 'ml.m4.xlarge'
        'InstanceCount': 1,
    },
    Tags=tag_dicts
)
response

In [None]:
sm_runtime.invoke_endpoint_async(
    EndpointName=endpoint_name,
    InputLocation=input_location,
    ContentType='application/json',
    Accept='application/json',
)

In [None]:
sage.delete_endpoint(EndpointName=endpoint_name)

In [None]:
kk