# Dubbing Pipeline with CDK

After deploying the solution using CDK, you can invoke the 
pipeline by uploading a sample and job file.

In [None]:
import json
import boto3
from datetime import datetime
from time import sleep
from IPython.display import clear_output


In [None]:
## The following are required parameters to set

# Region and S3 parameters
region_name = boto3.session.Session().region_name

# Provide any job name, used for creating the s3 prefixes
job_name = "lipsync-4"

# Source
source_file = "./samples/aws-fr.mp4"                     # Source video to localize
media_format = "mp4"                                          # Specify the media format for Amazon Transcribe

# Transcription
transcribe_source_language_code = "fr-CA"                     # Amazon Transcribe language code: en-US, es-US, ...

# Translation
translate_source_language_code = "fr-CA"                         # Amazon Translate language codes: en, es, ...
translate_target_language_code = "en"                         # Amazon Translate language codes: en, es, ...



In [None]:
# Helper functions

def get_bucket(s3_uri):
    bucket = s3_uri.split("/")[2]
    return bucket
    
def get_key(s3_uri):
    key = "/".join(s3_uri.split("/")[3:])
    return key
    

In [None]:
# Retrieve the outputs of the pipeline stack

cf_client = boto3.client('cloudformation', region_name=region_name)
s3_client = boto3.client('s3',region_name=region_name)


In [None]:
# Retrieve stack outputs
sm_stack_name = "SageMakerEndpointsStack"
vd_stack_name = "VisualDubbingLipsyncCdkStack"

stacks = [sm_stack_name, vd_stack_name]

stacks_output_dict = {}

for stack_name in stacks:
    # Retrieve the stack
    print(f"Retrieving CF stack details: {stack_name}")
    response = cf_client.describe_stacks(StackName=stack_name)

    # Get the outputs
    print("Parsing response")
    outputs = response['Stacks'][0]['Outputs']

    # Convert the outputs to a dictionary
    output_dict = {output['OutputKey']: output['OutputValue'] for output in outputs}

    stacks_output_dict.update(output_dict)

stacks_output_dict

In [None]:
# Create the object keys

bucket = stacks_output_dict['VDBucketOutput']

input_job_key = f"inputs/{job_name}/pipeline_job/{job_name}.json"
input_video_key = f"inputs/{job_name}/videos/{source_file.split('/')[-1]}"
output_video_key = f"outputs/{job_name}/{source_file.split('/')[-1]}"

print(input_job_key)
print(input_video_key)
print(output_video_key)

In [None]:
# Upload the video to process

s3 = boto3.client('s3', region_name=region_name)
s3.upload_file(source_file, bucket, input_video_key)
print(f"Uploaded file to s3://{bucket}/{input_video_key}")

In [None]:
# Create the job file
job = {
    "bucket": bucket,
    "prefix_inputs": "inputs",
    "prefix_outputs": "outputs",
    "job_name": job_name,
    "transcribe_source_language_code": transcribe_source_language_code,
    "media_format": "mp4",
    "translate_source_language_code": translate_source_language_code,
    "translate_target_language_code": translate_target_language_code,
    "tts_endpoint_name": stacks_output_dict['SageMakerTTSEndpointOutput'],
    "tts_model_id": "", #Not currently used
    "retalking_endpoint_name": stacks_output_dict['SageMakerRetalkingEndpointOutput'],
    "source_file_s3_uri": f"s3://{bucket}/{input_video_key}",
    "destination_s3_uri": f"s3://{bucket}/{output_video_key}"
}

job

In [None]:
# Upload the job file which will start the pipeline

s3_resource = boto3.resource('s3')
s3_resource_obj = s3_resource.Object(bucket, input_job_key)
s3_resource_obj.put(Body=json.dumps(job).encode('utf-8'))


In [None]:
# Poll the destination S3 URI for completion

# Poll for completion
s3 = boto3.client('s3')

all_completed = False  # Flag to track completion

while not all_completed:
    print("=================")
    print(f"Checking progress - {datetime.now()}")
    completed_count = 0  # Count completed payloads

    key = "/".join(job['destination_s3_uri'].split("/")[3:])
    try:
        s3.head_object(Bucket=bucket, Key=key)
        print(" Completed.")
        all_completed = True
    except:
        print(" In Progress.")

    if all_completed:
        print("Retalking completed!")
    else:
        sleep(10)
        clear_output(wait=True) 

In [None]:
# Download file 
bucket = get_bucket(job['destination_s3_uri'])
key = get_key(job['destination_s3_uri'])

s3.download_file(bucket, key, output_video_key.split("/")[-1])