In [1]:
import json

import boto3

In [2]:
# make sure you set the appropriate AWS_PROFILE in terminal before launching notebook

# Initialize the Step Functions, s3, and CloudWatch Logs clients
sfn_client = boto3.client("stepfunctions")
s3_client = boto3.client('s3')
logs_client = boto3.client('logs')


In [3]:
def get_execution_alignment_times(execution_arn):
    # Get the execution history
    execution_history = sfn_client.get_execution_history(
        executionArn=execution_arn
    )
    
    # get the job inputs to get the s3 bucket
    event_input = json.loads(execution_history["events"][0]["executionStartedEventDetails"]["input"])
    s3_wd_uri = event_input["Input"]["NonHostAlignment"]["s3_wd_uri"]
    
    # get the path to the non_host alignment log
    s3_wd_uri_parts = s3_wd_uri.strip("s3://").split("/")
    bucket_name = s3_wd_uri_parts[0]
    key = "/".join(s3_wd_uri_parts[1::])
    key = f"{key}/non_host_alignment_status2.json"
    
    # Get the object from S3
    obj = s3_client.get_object(Bucket=bucket_name, Key=key)

    # Read the file's content
    file_content = json.loads(obj['Body'].read().decode('utf-8'))


    # get alignment times: 
    diamond_time_in_seconds = float(file_content["diamond_out"]["end_time"]) - float(file_content["diamond_out"]["start_time"])
    minimap_time_in_seconds = float(file_content["minimap2_out"]["end_time"]) - float(file_content["minimap2_out"]["start_time"])

    # get the time in minutes
    diamond_time_in_mins = diamond_time_in_seconds/60
    minimap_time_in_mins = minimap_time_in_seconds/60
    
    return diamond_time_in_mins, minimap_time_in_mins


In [6]:
# Replace with execution ARNs (right now these are stubbed)

execution_arn_no_compression = 'arn:aws:states:us-west-2:732052188396:execution:idseq-swipe-staging-short-read-mngs-wdl:idseq-staging-150-32175-35212-20240122120219'
execution_arn_w_compression = 'arn:aws:states:us-west-2:732052188396:execution:idseq-swipe-staging-short-read-mngs-wdl:idseq-staging-1166-32169-35209-20240122112059'

diamond, minimap = get_execution_alignment_times(execution_arn_no_compression)
diamond_compressed, minimap_compressed = get_execution_alignment_times(execution_arn_w_compression)

time_diff_diamond = diamond-diamond_compressed
time_diff_minimap = minimap-minimap_compressed

print(f"diamond: {time_diff_diamond}")
print(f"minimap: {time_diff_minimap}")



diamond: 3.5020290851593017
minimap: 5.072864035765331
