# Launching WES workflow on DRS objects based on a Data Connect query

## Check if endpoints are available

In [1]:
import requests
import json

def pretty_print_json(response):
    # pretty print JSON in blue color
    print("\033[38;2;8;75;138m"+json.dumps(response.json(), indent=4)+"\033[0m")
    
def print_head(text):
    # print in green color
    print("\033[38;2;8;138;75m"+text+"\033[0m")

1) Check if WES endpoints are available

In [4]:
node_ips = ['ga4gh-starter-kit.ilifu.ac.za', 'elwazi-node.icermali.org'] # , 'osdp.ace.ac.ug'] #, '196.43.136.22'] 
wes_port = "6000"
service_info_path = "/service-info"
runs_path = "/runs"
http_method = "GET"

for node_ip in node_ips:
    ga4gh_base_url = "http://" + node_ip + ":{}/ga4gh/{}/v1"
    wes_base_url = ga4gh_base_url.format(wes_port,"wes")
    request_url = wes_base_url+service_info_path
    print_head("{} request to {}".format(http_method, request_url))
    # GET request to service-info endpoint
    wes_service_info_resp = requests.request(http_method, request_url)
    # print the response
    pretty_print_json(wes_service_info_resp)

[38;2;8;138;75mGET request to http://ga4gh-starter-kit.ilifu.ac.za:6000/ga4gh/wes/v1/service-info[0m
[38;2;8;75;138m{
    "id": "org.ga4gh.starterkit.wes",
    "name": "GA4GH Starter Kit WES Service",
    "description": "An open source, community-driven implementation of the GA4GH Workflow Execution Service (WES)API specification.",
    "contactUrl": "mailto:info@ga4gh.org",
    "documentationUrl": "https://github.com/ga4gh/ga4gh-starter-kit-wes",
    "createdAt": "2020-01-15T12:00:00Z",
    "updatedAt": "2020-01-15T12:00:00Z",
    "environment": "test",
    "version": "0.3.2",
    "type": {
        "group": "org.ga4gh",
        "artifact": "wes",
        "version": "1.0.1"
    },
    "organization": {
        "name": "Global Alliance for Genomics and Health",
        "url": "https://ga4gh.org"
    },
    "workflow_type_versions": {
        "WDL": [
            "1.0"
        ],
        "NEXTFLOW": [
            "21.04.0"
        ]
    },
    "workflow_engine_versions": {
        "NA

2) Check if DRS endpoints are available

In [5]:
node_ips = ['ga4gh-starter-kit.ilifu.ac.za','elwazi-node.icermali.org'] #,'osdp.ace.ac.ug', '196.43.136.22'] 
drs_port = "5000"
service_info_path = "/service-info"
http_method = "GET"

for node_ip in node_ips:
    ga4gh_base_url = "http://" + node_ip + ":{}/ga4gh/{}/v1"
    drs_base_url = ga4gh_base_url.format(drs_port,"drs")
    request_url = drs_base_url+service_info_path
    print_head("{} request to {}".format(http_method, request_url))
    # GET request to service-info endpoint
    drs_service_info_resp = requests.request(http_method, request_url)
    # print the response
    pretty_print_json(drs_service_info_resp)

[38;2;8;138;75mGET request to http://ga4gh-starter-kit.ilifu.ac.za:5000/ga4gh/drs/v1/service-info[0m
[38;2;8;75;138m{
    "id": "org.ga4gh.starterkit.drs",
    "name": "GA4GH Starter Kit DRS Service",
    "description": "An open source, community-driven implementation of the GA4GH Data Repository Service (DRS) API specification.",
    "contactUrl": "mailto:info@ga4gh.org",
    "documentationUrl": "https://github.com/ga4gh/ga4gh-starter-kit-drs",
    "createdAt": "2020-01-15T12:00:00Z",
    "updatedAt": "2020-01-15T12:00:00Z",
    "environment": "test",
    "version": "0.3.2",
    "type": {
        "group": "org.ga4gh",
        "artifact": "drs",
        "version": "1.3.0experimental"
    },
    "organization": {
        "name": "Global Alliance for Genomics and Health",
        "url": "https://ga4gh.org"
    }
}[0m
[38;2;8;138;75mGET request to http://elwazi-node.icermali.org:5000/ga4gh/drs/v1/service-info[0m
[38;2;8;75;138m{
    "id": "org.ga4gh.starterkit.drs",
    "name": "GA

3) Check if Passport Broker endpoints are available

In [6]:
node_ips = ['localhost'] #'ga4gh-starter-kit.ilifu.ac.za']#, 'osdp.ace.ac.ug', 'elwazi-node.icermali.org'] #,  '196.43.136.22'] 
broker_port = "4500"
service_info_path = "/service-info"
http_method = "GET"

for node_ip in node_ips:
    ga4gh_base_url = "http://" + node_ip + ":{}/ga4gh/{}/v1"
    broker_base_url = ga4gh_base_url.format(broker_port,"passport")
    request_url = broker_base_url+service_info_path
    print_head("{} request to {}".format(http_method, request_url))
    # GET request to service-info endpoint
    broker_service_info_resp = requests.request(http_method, request_url)
    # print the response
    pretty_print_json(broker_service_info_resp)

[38;2;8;138;75mGET request to http://localhost:4500/ga4gh/passport/v1/service-info[0m
[38;2;8;75;138m{
    "id": "org.ga4gh.starterkit.passport.broker",
    "name": "GA4GH Starter Kit Passport Broker Service",
    "description": "Starter Kit implementation of a Passport Broker service, outlined in the GA4GH Passports specification. Manages researcher permissions to data and compute, and enables this information to be minted as JWTs and passed to downstream clearinghouses.",
    "contactUrl": "mailto:info@ga4gh.org",
    "documentationUrl": "https://github.com/ga4gh/ga4gh-starter-kit-passport-broker",
    "createdAt": "2022-04-28T09:00:00Z",
    "updatedAt": "2022-04-28T09:00:00Z",
    "environment": "test",
    "version": "0.0.2",
    "type": {
        "group": "org.ga4gh",
        "artifact": "passport-broker",
        "version": "1.0.0"
    },
    "organization": {
        "name": "Global Alliance for Genomics and Health",
        "url": "https://ga4gh.org"
    }
}[0m


## Check if DRS objects exists on endpoint

1. Check ga4gh-starter-kit.ilifu.ac.za

In [7]:
http_method = "GET"
node_ip = 'ga4gh-starter-kit.ilifu.ac.za'
drs_port = "5000"
# drs_ids = ['91860745a820a28b6c37d60432925bed','3520f407daf560b1732b9e026879eae4'] #authenticated
drs_ids = ['cf99eed44d272c7a9146a7f3216c8bd1','c3465aa1f0ef8773d5eede6c23e8e9b2']
object_path_get = "/objects/{}"
object_path_post = "/objects"
access_path = "/objects/{}/access/{}"

for drs_id in drs_ids:
    ga4gh_base_url = "http://" + node_ip + ":{}/ga4gh/{}/v1"
    drs_base_url = ga4gh_base_url.format(drs_port,"drs")
    request_url = drs_base_url+object_path_get.format(drs_id)
    print_head("{} request to {}".format(http_method, request_url))
    # GET request to /objects/{object_id} endpoint
    drs_object_response = requests.request(http_method, request_url)
    # print the response
    pretty_print_json(drs_object_response)

[38;2;8;138;75mGET request to http://ga4gh-starter-kit.ilifu.ac.za:5000/ga4gh/drs/v1/objects/cf99eed44d272c7a9146a7f3216c8bd1[0m
[38;2;8;75;138m{
    "id": "cf99eed44d272c7a9146a7f3216c8bd1",
    "description": "Patient: HG04015, Country: ITU, Region: SAS, Sex: male\n",
    "created_time": "2023-12-05T18:19:02Z",
    "mime_type": "application/cram",
    "name": "HG04015.final.chrX_15494566-15607236",
    "size": 0,
    "updated_time": "2023-12-05T18:19:02Z",
    "checksums": [
        {
            "checksum": "704074d0d6d57371fc3f8ca7f1164feacb221165",
            "type": "sha1"
        },
        {
            "checksum": "2646f5e2bdbc2e7e028e506147eee561008bafd79285577aa4a6d714400ae920",
            "type": "sha256"
        },
        {
            "checksum": "cf99eed44d272c7a9146a7f3216c8bd1",
            "type": "md5"
        }
    ],
    "self_uri": "drs://ga4gh-starter-kit.ilifu.ac.za:5000/cf99eed44d272c7a9146a7f3216c8bd1",
    "access_methods": [
        {
            "acce

2.1 Check osdp.ace.ac.ug (ACE - Uganda)

In [8]:
http_method = "GET"
node_ip = 'osdp.ace.ac.ug'
drs_port = "5000"
# drs_ids = ['6fa43c7de04b60c1a73a42aa2efc977d','be145a60bc059c154475a2561af0df6b'] #authenticated
drs_ids = ['c4b39bfcd30c59b70beb090fe4fdc2a1', 'c560c0bd4e98405adaf25534a588428b']
object_path_get = "/objects/{}"
object_path_post = "/objects"
access_path = "/objects/{}/access/{}"

for drs_id in drs_ids:
    ga4gh_base_url = "http://" + node_ip + ":{}/ga4gh/{}/v1"
    drs_base_url = ga4gh_base_url.format(drs_port,"drs")
    request_url = drs_base_url+object_path_get.format(drs_id)
    print_head("{} request to {}".format(http_method, request_url))
    # GET request to /objects/{object_id} endpoint
    drs_object_response = requests.request(http_method, request_url)
    # print the response
    pretty_print_json(drs_object_response)

[38;2;8;138;75mGET request to http://osdp.ace.ac.ug:5000/ga4gh/drs/v1/objects/c4b39bfcd30c59b70beb090fe4fdc2a1[0m


KeyboardInterrupt: 

2.2 Check 196.43.136.22 (UVRI - Uganda)

In [52]:
http_method = "GET"
node_ip = '196.43.136.22' #UVRI - Uganda
drs_port = "5000"
drs_ids = ['1050d0443c2e83f9d9a8933481dcb405','be04a5a90617aeae9a05fb533f544ade']
object_path_get = "/objects/{}"
object_path_post = "/objects"
access_path = "/objects/{}/access/{}"

for drs_id in drs_ids:
    ga4gh_base_url = "http://" + node_ip + ":{}/ga4gh/{}/v1"
    drs_base_url = ga4gh_base_url.format(drs_port,"drs")
    request_url = drs_base_url+object_path_get.format(drs_id)
    print_head("{} request to {}".format(http_method, request_url))
    # GET request to /objects/{object_id} endpoint
    drs_object_response = requests.request(http_method, request_url)
    # print the response
    pretty_print_json(drs_object_response)

[38;2;8;138;75mGET request to http://196.43.136.22:5000/ga4gh/drs/v1/objects/1050d0443c2e83f9d9a8933481dcb405[0m


ConnectionError: HTTPConnectionPool(host='196.43.136.22', port=5000): Max retries exceeded with url: /ga4gh/drs/v1/objects/1050d0443c2e83f9d9a8933481dcb405 (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x7f72f3f40850>: Failed to establish a new connection: [Errno 113] No route to host'))

3. Check elwazi-node.icermali.org (Mali)

In [9]:
http_method = "GET"
node_ip = 'elwazi-node.icermali.org'
drs_port = "5000"
# drs_ids = ['a68c60133f942881983d0e15827bf88f','168d353c6f474ca72e35e9209f921a59'] #authenticated
drs_ids = ['c276d04bacfaf6540b947340addf2670', 'c080e08c01ec46446bb16801c7abe5b2']
object_path_get = "/objects/{}"
object_path_post = "/objects"
access_path = "/objects/{}/access/{}"

for drs_id in drs_ids:
    ga4gh_base_url = "http://" + node_ip + ":{}/ga4gh/{}/v1"
    drs_base_url = ga4gh_base_url.format(drs_port,"drs")
    request_url = drs_base_url+object_path_get.format(drs_id)
    print_head("{} request to {}".format(http_method, request_url))
    # GET request to /objects/{object_id} endpoint
    drs_object_response = requests.request(http_method, request_url)
    # print the response
    pretty_print_json(drs_object_response)


[38;2;8;138;75mGET request to http://elwazi-node.icermali.org:5000/ga4gh/drs/v1/objects/c276d04bacfaf6540b947340addf2670[0m
[38;2;8;75;138m{
    "id": "c276d04bacfaf6540b947340addf2670",
    "description": "Patient: NA19379, Country: LWK, Region: AFR, Sex: female\n",
    "created_time": "2023-12-09T09:26:35Z",
    "mime_type": "application/crai",
    "name": "NA19379.final.chrX_15494566-15607236.cram",
    "size": 0,
    "updated_time": "2023-12-09T09:26:35Z",
    "checksums": [
        {
            "checksum": "fed901adb4f5a734ced0057ac6cea06591f0ed37",
            "type": "sha1"
        },
        {
            "checksum": "33623088a878177903d8bf2fe6331b4a4042ce7958dee6870cf5e24d291f0324",
            "type": "sha256"
        },
        {
            "checksum": "c276d04bacfaf6540b947340addf2670",
            "type": "md5"
        }
    ],
    "self_uri": "drs://elwazi-node.icermali.org:5000/c276d04bacfaf6540b947340addf2670",
    "access_methods": [
        {
            "access_

## Launch workflow on test DRS objects from South-Africa, Mali and Uganda (using WES on ga4gh-starter-kit.ilifu.ac.za, elwazi-node.icermali.org or osdp.ace.ac.ug and 196.43.136.22)

1. Launch workflow
- Change the node_ip to run on a different WES endpoint
- Change the infput_file to run on different DRS objects

In [10]:
# node_ip = 'ga4gh-starter-kit.ilifu.ac.za'
node_ip = 'elwazi-node.icermali.org'
# node_ip = 'osdp.ace.ac.ug'
#node_ip = '196.43.136.22' #UVRI
wes_port = "6000"
service_info_path = "/service-info"
runs_path = "/runs"
http_method = "GET"
ga4gh_base_url = "http://" + node_ip + ":{}/ga4gh/{}/v1"
wes_base_url = ga4gh_base_url.format(wes_port,"wes")

http_method = "POST"
request_url = wes_base_url + runs_path

nextflow_workflow_url = "https://github.com/grbot/cram-qc"

#ACE-Mali
# input_file = "drs://elwazi-node.icermali.org:5000/a68c60133f942881983d0e15827bf88f drs://elwazi-node.icermali.org:5000/168d353c6f474ca72e35e9209f921a59"
input_file = "drs://elwazi-node.icermali.org:5000/c276d04bacfaf6540b947340addf2670 drs://elwazi-node.icermali.org:5000/c080e08c01ec46446bb16801c7abe5b2"

#UVRI
# input_file = "drs://196.43.136.22:5000/1050d0443c2e83f9d9a8933481dcb405 drs://196.43.136.22:5000/be04a5a90617aeae9a05fb533f544ade"

#ilifu
# input_file = "drs://ga4gh-starter-kit.ilifu.ac.za:5000/91860745a820a28b6c37d60432925bed drs://ga4gh-starter-kit.ilifu.ac.za:5000/3520f407daf560b1732b9e026879eae4"
# input_file = "drs://ga4gh-starter-kit.ilifu.ac.za:5000/cf99eed44d272c7a9146a7f3216c8bd1 drs://ga4gh-starter-kit.ilifu.ac.za:5000/c3465aa1f0ef8773d5eede6c23e8e9b2"

#OSDP
# input_file = "drs://osdp.ace.ac.ug:5000/c3e615325a81b4568b1c355ab7b804d5 drs://osdp.ace.ac.ug:5000/cd2e5e057f112d609f901cdf151d8dee" 
# input_file = "drs://osdp.ace.ac.ug:5000/6fa43c7de04b60c1a73a42aa2efc977d drs://osdp.ace.ac.ug:5000/be145a60bc059c154475a2561af0df6b" 

data = {
    'workflow_type': 'NEXTFLOW',
    'workflow_type_version': '21.04.0',
    'workflow_url': nextflow_workflow_url,
    'workflow_params': f'{{"input":"{input_file}"}}'
}

print_head("{} request to {}".format(http_method, request_url))

# Post a Nextflow workflow
wes_post_workflow_response = requests.request(http_method, request_url, data = data)

# print the response
pretty_print_json(wes_post_workflow_response)

current_run_id = wes_post_workflow_response.json()["run_id"]

print_head("run_id = {}".format(current_run_id))

[38;2;8;138;75mPOST request to http://elwazi-node.icermali.org:6000/ga4gh/wes/v1/runs[0m
[38;2;8;75;138m{
    "run_id": "1da569de-f7fe-4199-82d6-60109424fbd1"
}[0m
[38;2;8;138;75mrun_id = 1da569de-f7fe-4199-82d6-60109424fbd1[0m


2. Check output

In [11]:
http_method = "GET"
request_url = wes_base_url + runs_path + "/" + current_run_id

print_head("{} request to {}".format(http_method, request_url))

# Get request to /runs/{run_id}
monitor_run_response = requests.request(http_method, request_url)

# print the response
pretty_print_json(monitor_run_response)

[38;2;8;138;75mGET request to http://elwazi-node.icermali.org:6000/ga4gh/wes/v1/runs/1da569de-f7fe-4199-82d6-60109424fbd1[0m
[38;2;8;75;138m{
    "run_id": "1da569de-f7fe-4199-82d6-60109424fbd1",
    "request": {
        "workflow_params": {
            "input": "drs://elwazi-node.icermali.org:5000/c276d04bacfaf6540b947340addf2670 drs://elwazi-node.icermali.org:5000/c080e08c01ec46446bb16801c7abe5b2"
        },
        "workflow_type": "NEXTFLOW",
        "workflow_type_version": "21.04.0",
        "workflow_url": "https://github.com/grbot/cram-qc"
    },
    "state": "RUNNING",
    "run_log": {
        "name": "grbot/cram-qc",
        "cmd": [
            "#!/bin/bash -ue",
            "samtools flagstat     -@ 1     NA19379.final.chrX_15494566-15607236.cram.crai > NA19379.final.chrX_15494566-15607236.cram.crai.flagstat",
            "#!/bin/bash -ue",
            "samtools flagstat     -@ 1     HG03304.final.chrX_15494566-15607236.cram.crai > HG03304.final.chrX_15494566-15607236.cr

# Data Connect
1. Check service-info

In [12]:
import requests
import json

dc_port = "8089"
dc_base_url = "http://ga4gh-starter-kit.ilifu.ac.za:{}".format(dc_port)


service_info_path = "/service-info"
tables_path = "/tables"
table_info_path = "/table/{}/info"
table_data_path = "/table/{}/data"
search_path = "/search"

def pretty_print_json(response):
    print(json.dumps(response.json(), indent=4))

In [13]:
dc_service_info_resp = requests.request("GET", dc_base_url+service_info_path)
print(dc_base_url+service_info_path)
pretty_print_json(dc_service_info_resp)

http://ga4gh-starter-kit.ilifu.ac.za:8089/service-info
{
    "id": "",
    "name": "GA4GH Discovery Search API",
    "description": "",
    "documentationUrl": "",
    "contactUrl": "",
    "version": ""
}


2. Check data table we will be querying

In [14]:
dc_service_info_resp = requests.request("GET", dc_base_url+'/table/trino.public.genome_ilifu/info')
pretty_print_json(dc_service_info_resp)

{
    "name": "trino.public.genome_ilifu",
    "description": "Automatically generated schema",
    "data_model": {
        "$id": "http://ga4gh-starter-kit.ilifu.ac.za:8089/table/trino.public.genome_ilifu/info",
        "description": "Automatically generated schema",
        "$schema": "http://json-schema.org/draft-07/schema#",
        "properties": {
            "sample_id": {
                "format": "varchar",
                "type": "string",
                "$comment": "varchar"
            },
            "population_id": {
                "format": "varchar",
                "type": "string",
                "$comment": "varchar"
            },
            "super_population_id": {
                "format": "varchar",
                "type": "string",
                "$comment": "varchar"
            },
            "sex": {
                "format": "varchar",
                "type": "string",
                "$comment": "varchar"
            },
            "cram_drs_id": {
   

### Select address and id functions

In [15]:
import re

def get_address(s):
    address = s.replace("drs://","")
    address = re.sub(':.*', '', address)
    return address
    
def get_drs_id(s):
    drs_id = re.sub('.*/', '', s)
    return drs_id

### Access via Data Connect

1. Do query
Select CRAM DRS ids for all African samples. Limit search to 10 samples for now.

In [16]:
import requests, json
q2 = {
  "query": "select cram_drs_id from trino.public.genome_ilifu where super_population_id='AFR' limit 10",
  "parameters": []
}
r = requests.post("http://ga4gh-starter-kit.ilifu.ac.za:8089/search", json = q2)
print(json.dumps(r.json(), indent=3))
data = r.json()

{
   "data": [],
   "pagination": {
      "next_page_url": "http://ga4gh-starter-kit.ilifu.ac.za:8089/search/v1/statement/queued/20231211_074655_00001_46krn/y76daec0be6b601f0a939b26eaeae88d9cd6e4f14/1?queryJobId=20231211_074655_00001_46krn"
   }
}


2. Parse through pages to get results

In [17]:
next_page = data['pagination']['next_page_url']
# print (next_page)

# Poll till we get results
while not 'executing' in next_page:
    dc_service_info_resp = requests.request("GET", next_page)
    data = dc_service_info_resp.json()
    next_page = data['pagination']['next_page_url']
    # print (data)

dc_service_info_resp = requests.request("GET", next_page)
# pretty_print_json(dc_service_info_resp)

data = dc_service_info_resp.json()
next_page = data['pagination']['next_page_url']
dc_service_info_resp = requests.request("GET", next_page)
data = dc_service_info_resp.json()
# pretty_print_json(dc_service_info_resp)

drs_str = ""
drs_ids = []
for i in range(len(data['data'])):
    # print (data['data'][i]['cram_drs_id'])
    drs_ids.append(data['data'][i]['cram_drs_id'])
    drs_str = drs_str + data['data'][i]['cram_drs_id'] + " "

drs_str = drs_str[:-1]


3. Map DRS server to DRS objects

In [18]:
drs_servers = {}
for drs_id in drs_ids:
    address = get_address(drs_id)
    if address not in drs_servers:
        drs_servers[address] = {}
        drs_servers[address]['drs_ids'] = []
        drs_servers[address]['drs_ids'].append(drs_id)
        drs_servers[address]['total_file_size'] = 0
        drs_servers[address]['ingress'] = 0
        drs_servers[address]['run_id'] = 0
        drs_servers[address]['drs_ids_str'] = ""
        drs_servers[address]['outputs'] = {}
    else:
        drs_servers[address]['drs_ids'].append(drs_id)
print (drs_servers)

{'osdp.ace.ac.ug': {'drs_ids': ['drs://osdp.ace.ac.ug:5000/6fa43c7de04b60c1a73a42aa2efc977d', 'drs://osdp.ace.ac.ug:5000/be145a60bc059c154475a2561af0df6b', 'drs://osdp.ace.ac.ug:5000/9a45659fe478e5bb39d1dd1b08bd1807', 'drs://osdp.ace.ac.ug:5000/82adbcf7cc72c31a86e65d73bf6ef81b'], 'total_file_size': 0, 'ingress': 0, 'run_id': 0, 'drs_ids_str': '', 'outputs': {}}, 'elwazi-node.icermali.org': {'drs_ids': ['drs://elwazi-node.icermali.org:5000/a68c60133f942881983d0e15827bf88f', 'drs://elwazi-node.icermali.org:5000/45ca586b0921ffedf6a63679fbaacb68', 'drs://elwazi-node.icermali.org:5000/d36019bb63182abad672205a140f7e83', 'drs://elwazi-node.icermali.org:5000/b809bb9b81a9583ec67e787b0449e9bd', 'drs://elwazi-node.icermali.org:5000/168d353c6f474ca72e35e9209f921a59'], 'total_file_size': 0, 'ingress': 0, 'run_id': 0, 'drs_ids_str': '', 'outputs': {}}, 'ga4gh-starter-kit.ilifu.ac.za': {'drs_ids': ['drs://ga4gh-starter-kit.ilifu.ac.za:5000/5a436bec951fab59dd975bcd10f316f1'], 'total_file_size': 0, 'in

In [19]:
#Exclude OSDP, ACE-UGANDA server currently unavailable
drs_servers.pop('osdp.ace.ac.ug')
print (drs_servers)
# for server in drs_servers:
#     print(server)

{'elwazi-node.icermali.org': {'drs_ids': ['drs://elwazi-node.icermali.org:5000/a68c60133f942881983d0e15827bf88f', 'drs://elwazi-node.icermali.org:5000/45ca586b0921ffedf6a63679fbaacb68', 'drs://elwazi-node.icermali.org:5000/d36019bb63182abad672205a140f7e83', 'drs://elwazi-node.icermali.org:5000/b809bb9b81a9583ec67e787b0449e9bd', 'drs://elwazi-node.icermali.org:5000/168d353c6f474ca72e35e9209f921a59'], 'total_file_size': 0, 'ingress': 0, 'run_id': 0, 'drs_ids_str': '', 'outputs': {}}, 'ga4gh-starter-kit.ilifu.ac.za': {'drs_ids': ['drs://ga4gh-starter-kit.ilifu.ac.za:5000/5a436bec951fab59dd975bcd10f316f1'], 'total_file_size': 0, 'ingress': 0, 'run_id': 0, 'drs_ids_str': '', 'outputs': {}}}


### Use case
### Now we initiate a run in a gather/scatter/federated manner
#### - Runs are initiated on individual nodes (calculate flagstat)
#### - Output is gathered and MultiQC are run on the flagstat results on one WES endpoint

## Passport information

#### GET User id



In [20]:
user_id = input("Enter your user id \n") #userId=a6828597-5816-46d3-83ef-665f2ceb9588

In [21]:
drs_admin_port = "5001" #drs admin port
broker_admin_port = "4501" #broker admin port

ga4gh_admin_url = "http://localhost:{}/admin/ga4gh/{}/v1/"

broker_base_url = ga4gh_base_url.format(broker_port,"passport")
broker_admin_url = ga4gh_admin_url.format(broker_admin_port,"passport")

print(broker_admin_url)

http://localhost:4501/admin/ga4gh/passport/v1/


### Get User's available visas

In [26]:
request_url = broker_admin_url+ "users/" + user_id
# request_url = broker_admin_url+ "visas/" + "33d54e45-bf1b-40d8-93be-57142aa5c949"
http_method = "GET"
drs_object_response = requests.request(http_method, request_url)
# print the response
pretty_print_json(drs_object_response)

{
    "id": "a6828597-5816-46d3-83ef-665f2ceb9588",
    "passportVisaAssertions": [
        {
            "status": "active",
            "assertedAt": 1701884776,
            "passportVisa": {
                "id": "33d54e45-bf1b-40d8-93be-57142aa5c949",
                "visaName": "eLwaziPilotIlifu",
                "visaIssuer": "http://ga4gh-starter-kit.ilifu.ac.za:4500/",
                "visaDescription": "elwazi pilot project: passports demo, ilifu"
            }
        },
        {
            "status": "active",
            "assertedAt": 1701884776,
            "passportVisa": {
                "id": "510c790d-af4f-4910-8d88-05b0784d1fbf",
                "visaName": "eLwaziPilotACEUganda",
                "visaIssuer": "http://osdp.ace.ac.ug:4500/",
                "visaDescription": "elwazi pilot project: passports demo, osdp"
            }
        },
        {
            "status": "active",
            "assertedAt": 1701884776,
            "passportVisa": {
              

### GET Passport Token for select visas

In [27]:
http_method = "POST"

mint_path_post = "mint/"
request_url = broker_admin_url + mint_path_post

request_body = json.dumps({
    "researcherId": user_id,
    "requestedVisas": ["4b342cec-e141-4d56-9930-b49cbb80c303", "510c790d-af4f-4910-8d88-05b0784d1fbf", "33d54e45-bf1b-40d8-93be-57142aa5c949"]
})
request_headers = {"Content-Type": "application/json"}

print_head("{} a request to {} to get passport token from selected visas".format(http_method, request_url))

# POST visas to the /mint endpoint
passport_token_response = requests.request(
    http_method, 
    request_url,
    headers = request_headers,
    data = request_body
)

passport_token = passport_token_response.text
# print the response
# pretty_print_json(passport_token_response)
print(passport_token)

[38;2;8;138;75mPOST a request to http://localhost:4501/admin/ga4gh/passport/v1/mint/ to get passport token from selected visas[0m
eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJzY29wZSI6Im9wZW5pZCIsImNvbnRhaW5lZF92aXNhcyI6WyJlTHdhemlQaWxvdEFDRU1hbGlAaHR0cDovL2Vsd2F6aS1ub2RlLmljZXJtYWxpLm9yZzo0NTAwLyIsImVMd2F6aVBpbG90QUNFVWdhbmRhQGh0dHA6Ly9vc2RwLmFjZS5hYy51Zzo0NTAwLyIsImVMd2F6aVBpbG90SWxpZnVAaHR0cDovL2dhNGdoLXN0YXJ0ZXIta2l0LmlsaWZ1LmFjLnphOjQ1MDAvIl0sImlzcyI6Imh0dHA6Ly9nYTRnaC1zdGFydGVyLWtpdC5pbGlmdS5hYy56YTo0NTAwLyIsImV4cCI6MTcwMjI4NDU3MCwiaWF0IjoxNzAyMjgwOTcwLCJnYTRnaF9wYXNzcG9ydF92MSI6WyJleUowZVhBaU9pSktWMVFpTENKaGJHY2lPaUpJVXpJMU5pSjkuZXlKbllUUm5hRjkyYVhOaFgzWXhJanA3SW1GemMyVnlkR1ZrSWpveE56QXhPRGcwTnpjMkxDSjJhWE5oWDJsemMzVmxjaUk2SW1oMGRIQTZMeTlsYkhkaGVta3RibTlrWlM1cFkyVnliV0ZzYVM1dmNtYzZORFV3TUM4aUxDSmllU0k2SW1SaFl5SXNJbk52ZFhKalpTSTZJbWgwZEhBNkx5OWxiSGRoZW1rdGJtOWtaUzVwWTJWeWJXRnNhUzV2Y21jNk5EVXdNQzhpTENKMGVYQmxJam9pUTI5dWRISnZiR3hsWkVGalkyVnpjMGR5WVc1MGN5SXNJblpoYkhWbElqb2lhSFIwY0hNNkx

In [28]:
passports ={}
passports["ga4gh-starter-kit.ilifu.ac.za"]=passport_token
passports["elwazi-node.icermali.org"]="eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJzY29wZSI6Im9wZW5pZCIsImNvbnRhaW5lZF92aXNhcyI6WyJlTHdhemlQaWxvdEFDRU1hbGlAaHR0cDovL2Vsd2F6aS1ub2RlLmljZXJtYWxpLm9yZzo0NTAwLyIsImVMd2F6aVBpbG90QUNFVWdhbmRhQGh0dHA6Ly9vc2RwLmFjZS5hYy51Zzo0NTAwLyIsImVMd2F6aVBpbG90SWxpZnVAaHR0cDovL2dhNGdoLXN0YXJ0ZXIta2l0LmlsaWZ1LmFjLnphOjQ1MDAvIl0sImlzcyI6Imh0dHA6Ly9lbHdhemktbm9kZS5pY2VybWFsaS5vcmc6NDUwMC8iLCJleHAiOjE3MDIyODQ1OTEsImlhdCI6MTcwMjI4MDk5MSwiZ2E0Z2hfcGFzc3BvcnRfdjEiOlsiZXlKMGVYQWlPaUpLVjFRaUxDSmhiR2NpT2lKSVV6STFOaUo5LmV5Sm5ZVFJuYUY5MmFYTmhYM1l4SWpwN0ltRnpjMlZ5ZEdWa0lqb3hOekF5TVRFMU16TXlMQ0oyYVhOaFgybHpjM1ZsY2lJNkltaDBkSEE2THk5bGJIZGhlbWt0Ym05a1pTNXBZMlZ5YldGc2FTNXZjbWM2TkRVd01DOGlMQ0ppZVNJNkltUmhZeUlzSW5OdmRYSmpaU0k2SW1oMGRIQTZMeTlsYkhkaGVta3RibTlrWlM1cFkyVnliV0ZzYVM1dmNtYzZORFV3TUM4aUxDSjBlWEJsSWpvaVEyOXVkSEp2Ykd4bFpFRmpZMlZ6YzBkeVlXNTBjeUlzSW5aaGJIVmxJam9pYUhSMGNITTZMeTlrYjJrdWIzSm5MekV3TGpFd016Z3ZjelF4TkRNeExUQXhPQzB3TWpFNUxYa2lMQ0oyYVhOaFgyNWhiV1VpT2lKbFRIZGhlbWxRYVd4dmRFRkRSVTFoYkdraWZTd2lhWE56SWpvaWFIUjBjRG92TDJWc2QyRjZhUzF1YjJSbExtbGpaWEp0WVd4cExtOXlaem8wTlRBd0x5SXNJbVY0Y0NJNk1UY3dNakk0TkRVNU1Td2lhV0YwSWpveE56QXlNamd3T1RreGZRLkhvM2F3S2RUcDhYNlJMZTY5Wm1zcGE3WDBleTRZSkh5UmNKQXpiamF6cHMiLCJleUowZVhBaU9pSktWMVFpTENKaGJHY2lPaUpJVXpJMU5pSjkuZXlKbllUUm5hRjkyYVhOaFgzWXhJanA3SW1GemMyVnlkR1ZrSWpveE56QXlNVEUxTXpNeUxDSjJhWE5oWDJsemMzVmxjaUk2SW1oMGRIQTZMeTl2YzJSd0xtRmpaUzVoWXk1MVp6bzBOVEF3THlJc0ltSjVJam9pWkdGaklpd2ljMjkxY21ObElqb2lhSFIwY0RvdkwyOXpaSEF1WVdObExtRmpMblZuT2pRMU1EQXZJaXdpZEhsd1pTSTZJa052Ym5SeWIyeHNaV1JCWTJObGMzTkhjbUZ1ZEhNaUxDSjJZV3gxWlNJNkltaDBkSEJ6T2k4dlpHOXBMbTl5Wnk4eE1DNHhNRE00TDNNME1UUXpNUzB3TVRndE1ESXhPUzE1SWl3aWRtbHpZVjl1WVcxbElqb2laVXgzWVhwcFVHbHNiM1JCUTBWVloyRnVaR0VpZlN3aWFYTnpJam9pYUhSMGNEb3ZMMjl6WkhBdVlXTmxMbUZqTG5Wbk9qUTFNREF2SWl3aVpYaHdJam94TnpBeU1qZzBOVGt4TENKcFlYUWlPakUzTURJeU9EQTVPVEY5LjFRRDRtUGhudHpGS1NUdUdLdU4yNkVPV0lkbE5zS1Z2ZnlHNXdDZ3BNc0EiLCJleUowZVhBaU9pSktWMVFpTENKaGJHY2lPaUpJVXpJMU5pSjkuZXlKbllUUm5hRjkyYVhOaFgzWXhJanA3SW1GemMyVnlkR1ZrSWpveE56QXlNVEUxTXpNeUxDSjJhWE5oWDJsemMzVmxjaUk2SW1oMGRIQTZMeTluWVRSbmFDMXpkR0Z5ZEdWeUxXdHBkQzVwYkdsbWRTNWhZeTU2WVRvME5UQXdMeUlzSW1KNUlqb2laR0ZqSWl3aWMyOTFjbU5sSWpvaWFIUjBjRG92TDJkaE5HZG9MWE4wWVhKMFpYSXRhMmwwTG1sc2FXWjFMbUZqTG5waE9qUTFNREF2SWl3aWRIbHdaU0k2SWtOdmJuUnliMnhzWldSQlkyTmxjM05IY21GdWRITWlMQ0oyWVd4MVpTSTZJbWgwZEhCek9pOHZaRzlwTG05eVp5OHhNQzR4TURNNEwzTTBNVFF6TVMwd01UZ3RNREl4T1MxNUlpd2lkbWx6WVY5dVlXMWxJam9pWlV4M1lYcHBVR2xzYjNSSmJHbG1kU0o5TENKcGMzTWlPaUpvZEhSd09pOHZaMkUwWjJndGMzUmhjblJsY2kxcmFYUXVhV3hwWm5VdVlXTXVlbUU2TkRVd01DOGlMQ0psZUhBaU9qRTNNREl5T0RRMU9URXNJbWxoZENJNk1UY3dNakk0TURrNU1YMC5qS1phRmJjTnltRVhraDRQb2w3eTdyRFFub1haU25nTGNxVEVtOFVKMEVBIl19.R-86TNFCkEtB-TaByb29YKopRfg8kejeVHrzKnFs6ZM"
print(passports)

{'ga4gh-starter-kit.ilifu.ac.za': 'eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJzY29wZSI6Im9wZW5pZCIsImNvbnRhaW5lZF92aXNhcyI6WyJlTHdhemlQaWxvdEFDRU1hbGlAaHR0cDovL2Vsd2F6aS1ub2RlLmljZXJtYWxpLm9yZzo0NTAwLyIsImVMd2F6aVBpbG90QUNFVWdhbmRhQGh0dHA6Ly9vc2RwLmFjZS5hYy51Zzo0NTAwLyIsImVMd2F6aVBpbG90SWxpZnVAaHR0cDovL2dhNGdoLXN0YXJ0ZXIta2l0LmlsaWZ1LmFjLnphOjQ1MDAvIl0sImlzcyI6Imh0dHA6Ly9nYTRnaC1zdGFydGVyLWtpdC5pbGlmdS5hYy56YTo0NTAwLyIsImV4cCI6MTcwMjI4NDU3MCwiaWF0IjoxNzAyMjgwOTcwLCJnYTRnaF9wYXNzcG9ydF92MSI6WyJleUowZVhBaU9pSktWMVFpTENKaGJHY2lPaUpJVXpJMU5pSjkuZXlKbllUUm5hRjkyYVhOaFgzWXhJanA3SW1GemMyVnlkR1ZrSWpveE56QXhPRGcwTnpjMkxDSjJhWE5oWDJsemMzVmxjaUk2SW1oMGRIQTZMeTlsYkhkaGVta3RibTlrWlM1cFkyVnliV0ZzYVM1dmNtYzZORFV3TUM4aUxDSmllU0k2SW1SaFl5SXNJbk52ZFhKalpTSTZJbWgwZEhBNkx5OWxiSGRoZW1rdGJtOWtaUzVwWTJWeWJXRnNhUzV2Y21jNk5EVXdNQzhpTENKMGVYQmxJam9pUTI5dWRISnZiR3hsWkVGalkyVnpjMGR5WVc1MGN5SXNJblpoYkhWbElqb2lhSFIwY0hNNkx5OWtiMmt1YjNKbkx6RXdMakV3TXpndmN6UXhORE14TFRBeE9DMHdNakU1TFhraUxDSjJhWE5oWDI1aGJXVWlPaUpsVEhkaGVt

### Accessing DRS objects using Passports

In [29]:
def getDRSObject(node, drs_object_id, passport_token):
    http_method = "POST"

    drs_objects_path = "/objects/"
    drs_base_url = "http://" + node + ":5000/ga4gh/drs/v1/"
    request_url = drs_base_url + drs_objects_path + drs_object_id
    request_body = json.dumps({
        "passports": [passport_token]
    })
    request_headers = {"Content-Type": "application/json"}

    print_head("{} request to {} with a passport that has selected visas".format(http_method, request_url))

    # GET request to /objects/{object_id} endpoint
    drs_object_response = requests.request(http_method, request_url, headers = request_headers, data = request_body)
    return drs_object_response

In [30]:
nodes = ['ga4gh-starter-kit.ilifu.ac.za', 'elwazi-node.icermali.org']
drs_objects = []
for node in nodes:
    print(node)
    drs_node = drs_servers[node]
    for drs_object in drs_node['drs_ids']:
        print(drs_object)
        obj_index = drs_servers[node]['drs_ids'].index(drs_object)
        object_id = drs_object.split("/")[-1]
        # drs_objects.append(object_id)
        drs_object = getDRSObject(node, object_id, passports[node]).json()
        # drs_servers[node]['drs_ids'][drs_object] = drs_object['access_methods'][0]['access_url']['url'].replace("file://", "")
        drs_servers[node]['drs_ids'][obj_index] = drs_object['access_methods'][0]['access_url']['url'].replace("file://", "")
        print(drs_object['access_methods'][0]['access_url']['url'].replace("file://", ""))
        # pretty_print_json(drs_object)
        # print(drs_object)

ga4gh-starter-kit.ilifu.ac.za
drs://ga4gh-starter-kit.ilifu.ac.za:5000/5a436bec951fab59dd975bcd10f316f1
[38;2;8;138;75mPOST request to http://ga4gh-starter-kit.ilifu.ac.za:5000/ga4gh/drs/v1//objects/5a436bec951fab59dd975bcd10f316f1 with a passport that has selected visas[0m
/share/elwazi/crams/HG01883/HG01883.final.chrX_15494566-15607236.cram
elwazi-node.icermali.org
drs://elwazi-node.icermali.org:5000/a68c60133f942881983d0e15827bf88f
[38;2;8;138;75mPOST request to http://elwazi-node.icermali.org:5000/ga4gh/drs/v1//objects/a68c60133f942881983d0e15827bf88f with a passport that has selected visas[0m
/share/elwazi/crams/HG01880/HG01880.final.chrX_15494566-15607236.cram
drs://elwazi-node.icermali.org:5000/45ca586b0921ffedf6a63679fbaacb68
[38;2;8;138;75mPOST request to http://elwazi-node.icermali.org:5000/ga4gh/drs/v1//objects/45ca586b0921ffedf6a63679fbaacb68 with a passport that has selected visas[0m
/share/elwazi/crams/HG01882/HG01882.final.chrX_15494566-15607236.cram
drs://elwazi-n

In [31]:
drs_servers

{'elwazi-node.icermali.org': {'drs_ids': ['/share/elwazi/crams/HG01880/HG01880.final.chrX_15494566-15607236.cram',
   '/share/elwazi/crams/HG01882/HG01882.final.chrX_15494566-15607236.cram',
   '/share/elwazi/crams/HG01886/HG01886.final.chrX_15494566-15607236.cram',
   '/share/elwazi/crams/HG01889/HG01889.final.chrX_15494566-15607236.cram',
   '/share/elwazi/crams/HG01890/HG01890.final.chrX_15494566-15607236.cram'],
  'total_file_size': 0,
  'ingress': 0,
  'run_id': 0,
  'drs_ids_str': '',
  'outputs': {}},
 'ga4gh-starter-kit.ilifu.ac.za': {'drs_ids': ['/share/elwazi/crams/HG01883/HG01883.final.chrX_15494566-15607236.cram'],
  'total_file_size': 0,
  'ingress': 0,
  'run_id': 0,
  'drs_ids_str': '',
  'outputs': {}}}

![Use case 1](use_case_2.png)

1) Launch flagstat runs. Use the dictionary structure generated previously. Launch workflow at WES endpoint on DRS objects only from that endpoint.

In [32]:
import time
# Populate with drs_ids_str
for drs_server in drs_servers:
    print ("Launching jobs on server: " + drs_server)
    drs_ids = drs_servers[drs_server]['drs_ids']
    drs_ids_str = ""
    for drs_id in drs_ids:
        drs_ids_str = drs_ids_str + drs_id + " "
    drs_ids_str = drs_ids_str[:-1]
#     print (drs_ids_str)
    drs_servers[drs_server]['drs_ids_str'] = drs_ids_str

# Launch workflow
for drs_server in drs_servers:
    wes_port = "6000"
    ga4gh_base_url = "http://" + drs_server + ":{}/ga4gh/{}/v1"
    wes_base_url = ga4gh_base_url.format(wes_port,"wes")

    runs_path = "/runs"

    http_method = "POST"
    request_url = wes_base_url + runs_path

    nextflow_workflow_url = "https://github.com/grbot/flagstat"

    input_file = drs_servers[drs_server]['drs_ids_str']
    
    data = {
        'workflow_type': 'NEXTFLOW',
        'workflow_type_version': '21.04.0',
        'workflow_url': nextflow_workflow_url,
        'workflow_params': f'{{"input":"{input_file}"}}'
    }

    print_head("{} request to {}".format(http_method, request_url))

    # Post a Nextflow workflow
    wes_post_workflow_response = requests.request(http_method, request_url, data = data)

    # print the response
    pretty_print_json(wes_post_workflow_response)

    current_run_id = wes_post_workflow_response.json()["run_id"]

    print_head("run_id = {}".format(current_run_id))

    ## We don't launch in parallel for now. Just poll a submitted job and retrieve the results
    http_method = "GET"
    request_url = wes_base_url + runs_path + "/" + current_run_id

    print_head("{} request to {}".format(http_method, request_url))

    # Get request to /runs/{run_id}
#     monitor_run_response = requests.request(http_method, request_url) 
    time.sleep(15)
    monitor_run_response = requests.request(http_method, request_url)
    
    # Poll until job is complete

    print(monitor_run_response.json())
    while monitor_run_response.json()["state"] != "COMPLETE":
        print("Current job status: " + monitor_run_response.json()["state"])
        time.sleep(5)
        monitor_run_response = requests.request(http_method, request_url)

    print("Job running status: " + monitor_run_response.json()["state"])
    pretty_print_json(monitor_run_response)

    drs_servers[drs_server]['run_id'] = current_run_id
    drs_servers[drs_server]['outputs'] = monitor_run_response.json()["outputs"]


Launching jobs on server: elwazi-node.icermali.org
Launching jobs on server: ga4gh-starter-kit.ilifu.ac.za
[38;2;8;138;75mPOST request to http://elwazi-node.icermali.org:6000/ga4gh/wes/v1/runs[0m
{
    "run_id": "0404782a-2020-41f2-bcfd-1af9506e756f"
}
[38;2;8;138;75mrun_id = 0404782a-2020-41f2-bcfd-1af9506e756f[0m
[38;2;8;138;75mGET request to http://elwazi-node.icermali.org:6000/ga4gh/wes/v1/runs/0404782a-2020-41f2-bcfd-1af9506e756f[0m
{'run_id': '0404782a-2020-41f2-bcfd-1af9506e756f', 'request': {'workflow_params': {'input': '/share/elwazi/crams/HG01880/HG01880.final.chrX_15494566-15607236.cram /share/elwazi/crams/HG01882/HG01882.final.chrX_15494566-15607236.cram /share/elwazi/crams/HG01886/HG01886.final.chrX_15494566-15607236.cram /share/elwazi/crams/HG01889/HG01889.final.chrX_15494566-15607236.cram /share/elwazi/crams/HG01890/HG01890.final.chrX_15494566-15607236.cram'}, 'workflow_type': 'NEXTFLOW', 'workflow_type_version': '21.04.0', 'workflow_url': 'https://github.com/grbot

2. a) Upload results to individual DRS servers and get a list of DRS objects (this will not work need to run 2 b) and explained there)

In [None]:
# importlib.reload(upload_to_drs)

# drs_ids_str = ""

# for drs_server in drs_servers:

#     run_id = drs_servers[drs_server]['run_id']
#     outputs = drs_servers[drs_server]["outputs"]

#     for key in outputs:
#         if '.flagstat' in key:
#             file = outputs[key][7:]
#             file_ext = file.split(".")[-1]
#             print (file)
#             meta_d = upload_to_drs.files_metadata_test(run_id, file, file_ext)
#             upload_to_drs.add_file_to_server(meta_d, file_ext, drs_server,'5001') #adds drs object
#             drs_id = "drs://" + drs_server + ":5000/" + meta_d[0][3]
#             drs_ids_str = drs_ids_str + " "  + drs_id
            
# drs_ids_str = drs_ids_str[:-1]

    

2. b) Upload results to individual DRS servers and get a list of DRS access URLs
   

In [33]:
import importlib
import upload_to_drs

importlib.reload(upload_to_drs)

drs_urls_str = ""

for drs_server in drs_servers:

    run_id = drs_servers[drs_server]['run_id']
    outputs = drs_servers[drs_server]["outputs"]

    for key in outputs:
        if '.flagstat' in key:
            print (key)
            file = outputs[key][7:]
            file_ext = file.split(".")[-1]
            meta_d = upload_to_drs.files_metadata_test(run_id, file, file_ext)
            upload_to_drs.add_file_to_server(meta_d, file_ext, drs_server,'5001') #adds drs object
            drs_id = meta_d[0][3]
            drs_port = 5000
            object_path_get = "/objects/{}"
            http_method = "GET"
            ga4gh_base_url = "http://" + drs_server + ":{}/ga4gh/{}/v1"
            drs_base_url = ga4gh_base_url.format(drs_port,"drs")
            request_url = drs_base_url + object_path_get.format(drs_id)
            #print_head("{} request to {}".format(http_method, request_url))
            drs_object_response = requests.request(http_method, request_url)
            #pretty_print_json(drs_object_response)
            data = drs_object_response.json()
            # We cannot use DRS objects here and need to directly stream. DRS objects only resolve to local path
            # and if path is not on server their would be a failure. The disadvantage of using stream is that you loose
            # the file naming.
            access_url = request_url + "/access/" + (data['access_methods'][1]['access_id'])
            drs_object_response = requests.request(http_method, access_url)
            drs_url = drs_object_response.json()["url"]
            drs_urls_str = drs_urls_str + drs_url + " "
    
drs_urls_str = drs_urls_str[:-1]
print(drs_urls_str)


HG01880.final.chrX_15494566-15607236.cram.flagstat
HG01890.final.chrX_15494566-15607236.cram.flagstat
HG01882.final.chrX_15494566-15607236.cram.flagstat
HG01889.final.chrX_15494566-15607236.cram.flagstat
HG01886.final.chrX_15494566-15607236.cram.flagstat
HG01883.final.chrX_15494566-15607236.cram.flagstat


KeyError: 'access_methods'

3. Launch the workflow that will combine the results

In [209]:
wes_port = "6000"
drs_server_central = "ga4gh-starter-kit.ilifu.ac.za"
ga4gh_base_url = "http://" + drs_server_central + ":{}/ga4gh/{}/v1"
wes_base_url = ga4gh_base_url.format(wes_port,"wes")

service_info_path = "/service-info"
runs_path = "/runs"

http_method = "POST"
request_url = wes_base_url + runs_path

nextflow_workflow_url = "https://github.com/grbot/multiqc"
input_file = drs_urls_str

#print (drs_str)

data = {
    'workflow_type': 'NEXTFLOW',
    'workflow_type_version': '21.04.0',
    'workflow_url': nextflow_workflow_url,
    'workflow_params': f'{{"input":"{input_file}"}}'
}

print_head("{} request to {}".format(http_method, request_url))

# Post a Nextflow workflow
wes_post_workflow_response = requests.request(http_method, request_url, data = data)

# print the response
pretty_print_json(wes_post_workflow_response)

current_run_id = wes_post_workflow_response.json()["run_id"]

print_head("run_id = {}".format(current_run_id))

[38;2;8;138;75mPOST request to http://ga4gh-starter-kit.ilifu.ac.za:6000/ga4gh/wes/v1/runs[0m
{
    "run_id": "0db7c152-8edf-4dd6-bc53-99b36952dfe1"
}
[38;2;8;138;75mrun_id = 0db7c152-8edf-4dd6-bc53-99b36952dfe1[0m


4. Poll for results

In [210]:
import time
# On first run might get the error not able to find ["state"]. This is due to underlying isssue below.
#{
#    "timestamp": "2023-07-27T17:10:08Z",
#    "status_code": 400,
#    "error": "Bad Request",
#    "msg": "Could not load WES run log"
#}
# Just rerun for now and then polling will start

http_method = "GET"
request_url = wes_base_url + runs_path + "/" + current_run_id

print_head("{} request to {}".format(http_method, request_url))

# Get request to /runs/{run_id}
monitor_run_response = requests.request(http_method, request_url)
time.sleep(15) # This delay resolves the issue mentioned above
monitor_run_response = requests.request(http_method, request_url)

print(monitor_run_response)

# Poll until job is complete
while monitor_run_response.json()["state"]!="COMPLETE":
    print("Current job status: " + monitor_run_response.json()["state"])
    time.sleep(5)
    monitor_run_response = requests.request(http_method, request_url)

print("Job running status: " + monitor_run_response.json()["state"])
pretty_print_json(monitor_run_response)

[38;2;8;138;75mGET request to http://ga4gh-starter-kit.ilifu.ac.za:6000/ga4gh/wes/v1/runs/0db7c152-8edf-4dd6-bc53-99b36952dfe1[0m
<Response [200]>
Job running status: COMPLETE
{
    "run_id": "0db7c152-8edf-4dd6-bc53-99b36952dfe1",
    "request": {
        "workflow_params": {
            "input": "http://elwazi-node.icermali.org:5000/ga4gh/drs/v1/stream/6f9a75e6e4e986f7418aaf7d56bec6e4/00605d2c-3bde-455a-bdc1-18680b526dbb http://elwazi-node.icermali.org:5000/ga4gh/drs/v1/stream/0a05695a4570a54efcd3d6b70963f3bf/add01a8d-b784-4b58-ad5c-5a9f0261f1d0 http://elwazi-node.icermali.org:5000/ga4gh/drs/v1/stream/556a09c030dbfd19c670c97c99653389/24363342-5891-431b-8879-5ad4cdf4e0d8 http://elwazi-node.icermali.org:5000/ga4gh/drs/v1/stream/bbe9a879b8ba187b3c383fb041a6d2e6/64a1dded-0ee8-44d0-a9d2-c642c8dfc359 http://elwazi-node.icermali.org:5000/ga4gh/drs/v1/stream/a6bbb32e20cffd27a44ee87562f513c2/9c76fce9-565b-4562-aa00-1322e0420aba "
        },
        "workflow_type": "NEXTFLOW",
        "work

5. Upload `multiqc_report.html` to central DRS server

In [211]:
import importlib
import upload_to_drs
importlib.reload(upload_to_drs)

run_id = monitor_run_response.json()['run_id']
outputs = monitor_run_response.json()["outputs"]

for key in outputs:
    if 'multiqc_report.html' in key:
        print (outputs[key])
        file = outputs[key][7:]
        file_ext = file.split(".")[-1]
        meta_d = upload_to_drs.files_metadata_test(run_id, file, file_ext)
        upload_to_drs.add_file_to_server(meta_d, file_ext, drs_server_central,'5001') #adds drs object
        drs_id = meta_d[0][3]

file:///opt/ga4gh-starter-kit-wes/wes_runs/0d/b7/c1/0db7c152-8edf-4dd6-bc53-99b36952dfe1/work/ae/f276c65dd413a0d724252df8cc2c1d/multiqc_report.html


6. Download `multiqc_report.html`

In [212]:
import urllib.request

drs_port = 5000

object_path_get = "/objects/{}"
http_method = "GET"
ga4gh_base_url = "http://" + drs_server_central + ":{}/ga4gh/{}/v1"
drs_base_url = ga4gh_base_url.format(drs_port,"drs")
request_url = drs_base_url + object_path_get.format(drs_id)
#print_head("{} request to {}".format(http_method, request_url))
drs_object_response = requests.request(http_method, request_url)
#pretty_print_json(drs_object_response)
data = drs_object_response.json()
access_url = request_url + "/access/" + (data['access_methods'][1]['access_id'])
#print(access_path)
drs_object_response = requests.request(http_method, access_url)
download_url = drs_object_response.json()["url"]
print(download_url)
urllib.request.urlretrieve(download_url, "multiqc_report_2.html")

KeyError: 'access_methods'