# Launching WES workflow on DRS objects based on a Data Connect query

## Check if endpoints are available

In [1]:
import requests
import json

def pretty_print_json(response):
    # pretty print JSON in blue color
    print("\033[38;2;8;75;138m"+json.dumps(response.json(), indent=4)+"\033[0m")
    
def print_head(text):
    # print in green color
    print("\033[38;2;8;138;75m"+text+"\033[0m")

1) Check if WES endpoints are available

In [2]:
node_ips = ['196.43.136.22', 'ga4gh-starter-kit.ilifu.ac.za'] #, 'osdp.ace.ac.ug', 'elwazi-node.icermali.org'] 
wes_port = "6000"
service_info_path = "/service-info"
runs_path = "/runs"
http_method = "GET"

for node_ip in node_ips:
    ga4gh_base_url = "http://" + node_ip + ":{}/ga4gh/{}/v1"
    wes_base_url = ga4gh_base_url.format(wes_port,"wes")
    request_url = wes_base_url+service_info_path
    print_head("{} request to {}".format(http_method, request_url))
    # GET request to service-info endpoint
    wes_service_info_resp = requests.request(http_method, request_url)
    # print the response
    pretty_print_json(wes_service_info_resp)

[38;2;8;138;75mGET request to http://196.43.136.22:6000/ga4gh/wes/v1/service-info[0m
[38;2;8;75;138m{
    "auth_instructions_url": "https://somewhere.org",
    "contact_info_url": "mailto:your@email.de",
    "default_workflow_engine_parameters": [
        {
            "default_value": null,
            "name": "NFL|22.10.0|accounting-name",
            "type": "Optional[str]"
        },
        {
            "default_value": null,
            "name": "NFL|22.10.0|job-name",
            "type": "Optional[str]"
        },
        {
            "default_value": null,
            "name": "NFL|22.10.0|group",
            "type": "Optional[str]"
        },
        {
            "default_value": null,
            "name": "NFL|22.10.0|queue",
            "type": "Optional[str]"
        },
        {
            "default_value": "true",
            "name": "NFL|22.10.0|trace",
            "type": "bool"
        },
        {
            "default_value": "true",
            "name": "NFL|22.10.

2) Check if DRS endpoints are available

In [3]:
node_ips = ['196.43.136.22', 'ga4gh-starter-kit.ilifu.ac.za'] # ,'osdp.ace.ac.ug','elwazi-node.icermali.org'] #, 
drs_port = "5000"
service_info_path = "/service-info"
http_method = "GET"

for node_ip in node_ips:
    ga4gh_base_url = "http://" + node_ip + ":{}/ga4gh/{}/v1"
    drs_base_url = ga4gh_base_url.format(drs_port,"drs")
    request_url = drs_base_url+service_info_path
    print_head("{} request to {}".format(http_method, request_url))
    # GET request to service-info endpoint
    drs_service_info_resp = requests.request(http_method, request_url)
    # print the response
    pretty_print_json(drs_service_info_resp)

[38;2;8;138;75mGET request to http://196.43.136.22:5000/ga4gh/drs/v1/service-info[0m
[38;2;8;75;138m{
    "id": "org.ga4gh.starterkit.drs",
    "name": "GA4GH Starter Kit DRS Service",
    "description": "An open source, community-driven implementation of the GA4GH Data Repository Service (DRS) API specification.",
    "contactUrl": "mailto:info@ga4gh.org",
    "documentationUrl": "https://github.com/ga4gh/ga4gh-starter-kit-drs",
    "createdAt": "2020-01-15T12:00:00Z",
    "updatedAt": "2020-01-15T12:00:00Z",
    "environment": "test",
    "version": "0.3.2",
    "type": {
        "group": "org.ga4gh",
        "artifact": "drs",
        "version": "1.3.0experimental"
    },
    "organization": {
        "name": "Global Alliance for Genomics and Health",
        "url": "https://ga4gh.org"
    }
}[0m
[38;2;8;138;75mGET request to http://ga4gh-starter-kit.ilifu.ac.za:5000/ga4gh/drs/v1/service-info[0m
[38;2;8;75;138m{
    "id": "org.ga4gh.starterkit.drs",
    "name": "GA4GH Starter

3) Check if Passport Broker endpoints are available

In [4]:
node_ips = ['ga4gh-starter-kit.ilifu.ac.za'] #, '196.43.136.22', 'osdp.ace.ac.ug', 'elwazi-node.icermali.org'] 
broker_port = "4500"
service_info_path = "/service-info"
http_method = "GET"

for node_ip in node_ips:
    ga4gh_base_url = "http://" + node_ip + ":{}/ga4gh/{}/v1"
    broker_base_url = ga4gh_base_url.format(broker_port,"passport")
    request_url = broker_base_url+service_info_path
    print_head("{} request to {}".format(http_method, request_url))
    # GET request to service-info endpoint
    broker_service_info_resp = requests.request(http_method, request_url)
    # print the response
    pretty_print_json(broker_service_info_resp)

[38;2;8;138;75mGET request to http://ga4gh-starter-kit.ilifu.ac.za:4500/ga4gh/passport/v1/service-info[0m
[38;2;8;75;138m{
    "id": "org.ga4gh.starterkit.passport.broker",
    "name": "GA4GH Starter Kit Passport Broker Service",
    "description": "Starter Kit implementation of a Passport Broker service, outlined in the GA4GH Passports specification. Manages researcher permissions to data and compute, and enables this information to be minted as JWTs and passed to downstream clearinghouses.",
    "contactUrl": "mailto:info@ga4gh.org",
    "documentationUrl": "https://github.com/ga4gh/ga4gh-starter-kit-passport-broker",
    "createdAt": "2022-04-28T09:00:00Z",
    "updatedAt": "2022-04-28T09:00:00Z",
    "environment": "test",
    "version": "0.0.2",
    "type": {
        "group": "org.ga4gh",
        "artifact": "passport-broker",
        "version": "1.0.0"
    },
    "organization": {
        "name": "Global Alliance for Genomics and Health",
        "url": "https://ga4gh.org"
   

## Check if DRS objects exists on endpoint

1. Check ga4gh-starter-kit.ilifu.ac.za

In [5]:
http_method = "GET"
node_ip = 'ga4gh-starter-kit.ilifu.ac.za'
drs_port = "5000"
# drs_ids = ['91860745a820a28b6c37d60432925bed','3520f407daf560b1732b9e026879eae4'] #authenticated
drs_ids = ['cf99eed44d272c7a9146a7f3216c8bd1','3520f407daf560b1732b9e026879eae4']
object_path_get = "/objects/{}"
object_path_post = "/objects"
access_path = "/objects/{}/access/{}"

for drs_id in drs_ids:
    ga4gh_base_url = "http://" + node_ip + ":{}/ga4gh/{}/v1"
    drs_base_url = ga4gh_base_url.format(drs_port,"drs")
    request_url = drs_base_url+object_path_get.format(drs_id)
    print_head("{} request to {}".format(http_method, request_url))
    # GET request to /objects/{object_id} endpoint
    drs_object_response = requests.request(http_method, request_url)
    # print the response
    pretty_print_json(drs_object_response)

[38;2;8;138;75mGET request to http://ga4gh-starter-kit.ilifu.ac.za:5000/ga4gh/drs/v1/objects/cf99eed44d272c7a9146a7f3216c8bd1[0m
[38;2;8;75;138m{
    "id": "cf99eed44d272c7a9146a7f3216c8bd1",
    "description": "Patient: HG04015, Country: ITU, Region: SAS, Sex: male\n",
    "created_time": "2023-12-05T18:19:02Z",
    "mime_type": "application/cram",
    "name": "HG04015.final.chrX_15494566-15607236",
    "size": 0,
    "updated_time": "2023-12-05T18:19:02Z",
    "checksums": [
        {
            "checksum": "704074d0d6d57371fc3f8ca7f1164feacb221165",
            "type": "sha1"
        },
        {
            "checksum": "2646f5e2bdbc2e7e028e506147eee561008bafd79285577aa4a6d714400ae920",
            "type": "sha256"
        },
        {
            "checksum": "cf99eed44d272c7a9146a7f3216c8bd1",
            "type": "md5"
        }
    ],
    "self_uri": "drs://ga4gh-starter-kit.ilifu.ac.za:5000/cf99eed44d272c7a9146a7f3216c8bd1",
    "access_methods": [
        {
            "acce

2.1 Check osdp.ace.ac.ug (ACE - Uganda)

In [6]:
http_method = "GET"
node_ip = 'osdp.ace.ac.ug'
drs_port = "5000"
# drs_ids = ['6fa43c7de04b60c1a73a42aa2efc977d','be145a60bc059c154475a2561af0df6b'] #authenticated
drs_ids = ['c4b39bfcd30c59b70beb090fe4fdc2a1', 'be145a60bc059c154475a2561af0df6b'] # 'c560c0bd4e98405adaf25534a588428b']
object_path_get = "/objects/{}"
object_path_post = "/objects"
access_path = "/objects/{}/access/{}"

for drs_id in drs_ids:
    ga4gh_base_url = "http://" + node_ip + ":{}/ga4gh/{}/v1"
    drs_base_url = ga4gh_base_url.format(drs_port,"drs")
    request_url = drs_base_url+object_path_get.format(drs_id)
    print_head("{} request to {}".format(http_method, request_url))
    # GET request to /objects/{object_id} endpoint
    drs_object_response = requests.request(http_method, request_url)
    # print the response
    pretty_print_json(drs_object_response)

[38;2;8;138;75mGET request to http://osdp.ace.ac.ug:5000/ga4gh/drs/v1/objects/c4b39bfcd30c59b70beb090fe4fdc2a1[0m
[38;2;8;75;138m{
    "id": "c4b39bfcd30c59b70beb090fe4fdc2a1",
    "description": "Patient: HG01070, Country: PUR, Region: AMR, Sex: female\n",
    "created_time": "2023-12-04T14:33:27Z",
    "mime_type": "application/cram",
    "name": "HG01070.final.chrX_15494566-15607236",
    "size": 0,
    "updated_time": "2023-12-04T14:33:27Z",
    "checksums": [
        {
            "checksum": "b60e07d928db349803de87ca5415703e2ff58ba8",
            "type": "sha1"
        },
        {
            "checksum": "be114a739d6d05b098ccd8ddc1e517bd22853c38b6a471f538d73191f29d0284",
            "type": "sha256"
        },
        {
            "checksum": "c4b39bfcd30c59b70beb090fe4fdc2a1",
            "type": "md5"
        }
    ],
    "self_uri": "drs://osdp.ace.ac.ug:5000/c4b39bfcd30c59b70beb090fe4fdc2a1",
    "access_methods": [
        {
            "access_url": {
                "

2.2 Check 196.43.136.22 (UVRI - Uganda)

In [6]:
http_method = "GET"
node_ip = '196.43.136.22' #UVRI - Uganda
drs_port = "5000"
drs_ids = ['1050d0443c2e83f9d9a8933481dcb405', 'cd9d286cc0d2bc9a8a5e9745ada572e5'] #'be04a5a90617aeae9a05fb533f544ade'] #"c0ad0a5bdfac52e0791329eedafc8f85", "c12d8d97cae50db36e8d8e0091b79713"
object_path_get = "/objects/{}"
object_path_post = "/objects"
access_path = "/objects/{}/access/{}"

for drs_id in drs_ids:
    ga4gh_base_url = "http://" + node_ip + ":{}/ga4gh/{}/v1"
    drs_base_url = ga4gh_base_url.format(drs_port,"drs")
    request_url = drs_base_url+object_path_get.format(drs_id)
    print_head("{} request to {}".format(http_method, request_url))
    # GET request to /objects/{object_id} endpoint
    drs_object_response = requests.request(http_method, request_url)
    # print the response
    pretty_print_json(drs_object_response)

[38;2;8;138;75mGET request to http://196.43.136.22:5000/ga4gh/drs/v1/objects/1050d0443c2e83f9d9a8933481dcb405[0m
[38;2;8;75;138m{
    "timestamp": "2024-03-27T12:03:41Z",
    "status_code": 401,
    "error": "Unauthorized",
    "msg": "Request for controlled data is missing user passport(s)"
}[0m
[38;2;8;138;75mGET request to http://196.43.136.22:5000/ga4gh/drs/v1/objects/cd9d286cc0d2bc9a8a5e9745ada572e5[0m
[38;2;8;75;138m{
    "id": "cd9d286cc0d2bc9a8a5e9745ada572e5",
    "description": "Patient: HG00737, Country: PUR, Region: AMR, Sex: female\n",
    "created_time": "2024-01-26T12:01:40Z",
    "mime_type": "application/cram",
    "name": "HG00737.final.chrX_15494566-15607236",
    "size": 0,
    "updated_time": "2024-01-26T12:01:40Z",
    "checksums": [
        {
            "checksum": "5135572ad2b328bcc9db0346898053a18436b9a8",
            "type": "sha1"
        },
        {
            "checksum": "08edccb80e21f7933b1d3f507438c6229be16ddb373ea1328a4f3119c2e21b4d",
         

3. Check elwazi-node.icermali.org (Mali)

In [7]:
http_method = "GET"
node_ip = 'elwazi-node.icermali.org'
drs_port = "5000"
# drs_ids = ['a68c60133f942881983d0e15827bf88f','168d353c6f474ca72e35e9209f921a59'] #authenticated
drs_ids = ['c276d04bacfaf6540b947340addf2670', '168d353c6f474ca72e35e9209f921a59']
object_path_get = "/objects/{}"
object_path_post = "/objects"
access_path = "/objects/{}/access/{}"

for drs_id in drs_ids:
    ga4gh_base_url = "http://" + node_ip + ":{}/ga4gh/{}/v1"
    drs_base_url = ga4gh_base_url.format(drs_port,"drs")
    request_url = drs_base_url+object_path_get.format(drs_id)
    print_head("{} request to {}".format(http_method, request_url))
    # GET request to /objects/{object_id} endpoint
    drs_object_response = requests.request(http_method, request_url)
    # print the response
    pretty_print_json(drs_object_response)


[38;2;8;138;75mGET request to http://elwazi-node.icermali.org:5000/ga4gh/drs/v1/objects/c276d04bacfaf6540b947340addf2670[0m


ConnectionError: HTTPConnectionPool(host='elwazi-node.icermali.org', port=5000): Max retries exceeded with url: /ga4gh/drs/v1/objects/c276d04bacfaf6540b947340addf2670 (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x7ff374e92b00>: Failed to establish a new connection: [Errno 113] No route to host'))

## Launch workflow on test DRS objects from South-Africa, Mali and Uganda (using WES on ga4gh-starter-kit.ilifu.ac.za, elwazi-node.icermali.org or osdp.ace.ac.ug and 196.43.136.22)

1. Launch workflow
- Change the node_ip to run on a different WES endpoint
- Change the infput_file to run on different DRS objects

In [109]:
# node_ip = 'ga4gh-starter-kit.ilifu.ac.za'
# node_ip = 'elwazi-node.icermali.org'
# node_ip = 'osdp.ace.ac.ug'
node_ip = '196.43.136.22' #UVRI
wes_port = "6000"
service_info_path = "/service-info"
runs_path = "/runs"
http_method = "GET"
ga4gh_base_url = "http://" + node_ip + ":{}/ga4gh/{}/v1"
wes_base_url = ga4gh_base_url.format(wes_port,"wes")

http_method = "POST"
request_url = wes_base_url + runs_path

nextflow_workflow_url = "trs://dockstore.org:443/%23workflow%2Fgithub.com%2Fgrbot%2Fflagstat/main/NFL/main.nf"
# nextflow_workflow_url = "https://github.com/grbot/cram-qc"

#ACE-Mali
# input_file = "drs://elwazi-node.icermali.org:5000/a68c60133f942881983d0e15827bf88f drs://elwazi-node.icermali.org:5000/168d353c6f474ca72e35e9209f921a59"
# input_file = "drs://elwazi-node.icermali.org:5000/c276d04bacfaf6540b947340addf2670 drs://elwazi-node.icermali.org:5000/c080e08c01ec46446bb16801c7abe5b2"

#UVRI
# input_file = "drs://196.43.136.22:5000/cd9d286cc0d2bc9a8a5e9745ada572e5" # drs://196.43.136.22:5000/be04a5a90617aeae9a05fb533f544ade"
input_file = "drs://196.43.136.22:5000/cd9d286cc0d2bc9a8a5e9745ada572e5 drs://196.43.136.22:5000/cff48c0486ab98d29c31d44066e4fa1e"

#ilifu
# input_file = "drs://ga4gh-starter-kit.ilifu.ac.za:5000/cf99eed44d272c7a9146a7f3216c8bd1" # drs://ga4gh-starter-kit.ilifu.ac.za:5000/3520f407daf560b1732b9e026879eae4"
# input_file = "drs://ga4gh-starter-kit.ilifu.ac.za:5000/c3465aa1f0ef8773d5eede6c23e8e9b2 drs://ga4gh-starter-kit.ilifu.ac.za:5000/cf99eed44d272c7a9146a7f3216c8bd1"

#OSDP
# input_file = "drs://osdp.ace.ac.ug:5000/c3e615325a81b4568b1c355ab7b804d5 drs://osdp.ace.ac.ug:5000/cd2e5e057f112d609f901cdf151d8dee" 
# input_file = "drs://osdp.ace.ac.ug:5000/6fa43c7de04b60c1a73a42aa2efc977d drs://osdp.ace.ac.ug:5000/be145a60bc059c154475a2561af0df6b" 

data = {
    'workflow_type': 'NFL',
    'workflow_type_version': '22.10.0',
    'workflow_url': nextflow_workflow_url,
    'workflow_params': f'{{"input":"{input_file}"}}'
}

print_head("{} request to {}".format(http_method, request_url))

# Post a Nextflow workflow
wes_post_workflow_response = requests.request(http_method, request_url, data = data)

# print the response
pretty_print_json(wes_post_workflow_response)

current_run_id = wes_post_workflow_response.json()["run_id"]

print_head("run_id = {}".format(current_run_id))

[38;2;8;138;75mPOST request to http://196.43.136.22:6000/ga4gh/wes/v1/runs[0m
{
    "run_id": "7b5f3f64-efcb-423e-9f2e-7f6dacd32afd"
}
[38;2;8;138;75mrun_id = 7b5f3f64-efcb-423e-9f2e-7f6dacd32afd[0m


2. Check output

In [11]:
http_method = "GET"
request_url = wes_base_url + runs_path + "/" + current_run_id

print_head("{} request to {}".format(http_method, request_url))

# Get request to /runs/{run_id}
monitor_run_response = requests.request(http_method, request_url)

# print the response
pretty_print_json(monitor_run_response)

[38;2;8;138;75mGET request to http://ga4gh-starter-kit.ilifu.ac.za:6000/ga4gh/wes/v1/runs/e2c5d2fb-a759-47af-beb0-658da52fc42d[0m
[38;2;8;75;138m{
    "outputs": {
        "filesystem": [
            "dag.dot",
            "trace.txt",
            "timeline.html",
            "e2c5d2fb-a759-47af-beb0-658da52fc42d.yaml",
            ".nextflow.log",
            "report.html",
            ".nextflow/history",
            ".nextflow/cache/9457493d-c701-4320-b449-330c2a2a72b8/index.fervent_banach",
            ".nextflow/cache/9457493d-c701-4320-b449-330c2a2a72b8/db/000003.log",
            ".nextflow/cache/9457493d-c701-4320-b449-330c2a2a72b8/db/LOCK",
            ".nextflow/cache/9457493d-c701-4320-b449-330c2a2a72b8/db/MANIFEST-000002",
            ".nextflow/cache/9457493d-c701-4320-b449-330c2a2a72b8/db/CURRENT",
            "work/a9/d21026de62a3a4eacd56d5da740b8b/HG04176.final.chrX_15494566-15607236.cram.flagstat",
            "work/a9/d21026de62a3a4eacd56d5da740b8b/.command.log",
 

# Data Connect
1. Check service-info

In [4]:
import requests
import json

dc_port = "8089"
dc_base_url = "http://ga4gh-starter-kit.ilifu.ac.za:{}".format(dc_port)


service_info_path = "/service-info"
tables_path = "/tables"
table_info_path = "/table/{}/info"
table_data_path = "/table/{}/data"
search_path = "/search"

def pretty_print_json(response):
    print(json.dumps(response.json(), indent=4))

In [5]:
dc_service_info_resp = requests.request("GET", dc_base_url+service_info_path)
print(dc_base_url+service_info_path)
pretty_print_json(dc_service_info_resp)

http://ga4gh-starter-kit.ilifu.ac.za:8089/service-info
{
    "id": "",
    "name": "GA4GH Discovery Search API",
    "description": "",
    "documentationUrl": "",
    "contactUrl": "",
    "version": ""
}


2. Check data table we will be querying

In [6]:
dc_service_info_resp = requests.request("GET", dc_base_url+'/table/trino.public.genome_ilifu/info')
pretty_print_json(dc_service_info_resp)

{
    "name": "trino.public.genome_ilifu",
    "description": "Automatically generated schema",
    "data_model": {
        "$id": "http://ga4gh-starter-kit.ilifu.ac.za:8089/table/trino.public.genome_ilifu/info",
        "description": "Automatically generated schema",
        "$schema": "http://json-schema.org/draft-07/schema#",
        "properties": {
            "sample_id": {
                "format": "varchar",
                "type": "string",
                "$comment": "varchar"
            },
            "population_id": {
                "format": "varchar",
                "type": "string",
                "$comment": "varchar"
            },
            "super_population_id": {
                "format": "varchar",
                "type": "string",
                "$comment": "varchar"
            },
            "sex": {
                "format": "varchar",
                "type": "string",
                "$comment": "varchar"
            },
            "cram_drs_id": {
   

### Select address and id functions

In [7]:
import re

def get_address(s):
    address = s.replace("drs://","")
    address = re.sub(':.*', '', address)
    return address
    
def get_drs_id(s):
    drs_id = re.sub('.*/', '', s)
    return drs_id

### Access via Data Connect

1. Do query
Select CRAM DRS ids for all African samples. Limit search to 10 samples for now.

In [8]:
import requests, json
q2 = {
  "query": "select cram_drs_id from trino.public.genome_ilifu where super_population_id='AFR' limit 50",
  "parameters": []
}
r = requests.post("http://ga4gh-starter-kit.ilifu.ac.za:8089/search", json = q2)
print(json.dumps(r.json(), indent=3))
data = r.json()

{
   "data": [],
   "pagination": {
      "next_page_url": "http://ga4gh-starter-kit.ilifu.ac.za:8089/search/v1/statement/queued/20240412_154124_00016_46krn/yabd8613029bae105f8b37de7d0d576bb9b49d3f8/1?queryJobId=20240412_154124_00016_46krn"
   }
}


2. Parse through pages to get results

In [9]:
next_page = data['pagination']['next_page_url']
# print (next_page)

# Poll till we get results
while not 'executing' in next_page:
    dc_service_info_resp = requests.request("GET", next_page)
    data = dc_service_info_resp.json()
    next_page = data['pagination']['next_page_url']
    # print (data)

dc_service_info_resp = requests.request("GET", next_page)
# pretty_print_json(dc_service_info_resp)

data = dc_service_info_resp.json()
next_page = data['pagination']['next_page_url']
dc_service_info_resp = requests.request("GET", next_page)
data = dc_service_info_resp.json()
# pretty_print_json(dc_service_info_resp)

drs_str = ""
drs_ids = []
for i in range(len(data['data'])):
    # print (data['data'][i]['cram_drs_id'])
    drs_ids.append(data['data'][i]['cram_drs_id'])
    drs_str = drs_str + data['data'][i]['cram_drs_id'] + " "

drs_str = drs_str[:-1]


In [10]:
drs_str

'drs://osdp.ace.ac.ug:5000/6fa43c7de04b60c1a73a42aa2efc977d drs://elwazi-node.icermali.org:5000/a68c60133f942881983d0e15827bf88f drs://elwazi-node.icermali.org:5000/45ca586b0921ffedf6a63679fbaacb68 drs://ga4gh-starter-kit.ilifu.ac.za:5000/5a436bec951fab59dd975bcd10f316f1 drs://osdp.ace.ac.ug:5000/be145a60bc059c154475a2561af0df6b drs://elwazi-node.icermali.org:5000/d36019bb63182abad672205a140f7e83 drs://elwazi-node.icermali.org:5000/b809bb9b81a9583ec67e787b0449e9bd drs://elwazi-node.icermali.org:5000/168d353c6f474ca72e35e9209f921a59 drs://osdp.ace.ac.ug:5000/9a45659fe478e5bb39d1dd1b08bd1807 drs://osdp.ace.ac.ug:5000/82adbcf7cc72c31a86e65d73bf6ef81b drs://196.43.136.22:5000/3c37780022d0ff19763fe4ca0036debf drs://ga4gh-starter-kit.ilifu.ac.za:5000/7ee2c5a8e3ba499d101003644327f4c7 drs://196.43.136.22:5000/99e313c5d4752f48f2c62d3b6026000a drs://osdp.ace.ac.ug:5000/651234e69ba43ecf06daef1806b4a956 drs://elwazi-node.icermali.org:5000/8a027c16f1b135b4d32f434151cca41c drs://elwazi-node.icermali

3. Map DRS server to DRS objects

In [151]:
drs_servers = {}
for drs_id in drs_ids:
    address = get_address(drs_id)
    if address not in drs_servers:
        drs_servers[address] = {}
        drs_servers[address]['drs_ids'] = []
        drs_servers[address]['drs_ids'].append(drs_id)
        drs_servers[address]['total_file_size'] = 0
        drs_servers[address]['ingress'] = 0
        drs_servers[address]['run_id'] = 0
        drs_servers[address]['drs_ids_str'] = ""
        drs_servers[address]['outputs'] = {}
    else:
        drs_servers[address]['drs_ids'].append(drs_id)
print (drs_servers)

{'osdp.ace.ac.ug': {'drs_ids': ['drs://osdp.ace.ac.ug:5000/6fa43c7de04b60c1a73a42aa2efc977d', 'drs://osdp.ace.ac.ug:5000/be145a60bc059c154475a2561af0df6b', 'drs://osdp.ace.ac.ug:5000/9a45659fe478e5bb39d1dd1b08bd1807', 'drs://osdp.ace.ac.ug:5000/82adbcf7cc72c31a86e65d73bf6ef81b', 'drs://osdp.ace.ac.ug:5000/651234e69ba43ecf06daef1806b4a956', 'drs://osdp.ace.ac.ug:5000/d686d0f3ae901650e32ab4a67986d8b9', 'drs://osdp.ace.ac.ug:5000/d2235a913ccf2216db33e14c3f9c8359', 'drs://osdp.ace.ac.ug:5000/243ce167a1fa85f7b9b884c87db7289f', 'drs://osdp.ace.ac.ug:5000/b1477540aabf7e5252132679ecb84bb5', 'drs://osdp.ace.ac.ug:5000/8b641d9b15e441eea8d8e37482815f08'], 'total_file_size': 0, 'ingress': 0, 'run_id': 0, 'drs_ids_str': '', 'outputs': {}}, 'elwazi-node.icermali.org': {'drs_ids': ['drs://elwazi-node.icermali.org:5000/a68c60133f942881983d0e15827bf88f', 'drs://elwazi-node.icermali.org:5000/45ca586b0921ffedf6a63679fbaacb68', 'drs://elwazi-node.icermali.org:5000/d36019bb63182abad672205a140f7e83', 'drs:/

In [152]:
#Exclude OSDP, ACE-UGANDA server currently unavailable
drs_servers.pop('osdp.ace.ac.ug')
drs_servers.pop('elwazi-node.icermali.org')
print (drs_servers)
# for server in drs_servers:
#     print(server)

{'ga4gh-starter-kit.ilifu.ac.za': {'drs_ids': ['drs://ga4gh-starter-kit.ilifu.ac.za:5000/5a436bec951fab59dd975bcd10f316f1', 'drs://ga4gh-starter-kit.ilifu.ac.za:5000/7ee2c5a8e3ba499d101003644327f4c7', 'drs://ga4gh-starter-kit.ilifu.ac.za:5000/6e04749ff551dea033c661e6228cc5dc', 'drs://ga4gh-starter-kit.ilifu.ac.za:5000/d76782899a42c56e47f4915f87a5ad84', 'drs://ga4gh-starter-kit.ilifu.ac.za:5000/bc305dafd0690251780cdf0196e271e5', 'drs://ga4gh-starter-kit.ilifu.ac.za:5000/ec7d550ac8b9a1c5752843d644bc8cd3', 'drs://ga4gh-starter-kit.ilifu.ac.za:5000/700359ca3597a743ef2c4799a5628596', 'drs://ga4gh-starter-kit.ilifu.ac.za:5000/dccf480c2edab37b0ae7df28cfc64287', 'drs://ga4gh-starter-kit.ilifu.ac.za:5000/91b7a7c07c89f514ab246816db689a19', 'drs://ga4gh-starter-kit.ilifu.ac.za:5000/7b806ea853be20cc06ac11876f475ee2', 'drs://ga4gh-starter-kit.ilifu.ac.za:5000/3cf0023533d33900ebe8f0ccde3ef67c', 'drs://ga4gh-starter-kit.ilifu.ac.za:5000/ff1f3a1bc2240d70fecc9d1ccad97179', 'drs://ga4gh-starter-kit.ilif

### Use case
### Now we initiate a run in a gather/scatter/federated manner
#### - Runs are initiated on individual nodes (calculate flagstat)
#### - Output is gathered and MultiQC are run on the flagstat results on one WES endpoint

## Passport information

#### GET User id



In [20]:
user_id = input("Enter your user id \n") 
#userId=a6828597-5816-46d3-83ef-665f2ceb9588(ilifu)
#userId=2bebbaa7-ec2f-42e2-bfef-4ef73e6b058f (UVRI)

Enter your user id 
a6828597-5816-46d3-83ef-665f2ceb9588


In [118]:
drs_admin_port = "5001" #drs admin port
broker_admin_port = "4501" #broker admin port
broker_nodes = ['ga4gh-starter-kit.ilifu.ac.za'] #, 'elwazi-node.icermali.org', '196.43.136.22']
ga4gh_admin_url = "http://{}:{}/admin/ga4gh/{}/v1/"

for node in broker_nodes:
    broker_base_url = ga4gh_base_url.format(node, broker_port,"passport")
    broker_admin_url = ga4gh_admin_url.format(node, broker_admin_port,"passport")

    print(broker_admin_url)

http://ga4gh-starter-kit.ilifu.ac.za:4501/admin/ga4gh/passport/v1/


### Get available Users  & Visas

In [119]:
# broker_admin_url = 'http://196.43.136.22:4501/admin/ga4gh/passport/v1/'
broker_admin_url = 'http://ga4gh-starter-kit.ilifu.ac.za:4501/admin/ga4gh/passport/v1/'
# broker_admin_url = 'http://elwazi-node.icermali.org:4501/admin/ga4gh/passport/v1/'

request_url = broker_admin_url+ "visas/" # + "4b342cec-e141-4d56-9930-b49cbb80c303"
http_method = "GET"
drs_object_response = requests.request(http_method, request_url)
# print the response
pretty_print_json(drs_object_response)

[
    {
        "id": "33d54e45-bf1b-40d8-93be-57142aa5c949",
        "visaName": "eLwaziPilotIlifu",
        "visaIssuer": "http://ga4gh-starter-kit.ilifu.ac.za:4500/",
        "visaDescription": "elwazi pilot project: passports demo, ilifu",
        "visaSecret": "6B652F7C251E44BA288B62EB44D3F"
    },
    {
        "id": "4b342cec-e141-4d56-9930-b49cbb80c303",
        "visaName": "eLwaziPilotACEMali",
        "visaIssuer": "http://elwazi-node.icermali.org:4500/",
        "visaDescription": "elwazi pilot project: passports demo, icermali",
        "visaSecret": "3A9B13AEF16661D99E464C57ECDB1"
    },
    {
        "id": "510c790d-af4f-4910-8d88-05b0784d1fbf",
        "visaName": "eLwaziPilotACEUganda",
        "visaIssuer": "http://osdp.ace.ac.ug:4500/",
        "visaDescription": "elwazi pilot project: passports demo, osdp",
        "visaSecret": "ACD6733BB84C29E9A3CE579F23313"
    },
    {
        "id": "7ecf7d8a-7b00-4d54-ac0f-249837f308f1",
        "visaName": "eLwaziPilotUVRI",
  

In [105]:
# broker_admin_url = 'http://196.43.136.22:4501/admin/ga4gh/passport/v1/'
broker_admin_url = 'http://ga4gh-starter-kit.ilifu.ac.za:4501/admin/ga4gh/passport/v1/'
# broker_admin_url = 'http://elwazi-node.icermali.org:4501/admin/ga4gh/passport/v1/'

request_url = broker_admin_url+ "users/" #+ user_id
# request_url = broker_admin_url+ "visas/" + "4b342cec-e141-4d56-9930-b49cbb80c303"
http_method = "GET"
drs_object_response = requests.request(http_method, request_url)
# print the response
pretty_print_json(drs_object_response)

[
    {
        "id": "a6828597-5816-46d3-83ef-665f2ceb9588"
    },
    {
        "id": "61954297-3958-4c43-bf95-2ded779e8fa3"
    },
    {
        "id": "c653e05b-c590-4cbe-a3c9-7402a5242010"
    },
    {
        "id": "5ce78708-fbd2-40eb-9e3e-ccb72796fc05"
    },
    {
        "id": "1a4dde85-c449-4ffb-bf94-68909e551be7"
    },
    {
        "id": "c7624f37-ad87-4817-b1bb-a81e3cee2721"
    },
    {
        "id": "95fa8625-26af-465d-b8e7-209f1a869a63"
    },
    {
        "id": "6f48d2fb-5339-431b-a933-53347b39ab5e"
    },
    {
        "id": "bdb61bd2-081d-48bb-934a-6211ba7eb927"
    },
    {
        "id": "5b014a24-afa6-41ca-bf7e-97d132a40051"
    },
    {
        "id": "69738f71-e18b-4625-87fc-90391ede25b6"
    },
    {
        "id": "2bebbaa7-ec2f-42e2-bfef-4ef73e6b058f"
    }
]


### GET Passport Token for select visas

In [120]:
http_method = "POST"

mint_path_post = "mint/"

drs_admin_port = "5001" #drs admin port
broker_admin_port = "4501" #broker admin port
broker_nodes = ['ga4gh-starter-kit.ilifu.ac.za']#, 'elwazi-node.icermali.org', '196.43.136.22']
ga4gh_admin_url = "http://{}:{}/admin/ga4gh/{}/v1/"
passports ={}

for node in broker_nodes:
    broker_base_url = ga4gh_base_url.format(node, broker_port,"passport")
    broker_admin_url = ga4gh_admin_url.format(node, broker_admin_port,"passport")

    print(broker_admin_url)
    
    request_url = broker_admin_url + mint_path_post
    print(request_url)

    request_body = json.dumps({
        "researcherId": user_id,
        "requestedVisas": ["4b342cec-e141-4d56-9930-b49cbb80c303", "510c790d-af4f-4910-8d88-05b0784d1fbf", 
                           "33d54e45-bf1b-40d8-93be-57142aa5c949", "7ecf7d8a-7b00-4d54-ac0f-249837f308f1"]
    })
    request_headers = {"Content-Type": "application/json"}

    print_head("{} a request to {} to get passport token from selected visas".format(http_method, request_url))

    # POST visas to the /mint endpoint
    passport_token_response = requests.request(
        http_method, 
        request_url,
        headers = request_headers,
        data = request_body
    )

    passports[node] = passport_token_response.text
    # print the response
    # pretty_print_json(passport_token_response)
    # print(passport_token)

http://ga4gh-starter-kit.ilifu.ac.za:4501/admin/ga4gh/passport/v1/
http://ga4gh-starter-kit.ilifu.ac.za:4501/admin/ga4gh/passport/v1/mint/
[38;2;8;138;75mPOST a request to http://ga4gh-starter-kit.ilifu.ac.za:4501/admin/ga4gh/passport/v1/mint/ to get passport token from selected visas[0m


In [121]:
request_body

'{"researcherId": "a6828597-5816-46d3-83ef-665f2ceb9588", "requestedVisas": ["4b342cec-e141-4d56-9930-b49cbb80c303", "510c790d-af4f-4910-8d88-05b0784d1fbf", "33d54e45-bf1b-40d8-93be-57142aa5c949", "7ecf7d8a-7b00-4d54-ac0f-249837f308f1"]}'

In [122]:
passports #check passport tokens

{'ga4gh-starter-kit.ilifu.ac.za': 'eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJzY29wZSI6Im9wZW5pZCIsImNvbnRhaW5lZF92aXNhcyI6WyJlTHdhemlQaWxvdEFDRU1hbGlAaHR0cDovL2Vsd2F6aS1ub2RlLmljZXJtYWxpLm9yZzo0NTAwLyIsImVMd2F6aVBpbG90QUNFVWdhbmRhQGh0dHA6Ly9vc2RwLmFjZS5hYy51Zzo0NTAwLyIsImVMd2F6aVBpbG90SWxpZnVAaHR0cDovL2dhNGdoLXN0YXJ0ZXIta2l0LmlsaWZ1LmFjLnphOjQ1MDAvIiwiZUx3YXppUGlsb3RVVlJJQGh0dHA6Ly8xOTYuNDMuMTM2LjIyOjQ1MDAvIl0sImlzcyI6Imh0dHA6Ly9nYTRnaC1zdGFydGVyLWtpdC5pbGlmdS5hYy56YTo0NTAwLyIsImV4cCI6MTcxMTU1Mzg5MSwiaWF0IjoxNzExNTUwMjkxLCJnYTRnaF9wYXNzcG9ydF92MSI6WyJleUowZVhBaU9pSktWMVFpTENKaGJHY2lPaUpJVXpJMU5pSjkuZXlKbllUUm5hRjkyYVhOaFgzWXhJanA3SW1GemMyVnlkR1ZrSWpveE56QXhPRGcwTnpjMkxDSjJhWE5oWDJsemMzVmxjaUk2SW1oMGRIQTZMeTlsYkhkaGVta3RibTlrWlM1cFkyVnliV0ZzYVM1dmNtYzZORFV3TUM4aUxDSmllU0k2SW1SaFl5SXNJbk52ZFhKalpTSTZJbWgwZEhBNkx5OWxiSGRoZW1rdGJtOWtaUzVwWTJWeWJXRnNhUzV2Y21jNk5EVXdNQzhpTENKMGVYQmxJam9pUTI5dWRISnZiR3hsWkVGalkyVnpjMGR5WVc1MGN5SXNJblpoYkhWbElqb2lhSFIwY0hNNkx5OWtiMmt1YjNKbkx6RXdMakV3TXpndmN6UXhO

### Accessing DRS objects using Passports

In [123]:
def getDRSObject(node, drs_object_id, passport_token):
    http_method = "POST"

    drs_objects_path = "/objects/"
    drs_base_url = "http://" + node + ":5000/ga4gh/drs/v1/"
    request_url = drs_base_url + drs_objects_path + drs_object_id
    request_body = json.dumps({
        "passports": [passport_token]
    })
    request_headers = {"Content-Type": "application/json"}

    print_head("{} request to {} with a passport that has selected visas".format(http_method, request_url))

    # GET request to /objects/{object_id} endpoint
    drs_object_response = requests.request(http_method, request_url, headers = request_headers, data = request_body)
    return drs_object_response

In [153]:
nodes = ['ga4gh-starter-kit.ilifu.ac.za', '196.43.136.22'] #'elwazi-node.icermali.org']
drs_objects = []
for node in nodes:
    print(node)
    drs_node = drs_servers[node]
    for drs_object in drs_node['drs_ids']:
        print(drs_object)
        obj_index = drs_servers[node]['drs_ids'].index(drs_object)
        object_id = drs_object.split("/")[-1]
        # drs_objects.append(object_id)
        drs_object = getDRSObject(node, object_id, passports['ga4gh-starter-kit.ilifu.ac.za']).json()
#         drs_object = getDRSObject(node, object_id, passports[node]).json() #for multiple brokers
        print(drs_object)
        # drs_servers[node]['drs_ids'][drs_object] = drs_object['access_methods'][0]['access_url']['url'].replace("file://", "")
        drs_servers[node]['drs_ids'][obj_index] = drs_object['access_methods'][0]['access_url']['url'].replace("file://", "")
        print(drs_object['access_methods'][0]['access_url']['url'].replace("file://", ""))
        # pretty_print_json(drs_object)
        # print(drs_object)

ga4gh-starter-kit.ilifu.ac.za
drs://ga4gh-starter-kit.ilifu.ac.za:5000/5a436bec951fab59dd975bcd10f316f1
[38;2;8;138;75mPOST request to http://ga4gh-starter-kit.ilifu.ac.za:5000/ga4gh/drs/v1//objects/5a436bec951fab59dd975bcd10f316f1 with a passport that has selected visas[0m
{'id': '5a436bec951fab59dd975bcd10f316f1', 'description': 'Patient: HG01883, Country: ACB, Region: AFR, Sex: female\n', 'created_time': '2023-12-05T18:19:02Z', 'mime_type': 'application/cram', 'name': 'HG01883.final.chrX_15494566-15607236', 'size': 0, 'updated_time': '2023-12-05T18:19:02Z', 'checksums': [{'checksum': '789133eb2d6c421fd0fb753f9cbf8093788f5f25', 'type': 'sha1'}, {'checksum': '0ee02c1db461b8d41381c65aba46ed72777d86ed4cd4ea6982dc5f974aabeb2f', 'type': 'sha256'}, {'checksum': '5a436bec951fab59dd975bcd10f316f1', 'type': 'md5'}], 'self_uri': 'drs://ga4gh-starter-kit.ilifu.ac.za:5000/5a436bec951fab59dd975bcd10f316f1', 'access_methods': [{'access_url': {'url': 'file:///share/elwazi/crams/HG01883/HG01883.fi

{'id': 'ff1f3a1bc2240d70fecc9d1ccad97179', 'description': 'Patient: HG02317, Country: ACB, Region: AFR, Sex: male\n', 'created_time': '2023-12-05T18:19:02Z', 'mime_type': 'application/cram', 'name': 'HG02317.final.chrX_15494566-15607236', 'size': 0, 'updated_time': '2023-12-05T18:19:02Z', 'checksums': [{'checksum': 'da6a958d033f801937965bac18048243afee48a8', 'type': 'sha1'}, {'checksum': '6a2080f104c55ab8a1055c0d7a8133a9c2b2eead83dfb9a800cbaefed7891462', 'type': 'sha256'}, {'checksum': 'ff1f3a1bc2240d70fecc9d1ccad97179', 'type': 'md5'}], 'self_uri': 'drs://ga4gh-starter-kit.ilifu.ac.za:5000/ff1f3a1bc2240d70fecc9d1ccad97179', 'access_methods': [{'access_url': {'url': 'file:///share/elwazi/crams/HG02317/HG02317.final.chrX_15494566-15607236.cram'}, 'type': 'file'}, {'access_id': 'a55a016a-b39d-4081-abaa-047345f92e72', 'type': 'https'}]}
/share/elwazi/crams/HG02317/HG02317.final.chrX_15494566-15607236.cram
drs://ga4gh-starter-kit.ilifu.ac.za:5000/590b8f0cce9e2ba656ea12c88ce8a5fc
[38;2;8;1

{'id': '127f410958c55239c5aa3a533bcfc86e', 'description': 'Patient: HG02143, Country: ACB, Region: AFR, Sex: male\n', 'created_time': '2024-01-26T12:01:40Z', 'mime_type': 'application/cram', 'name': 'HG02143.final.chrX_15494566-15607236', 'size': 0, 'updated_time': '2024-01-26T12:01:40Z', 'checksums': [{'checksum': 'f55fd6cc144c1bfb07ba281541db12f475ad0d95', 'type': 'sha1'}, {'checksum': 'c3afcb79599eb97ba7dad22f2f2cc2426ebd501c4faed9ddfa34ae4694b25e70', 'type': 'sha256'}, {'checksum': '127f410958c55239c5aa3a533bcfc86e', 'type': 'md5'}], 'self_uri': 'drs://196.43.136.22:5000/127f410958c55239c5aa3a533bcfc86e', 'access_methods': [{'access_url': {'url': 'file:///share/elwazi/crams/HG02143/HG02143.final.chrX_15494566-15607236.cram'}, 'type': 'file'}, {'access_id': 'a03e6e46-a9a4-4f04-a5ef-3b15554228ee', 'type': 'https'}]}
/share/elwazi/crams/HG02143/HG02143.final.chrX_15494566-15607236.cram
drs://196.43.136.22:5000/c6d2d854e092f41cc1dc4792f4012685
[38;2;8;138;75mPOST request to http://196

In [154]:
drs_servers

{'ga4gh-starter-kit.ilifu.ac.za': {'drs_ids': ['/share/elwazi/crams/HG01883/HG01883.final.chrX_15494566-15607236.cram',
   '/share/elwazi/crams/HG01914/HG01914.final.chrX_15494566-15607236.cram',
   '/share/elwazi/crams/HG02010/HG02010.final.chrX_15494566-15607236.cram',
   '/share/elwazi/crams/HG02013/HG02013.final.chrX_15494566-15607236.cram',
   '/share/elwazi/crams/HG02014/HG02014.final.chrX_15494566-15607236.cram',
   '/share/elwazi/crams/HG02108/HG02108.final.chrX_15494566-15607236.cram',
   '/share/elwazi/crams/HG02144/HG02144.final.chrX_15494566-15607236.cram',
   '/share/elwazi/crams/HG02283/HG02283.final.chrX_15494566-15607236.cram',
   '/share/elwazi/crams/HG02307/HG02307.final.chrX_15494566-15607236.cram',
   '/share/elwazi/crams/HG02308/HG02308.final.chrX_15494566-15607236.cram',
   '/share/elwazi/crams/HG02315/HG02315.final.chrX_15494566-15607236.cram',
   '/share/elwazi/crams/HG02317/HG02317.final.chrX_15494566-15607236.cram',
   '/share/elwazi/crams/HG02318/HG02318.fina

![Use case 1](use_case_2.png)

1) Launch flagstat runs. Use the dictionary structure generated previously. Launch workflow at WES endpoint on DRS objects only from that endpoint.

In [155]:
import time
# Populate with drs_ids_str
for drs_server in drs_servers:
    print ("Launching jobs on server: " + drs_server)
    drs_ids = drs_servers[drs_server]['drs_ids']
    drs_ids_str = ""
    for drs_id in drs_ids:
        drs_ids_str = drs_ids_str + drs_id + " "
    drs_ids_str = drs_ids_str[:-1]
#     print (drs_ids_str)
    drs_servers[drs_server]['drs_ids_str'] = drs_ids_str

# Launch workflow
for drs_server in drs_servers:
    wes_port = "6000"
    ga4gh_base_url = "http://" + drs_server + ":{}/ga4gh/{}/v1"
    wes_base_url = ga4gh_base_url.format(wes_port,"wes")

    runs_path = "/runs"

    http_method = "POST"
    request_url = wes_base_url + runs_path

#     nextflow_workflow_url = "https://github.com/grbot/flagstat"
    nextflow_workflow_url = "trs://dockstore.org:443/%23workflow%2Fgithub.com%2Fgrbot%2Fflagstat/main/NFL/main.nf"

    input_file = drs_servers[drs_server]['drs_ids_str']
    
    data = {
        'workflow_type': 'NFL',
        'workflow_type_version': '22.10.0',
        'workflow_url': nextflow_workflow_url,
        'workflow_params': f'{{"input":"{input_file}"}}'
    }

    print_head("{} request to {}".format(http_method, request_url))

    # Post a Nextflow workflow
    wes_post_workflow_response = requests.request(http_method, request_url, data = data)

    # print the response
    pretty_print_json(wes_post_workflow_response)

    current_run_id = wes_post_workflow_response.json()["run_id"]

    print_head("run_id = {}".format(current_run_id))

    ## We don't launch in parallel for now. Just poll a submitted job and retrieve the results
    http_method = "GET"
    request_url = wes_base_url + runs_path + "/" + current_run_id

    print_head("{} request to {}".format(http_method, request_url))

    # Get request to /runs/{run_id}
#     monitor_run_response = requests.request(http_method, request_url) 
    time.sleep(120)
    monitor_run_response = requests.request(http_method, request_url)
    
    # Poll until job is complete

    print(monitor_run_response.json())
    while monitor_run_response.json()["state"] != "COMPLETE":
        print("Current job status: " + monitor_run_response.json()["state"])
        time.sleep(15)
        monitor_run_response = requests.request(http_method, request_url)

    print("Job running status: " + monitor_run_response.json()["state"])
    pretty_print_json(monitor_run_response)

    drs_servers[drs_server]['run_id'] = current_run_id
    drs_servers[drs_server]['outputs'] = monitor_run_response.json()["outputs"]["filesystem"]
#     drs_servers[drs_server]['outputs'] = monitor_run_response.json()["outputs"]


Launching jobs on server: ga4gh-starter-kit.ilifu.ac.za
Launching jobs on server: 196.43.136.22
[38;2;8;138;75mPOST request to http://ga4gh-starter-kit.ilifu.ac.za:6000/ga4gh/wes/v1/runs[0m
{
    "run_id": "d4f1cef4-5c57-4bbc-ad30-5df2248cdcd7"
}
[38;2;8;138;75mrun_id = d4f1cef4-5c57-4bbc-ad30-5df2248cdcd7[0m
[38;2;8;138;75mGET request to http://ga4gh-starter-kit.ilifu.ac.za:6000/ga4gh/wes/v1/runs/d4f1cef4-5c57-4bbc-ad30-5df2248cdcd7[0m
{'outputs': {'filesystem': ['d4f1cef4-5c57-4bbc-ad30-5df2248cdcd7.yaml', 'dag.dot', 'trace.txt', 'timeline.html', '.nextflow.log', 'report.html', '.nextflow/history', '.nextflow/cache/b549ceee-fe33-4afb-9bdc-6f9f5b87617a/index.hungry_booth', '.nextflow/cache/b549ceee-fe33-4afb-9bdc-6f9f5b87617a/db/000003.log', '.nextflow/cache/b549ceee-fe33-4afb-9bdc-6f9f5b87617a/db/LOCK', '.nextflow/cache/b549ceee-fe33-4afb-9bdc-6f9f5b87617a/db/MANIFEST-000002', '.nextflow/cache/b549ceee-fe33-4afb-9bdc-6f9f5b87617a/db/CURRENT', 'work/7e/b6e19c569d539b8b3340223e15

{
    "run_id": "7db994dc-6b18-4a71-a66e-ed5d289abc66"
}
[38;2;8;138;75mrun_id = 7db994dc-6b18-4a71-a66e-ed5d289abc66[0m
[38;2;8;138;75mGET request to http://196.43.136.22:6000/ga4gh/wes/v1/runs/7db994dc-6b18-4a71-a66e-ed5d289abc66[0m
{'outputs': {'filesystem': ['.nextflow.log', 'trace.txt', 'report.html', '7db994dc-6b18-4a71-a66e-ed5d289abc66.yaml', 'dag.dot', 'timeline.html', 'work/ba/d2c56455214c3dc9d95f56a78283f3/.command.sh', 'work/ba/d2c56455214c3dc9d95f56a78283f3/.command.out', 'work/ba/d2c56455214c3dc9d95f56a78283f3/.command.err', 'work/ba/d2c56455214c3dc9d95f56a78283f3/.command.trace', 'work/ba/d2c56455214c3dc9d95f56a78283f3/HG02282.final.chrX_15494566-15607236.cram.flagstat', 'work/ba/d2c56455214c3dc9d95f56a78283f3/.command.log', 'work/ba/d2c56455214c3dc9d95f56a78283f3/.exitcode', 'work/ba/d2c56455214c3dc9d95f56a78283f3/HG02282.final.chrX_15494566-15607236.cram', 'work/ba/d2c56455214c3dc9d95f56a78283f3/.command.begin', 'work/ba/d2c56455214c3dc9d95f56a78283f3/.command.run'

In [156]:
drs_servers

{'ga4gh-starter-kit.ilifu.ac.za': {'drs_ids': ['/share/elwazi/crams/HG01883/HG01883.final.chrX_15494566-15607236.cram',
   '/share/elwazi/crams/HG01914/HG01914.final.chrX_15494566-15607236.cram',
   '/share/elwazi/crams/HG02010/HG02010.final.chrX_15494566-15607236.cram',
   '/share/elwazi/crams/HG02013/HG02013.final.chrX_15494566-15607236.cram',
   '/share/elwazi/crams/HG02014/HG02014.final.chrX_15494566-15607236.cram',
   '/share/elwazi/crams/HG02108/HG02108.final.chrX_15494566-15607236.cram',
   '/share/elwazi/crams/HG02144/HG02144.final.chrX_15494566-15607236.cram',
   '/share/elwazi/crams/HG02283/HG02283.final.chrX_15494566-15607236.cram',
   '/share/elwazi/crams/HG02307/HG02307.final.chrX_15494566-15607236.cram',
   '/share/elwazi/crams/HG02308/HG02308.final.chrX_15494566-15607236.cram',
   '/share/elwazi/crams/HG02315/HG02315.final.chrX_15494566-15607236.cram',
   '/share/elwazi/crams/HG02317/HG02317.final.chrX_15494566-15607236.cram',
   '/share/elwazi/crams/HG02318/HG02318.fina

2. a) Upload results to individual DRS servers and get a list of DRS objects (this will not work need to run 2 b) and explained there)

In [None]:
# importlib.reload(upload_to_drs)

# drs_ids_str = ""

# for drs_server in drs_servers:

#     run_id = drs_servers[drs_server]['run_id']
#     outputs = drs_servers[drs_server]["outputs"]

#     for key in outputs:
#         if '.flagstat' in key:
#             file = outputs[key][7:]
#             file_ext = file.split(".")[-1]
#             print (file)
#             meta_d = upload_to_drs.files_metadata_test(run_id, file, file_ext)
#             upload_to_drs.add_file_to_server(meta_d, file_ext, drs_server,'5001') #adds drs object
#             drs_id = "drs://" + drs_server + ":5000/" + meta_d[0][3]
#             drs_ids_str = drs_ids_str + " "  + drs_id
            
# drs_ids_str = drs_ids_str[:-1]

    

2. b) Upload results to individual DRS servers and get a list of DRS access URLs
   

In [157]:
import importlib
import upload_to_drs

importlib.reload(upload_to_drs)

drs_urls_str = ""
# drs_servers_active = ['elwazi-node.icermali.org']
for drs_server in drs_servers:
    print(drs_server)

    run_id = drs_servers[drs_server]['run_id']
    outputs = drs_servers[drs_server]["outputs"]
    # if drs_server in drs_servers_active:
    for output_file in outputs:
        if '.flagstat' in output_file:
            print (output_file)
            #this should be rectified, weskit doesn't share complete absolute path, this varies for each run
            file_path="/opt/weskit/api/tests/data/" 
            file = file_path + run_id[:4] + "/" + run_id +"/" + output_file #absolute path has run_id[:4]/run_id/output path from weskit
#             file = outputs[key][7:]
            print(file)
            file_ext = file.split(".")[-1]
            meta_d = upload_to_drs.files_metadata_test(run_id, file, file_ext)
            upload_to_drs.add_file_to_server(meta_d, file_ext, drs_server,'5001') #adds drs object
            drs_id = meta_d[0][3]
            print(drs_id)
            drs_port = 5000
            object_path_get = "/objects/{}"
            http_method = "GET"
            ga4gh_base_url = "http://" + drs_server + ":{}/ga4gh/{}/v1"
            drs_base_url = ga4gh_base_url.format(drs_port,"drs")
            request_url = drs_base_url + object_path_get.format(drs_id)
            #print_head("{} request to {}".format(http_method, request_url))
            drs_object_response = requests.request(http_method, request_url)
            #pretty_print_json(drs_object_response)
            data = drs_object_response.json()
            # We cannot use DRS objects here and need to directly stream. DRS objects only resolve to local path
            # and if path is not on server their would be a failure. The disadvantage of using stream is that you loose
            # the file naming.
            access_url = request_url + "/access/" + (data['access_methods'][1]['access_id'])
            drs_object_response = requests.request(http_method, access_url)
            drs_url = drs_object_response.json()["url"]
            drs_urls_str = drs_urls_str + drs_url + " "
    
drs_urls_str = drs_urls_str[:-1]
print(drs_urls_str)


ga4gh-starter-kit.ilifu.ac.za
work/7e/b6e19c569d539b8b3340223e156a6c/HG02010.final.chrX_15494566-15607236.cram.flagstat
/opt/weskit/api/tests/data/d4f1/d4f1cef4-5c57-4bbc-ad30-5df2248cdcd7/work/7e/b6e19c569d539b8b3340223e156a6c/HG02010.final.chrX_15494566-15607236.cram.flagstat
f95c2ad3a39c2e4c9faaa8132e971600
work/d4/135f28c737f18fbdd9850d12a06f3d/HG02013.final.chrX_15494566-15607236.cram.flagstat
/opt/weskit/api/tests/data/d4f1/d4f1cef4-5c57-4bbc-ad30-5df2248cdcd7/work/d4/135f28c737f18fbdd9850d12a06f3d/HG02013.final.chrX_15494566-15607236.cram.flagstat
5491d16f42440f4572627e3b30d173e7
work/29/ac0e3c452af5002aca8c2dc2d55c02/HG02315.final.chrX_15494566-15607236.cram.flagstat
/opt/weskit/api/tests/data/d4f1/d4f1cef4-5c57-4bbc-ad30-5df2248cdcd7/work/29/ac0e3c452af5002aca8c2dc2d55c02/HG02315.final.chrX_15494566-15607236.cram.flagstat
90ff075ba6df76bed608174038e7b26b
work/c5/d4333f2c9ca882471564d962413d3b/HG02317.final.chrX_15494566-15607236.cram.flagstat
/opt/weskit/api/tests/data/d4f1/d4

3. Launch the workflow that will combine the results

In [158]:
wes_port = "6000"
drs_server_central = "ga4gh-starter-kit.ilifu.ac.za"
ga4gh_base_url = "http://" + drs_server_central + ":{}/ga4gh/{}/v1"
wes_base_url = ga4gh_base_url.format(wes_port,"wes")

service_info_path = "/service-info"
runs_path = "/runs"

http_method = "POST"
request_url = wes_base_url + runs_path

nextflow_workflow_url = "trs://dockstore.org:443/%23workflow%2Fgithub.com%2Fgrbot%2Fmultiqc/main/NFL/main.nf"#"#workflow/github.com/grbot/multiqc"
# nextflow_workflow_url = "https://github.com/grbot/multiqc"
input_file = drs_urls_str

#print (drs_str)

data = {
    'workflow_type': 'NFL',
    'workflow_type_version': '22.10.0',
    'workflow_url': nextflow_workflow_url,
    'workflow_params': f'{{"input":"{input_file}"}}'
}

print_head("{} request to {}".format(http_method, request_url))

# Post a Nextflow workflow
wes_post_workflow_response = requests.request(http_method, request_url, data = data)

# print the response
pretty_print_json(wes_post_workflow_response)

current_run_id = wes_post_workflow_response.json()["run_id"]

print_head("run_id = {}".format(current_run_id))

[38;2;8;138;75mPOST request to http://ga4gh-starter-kit.ilifu.ac.za:6000/ga4gh/wes/v1/runs[0m
{
    "run_id": "c6e4b3f7-e956-40c7-8133-318b95e2b1af"
}
[38;2;8;138;75mrun_id = c6e4b3f7-e956-40c7-8133-318b95e2b1af[0m


4. Poll for results

In [159]:
import time
# On first run might get the error not able to find ["state"]. This is due to underlying isssue below.
#{
#    "timestamp": "2023-07-27T17:10:08Z",
#    "status_code": 400,
#    "error": "Bad Request",
#    "msg": "Could not load WES run log"
#}
# Just rerun for now and then polling will start

http_method = "GET"
request_url = wes_base_url + runs_path + "/" + current_run_id

print_head("{} request to {}".format(http_method, request_url))

# Get request to /runs/{run_id}
monitor_run_response = requests.request(http_method, request_url)
time.sleep(15) # This delay resolves the issue mentioned above
monitor_run_response = requests.request(http_method, request_url)

print(monitor_run_response)

# Poll until job is complete
while monitor_run_response.json()["state"]!="COMPLETE":
    print("Current job status: " + monitor_run_response.json()["state"])
    time.sleep(5)
    monitor_run_response = requests.request(http_method, request_url)

print("Job running status: " + monitor_run_response.json()["state"])
pretty_print_json(monitor_run_response)

[38;2;8;138;75mGET request to http://ga4gh-starter-kit.ilifu.ac.za:6000/ga4gh/wes/v1/runs/c6e4b3f7-e956-40c7-8133-318b95e2b1af[0m
<Response [200]>
Job running status: COMPLETE
{
    "outputs": {
        "filesystem": [
            "c6e4b3f7-e956-40c7-8133-318b95e2b1af.yaml",
            "dag.dot",
            "trace.txt",
            "timeline.html",
            ".nextflow.log",
            "report.html",
            ".nextflow/history",
            ".nextflow/cache/b1e00886-d0bd-4c15-a4d4-14ed6a3b5609/index.festering_davinci",
            ".nextflow/cache/b1e00886-d0bd-4c15-a4d4-14ed6a3b5609/db/000003.log",
            ".nextflow/cache/b1e00886-d0bd-4c15-a4d4-14ed6a3b5609/db/LOCK",
            ".nextflow/cache/b1e00886-d0bd-4c15-a4d4-14ed6a3b5609/db/MANIFEST-000002",
            ".nextflow/cache/b1e00886-d0bd-4c15-a4d4-14ed6a3b5609/db/CURRENT",
            "work/ea/c5adc8972a5b2884b9dc46bb0a4c4b/1fed394f-caca-47b3-9126-33f6a885cd78",
            "work/ea/c5adc8972a5b2884b9dc46bb0a4c

5. Upload `multiqc_report.html` to central DRS server

In [172]:
import importlib
import upload_to_drs
importlib.reload(upload_to_drs)

run_id = monitor_run_response.json()['run_id']
outputs = monitor_run_response.json()["outputs"]["filesystem"]
# outputs = monitor_run_response.json()["outputs"]

for output_file in outputs:
    if 'multiqc_report.html' in output_file:
        print (output_file)
        file_path="/opt/weskit/api/tests/data/" 
        file = file_path + run_id[:4] + "/" + run_id +"/" + output_file #absolute path has run_id[:4]/run_id/output path from weskit
#         file = outputs[key][7:]
        file_ext = file.split(".")[-1]
        meta_d = upload_to_drs.files_metadata_test(run_id, file, file_ext)
        upload_to_drs.add_file_to_server(meta_d, file_ext, drs_server_central,'5001') #adds drs object
        drs_id = meta_d[0][3]

work/ea/c5adc8972a5b2884b9dc46bb0a4c4b/multiqc_report.html


6. Download `multiqc_report.html`

In [174]:
import urllib.request

drs_port = 5000

object_path_get = "/objects/{}"
http_method = "GET"
ga4gh_base_url = "http://" + drs_server_central + ":{}/ga4gh/{}/v1"
drs_base_url = ga4gh_base_url.format(drs_port,"drs")
request_url = drs_base_url + object_path_get.format(drs_id)
#print_head("{} request to {}".format(http_method, request_url))
drs_object_response = requests.request(http_method, request_url)
#pretty_print_json(drs_object_response)
data = drs_object_response.json()
access_url = request_url + "/access/" + (data['access_methods'][1]['access_id'])
#print(access_path)
drs_object_response = requests.request(http_method, access_url)
download_url = drs_object_response.json()["url"]
print(download_url)
urllib.request.urlretrieve(download_url, "multiqc_report_2.html")

http://ga4gh-starter-kit.ilifu.ac.za:5000/ga4gh/drs/v1/stream/963a0b8c77137786d32bb7d63b113586/ded9dea4-9b7b-4e72-a728-399bbf5453ae


('multiqc_report_2.html', <http.client.HTTPMessage at 0x7ff35fd3be80>)