# Launching WES workflow on DRS objects based on a Data Connect query

## Check if WES endpoints are available

In [363]:
import requests
import json

def pretty_print_json(response):
    # pretty print JSON in blue color
    print("\033[38;2;8;75;138m"+json.dumps(response.json(), indent=4)+"\033[0m")
    
def print_head(text):
    # print in green color
    print("\033[38;2;8;138;75m"+text+"\033[0m")

In [412]:
node_ips = ['ga4gh-starter-kit.ilifu.ac.za','154.114.10.54','154.114.10.62']
wes_port = "6000"
service_info_path = "/service-info"
runs_path = "/runs"
http_method = "GET"

for node_ip in node_ips:
    ga4gh_base_url = "http://" + node_ip + ":{}/ga4gh/{}/v1"
    wes_base_url = ga4gh_base_url.format(wes_port,"wes")
    request_url = wes_base_url+service_info_path
    print_head("{} request to {}".format(http_method, request_url))
    # GET request to service-info endpoint
    wes_service_info_resp = requests.request(http_method, request_url)
    # print the response
    pretty_print_json(wes_service_info_resp)

[38;2;8;138;75mGET request to http://ga4gh-starter-kit.ilifu.ac.za:6000/ga4gh/wes/v1/service-info[0m
[38;2;8;75;138m{
    "id": "org.ga4gh.starterkit.wes",
    "name": "GA4GH Starter Kit WES Service",
    "description": "An open source, community-driven implementation of the GA4GH Workflow Execution Service (WES)API specification.",
    "contactUrl": "mailto:info@ga4gh.org",
    "documentationUrl": "https://github.com/ga4gh/ga4gh-starter-kit-wes",
    "createdAt": "2020-01-15T12:00:00Z",
    "updatedAt": "2020-01-15T12:00:00Z",
    "environment": "test",
    "version": "0.3.2",
    "type": {
        "group": "org.ga4gh",
        "artifact": "wes",
        "version": "1.0.1"
    },
    "organization": {
        "name": "Global Alliance for Genomics and Health",
        "url": "https://ga4gh.org"
    },
    "workflow_type_versions": {
        "WDL": [
            "1.0"
        ],
        "NEXTFLOW": [
            "21.04.0"
        ]
    },
    "workflow_engine_versions": {
        "NA

## Check if DRS endpoints are available

In [413]:
node_ips = ['ga4gh-starter-kit.ilifu.ac.za','154.114.10.54','154.114.10.62']
drs_port = "5000"
service_info_path = "/service-info"
http_method = "GET"

for node_ip in node_ips:
    ga4gh_base_url = "http://" + node_ip + ":{}/ga4gh/{}/v1"
    drs_base_url = ga4gh_base_url.format(drs_port,"drs")
    request_url = drs_base_url+service_info_path
    print_head("{} request to {}".format(http_method, request_url))
    # GET request to service-info endpoint
    drs_service_info_resp = requests.request(http_method, request_url)
    # print the response
    pretty_print_json(drs_service_info_resp)

[38;2;8;138;75mGET request to http://ga4gh-starter-kit.ilifu.ac.za:5000/ga4gh/drs/v1/service-info[0m
[38;2;8;75;138m{
    "id": "org.ga4gh.starterkit.drs",
    "name": "GA4GH Starter Kit DRS Service",
    "description": "An open source, community-driven implementation of the GA4GH Data Repository Service (DRS) API specification.",
    "contactUrl": "mailto:info@ga4gh.org",
    "documentationUrl": "https://github.com/ga4gh/ga4gh-starter-kit-drs",
    "createdAt": "2020-01-15T12:00:00Z",
    "updatedAt": "2020-01-15T12:00:00Z",
    "environment": "test",
    "version": "0.3.2",
    "type": {
        "group": "org.ga4gh",
        "artifact": "drs",
        "version": "1.3.0experimental"
    },
    "organization": {
        "name": "Global Alliance for Genomics and Health",
        "url": "https://ga4gh.org"
    }
}[0m
[38;2;8;138;75mGET request to http://154.114.10.54:5000/ga4gh/drs/v1/service-info[0m
[38;2;8;75;138m{
    "id": "org.ga4gh.starterkit.drs",
    "name": "GA4GH Starter

## Check if DRS objects exists on endpoint

### Check ga4gh-starter-kit.ilifu.ac.za

In [390]:
http_method = "GET"
node_ip = 'ga4gh-starter-kit.ilifu.ac.za'
drs_port = "5000"
drs_ids = ['c542689dba1e54669335c8e25abe6207','5a436bec951fab59dd975bcd10f316f1']
object_path_get = "/objects/{}"
object_path_post = "/objects"
access_path = "/objects/{}/access/{}"

for drs_id in drs_ids:
    ga4gh_base_url = "http://" + node_ip + ":{}/ga4gh/{}/v1"
    drs_base_url = ga4gh_base_url.format(drs_port,"drs")
    request_url = drs_base_url+object_path_get.format(drs_id)
    print_head("{} request to {}".format(http_method, request_url))
    # GET request to /objects/{object_id} endpoint
    drs_object_response = requests.request(http_method, request_url)
    # print the response
    pretty_print_json(drs_object_response)

[38;2;8;138;75mGET request to http://ga4gh-starter-kit.ilifu.ac.za:5000/ga4gh/drs/v1/objects/c542689dba1e54669335c8e25abe6207[0m
[38;2;8;75;138m{
    "id": "c542689dba1e54669335c8e25abe6207",
    "description": "Patient: HG01857, Country: KHV, Region: EAS, Sex: female\n",
    "created_time": "2023-07-24T13:45:58Z",
    "mime_type": "application/cram",
    "name": "HG01857.final.chrX_15494566-15607236",
    "size": 500309,
    "updated_time": "2023-07-24T13:45:58Z",
    "checksums": [
        {
            "checksum": "70ef6da9822aecf071acda427a61e31cbbf8f25b",
            "type": "sha1"
        },
        {
            "checksum": "b9e9f9a1f85de2ee57ba05d3f75de865458c2645284e42deef20de38b9e6a37c",
            "type": "sha256"
        },
        {
            "checksum": "c542689dba1e54669335c8e25abe6207",
            "type": "md5"
        }
    ],
    "self_uri": "drs://ga4gh-starter-kit.ilifu.ac.za:5000/c542689dba1e54669335c8e25abe6207",
    "access_methods": [
        {
          

### Check 154.114.10.54 (Uganda)

In [414]:
http_method = "GET"
node_ip = '154.114.10.54'
drs_port = "5000"
drs_ids = ['6fa43c7de04b60c1a73a42aa2efc977d','be145a60bc059c154475a2561af0df6b']
object_path_get = "/objects/{}"
object_path_post = "/objects"
access_path = "/objects/{}/access/{}"

for drs_id in drs_ids:
    ga4gh_base_url = "http://" + node_ip + ":{}/ga4gh/{}/v1"
    drs_base_url = ga4gh_base_url.format(drs_port,"drs")
    request_url = drs_base_url+object_path_get.format(drs_id)
    print_head("{} request to {}".format(http_method, request_url))
    # GET request to /objects/{object_id} endpoint
    drs_object_response = requests.request(http_method, request_url)
    # print the response
    pretty_print_json(drs_object_response)

[38;2;8;138;75mGET request to http://154.114.10.54:5000/ga4gh/drs/v1/objects/6fa43c7de04b60c1a73a42aa2efc977d[0m
[38;2;8;75;138m{
    "id": "6fa43c7de04b60c1a73a42aa2efc977d",
    "description": "Patient: HG01879, Country: ACB, Region: AFR, Sex: male\n",
    "created_time": "2023-07-26T13:08:05Z",
    "mime_type": "application/cram",
    "name": "HG01879.final.chrX_15494566-15607236",
    "size": 309197,
    "updated_time": "2023-07-26T13:08:05Z",
    "checksums": [
        {
            "checksum": "af30841a49bba8733cf1f070ff725dd9cdfc91f1",
            "type": "sha1"
        },
        {
            "checksum": "a61d4570f6d210220fd14b4f7744df46d6dce61df0da35782b528660def996f1",
            "type": "sha256"
        },
        {
            "checksum": "6fa43c7de04b60c1a73a42aa2efc977d",
            "type": "md5"
        }
    ],
    "self_uri": "drs://154.114.10.54:5000/6fa43c7de04b60c1a73a42aa2efc977d",
    "access_methods": [
        {
            "access_url": {
                

## Check 154.114.10.62 (Mali)

In [415]:
http_method = "GET"
node_ip = '154.114.10.62'
drs_port = "5000"
drs_ids = ['a68c60133f942881983d0e15827bf88f','168d353c6f474ca72e35e9209f921a59']
object_path_get = "/objects/{}"
object_path_post = "/objects"
access_path = "/objects/{}/access/{}"

for drs_id in drs_ids:
    ga4gh_base_url = "http://" + node_ip + ":{}/ga4gh/{}/v1"
    drs_base_url = ga4gh_base_url.format(drs_port,"drs")
    request_url = drs_base_url+object_path_get.format(drs_id)
    print_head("{} request to {}".format(http_method, request_url))
    # GET request to /objects/{object_id} endpoint
    drs_object_response = requests.request(http_method, request_url)
    # print the response
    pretty_print_json(drs_object_response)


[38;2;8;138;75mGET request to http://154.114.10.62:5000/ga4gh/drs/v1/objects/a68c60133f942881983d0e15827bf88f[0m
[38;2;8;75;138m{
    "id": "a68c60133f942881983d0e15827bf88f",
    "description": "Patient: HG01880, Country: ACB, Region: AFR, Sex: female\n",
    "created_time": "2023-07-26T12:59:15Z",
    "mime_type": "application/cram",
    "name": "HG01880.final.chrX_15494566-15607236",
    "size": 449026,
    "updated_time": "2023-07-26T12:59:15Z",
    "checksums": [
        {
            "checksum": "5747e183e64f2fe4d87da568fd30d0159086d1fd",
            "type": "sha1"
        },
        {
            "checksum": "50284d24e63c3c859f1e3d46a8aa54869e82f6fb37099926f2968fe7e64c15d7",
            "type": "sha256"
        },
        {
            "checksum": "a68c60133f942881983d0e15827bf88f",
            "type": "md5"
        }
    ],
    "self_uri": "drs://154.114.10.62:5000/a68c60133f942881983d0e15827bf88f",
    "access_methods": [
        {
            "access_url": {
              

## Launch workflow on test DRS objects from South-Africa, Mali and Uganda (using WES on ga4gh-starter-kit.ilifu.ac.za, 154.114.10.54 or 154.114.10.62)

#### - Change the node_ip to run on a different WES endpoint
#### - Change the infput_file to run on different DRS objects

In [549]:
#node_ip = 'ga4gh-starter-kit.ilifu.ac.za'
node_ip = '154.114.10.62'
#node_ip = '154.114.10.54'
wes_port = "6000"
service_info_path = "/service-info"
runs_path = "/runs"
http_method = "GET"
ga4gh_base_url = "http://" + node_ip + ":{}/ga4gh/{}/v1"
wes_base_url = ga4gh_base_url.format(wes_port,"wes")

http_method = "POST"
request_url = wes_base_url + runs_path

nextflow_workflow_url = "https://github.com/grbot/cram-qc"
#input_file = "drs://ga4gh-starter-kit.ilifu.ac.za:5000/c542689dba1e54669335c8e25abe6207 drs://ga4gh-starter-kit.ilifu.ac.za:5000/5a436bec951fab59dd975bcd10f316f1"
#input_file = "drs://154.114.10.62:5000/a68c60133f942881983d0e15827bf88f drs://154.114.10.62:5000/168d353c6f474ca72e35e9209f921a59"
input_file = "drs://ga4gh-starter-kit.ilifu.ac.za:5000/c542689dba1e54669335c8e25abe6207 drs://ga4gh-starter-kit.ilifu.ac.za:5000/5a436bec951fab59dd975bcd10f316f1 drs://154.114.10.54:5000/6fa43c7de04b60c1a73a42aa2efc977d drs://154.114.10.54:5000/be145a60bc059c154475a2561af0df6b drs://154.114.10.62:5000/a68c60133f942881983d0e15827bf88f drs://154.114.10.62:5000/168d353c6f474ca72e35e9209f921a59"


data = {
    'workflow_type': 'NEXTFLOW',
    'workflow_type_version': '21.04.0',
    'workflow_url': nextflow_workflow_url,
    'workflow_params': f'{{"input":"{input_file}"}}'
}

print_head("{} request to {}".format(http_method, request_url))

# Post a Nextflow workflow
wes_post_workflow_response = requests.request(http_method, request_url, data = data)

# print the response
pretty_print_json(wes_post_workflow_response)

current_run_id = wes_post_workflow_response.json()["run_id"]

print_head("run_id = {}".format(current_run_id))

[38;2;8;138;75mPOST request to http://154.114.10.62:6000/ga4gh/wes/v1/runs[0m
[38;2;8;75;138m{
    "run_id": "8f1314d4-3361-4a45-b885-dc9891e54f79"
}[0m
[38;2;8;138;75mrun_id = 8f1314d4-3361-4a45-b885-dc9891e54f79[0m


## Check output

In [551]:
http_method = "GET"
request_url = wes_base_url + runs_path + "/" + current_run_id

print_head("{} request to {}".format(http_method, request_url))

# Get request to /runs/{run_id}
monitor_run_response = requests.request(http_method, request_url)

# print the response
pretty_print_json(monitor_run_response)

[38;2;8;138;75mGET request to http://154.114.10.62:6000/ga4gh/wes/v1/runs/8f1314d4-3361-4a45-b885-dc9891e54f79[0m
[38;2;8;75;138m{
    "run_id": "8f1314d4-3361-4a45-b885-dc9891e54f79",
    "request": {
        "workflow_params": {
            "input": "drs://ga4gh-starter-kit.ilifu.ac.za:5000/c542689dba1e54669335c8e25abe6207 drs://ga4gh-starter-kit.ilifu.ac.za:5000/5a436bec951fab59dd975bcd10f316f1 drs://154.114.10.54:5000/6fa43c7de04b60c1a73a42aa2efc977d drs://154.114.10.54:5000/be145a60bc059c154475a2561af0df6b drs://154.114.10.62:5000/a68c60133f942881983d0e15827bf88f drs://154.114.10.62:5000/168d353c6f474ca72e35e9209f921a59"
        },
        "workflow_type": "NEXTFLOW",
        "workflow_type_version": "21.04.0",
        "workflow_url": "https://github.com/grbot/cram-qc"
    },
    "state": "COMPLETE",
    "run_log": {
        "name": "grbot/cram-qc",
        "cmd": [
            "#!/bin/bash -ue",
            "samtools flagstat     -@ 1     HG01883.final.chrX_15494566-15607236.c

# Check Data Connect

In [552]:
import requests
import json

dc_port = "8089"
dc_base_url = "http://ga4gh-starter-kit.ilifu.ac.za:{}".format(dc_port)


service_info_path = "/service-info"
tables_path = "/tables"
table_info_path = "/table/{}/info"
table_data_path = "/table/{}/data"
search_path = "/search"

def pretty_print_json(response):
    print(json.dumps(response.json(), indent=4))

In [431]:
dc_service_info_resp = requests.request("GET", dc_base_url+service_info_path)
pretty_print_json(dc_service_info_resp)

{
    "id": "",
    "name": "GA4GH Discovery Search API",
    "description": "",
    "documentationUrl": "",
    "contactUrl": "",
    "version": ""
}


In [553]:
dc_service_info_resp = requests.request("GET", dc_base_url+'/table/trino.public.genome_ilifu/info')
pretty_print_json(dc_service_info_resp)

{
    "name": "trino.public.genome_ilifu",
    "description": "Automatically generated schema",
    "data_model": {
        "$id": "http://ga4gh-starter-kit.ilifu.ac.za:8089/table/trino.public.genome_ilifu/info",
        "description": "Automatically generated schema",
        "$schema": "http://json-schema.org/draft-07/schema#",
        "properties": {
            "sample_id": {
                "format": "varchar",
                "type": "string",
                "$comment": "varchar"
            },
            "population_id": {
                "format": "varchar",
                "type": "string",
                "$comment": "varchar"
            },
            "super_population_id": {
                "format": "varchar",
                "type": "string",
                "$comment": "varchar"
            },
            "sex": {
                "format": "varchar",
                "type": "string",
                "$comment": "varchar"
            },
            "cram_drs_id": {
   

### Select address and id

In [554]:
import re

def get_address(s):
    address = s.replace("drs://","")
    address = re.sub(':.*', '', address)
    return address
    
def get_drs_id(s):
    drs_id = re.sub('.*/', '', s)
    return drs_id

### Do query

In [631]:
import requests, json
q2 = {
  "query": "select cram_drs_id from trino.public.genome_ilifu where super_population_id='AFR' limit 10",
  "parameters": []
}
r = requests.post("http://ga4gh-starter-kit.ilifu.ac.za:8089/search", json = q2)
print(json.dumps(r.json(), indent=3))
data = r.json()

{
   "data": [],
   "pagination": {
      "next_page_url": "http://ga4gh-starter-kit.ilifu.ac.za:8089/search/v1/statement/queued/20230727_185920_00017_69yj9/yc7fb0ecd76004dfee5b87e761ef2d9cc89bf28ab/1?queryJobId=20230727_185920_00017_69yj9"
   }
}


### Parse through pages to get results

In [632]:
next_page = data['pagination']['next_page_url']
# print (next_page)

# Poll till we get results
while not 'executing' in next_page:
    dc_service_info_resp = requests.request("GET", next_page)
    data = dc_service_info_resp.json()
    next_page = data['pagination']['next_page_url']
    # print (data)

dc_service_info_resp = requests.request("GET", next_page)
# pretty_print_json(dc_service_info_resp)

data = dc_service_info_resp.json()
next_page = data['pagination']['next_page_url']
dc_service_info_resp = requests.request("GET", next_page)
data = dc_service_info_resp.json()
# pretty_print_json(dc_service_info_resp)

drs_str = ""
drs_ids = []
for i in range(len(data['data'])):
    # print (data['data'][i]['cram_drs_id'])
    drs_ids.append(data['data'][i]['cram_drs_id'])
    drs_str = drs_str + data['data'][i]['cram_drs_id'] + " "

drs_str = drs_str[:-1]


### Map DRS server to DRS objects

In [633]:
drs_servers = {}
for drs_id in drs_ids:
    address = get_address(drs_id)
    if address not in drs_servers:
        drs_servers[address] = {}
        drs_servers[address]['drs_id'] = []
        drs_servers[address]['drs_id'].append(drs_id)
        drs_servers[address]['total_file_size'] = 0
    else:
        drs_servers[address]['drs_id'].append(drs_id)
print (drs_servers)

{'154.114.10.54': {'drs_id': ['drs://154.114.10.54:5000/6fa43c7de04b60c1a73a42aa2efc977d', 'drs://154.114.10.54:5000/be145a60bc059c154475a2561af0df6b', 'drs://154.114.10.54:5000/9a45659fe478e5bb39d1dd1b08bd1807', 'drs://154.114.10.54:5000/82adbcf7cc72c31a86e65d73bf6ef81b'], 'total_file_size': 0}, '154.114.10.62': {'drs_id': ['drs://154.114.10.62:5000/a68c60133f942881983d0e15827bf88f', 'drs://154.114.10.62:5000/45ca586b0921ffedf6a63679fbaacb68', 'drs://154.114.10.62:5000/d36019bb63182abad672205a140f7e83', 'drs://154.114.10.62:5000/b809bb9b81a9583ec67e787b0449e9bd', 'drs://154.114.10.62:5000/168d353c6f474ca72e35e9209f921a59'], 'total_file_size': 0}, 'ga4gh-starter-kit.ilifu.ac.za': {'drs_id': ['drs://ga4gh-starter-kit.ilifu.ac.za:5000/5a436bec951fab59dd975bcd10f316f1'], 'total_file_size': 0}}


### Go through DRS servers, check file size of objects and calculate total

In [634]:
for drs_server in drs_servers:
    http_method = "GET"
    drs_port = "5000"
    object_path_get = "/objects/{}"
    ga4gh_base_url = "http://" + drs_server + ":{}/ga4gh/{}/v1"
    total_file_size = 0;
    for drs_id in drs_servers[drs_server]['drs_id']:
        drs_base_url = ga4gh_base_url.format(drs_port,"drs")
        request_url = drs_base_url+object_path_get.format(get_drs_id(drs_id))
        # print_head("{} request to {}".format(http_method, request_url))
        drs_object_response = requests.request(http_method, request_url)
        data = drs_object_response.json()
        total_file_size = total_file_size + data['size']
        # pretty_print_json(drs_object_response)
    drs_servers[drs_server]['total_file_size'] = total_file_size

#print (drs_servers)        

### Get endpoint with largest file set in size

In [635]:
largest_file_set = 0
drs_server_selected = ""
for drs_server in drs_servers:
    if drs_servers[drs_server]['total_file_size'] > largest_file_set:
        largest_file_set = drs_servers[drs_server]['total_file_size']
        drs_server_selected = drs_server

### Launch workflow with selected DRS objects on endpoint with largest file set

In [636]:
wes_port = "6000"
ga4gh_base_url = "http://" + drs_server_selected + ":{}/ga4gh/{}/v1"
wes_base_url = ga4gh_base_url.format(wes_port,"wes")

service_info_path = "/service-info"
runs_path = "/runs"

http_method = "POST"
request_url = wes_base_url + runs_path

nextflow_workflow_url = "https://github.com/grbot/cram-qc"
input_file = drs_str

#print (drs_str)

data = {
    'workflow_type': 'NEXTFLOW',
    'workflow_type_version': '21.04.0',
    'workflow_url': nextflow_workflow_url,
    'workflow_params': f'{{"input":"{input_file}"}}'
}

print_head("{} request to {}".format(http_method, request_url))

# Post a Nextflow workflow
wes_post_workflow_response = requests.request(http_method, request_url, data = data)

# print the response
pretty_print_json(wes_post_workflow_response)

current_run_id = wes_post_workflow_response.json()["run_id"]

print_head("run_id = {}".format(current_run_id))

[38;2;8;138;75mPOST request to http://154.114.10.62:6000/ga4gh/wes/v1/runs[0m
{
    "run_id": "f85bcffe-b7bc-443b-a1fe-a5bd420d0189"
}
[38;2;8;138;75mrun_id = f85bcffe-b7bc-443b-a1fe-a5bd420d0189[0m


### Get run ouputs

In [639]:
import time
# On first run might get the error not able to find ["state"]. This is due to underlying isssue below.
#{
#    "timestamp": "2023-07-27T17:10:08Z",
#    "status_code": 400,
#    "error": "Bad Request",
#    "msg": "Could not load WES run log"
#}
# Just rerun for now and then polling will start

http_method = "GET"
request_url = wes_base_url + runs_path + "/" + current_run_id

print_head("{} request to {}".format(http_method, request_url))

# Get request to /runs/{run_id}
monitor_run_response = requests.request(http_method, request_url)

# Poll until job is complete
while monitor_run_response.json()["state"]!="COMPLETE":
    print("Current job status: " + monitor_run_response.json()["state"])
    time.sleep(5)
    monitor_run_response = requests.request(http_method, request_url)

print("Job running status: " + monitor_run_response.json()["state"])
pretty_print_json(monitor_run_response)

[38;2;8;138;75mGET request to http://154.114.10.62:6000/ga4gh/wes/v1/runs/f85bcffe-b7bc-443b-a1fe-a5bd420d0189[0m
Job running status: COMPLETE
{
    "run_id": "f85bcffe-b7bc-443b-a1fe-a5bd420d0189",
    "request": {
        "workflow_params": {
            "input": "drs://154.114.10.54:5000/6fa43c7de04b60c1a73a42aa2efc977d drs://154.114.10.62:5000/a68c60133f942881983d0e15827bf88f drs://154.114.10.62:5000/45ca586b0921ffedf6a63679fbaacb68 drs://ga4gh-starter-kit.ilifu.ac.za:5000/5a436bec951fab59dd975bcd10f316f1 drs://154.114.10.54:5000/be145a60bc059c154475a2561af0df6b drs://154.114.10.62:5000/d36019bb63182abad672205a140f7e83 drs://154.114.10.62:5000/b809bb9b81a9583ec67e787b0449e9bd drs://154.114.10.62:5000/168d353c6f474ca72e35e9209f921a59 drs://154.114.10.54:5000/9a45659fe478e5bb39d1dd1b08bd1807 drs://154.114.10.54:5000/82adbcf7cc72c31a86e65d73bf6ef81b"
        },
        "workflow_type": "NEXTFLOW",
        "workflow_type_version": "21.04.0",
        "workflow_url": "https://github.co

### Now post `multiqc_report.html` to the DRS server

In [640]:
import importlib
import upload_to_drs
importlib.reload(upload_to_drs)

run_id = monitor_run_response.json()['run_id']
outputs = monitor_run_response.json()["outputs"]

for key in outputs:
    if 'multiqc_report.html' in key:
        print (outputs[key])
        file = outputs[key][7:]
        file_ext = file.split(".")[-1]
        meta_d = upload_to_drs.files_metadata_test(run_id, file, file_ext)
        upload_to_drs.add_file_to_server(meta_d, file_ext, drs_server_selected,'5001') #adds drs object
        drs_id = meta_d[0][3]

file:///opt/ga4gh-starter-kit-wes/wes_runs/f8/5b/cf/f85bcffe-b7bc-443b-a1fe-a5bd420d0189/work/ee/a2aba315e3a31c3e38a1a9641585dd/multiqc_report.html


### Now retrieve the results

In [641]:
import urllib.request

drs_port = 5000

object_path_get = "/objects/{}"
http_method = "GET"
ga4gh_base_url = "http://" + drs_server_selected + ":{}/ga4gh/{}/v1"
drs_base_url = ga4gh_base_url.format(drs_port,"drs")
request_url = drs_base_url + object_path_get.format(drs_id)
#print_head("{} request to {}".format(http_method, request_url))
drs_object_response = requests.request(http_method, request_url)
#pretty_print_json(drs_object_response)
data = drs_object_response.json()
access_url = request_url + "/access/" + (data['access_methods'][1]['access_id'])
#print(access_path)
drs_object_response = requests.request(http_method, access_url)
download_url = drs_object_response.json()["url"]
print(download_url)
urllib.request.urlretrieve(download_url, "multiqc_report.html")

http://154.114.10.62:5000/ga4gh/drs/v1/stream/4df02b964fdbe06433847c2531d0995a/46ff9f70-a960-4095-8feb-103f53a9710e


('multiqc_report.html', <http.client.HTTPMessage at 0x7fdd1ae37f40>)