In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import yaml
import os
import urlparse
import requests
import json
import time

from synorchestrator.trs_client.client import TRSClient
from synorchestrator.wes_client.client import WESClient
from synorchestrator.util import build_wes_request
from synorchestrator.util import params_url2object

DEBUG:rdflib:RDFLib Version: 4.2.2


In [23]:
config.show()


Orchestrator options:

Evaluation Queues
---------------------------------------------------------------------------
0: github.com/dockstore-testing/md5sum-checker [CWL]
1: github.com/dockstore-testing/md5sum-checker/wdl [WDL]
2: github.com/DataBiosphere/topmed-workflows/TopMed_Variant_Caller [WDL]

Tool Registries
---------------------------------------------------------------------------
dockstore: dockstore.org:8443

Workflow Services
---------------------------------------------------------------------------
hca-cromwell: g0n2qjnu94.execute-api.us-east-1.amazonaws.com/test
arvados-wes: wes.qr1hi.arvadosapi.com
local: 0.0.0.0:8080


## Set up TRS and WES clients

Load options and paramters for TRS and WES servers from local configs.

In [24]:
tclient = TRSClient(**config.trs_config['dockstore'])
arvclient = WESClient(**config.wes_config['arvados-wes'])
cromclient = WESClient(**config.wes_config['hca-cromwell'])

### Verify WES connections are working

In [5]:
arvclient.get_service_info()

DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): wes.qr1hi.arvadosapi.com
DEBUG:urllib3.connectionpool:https://wes.qr1hi.arvadosapi.com:443 "GET /ga4gh/wes/v1/service-info HTTP/1.1" 200 325


{u'engine_versions': u'cwl-runner',
 u'key_values': {},
 u'supported_filesystem_protocols': [u'file', u'http', u'https', u'keep'],
 u'supported_wes_versions': u'0.2.1',
 u'system_state_counts': {},
 u'workflow_type_versions': {u'CWL': {u'workflow_type_version': [u'v1.0']}}}

In [6]:
cromclient.get_service_info()

DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): g0n2qjnu94.execute-api.us-east-1.amazonaws.com
DEBUG:urllib3.connectionpool:https://g0n2qjnu94.execute-api.us-east-1.amazonaws.com:443 "GET /test//service-info HTTP/1.1" 200 337


{u'auth_instructions_url': u'',
 u'default_workflow_engine_parameters': [],
 u'supported_filesystem_protocols': [u'gs'],
 u'supported_wes_versions': [u'0.2.1'],
 u'system_state_counts': {u'zero': 0},
 u'tags': {},
 u'workflow_engine_versions': {u'cromwell': u'31'},
 u'workflow_type_versions': {u'huh': {u'worfklow_type_version': [u'wdl',
    u'gzip',
    u'main.wdl']}}}

## Tool test

Using this section to do some lightweight testing with standalone WDL/CWL *tools* (before getting into workflows).

### WDL

Testing with the WDL version of `quay.io/briandoconnor/dockstore-tool-md5sum`; fetch the descriptor file content from Dockstore.

In [7]:
wdl_tool_url = 'https://dockstore.org:8443/api/ga4gh/v2/tools/quay.io%2Fbriandoconnor%2Fdockstore-tool-md5sum/versions/master/plain-WDL/descriptor/%2FDockstore.wdl'
wdl_tool_descriptor = requests.get(wdl_tool_url)
wdl_tool_descriptor.content

DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): dockstore.org
DEBUG:urllib3.connectionpool:https://dockstore.org:8443 "GET /api/ga4gh/v2/tools/quay.io%2Fbriandoconnor%2Fdockstore-tool-md5sum/versions/master/plain-WDL/descriptor/%2FDockstore.wdl HTTP/1.1" 200 211


'task md5 {\n  File inputFile\n\n  command {\n    /bin/my_md5sum ${inputFile}\n  }\n\n output {\n    File value = "md5sum.txt"\n }\n\n runtime {\n   docker: "quay.io/briandoconnor/dockstore-tool-md5sum:1.0.4"\n   cpu: 1\n   memory: "512 MB"\n }\n}\n\nworkflow ga4ghMd5 {\n File inputFile\n call md5 { input: inputFile=inputFile }\n}\n'

#### Submitting with `workflow_descriptor`

Retrieve the test parameters content from the JSON file in GitHub and format as an object for the WES request. Provide the contents of WDL descriptor file as string/blob in request.

In [8]:
wdl_tool_test_url = 'https://raw.githubusercontent.com/briandoconnor/dockstore-tool-md5sum/master/test.wdl.json'
tool_request = build_wes_request(
    workflow_descriptor=wdl_tool_descriptor.content, 
    workflow_params=wdl_tool_test_url,
    workflow_type='WDL'
    
)
tool_request

DEBUG:cachecontrol.controller:Looking up "https://raw.githubusercontent.com/briandoconnor/dockstore-tool-md5sum/master/test.wdl.json" in the cache
DEBUG:cachecontrol.controller:Current age based on date: 16397
DEBUG:cachecontrol.controller:Freshness lifetime from max-age: 300
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): raw.githubusercontent.com
DEBUG:urllib3.connectionpool:https://raw.githubusercontent.com:443 "GET /briandoconnor/dockstore-tool-md5sum/master/test.wdl.json HTTP/1.1" 304 0
DEBUG:urllib3.connectionpool:https://raw.githubusercontent.com:443 "HEAD /briandoconnor/dockstore-tool-md5sum/master/md5sum.input HTTP/1.1" 200 0


{'workflow_descriptor': 'task md5 {\n  File inputFile\n\n  command {\n    /bin/my_md5sum ${inputFile}\n  }\n\n output {\n    File value = "md5sum.txt"\n }\n\n runtime {\n   docker: "quay.io/briandoconnor/dockstore-tool-md5sum:1.0.4"\n   cpu: 1\n   memory: "512 MB"\n }\n}\n\nworkflow ga4ghMd5 {\n File inputFile\n call md5 { input: inputFile=inputFile }\n}\n',
 'workflow_params': ordereddict([('ga4ghMd5.inputFile', 'https://raw.githubusercontent.com/briandoconnor/dockstore-tool-md5sum/master/md5sum.input')]),
 'workflow_type': 'WDL',
 'workflow_type_version': 'v1.0'}

Update the file input path in `workflow_params` to use GS URL.

In [9]:
tool_request['workflow_params']['ga4ghMd5.inputFile'] = 'gs://dockstore-tool-md5sum-data/md5sum.input.txt'
tool_request

{'workflow_descriptor': 'task md5 {\n  File inputFile\n\n  command {\n    /bin/my_md5sum ${inputFile}\n  }\n\n output {\n    File value = "md5sum.txt"\n }\n\n runtime {\n   docker: "quay.io/briandoconnor/dockstore-tool-md5sum:1.0.4"\n   cpu: 1\n   memory: "512 MB"\n }\n}\n\nworkflow ga4ghMd5 {\n File inputFile\n call md5 { input: inputFile=inputFile }\n}\n',
 'workflow_params': ordereddict([('ga4ghMd5.inputFile', 'gs://dockstore-tool-md5sum-data/md5sum.input.txt')]),
 'workflow_type': 'WDL',
 'workflow_type_version': 'v1.0'}

Submit the tool run.

In [10]:
cromclient.run_workflow(tool_request)

DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): g0n2qjnu94.execute-api.us-east-1.amazonaws.com
DEBUG:urllib3.connectionpool:https://g0n2qjnu94.execute-api.us-east-1.amazonaws.com:443 "POST /test//workflows HTTP/1.1" 200 55


{u'workflow_id': u'c02f0049-c7b5-48fa-ac0f-150aacdf1a0d'}

In [15]:
cromclient.get_workflow_run('c02f0049-c7b5-48fa-ac0f-150aacdf1a0d')

DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): g0n2qjnu94.execute-api.us-east-1.amazonaws.com
DEBUG:urllib3.connectionpool:https://g0n2qjnu94.execute-api.us-east-1.amazonaws.com:443 "GET /test//workflows/c02f0049-c7b5-48fa-ac0f-150aacdf1a0d HTTP/1.1" 200 962


{u'end_time': u'2018-05-24T21:16:31.309Z',
 u'outputs': {u'ga4ghMd5.md5.value': u'gs://broad-dsde-mint-dev-cromwell-execution/cromwell-executions/ga4ghMd5/c02f0049-c7b5-48fa-ac0f-150aacdf1a0d/call-md5/md5sum.txt'},
 u'request': {u'workflow_descriptor': u'task md5 {\n  File inputFile\n\n  command {\n    /bin/my_md5sum ${inputFile}\n  }\n\n output {\n    File value = "md5sum.txt"\n }\n\n runtime {\n   docker: "quay.io/briandoconnor/dockstore-tool-md5sum:1.0.4"\n   cpu: 1\n   memory: "512 MB"\n }\n}\n\nworkflow ga4ghMd5 {\n File inputFile\n call md5 { input: inputFile=inputFile }\n}\n',
  u'workflow_params': {u'ga4ghMd5.inputFile': u'gs://dockstore-tool-md5sum-data/md5sum.input.txt'},
  u'workflow_type': u'WDL'},
 u'start_time': u'2018-05-24T21:16:10.439Z',
 u'state': u'COMPLETE',
 u'task_logs': [],
 u'workflow_id': u'c02f0049-c7b5-48fa-ac0f-150aacdf1a0d',
 u'workflow_log': {u'cmd': [u''],
  u'end_time': u'',
  u'exit_code': 0,
  u'name': u'',
  u'start_time': u'',
  u'stderr': u'',
  u's

#### Submitting with `workflow_url`

Attempt to run the tool by providing URL path for the WDL descriptor file in WES request.

In [13]:
tool_request = build_wes_request(
    workflow_url='https://dockstore.org:8443/api/ga4gh/v2/tools/quay.io%2Fbriandoconnor%2Fdockstore-tool-md5sum/versions/master/plain-WDL/descriptor/%2FDockstore.wdl', 
    workflow_params='https://raw.githubusercontent.com/briandoconnor/dockstore-tool-md5sum/master/test.wdl.json',
    workflow_type='WDL'
    
)
tool_request['workflow_params']['ga4ghMd5.inputFile'] = 'gs://dockstore-tool-md5sum-data/md5sum.input.txt'
tool_request

DEBUG:cachecontrol.controller:Looking up "https://raw.githubusercontent.com/briandoconnor/dockstore-tool-md5sum/master/test.wdl.json" in the cache
DEBUG:cachecontrol.controller:Current age based on date: 48
DEBUG:cachecontrol.controller:Freshness lifetime from max-age: 300
DEBUG:cachecontrol.controller:The response is "fresh", returning cached response
DEBUG:cachecontrol.controller:300 > 48
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): raw.githubusercontent.com
DEBUG:urllib3.connectionpool:https://raw.githubusercontent.com:443 "HEAD /briandoconnor/dockstore-tool-md5sum/master/md5sum.input HTTP/1.1" 200 0


{'workflow_params': ordereddict([('ga4ghMd5.inputFile', 'gs://dockstore-tool-md5sum-data/md5sum.input.txt')]),
 'workflow_type': 'WDL',
 'workflow_type_version': 'v1.0',
 'workflow_url': 'https://dockstore.org:8443/api/ga4gh/v2/tools/quay.io%2Fbriandoconnor%2Fdockstore-tool-md5sum/versions/master/plain-WDL/descriptor/%2FDockstore.wdl'}

In [14]:
cromclient.run_workflow(tool_request)

DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): g0n2qjnu94.execute-api.us-east-1.amazonaws.com
DEBUG:urllib3.connectionpool:https://g0n2qjnu94.execute-api.us-east-1.amazonaws.com:443 "POST /test//workflows HTTP/1.1" 200 232


{u'errorMessage': u"'workflow_descriptor'",
 u'errorType': u'KeyError',
 u'stackTrace': [[u'/var/task/wes_to_cromwell.py',
   93,
   u'wes_workflows_post',
   u'z = zipfile.ZipFile(io.BytesIO(base64.b64decode(event["workflow_descriptor"])), \'r\')']]}

### CWL

Testing with the CWL version of `quay.io/briandoconnor/dockstore-tool-md5sum`; fetch the descriptor content from Dockstore.

In [16]:
cwl_tool_url = 'https://dockstore.org:8443/api/ga4gh/v2/tools/quay.io%2Fbriandoconnor%2Fdockstore-tool-md5sum/versions/master/CWL/descriptor'
cwl_tool_descriptor = requests.get(cwl_tool_url).json()
cwl_tool_descriptor

DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): dockstore.org
DEBUG:urllib3.connectionpool:https://dockstore.org:8443 "GET /api/ga4gh/v2/tools/quay.io%2Fbriandoconnor%2Fdockstore-tool-md5sum/versions/master/CWL/descriptor HTTP/1.1" 200 775


{u'descriptor': u'#!/usr/bin/env cwl-runner\n\nclass: CommandLineTool\nid: Md5sum\nlabel: Simple md5sum tool\ncwlVersion: v1.0\n\n$namespaces:\n  dct: http://purl.org/dc/terms/\n  foaf: http://xmlns.com/foaf/0.1/\n\ndoc: |\n  [![Docker Repository on Quay.io](https://quay.io/repository/briandoconnor/dockstore-tool-md5sum/status "Docker Repository on Quay.io")](https://quay.io/repository/briandoconnor/dockstore-tool-md5sum)\n  [![Build Status](https://travis-ci.org/briandoconnor/dockstore-tool-md5sum.svg)](https://travis-ci.org/briandoconnor/dockstore-tool-md5sum)\n  A very, very simple Docker container for the md5sum command. See the [README](https://github.com/briandoconnor/dockstore-tool-md5sum/blob/master/README.md) for more information.\n\n\n#dct:creator:\n#  \'@id\': http://orcid.org/0000-0002-7681-6415\n#  foaf:name: Brian O\'Connor\n#  foaf:mbox: briandoconnor@gmail.com\n\nrequirements:\n- class: DockerRequirement\n  dockerPull: quay.io/briandoconnor/dockstore-tool-md5sum:1.0.4\n

#### Submitting with `workflow_url`

Attempt to run the tool by providing URL path for the CWL descriptor file in WES request.

In [52]:
cwl_tool_request = build_wes_request(
    workflow_url=cwl_tool_descriptor['url'],
    workflow_params='https://raw.githubusercontent.com/briandoconnor/dockstore-tool-md5sum/master/test.json',
    workflow_type='CWL'
)
cwl_tool_request

DEBUG:cachecontrol.controller:Looking up "https://raw.githubusercontent.com/briandoconnor/dockstore-tool-md5sum/master/test.json" in the cache
DEBUG:cachecontrol.controller:Current age based on date: 1828
DEBUG:cachecontrol.controller:Freshness lifetime from max-age: 300
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): raw.githubusercontent.com
DEBUG:urllib3.connectionpool:https://raw.githubusercontent.com:443 "GET /briandoconnor/dockstore-tool-md5sum/master/test.json HTTP/1.1" 304 0
DEBUG:urllib3.connectionpool:https://raw.githubusercontent.com:443 "HEAD /briandoconnor/dockstore-tool-md5sum/master/md5sum.input HTTP/1.1" 200 0


{'workflow_params': ordereddict([('input_file', ordereddict([('class', 'File'), ('location', 'https://raw.githubusercontent.com/briandoconnor/dockstore-tool-md5sum/master/md5sum.input')]))]),
 'workflow_type': 'CWL',
 'workflow_type_version': 'v1.0',
 'workflow_url': u'https://raw.githubusercontent.com/briandoconnor/dockstore-tool-md5sum/master/Dockstore.cwl'}

In [53]:
arvclient.run_workflow(cwl_tool_request)

DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): wes.qr1hi.arvadosapi.com
DEBUG:urllib3.connectionpool:https://wes.qr1hi.arvadosapi.com:443 "POST /ga4gh/wes/v1/workflows HTTP/1.1" 200 51


{u'workflow_id': u'qr1hi-xvhdp-engk7b81v9j7gwa'}

In [54]:
arvclient.get_workflow_run('qr1hi-xvhdp-engk7b81v9j7gwa')

DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): wes.qr1hi.arvadosapi.com
DEBUG:urllib3.connectionpool:https://wes.qr1hi.arvadosapi.com:443 "GET /ga4gh/wes/v1/workflows/qr1hi-xvhdp-engk7b81v9j7gwa HTTP/1.1" 200 1469


{u'outputs': {u'output_file': {u'basename': u'md5sum.txt',
   u'class': u'File',
   u'format': u'http://edamontology.org/data_3671',
   u'location': u'https://download.qr1hi.arvadosapi.com/c=2fd83f7df048ffe49ced1c05a05c6460+115/_/md5sum.txt',
   u'size': 33}},
 u'request': {},
 u'state': u'COMPLETE',
 u'task_logs': [],
 u'workflow_id': u'qr1hi-xvhdp-engk7b81v9j7gwa',
 u'workflow_log': {u'cmd': [u''],
  u'endTime': u'',
  u'exit_code': 0,
  u'startTime': u'',
  u'stderr': u'2018-05-24T18:27:35.973069854Z cwltool INFO: /usr/local/bin/arvados-cwl-runner 1.1.4.20180523205640, arvados-python-client 1.1.4.20180510153813, cwltool 1.0.20180523203033\n2018-05-24T18:27:35.974959654Z cwltool INFO: Resolved \'/var/lib/cwl/workflow.json#main\' to \'file:///var/lib/cwl/workflow.json#main\'\n2018-05-24T18:27:37.692391955Z arvados.cwl-runner INFO: [container Simple md5sum tool] reused container qr1hi-dz642-yf4bwvrr8336gu8\n2018-05-24T18:27:49.258490162Z arvados.cwl-runner INFO: [container Simple md5su

#### Submitting with `workflow_descriptor`

Retrieve the test parameters content from the JSON file in GitHub and format as an object for the WES request. Provide the contents of CWL descriptor file as string/blob in request.

In [55]:
cwl_tool_request = build_wes_request(
    workflow_descriptor=cwl_tool_descriptor['descriptor'], 
    workflow_params='https://raw.githubusercontent.com/briandoconnor/dockstore-tool-md5sum/master/test.json',
    workflow_type='CWL'
)
cwl_tool_request

DEBUG:cachecontrol.controller:Looking up "https://raw.githubusercontent.com/briandoconnor/dockstore-tool-md5sum/master/test.json" in the cache
DEBUG:cachecontrol.controller:Current age based on date: 24
DEBUG:cachecontrol.controller:Freshness lifetime from max-age: 300
DEBUG:cachecontrol.controller:The response is "fresh", returning cached response
DEBUG:cachecontrol.controller:300 > 24
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): raw.githubusercontent.com
DEBUG:urllib3.connectionpool:https://raw.githubusercontent.com:443 "HEAD /briandoconnor/dockstore-tool-md5sum/master/md5sum.input HTTP/1.1" 200 0


{'workflow_descriptor': u'#!/usr/bin/env cwl-runner\n\nclass: CommandLineTool\nid: Md5sum\nlabel: Simple md5sum tool\ncwlVersion: v1.0\n\n$namespaces:\n  dct: http://purl.org/dc/terms/\n  foaf: http://xmlns.com/foaf/0.1/\n\ndoc: |\n  [![Docker Repository on Quay.io](https://quay.io/repository/briandoconnor/dockstore-tool-md5sum/status "Docker Repository on Quay.io")](https://quay.io/repository/briandoconnor/dockstore-tool-md5sum)\n  [![Build Status](https://travis-ci.org/briandoconnor/dockstore-tool-md5sum.svg)](https://travis-ci.org/briandoconnor/dockstore-tool-md5sum)\n  A very, very simple Docker container for the md5sum command. See the [README](https://github.com/briandoconnor/dockstore-tool-md5sum/blob/master/README.md) for more information.\n\n\n#dct:creator:\n#  \'@id\': http://orcid.org/0000-0002-7681-6415\n#  foaf:name: Brian O\'Connor\n#  foaf:mbox: briandoconnor@gmail.com\n\nrequirements:\n- class: DockerRequirement\n  dockerPull: quay.io/briandoconnor/dockstore-tool-md5sum

In [56]:
arvclient.run_workflow(cwl_tool_request)

DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): wes.qr1hi.arvadosapi.com
DEBUG:urllib3.connectionpool:https://wes.qr1hi.arvadosapi.com:443 "POST /ga4gh/wes/v1/workflows HTTP/1.1" 200 51


{u'workflow_id': u'qr1hi-xvhdp-l1sa1g35hh6j6wj'}

In [57]:
arvclient.get_workflow_run('qr1hi-xvhdp-l1sa1g35hh6j6wj')

DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): wes.qr1hi.arvadosapi.com
DEBUG:urllib3.connectionpool:https://wes.qr1hi.arvadosapi.com:443 "GET /ga4gh/wes/v1/workflows/qr1hi-xvhdp-l1sa1g35hh6j6wj HTTP/1.1" 200 1469


{u'outputs': {u'output_file': {u'basename': u'md5sum.txt',
   u'class': u'File',
   u'format': u'http://edamontology.org/data_3671',
   u'location': u'https://download.qr1hi.arvadosapi.com/c=2fd83f7df048ffe49ced1c05a05c6460+115/_/md5sum.txt',
   u'size': 33}},
 u'request': {},
 u'state': u'COMPLETE',
 u'task_logs': [],
 u'workflow_id': u'qr1hi-xvhdp-l1sa1g35hh6j6wj',
 u'workflow_log': {u'cmd': [u''],
  u'endTime': u'',
  u'exit_code': 0,
  u'startTime': u'',
  u'stderr': u'2018-05-24T18:27:35.973069854Z cwltool INFO: /usr/local/bin/arvados-cwl-runner 1.1.4.20180523205640, arvados-python-client 1.1.4.20180510153813, cwltool 1.0.20180523203033\n2018-05-24T18:27:35.974959654Z cwltool INFO: Resolved \'/var/lib/cwl/workflow.json#main\' to \'file:///var/lib/cwl/workflow.json#main\'\n2018-05-24T18:27:37.692391955Z arvados.cwl-runner INFO: [container Simple md5sum tool] reused container qr1hi-dz642-yf4bwvrr8336gu8\n2018-05-24T18:27:49.258490162Z arvados.cwl-runner INFO: [container Simple md5su

## Workflows with TRS & WES

### CWL checker workflow for `md5sum-checker`

In [58]:
workflow_config = eval_config[0]
print(workflow_config)
workflow_id = workflow_config['workflow_id']
workflow_id

{'workflow_id': 'github.com/dockstore-testing/md5sum-checker', 'submission_type': 'params', 'trs_id': 'dockstore', 'version_id': 'develop', 'workflow_type': 'CWL'}


'github.com/dockstore-testing/md5sum-checker'

### Retrieve workflow info from Dockstore

#### Get checker for specified workflow

In [59]:
checker_workflow = tclient.get_workflow_checker(workflow_id)
checker_workflow['id']

DEBUG:root:retrieving workflow entry from tools/%23workflow%2Fgithub.com%2Fdockstore-testing%2Fmd5sum-checker
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): dockstore.org
DEBUG:urllib3.connectionpool:https://dockstore.org:8443 "GET /api/ga4gh/v2/tools/%23workflow%2Fgithub.com%2Fdockstore-testing%2Fmd5sum-checker HTTP/1.1" 200 418
DEBUG:synorchestrator.trs_client.client:found checker workflow: github.com/dockstore-testing/md5sum-checker/_cwl_checker
DEBUG:root:retrieving workflow entry from tools/%23workflow%2Fgithub.com%2Fdockstore-testing%2Fmd5sum-checker%2F_cwl_checker
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): dockstore.org
DEBUG:urllib3.connectionpool:https://dockstore.org:8443 "GET /api/ga4gh/v2/tools/%23workflow%2Fgithub.com%2Fdockstore-testing%2Fmd5sum-checker%2F_cwl_checker HTTP/1.1" 200 496


u'#workflow/github.com/dockstore-testing/md5sum-checker/_cwl_checker'

#### Get checker descriptor for specified version and workflow type

In [60]:
checker_descriptor = tclient.get_workflow_descriptor(
    id=checker_workflow['id'], 
    version_id=workflow_config['version_id'], 
    type=workflow_config['workflow_type']
)
checker_descriptor

DEBUG:synorchestrator.trs_client.client:getting descriptor from tools/%23workflow%2Fgithub.com%2Fdockstore-testing%2Fmd5sum-checker%2F_cwl_checker/versions/develop/CWL/descriptor
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): dockstore.org
DEBUG:urllib3.connectionpool:https://dockstore.org:8443 "GET /api/ga4gh/v2/tools/%23workflow%2Fgithub.com%2Fdockstore-testing%2Fmd5sum-checker%2F_cwl_checker/versions/develop/CWL/descriptor HTTP/1.1" 200 541


{u'descriptor': u'cwlVersion: v1.0\nclass: Workflow\n\nrequirements:\n  - class: SubworkflowFeatureRequirement\n\n#dct:creator:\n#  \'@id\': http://orcid.org/0000-0002-7681-6415\n#  foaf:name: Brian O\'Connor\n#  foaf:mbox: mailto:briandoconnor@gmail.com\n\n#dct:contributor:\n#  foaf:name: Denis Yuen\n#  foaf:mbox: mailto:denis.yuen@oicr.on.ca\n\ninputs:\n  input_file: File\n  expected_md5: string\n\noutputs:\n  workflow_output_file:\n    type: File\n    outputSource: checker/results_file\n\nsteps:\n  md5sum:\n    run: md5sum/md5sum-workflow.cwl\n    in:\n      input_file: input_file\n    out: [output_file]\n  checker:\n    run: checker/md5sum-checker.cwl\n    in:\n      input_file: md5sum/output_file\n      expected_md5: expected_md5\n    out: [results_file]\n\ndoc: |\n  This demonstrates how to wrap a "real" tool with a checker workflow that runs both the tool and a tool that performs verification of results\n',
 u'type': u'CWL',
 u'url': u'https://raw.githubusercontent.com/dockstore

#### Get checker test params for specified version and workflow type

In [61]:
checker_tests = tclient.get_workflow_tests(
    id=checker_workflow['id'], 
    version_id=workflow_config['version_id'], 
    type=workflow_config['workflow_type']
)
checker_tests

DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): dockstore.org
DEBUG:urllib3.connectionpool:https://dockstore.org:8443 "GET /api/ga4gh/v2/tools/%23workflow%2Fgithub.com%2Fdockstore-testing%2Fmd5sum-checker%2F_cwl_checker/versions/develop/CWL/tests HTTP/1.1" 200 206
DEBUG:synorchestrator.trs_client.client:getting descriptor from tools/%23workflow%2Fgithub.com%2Fdockstore-testing%2Fmd5sum-checker%2F_cwl_checker/versions/develop/CWL/descriptor
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): dockstore.org
DEBUG:urllib3.connectionpool:https://dockstore.org:8443 "GET /api/ga4gh/v2/tools/%23workflow%2Fgithub.com%2Fdockstore-testing%2Fmd5sum-checker%2F_cwl_checker/versions/develop/CWL/descriptor HTTP/1.1" 200 541


[{u'test': u'{\n  "input_file": {\n        "class": "File",\n        "path": "md5sum.input"\n    },\n    "expected_md5": "00579a00e3e7fa0674428ac7049423e2"\n}\n',
  u'url': u'https://raw.githubusercontent.com/dockstore-testing/md5sum-checker/develop/checker-input-cwl.json'}]

### Build workflow run request

In [62]:
workflow_request = build_wes_request(
    workflow_url=checker_descriptor['url'], 
    workflow_params=checker_tests[0]['url'],
    workflow_type=checker_descriptor['type']
)
workflow_request

DEBUG:cachecontrol.controller:Looking up "https://raw.githubusercontent.com/dockstore-testing/md5sum-checker/develop/checker-input-cwl.json" in the cache
DEBUG:cachecontrol.controller:Current age based on date: 1366
DEBUG:cachecontrol.controller:Freshness lifetime from max-age: 300
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): raw.githubusercontent.com
DEBUG:urllib3.connectionpool:https://raw.githubusercontent.com:443 "GET /dockstore-testing/md5sum-checker/develop/checker-input-cwl.json HTTP/1.1" 304 0
DEBUG:urllib3.connectionpool:https://raw.githubusercontent.com:443 "HEAD /dockstore-testing/md5sum-checker/develop/md5sum.input HTTP/1.1" 200 0


{'workflow_params': ordereddict([('input_file', ordereddict([('class', 'File'), ('location', u'https://raw.githubusercontent.com/dockstore-testing/md5sum-checker/develop/md5sum.input')])), ('expected_md5', '00579a00e3e7fa0674428ac7049423e2')]),
 'workflow_type': u'CWL',
 'workflow_type_version': 'v1.0',
 'workflow_url': u'https://raw.githubusercontent.com/dockstore-testing/md5sum-checker/develop/checker-workflow-wrapping-workflow.cwl'}

### Run checker workflow

#### Submit new workflow run

In [63]:
r = arvclient.run_workflow(workflow_request)
r

DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): wes.qr1hi.arvadosapi.com
DEBUG:urllib3.connectionpool:https://wes.qr1hi.arvadosapi.com:443 "POST /ga4gh/wes/v1/workflows HTTP/1.1" 200 51


{u'workflow_id': u'qr1hi-xvhdp-19pbqx445m9db9t'}

#### Monitor workflow run status

In [66]:
r = arvclient.get_workflow_run_status(r['workflow_id'])
r
# while r['state'] in ('QUEUED', 'INITIALIZING', 'RUNNING'):
#     time.sleep(1)
#     r = arvclient.get_workflow_run_status(r['workflow_id'])

DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): wes.qr1hi.arvadosapi.com
DEBUG:urllib3.connectionpool:https://wes.qr1hi.arvadosapi.com:443 "GET /ga4gh/wes/v1/workflows/qr1hi-xvhdp-19pbqx445m9db9t/status HTTP/1.1" 200 78


{u'state': u'INITIALIZING', u'workflow_id': u'qr1hi-xvhdp-19pbqx445m9db9t'}

#### Check workflow run details

In [67]:
arvclient.get_workflow_run(r['workflow_id'])

DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): wes.qr1hi.arvadosapi.com
DEBUG:urllib3.connectionpool:https://wes.qr1hi.arvadosapi.com:443 "GET /ga4gh/wes/v1/workflows/qr1hi-xvhdp-19pbqx445m9db9t HTTP/1.1" 200 260


{u'outputs': {},
 u'request': {},
 u'state': u'INITIALIZING',
 u'task_logs': [],
 u'workflow_id': u'qr1hi-xvhdp-19pbqx445m9db9t',
 u'workflow_log': {u'cmd': [u''],
  u'endTime': u'',
  u'startTime': u'',
  u'stderr': u'',
  u'stdout': u''}}

### WDL checker workflow for `TopMed_Variant_Caller`

In [68]:
workflow_config = eval_config[2]
print(workflow_config)
workflow_id = workflow_config['workflow_id']
workflow_id

{'workflow_id': 'github.com/DataBiosphere/topmed-workflows/TopMed_Variant_Caller', 'submission_type': 'params', 'trs_id': 'dockstore', 'version_id': '1.11.0', 'workflow_type': 'WDL'}


'github.com/DataBiosphere/topmed-workflows/TopMed_Variant_Caller'

### Retrieve workflow info from Dockstore

#### Get checker for specified workflow

In [69]:
checker_workflow = tclient.get_workflow_checker(workflow_id)
checker_workflow['id']

DEBUG:root:retrieving workflow entry from tools/%23workflow%2Fgithub.com%2FDataBiosphere%2Ftopmed-workflows%2FTopMed_Variant_Caller
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): dockstore.org
DEBUG:urllib3.connectionpool:https://dockstore.org:8443 "GET /api/ga4gh/v2/tools/%23workflow%2Fgithub.com%2FDataBiosphere%2Ftopmed-workflows%2FTopMed_Variant_Caller HTTP/1.1" 200 769
DEBUG:synorchestrator.trs_client.client:found checker workflow: github.com/DataBiosphere/topmed-workflows/TopMed_Variant_Caller_wdl_checker
DEBUG:root:retrieving workflow entry from tools/%23workflow%2Fgithub.com%2FDataBiosphere%2Ftopmed-workflows%2FTopMed_Variant_Caller_wdl_checker
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): dockstore.org
DEBUG:urllib3.connectionpool:https://dockstore.org:8443 "GET /api/ga4gh/v2/tools/%23workflow%2Fgithub.com%2FDataBiosphere%2Ftopmed-workflows%2FTopMed_Variant_Caller_wdl_checker HTTP/1.1" 200 498


u'#workflow/github.com/DataBiosphere/topmed-workflows/TopMed_Variant_Caller_wdl_checker'

#### Get checker descriptor for specified version and workflow type

In [70]:
checker_descriptor = tclient.get_workflow_descriptor(
    id=checker_workflow['id'], 
    version_id=workflow_config['version_id'], 
    type=workflow_config['workflow_type']
)
checker_descriptor

DEBUG:synorchestrator.trs_client.client:getting descriptor from tools/%23workflow%2Fgithub.com%2FDataBiosphere%2Ftopmed-workflows%2FTopMed_Variant_Caller_wdl_checker/versions/1.11.0/WDL/descriptor
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): dockstore.org
DEBUG:urllib3.connectionpool:https://dockstore.org:8443 "GET /api/ga4gh/v2/tools/%23workflow%2Fgithub.com%2FDataBiosphere%2Ftopmed-workflows%2FTopMed_Variant_Caller_wdl_checker/versions/1.11.0/WDL/descriptor HTTP/1.1" 200 1199


{u'descriptor': u'import "https://raw.githubusercontent.com/DataBiosphere/topmed-workflows/1.11.0/variant-caller/variant-caller-wdl/topmed_freeze3_calling.wdl" as TopMed_variantcaller\nimport "https://raw.githubusercontent.com/DataBiosphere/topmed-workflows/1.11.0/variant-caller/variant-caller-wdl-checker/topmed-variantcaller-checker.wdl" as checker\n\nworkflow checkerWorkflow {\n  File inputTruthVCFFile\n\n  # Deprecated: No need to input this anymore\n  # Disk size requirements will be calculated internally\n  # This will be removed in the next release\n  Float? reference_files_size\n\n  String docker_image\n\n  Array[File] input_crai_files\n  Array[File] input_cram_files\n\n  File ref_1000G_omni2_5_b38_sites_PASS_vcf_gz\n  File ref_1000G_omni2_5_b38_sites_PASS_vcf_gz_tbi\n  File chr10_vcf\n  File chr11_KI270927v1_alt_vcf\n  File chr11_vcf\n  File chr12_vcf\n  File chr13_vcf\n  File chr14_GL000009v2_random_vcf\n  File chr14_KI270846v1_alt_vcf\n  File chr14_vcf\n  File chr15_vcf\n  Fi

#### Get checker test params for specified version and workflow type

In [71]:
checker_tests = tclient.get_workflow_tests(
    id=checker_workflow['id'], 
    version_id=workflow_config['version_id'], 
    type=workflow_config['workflow_type']
)
checker_tests

DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): dockstore.org
DEBUG:urllib3.connectionpool:https://dockstore.org:8443 "GET /api/ga4gh/v2/tools/%23workflow%2Fgithub.com%2FDataBiosphere%2Ftopmed-workflows%2FTopMed_Variant_Caller_wdl_checker/versions/1.11.0/WDL/tests HTTP/1.1" 200 None
DEBUG:synorchestrator.trs_client.client:getting descriptor from tools/%23workflow%2Fgithub.com%2FDataBiosphere%2Ftopmed-workflows%2FTopMed_Variant_Caller_wdl_checker/versions/1.11.0/WDL/descriptor
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): dockstore.org
DEBUG:urllib3.connectionpool:https://dockstore.org:8443 "GET /api/ga4gh/v2/tools/%23workflow%2Fgithub.com%2FDataBiosphere%2Ftopmed-workflows%2FTopMed_Variant_Caller_wdl_checker/versions/1.11.0/WDL/descriptor HTTP/1.1" 200 1199


[{u'test': u'{\n  "checkerWorkflow.inputTruthVCFFile": "gs://topmed_workflow_testing/topmed_variant_caller_checker/truth_topmed_variant_caller_NWD176325.0005_output.tar.gz",\n\n  "checkerWorkflow.input_cram_files": ["gs://topmed_workflow_testing/topmed_aligner/input_files/NWD176325.0005.recab.cram"],\n  "checkerWorkflow.input_crai_files": ["gs://topmed_workflow_testing/topmed_aligner/input_files/NWD176325.0005.recab.cram.crai"],\n\n  "checkerWorkflow.ref_1000G_omni2_5_b38_sites_PASS_vcf_gz":  "gs://topmed_workflow_testing/topmed_variant_caller/reference_files/hg38/1000G_omni2.5.b38.sites.PASS.vcf.gz",\n  "checkerWorkflow.ref_1000G_omni2_5_b38_sites_PASS_vcf_gz_tbi":  "gs://topmed_workflow_testing/topmed_variant_caller/reference_files/hg38/1000G_omni2.5.b38.sites.PASS.vcf.gz.tbi",\n  "checkerWorkflow.chr10_vcf":  "gs://topmed_workflow_testing/topmed_variant_caller/reference_files/hg38/1kg.pilot_release.merged.indels.sites.hg38.chr10.vcf",\n  "checkerWorkflow.chr11_KI270927v1_alt_vcf":  

### Build workflow run request

In [142]:
workflow_request = build_wes_request(
    workflow_descriptor=checker_descriptor['descriptor'], 
    workflow_params=checker_tests[0]['url'],
    workflow_type=checker_descriptor['type']
)
workflow_request

DEBUG:cachecontrol.controller:Looking up "https://raw.githubusercontent.com/DataBiosphere/topmed-workflows/1.11.0/variant-caller/variant-caller-wdl-checker/topmed_freeze3_calling_checker.json" in the cache
DEBUG:cachecontrol.controller:Current age based on date: 38
DEBUG:cachecontrol.controller:Freshness lifetime from max-age: 300
DEBUG:cachecontrol.controller:The response is "fresh", returning cached response
DEBUG:cachecontrol.controller:300 > 38


{'workflow_descriptor': u'import "https://raw.githubusercontent.com/DataBiosphere/topmed-workflows/1.11.0/variant-caller/variant-caller-wdl/topmed_freeze3_calling.wdl" as TopMed_variantcaller\nimport "https://raw.githubusercontent.com/DataBiosphere/topmed-workflows/1.11.0/variant-caller/variant-caller-wdl-checker/topmed-variantcaller-checker.wdl" as checker\n\nworkflow checkerWorkflow {\n  File inputTruthVCFFile\n\n  # Deprecated: No need to input this anymore\n  # Disk size requirements will be calculated internally\n  # This will be removed in the next release\n  Float? reference_files_size\n\n  String docker_image\n\n  Array[File] input_crai_files\n  Array[File] input_cram_files\n\n  File ref_1000G_omni2_5_b38_sites_PASS_vcf_gz\n  File ref_1000G_omni2_5_b38_sites_PASS_vcf_gz_tbi\n  File chr10_vcf\n  File chr11_KI270927v1_alt_vcf\n  File chr11_vcf\n  File chr12_vcf\n  File chr13_vcf\n  File chr14_GL000009v2_random_vcf\n  File chr14_KI270846v1_alt_vcf\n  File chr14_vcf\n  File chr15_v

### Run checker workflow

#### Submit new workflow run

In [143]:
r = cromclient.run_workflow(workflow_request)
r

DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): g0n2qjnu94.execute-api.us-east-1.amazonaws.com
DEBUG:urllib3.connectionpool:https://g0n2qjnu94.execute-api.us-east-1.amazonaws.com:443 "POST /test//workflows HTTP/1.1" 200 55


{u'workflow_id': u'cebafa4f-53d8-41a3-9925-700cb2c407c5'}

#### Monitor workflow run status

In [155]:
r = cromclient.get_workflow_run_status(r['workflow_id'])
while r['state'] in ('QUEUED', 'INITIALIZING', 'RUNNING'):
    time.sleep(30)
    r = cromclient.get_workflow_run_status(r['workflow_id'])

DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): g0n2qjnu94.execute-api.us-east-1.amazonaws.com
DEBUG:urllib3.connectionpool:https://g0n2qjnu94.execute-api.us-east-1.amazonaws.com:443 "GET /test//workflows/cebafa4f-53d8-41a3-9925-700cb2c407c5/status HTTP/1.1" 200 75
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): g0n2qjnu94.execute-api.us-east-1.amazonaws.com
DEBUG:urllib3.connectionpool:https://g0n2qjnu94.execute-api.us-east-1.amazonaws.com:443 "GET /test//workflows/cebafa4f-53d8-41a3-9925-700cb2c407c5/status HTTP/1.1" 200 75
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): g0n2qjnu94.execute-api.us-east-1.amazonaws.com
DEBUG:urllib3.connectionpool:https://g0n2qjnu94.execute-api.us-east-1.amazonaws.com:443 "GET /test//workflows/cebafa4f-53d8-41a3-9925-700cb2c407c5/status HTTP/1.1" 200 75
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): g0n2qjnu94.execute-api.us-east-1.amazonaws.com
DEBUG:urllib3.connectionpool:https://g0n2qj

DEBUG:urllib3.connectionpool:https://g0n2qjnu94.execute-api.us-east-1.amazonaws.com:443 "GET /test//workflows/cebafa4f-53d8-41a3-9925-700cb2c407c5/status HTTP/1.1" 200 75
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): g0n2qjnu94.execute-api.us-east-1.amazonaws.com
DEBUG:urllib3.connectionpool:https://g0n2qjnu94.execute-api.us-east-1.amazonaws.com:443 "GET /test//workflows/cebafa4f-53d8-41a3-9925-700cb2c407c5/status HTTP/1.1" 200 75
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): g0n2qjnu94.execute-api.us-east-1.amazonaws.com
DEBUG:urllib3.connectionpool:https://g0n2qjnu94.execute-api.us-east-1.amazonaws.com:443 "GET /test//workflows/cebafa4f-53d8-41a3-9925-700cb2c407c5/status HTTP/1.1" 200 75
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): g0n2qjnu94.execute-api.us-east-1.amazonaws.com
DEBUG:urllib3.connectionpool:https://g0n2qjnu94.execute-api.us-east-1.amazonaws.com:443 "GET /test//workflows/cebafa4f-53d8-41a3-9925-700cb2c407c5/status 

#### Check workflow run details

In [157]:
cromclient.get_workflow_run(r['workflow_id'])

DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): g0n2qjnu94.execute-api.us-east-1.amazonaws.com
DEBUG:urllib3.connectionpool:https://g0n2qjnu94.execute-api.us-east-1.amazonaws.com:443 "GET /test//workflows/cebafa4f-53d8-41a3-9925-700cb2c407c5 HTTP/1.1" 200 16028


{u'end_time': u'2018-05-25T02:52:32.854Z',
 u'outputs': {u'checkerWorkflow.variantcaller.topmed_variant_caller_output': u'gs://broad-dsde-mint-dev-cromwell-execution/cromwell-executions/checkerWorkflow/cebafa4f-53d8-41a3-9925-700cb2c407c5/call-variantcaller/TopMedVariantCaller/a2428177-6416-4e2b-a0e0-23060f435495/call-variantCalling/topmed_variant_caller_output.tar.gz'},
 u'request': {u'workflow_descriptor': u'import "https://raw.githubusercontent.com/DataBiosphere/topmed-workflows/1.11.0/variant-caller/variant-caller-wdl/topmed_freeze3_calling.wdl" as TopMed_variantcaller\nimport "https://raw.githubusercontent.com/DataBiosphere/topmed-workflows/1.11.0/variant-caller/variant-caller-wdl-checker/topmed-variantcaller-checker.wdl" as checker\n\nworkflow checkerWorkflow {\n  File inputTruthVCFFile\n\n  # Deprecated: No need to input this anymore\n  # Disk size requirements will be calculated internally\n  # This will be removed in the next release\n  Float? reference_files_size\n\n  String 