In [1]:
from SciServer import Jobs,Authentication,Config
import json
import requests
import ipywidgets as widgets
from os import listdir, walk
from os.path import isfile, join
import time
from datetime import datetime

## Functions to submit the job

In [2]:
def pathForUserVolume(uv):
    return '{0}/{1}/{2}'.format(uv['rootVolumeName'],uv['owner'],uv['name'])

In [3]:
# the following code replaces similar function in SciServer.Jobs
# That verison does not yet support writing to a data volume
def submitShellCommandJob(shellCommand, dockerComputeDomain = None, dockerImageName = None, userVolumes = None, dataVolumes = None, resultsFolderPath = "", jobAlias = ""):
    """
    Submits a shell command for execution (as an asynchronous job) inside a Docker compute domain.
    :param shellCommand: shell command (string) defined by the user.
    :param dockerComputeDomain: object (dictionary) that defines a Docker compute domain. A list of these kind of objects available to the user is returned by the function Jobs.getDockerComputeDomains().
    :param dockerImageName: name (string) of the Docker image for executing the notebook. E.g.,  dockerImageName="Python (astro)". An array of available Docker images is defined as the 'images' property in the dockerComputeDomain object.
    :param userVolumes: a list with the names of user volumes (with optional write permissions) that will be mounted to the docker Image.
           E.g., userVolumes = [{'name':'persistent', 'needsWriteAccess':False},{'name':'scratch', , 'needsWriteAccess':True}]
           A list of available user volumes can be found as the 'userVolumes' property in the dockerComputeDomain object. If userVolumes=None, then all available user volumes are mounted, with 'needsWriteAccess' = True if the user has Write permissions on the volume.
    :param dataVolumes: a list with the names of data volumes that will be mounted to the docker Image.
           E.g., dataVolumes=[{"name":"SDSS_DAS"}, {"name":"Recount"}].
           A list of available data volumes can be found as the 'volumes' property in the dockerComputeDomain object. If dataVolumes=None, then all available data volumes are mounted.
    :param resultsFolderPath: full path to results folder (string) where the shell command is executed. E.g.: /home/idies/workspace/rootVolume/username/userVolume/jobsFolder. If not set, then a default folder will be set automatically.
    :param jobAlias: alias (string) of job, defined by the user.
    :return: the job ID (int)
    :raises: Throws an exception if the HTTP request to the Authentication URL returns an error. Throws an exception if the HTTP request to the JOBM API returns an error, or if the volumes defined by the user are not available in the Docker compute domain.
    :example: dockerComputeDomain = Jobs.getDockerComputeDomains()[0]; job = Jobs.submitShellCommandJob('pwd', dockerComputeDomain, 'Python (astro)', [{'name':'persistent'},{'name':'scratch', 'needsWriteAccess':True}], [{'name':'SDSS_DAS'}], 'myNewJob')
    .. seealso:: Jobs.submitNotebookJob, Jobs.getJobStatus, Jobs.getDockerComputeDomains, Jobs.cancelJob
    """

    token = Authentication.login(UserName=USERNAME, Password=PASSWORD)
    if token is not None and token != "":

        if Config.isSciServerComputeEnvironment():
            taskName = "Compute.SciScript-Python.Jobs.submitShellCommandJob"
        else:
            taskName = "SciScript-Python.Jobs.submitShellCommandJob"

        if dockerComputeDomain is None:
            dockerComputeDomains = getDockerComputeDomains();
            if dockerComputeDomains .__len__() > 0:
                dockerComputeDomain = dockerComputeDomains[0];
            else:
                raise Exception("There are no dockerComputeDomains available for the user.");

        if dockerImageName is None:
            images = dockerComputeDomain.get('images');
            if images.__len__() > 0:
                dockerImageName = images[0].get('name')
            else:
                raise Exception("dockerComputeDomain has no docker images available for the user.");

        uVols = [];
        for uVol in userVolumes:
            found = False;
            for vol in dockerComputeDomain.get('userVolumes'):
                if vol.get('name') == uVol.get('name'):
                    found = True;
                    if (uVol.get('needsWriteAccess')):
                        if uVol.get('needsWriteAccess') == True and 'write' in vol.get('allowedActions'):
                            uVols.append({'userVolumeId': vol.get('id'), 'needsWriteAccess': True});
                        else:
                            uVols.append({'userVolumeId': vol.get('id'), 'needsWriteAccess': False});
                    else:
                        if 'write' in vol.get('allowedActions'):
                            uVols.append({'userVolumeId': vol.get('id'), 'needsWriteAccess': True});
                        else:
                            uVols.append({'userVolumeId': vol.get('id'), 'needsWriteAccess': False});

            if not found:
                raise Exception("User volume '" + uVol.get('name') + "' not found within Compute domain")

        datVols = [];
        for dVol in dataVolumes:
            found = False;
            for vol in dockerComputeDomain.get('volumes'):
                name=dVol.get('name')
                if name == vol.get('name'):
                    found = True
                    if (vol.get('needsWriteAccess')):
                        if vol.get('needsWriteAccess') == True and vol.get('writable') == True:
                            datVols.append({'name': name, 'writable': True});
                        else:
                            datVols.append({'name': name, 'writable': False});
                    else:
                        if vol.get('writable'):
                            datVols.append({'name': name, 'writable': True});
                        else:
                            datVols.append({'name': name, 'writable': False});
                    found = True;

            if not found:
                raise Exception("Data volume '" + dVol.get('name') + "' not found within Compute domain")


        dockerComputeEndpoint = dockerComputeDomain.get('apiEndpoint');

        dockerJobModel = {
            "command": shellCommand,
            "submitterDID": jobAlias,
            "dockerComputeEndpoint": dockerComputeEndpoint,
            "dockerImageName": dockerImageName,
            "volumeContainers": datVols,
            "userVolumes": uVols,
            "resultsFolderURI": resultsFolderPath
        }
        data = json.dumps(dockerJobModel).encode()
        url = Config.RacmApiURL + "/jobm/rest/jobs/docker?TaskName="+taskName;
        headers = {'X-Auth-Token': token, "Content-Type": "application/json"}
        res = requests.post(url, data=data, headers=headers, stream=True)

        if res.status_code != 200:
            raise Exception("Error when submitting a job to the JOBM API.\nHttp Response from JOBM API returned status code " + str(res.status_code) + ":\n" + res.content.decode());
        else:
            return (json.loads(res.content.decode())).get('id')
    else:
        raise Exception("User token is not defined. First log into SciServer.")

### Sciserver User Config

In [50]:
#Your sciserver username and password
USERNAME='howsercw'
PASSWORD='------'

# define the required job environment
DOMAIN='COVID-19 Jobs'  # change with name of new compute domain
IMAGE='SciServer Essentials'

# define lists of user and data volumes that should be mounted
# Add those that you 
USERVOLUMES=['Storage/' + USERNAME + '/persistent','Temporary/' + USERNAME + '/scratch']
DATAVOLUMES=['COVID-19']

RESULTSFOLDERPATH = "/home/idies/workspace/Temporary/" + USERNAME + "/scratch/jobs"
JOBALIAS = "guppyplex"


In [5]:
domains=Jobs.getDockerComputeDomains()
domain=None
image=None
volumes=[]
userVolumes=[]
dataVolumes=[]
for d in domains:
    if d['name'] == DOMAIN:
        domain=d
        for im in d['images']:
            if im['name'] == IMAGE:
                image = im
        for v in d['volumes']:
            if v['name'] in DATAVOLUMES:
                dataVolumes.append({"name":v['name'],'needsWriteAccess':True})
        for uv in d['userVolumes']:
            path=pathForUserVolume(uv)
            if path in USERVOLUMES:
                userVolumes.append({'name':uv['name'],'rootVolumeName':uv['rootVolumeName']
                                    ,'owner':uv['owner'],'needsWriteAccess':True})
        break

## Module 2: Length Filter 

Guppyplex script

In [44]:
script='bash -x /home/idies/workspace/covid19/code/ncov/pipeline_scripts/artic-module2-length-filter.sh'

Sequencing run directory

In [7]:
run_dir='/home/idies/workspace/Temporary/howsercw/scratch/pipeline_test'

In [15]:
manifest= run_dir + "/manifest.txt"

In [55]:
def module_2_get_dirs(run_dir):
    input_dir= run_dir + '/artic-pipeline/1-barcode-demux'
    sub_dirs=[d[0] for d in walk(input_dir)]
    
    sample_dirs=[]
    
    with open(manifest) as f:
        for line in f:
            manifest_vars = line.split('\t')
            for sub_dir in sub_dirs:
                if sub_dir.split('/')[-1]==manifest_vars[0]:
                    sample_dirs.append(sub_dir)
            
            
    return sample_dirs

In [56]:
module_2_dirs=module_2_get_dirs(run_dir)

In [57]:
module_2_dirs

['/home/idies/workspace/Temporary/howsercw/scratch/pipeline_test/artic-pipeline/1-barcode-demux/NB01',
 '/home/idies/workspace/Temporary/howsercw/scratch/pipeline_test/artic-pipeline/1-barcode-demux/NB02']

Create command: merge bash script with parameters and input files for analysis...

In [45]:
commands=[]
for i in range(len(module_2_dirs)):
    module_2_job_no= i
    command = script + " -i " + module_2_dirs[i]
    commands.append(command)


In [47]:
commands

['bash -x /home/idies/workspace/covid19/code/ncov/pipeline_scripts/artic-module2-length-filter.sh -i /home/idies/workspace/Temporary/howsercw/scratch/pipeline_test/artic-pipeline/1-barcode-demux/NB01',
 'bash -x /home/idies/workspace/covid19/code/ncov/pipeline_scripts/artic-module2-length-filter.sh -i /home/idies/workspace/Temporary/howsercw/scratch/pipeline_test/artic-pipeline/1-barcode-demux/NB02']

In [48]:
def submit_batch(commands):
    jobs=[]
    for command in commands:
        job=submitShellCommandJob(shellCommand=command
                                    , dockerComputeDomain = domain
                                    , dockerImageName = IMAGE
                                    , userVolumes = userVolumes, dataVolumes=dataVolumes
                                    , resultsFolderPath = RESULTSFOLDERPATH
                                    , jobAlias = JOBALIAS)
        jobs.append(job)
        time.sleep(30)
    return jobs
    

In [52]:
jobs=submit_batch(commands)