In [1]:
import os 
import json 
import re

from _utils import (
    aria2c_download_file, 
    modify_config_file,
    download_bowtie_index
)

import logging
logging.basicConfig(level=logging.DEBUG)

In [2]:
download_bowtie_index("./database", "https://www.learningcontainer.com/wp-content/uploads/2020/05/sample-zip-file.zip")


08/20 20:50:51 [[1;32mNOTICE[0m] Downloading 1 item(s)


INFO:root:aria2c -x 16 -j 16 -c https://www.learningcontainer.com/wp-content/uploads/2020/05/sample-zip-file.zip -d /storage/TomaszLab/vbez/metagenomic_gmhi/metagenomome_assembly/src/database
INFO:root:Downloaded sample-zip-file.zip
INFO:root:Decompressing Bowtie2 indexes



08/20 20:50:51 [[1;32mNOTICE[0m] Download complete: /storage/TomaszLab/vbez/metagenomic_gmhi/metagenomome_assembly/src/database/sample-zip-file.zip

Download Results:
gid   |stat|avg speed  |path/URI
aba724|OK  |    53KiB/s|/storage/TomaszLab/vbez/metagenomic_gmhi/metagenomome_assembly/src/database/sample-zip-file.zip

Status Legend:
(OK):download completed.
Archive:  /storage/TomaszLab/vbez/metagenomic_gmhi/metagenomome_assembly/src/database/sample-zip-file.zip
  inflating: /storage/TomaszLab/vbez/metagenomic_gmhi/metagenomome_assembly/src/database/sample.txt  


('bowtie2',
 'bowtie2_index_path',
 '/storage/TomaszLab/vbez/metagenomic_gmhi/metagenomome_assembly/src/database')

In [11]:
url = "https://genome-idx.s3.amazonaws.com/bt/GRCh38_noalt_as.zip"
def download_bowtie_index(save_dir, url=url):
    database_dir = os.path.join(save_dir, "databases")
    os.makedirs(database_dir, exist_ok=True)
    filename = aria2c_download_file(url, database_dir)
    logging.info("Decompressing Bowtie2 indexes")
    zip_path = os.path.join(database_dir, filename)
    os.system(f"unzip {zip_path} -d {database_dir}")
    os.remove(zip_path)
    
    bowtie_index_path = os.path.abspath(os.path.join(database_dir, filename.split(".")[0]))
    modify_config_file("config.ini",
                       section="bowtie2", 
                       config_name="bowtie2_index_path",
                       config_value="bowtie2_index_path")

In [None]:
def prepare_inputs(study_path, step="qc_and_assemble", r1_suffix="_1.fastq.gz", r2_suffix="_2.fastq.gz")
    if step == "qc_and_assemble":
        # getting sorted lists of forward and reverse reads from a folder
        forward, reverse = [os.path.join(study_path, file) for file in sorted(os.listdir(study_path)) if file.endswith(r1_suffix)], \
                           [os.path.join(study_path, file) for file in sorted(os.listdir(study_path)) if file.endswith(r2_suffix)]

        # template
        with open("json_templates/1-qc_and_assemble.json", "r") as f:
            template = json.load(f)
        # adding files to json

        for r1, r2 in zip(forward, reverse):
            template["qc_and_assemble.sampleInfo"].append({"file_r1": r1, "file_r2": r2})

        # writing input json
        with open('inputs.json', 'w') as f:
            json.dump(template, f, indent=4, sort_keys=True, ensure_ascii=False)

In [24]:
def modify_concurrency_config(path_to_file : str, 
                              output_path : str, 
                              n_jobs: int) -> None: 
    """Modifies Cromwell's config configuraton .json
    required running multiple jobs in parallel"""
    
    # read initial file 
    with open(path_to_file, "r") as f: 
        config = f.read()
        
    config = config.replace("concurrent-job-limit = 8", f"concurrent-job-limit = {n_jobs}")
    out_config_path = os.path.join(output_path, "concurrency_config.conf") 
    with open(out_config_path, "w") as f:   
        f.write(config)
    print(config)
    # return out_config_path

In [25]:
modify_concurrency_config("cromwell_configs/kneaddata.conf", "./", 4)

include required(classpath("application"))
backend {
  default = Docker

  providers {

    # Example backend that _only_ runs workflows that specify docker for every command.
    Docker {
      actor-factory = "cromwell.backend.impl.sfs.config.ConfigBackendLifecycleActorFactory"
      config {
        run-in-background = true
        runtime-attributes = """
              String? docker
              String? docker_user
        """
        submit-docker = """
          # make sure there is no preexisting Docker CID file
          rm -f ${docker_cid}
          # run as in the original configuration without --rm flag (will remove later)
          if [[ ${docker} =~ "kneaddata" ]]; then
          docker run \
            --cidfile ${docker_cid} \
            -i \
            ${"--user " + docker_user} \
            --entrypoint ${job_shell} \
            -v ${cwd}:${docker_cwd}:delegated \
            -v "/storage/TomaszLab/vbez/metagenomic_gmhi/metagenomome_assembly/databases/GRCh38_bt2