In [14]:
import os 
import json 
import re

from utils import (
    aria2c_download_file, 
    modify_config_file
)

import logging
logging.basicConfig(level=logging.DEBUG)

In [11]:
url = "https://genome-idx.s3.amazonaws.com/bt/GRCh38_noalt_as.zip"
def download_bowtie_index(save_dir, url=url):
    database_dir = os.path.join(save_dir, "databases")
    os.makedirs(database_dir, exist_ok=True)
    filename = aria2c_download_file(url, database_dir)
    logging.info("Decompressing Bowtie2 indexes")
    zip_path = os.path.join(database_dir, filename)
    os.system(f"unzip {zip_path} -d {database_dir}")
    os.remove(zip_path)
    
    bowtie_index_path = os.path.abspath(os.path.join(database_dir, filename.split(".")[0]))
    modify_config_file("config.ini",
                       section="bowtie2", 
                       config_name="bowtie2_index_path",
                       config_value="bowtie2_index_path")

In [None]:
def prepare_inputs(study_path, step="qc_and_assemble", r1_suffix="_1.fastq.gz", r2_suffix="_2.fastq.gz")
    if step == "qc_and_assemble":
        # getting sorted lists of forward and reverse reads from a folder
        forward, reverse = [os.path.join(study_path, file) for file in sorted(os.listdir(study_path)) if file.endswith(r1_suffix)], \
                           [os.path.join(study_path, file) for file in sorted(os.listdir(study_path)) if file.endswith(r2_suffix)]

        # template
        with open("json_templates/1-qc_and_assemble.json", "r") as f:
            template = json.load(f)
        # adding files to json

        for r1, r2 in zip(forward, reverse):
            template["qc_and_assemble.sampleInfo"].append({"file_r1": r1, "file_r2": r2})

        # writing input json
        with open('inputs.json', 'w') as f:
            json.dump(template, f, indent=4, sort_keys=True, ensure_ascii=False)

In [20]:
template

{'qc_and_assemble.sample_suffix': '_1.fastq.gz',
 'qc_and_assemble.bowtie2_index_path': 'GRCh38',
 'qc_and_assemble.thread_num': 4,
 'qc_and_assemble.sampleInfo': {'file_r1': [], 'file_r2': []},
 'qc_and_assemble.trf_path': '/bin',
 'qc_and_assemble.bowtie2_path': '/bin'}