In [1]:
from pathlib import Path
import sys 
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns


import subprocess
import shutil

sys.path.append("./")
from vseek.common.errors import *

In [2]:
# dowloading the data module 
def dependency_check(prog: str) -> bool:
    """Checks if the executable program is available

    Parameters
    ----------
    prog : str
        name of the program or executable
    
    Returns
    -------
    Bool
        Returns True if the program exists 

    Raises:
    -------
    MissingDependencyError:
        If the program is not found
    UnsupportedDependencyError:
        Raised if attempt to call unsupported executable. Please look at supported 
        executables in the yaml file.
    """
    callables = ["prefetch", "fasterq-dump"]
    
    # checking if the callable is supported
    if prog not in callables:
        raise UnsupportedDependencyError(f"{prog} is not supported. Supported programs {callables}")

    # checking if supported callable is installed
    check = shutil.which(prog)
    if check is None:
        raise MissingDependencyError(f"{prog} is not installed")

    return True

    
# TODO: add error parser for each callable?
def _call(cmd: str) -> int:
    """Wrapper for calling executable

    Parameters
    ----------
    cmd : str
        command input
    
    Returns
    -------
    int
        Return code
        
        
    Raises:
    -------
    ExecutionFailedError:
        raised when a non-zero exit status code is raised from the executable
    """
    cmd = cmd.split() 
    call = subprocess.run(cmd, shell=False, stdout=subprocess.PIPE, stderr=subprocess.PIPE) 
    if call.returncode != 0:
        raise ExecutionError("Execution failed. Non-zero exit status captured")
    
    
def download_fastq(sra_ids: str | list, threads=4, prefetch_dir="SRA_download", fastq_dir="fastq_files", verbose=False) -> str: 
    """ Downloads fastq file to local machine with given sra ascension id.

    Parameters
    ----------
    sra_ids : str | list
        single string that are delimited if multiple sra ids 
        by white spaces or a list of sra ascension ids
    threads : int, optional
        number of threads to use for downloading fastq files, 
        by default 4
    prefetch_dir : str, optional
        directory for storing prefetched sra files, 
        by default "SRA_download"
    fastq_dir : str, optional
        directory name for saving fastq files
        by default "fastq_files"
    
    Returns
    ------
    str
        returns the absolute path where the fastq files are downloaded
    """
    
    # type checking 
    if isinstance(sra_ids, str):
        sra_ids = sra_ids.split()

    # creating a prefetch directory
    prefetch_path = Path(f"{prefetch_dir}")
    prefetch_path.mkdir(exist_ok=True)
    fastq_path = Path(fastq_dir)
    fastq_path.mkdir(exist_ok=True)

    # executeable names
    prefetch_prog = "prefetch"
    fasterq_prog = "fasterq-dump"

    # checking dependencies 
    dependency_check(prefetch_prog)
    dependency_check(fasterq_prog)

    # prefetching sra files 
    print("Prefetching SRR data...")
    sra_ids_str = " ".join(sra_ids)
    prefetch_cmd = f"{prefetch_prog} {sra_ids_str} -O {prefetch_path.absolute()}"
    _call(prefetch_cmd)


    # TODO: add vdb_view steps checking that the files are not corrupt
    # downloading fastq files
    print("Downloading Fastq files")
    for sra_id in sra_ids:
        fasterq_cmd = f"{fasterq_prog} {prefetch_path.absolute()}/{sra_id}/{sra_id}.sra -e {threads} -O {fastq_path.absolute()}" 
        print(fasterq_cmd)
    #    _call(fasterq_cmd)
    #    print(f"{sra_id} fastq file download complete")

    return fastq_path.absolute()
    

In [3]:
# downloading the data 
srr_id = ["SRR12432009", "SRR12464727"] 
download_fastq("SRR12432009 SRR12464727")

Prefetching SRR data...
Downloading Fastq files
fasterq-dump /Users/erikserrano/Development/prelim/prelim3/program_name/SRA_download/SRR12432009/SRR12432009.sra -e 4 -O /Users/erikserrano/Development/prelim/prelim3/program_name/fastq_files
fasterq-dump /Users/erikserrano/Development/prelim/prelim3/program_name/SRA_download/SRR12464727/SRR12464727.sra -e 4 -O /Users/erikserrano/Development/prelim/prelim3/program_name/fastq_files


PosixPath('/Users/erikserrano/Development/prelim/prelim3/program_name/fastq_files')

In [4]:
print(shutil.which("fasterq-d"))

None
