In [None]:
from pathlib import Path

# Mapping

In [None]:
DATA_DIR = Path("../data")

RUN_DIRS = [run_dir 
 for run_dir in DATA_DIR.glob("*") 
 if run_dir.is_dir() 
 and len(list(run_dir.glob("*"))) != 0 ]

MAPPER_WUHAN_DIR = Path("../mapper_wuhan")
DEPLETION_DIR = Path("../human_depleted_fastqs/depletion_analysis")
GENOME_SARS_PATH = "../resources/genomes/MN908947_3.fa"

MAPPER_HUMAN_DIR = Path("../mapper_human")
GENOME_HUMAN_PATH = "../resources/genomes/hg38.fa"

MAPPER_HUMAN_SARS_DIR = Path("../mapper_human_sarscov2")
GENOME_HUMAN_SARS_PATH = "../resources/genomes/human_sars.fa"

## Mapping vs SARS-CoV Wuhan-Hu-1

In [None]:
print("MAPPING against Wuhan-Hu-1 the following runs:")
for run_dir in RUN_DIRS:
    
    # Prepare mapper outdir
    out_dir = MAPPER_WUHAN_DIR / run_dir.stem
    print(f"- {run_dir.stem} in {out_dir}")
    
    # Check if mapping already done:
    if out_dir.exists():
        print(f"The result folder {out_dir} exists already. Skipping...")
        continue
        
    # Prepare sequana command
    sequana_cmd = f"sequana_mapper --reference {GENOME_SARS_PATH} --input-directory {run_dir} --do-coverage --working-directory {out_dir} --mapper bowtie2"
    print(f"Sequana command: {sequana_cmd}")
    
    # Create sequana templates
    ! module load sequana/prod && {sequana_cmd}
    ! module load sequana/prod && cd {out_dir} && sbatch -q biomics -p biomics -A biomics mapper.sh

## Mapping vs human

In [None]:
for run_dir in RUN_DIRS:
    
    # Prepare mapper outdir
    out_dir = MAPPER_HUMAN_DIR / run_dir.stem
    print(f"- {run_dir.stem} in {out_dir}")
    
    # Check if mapping already done:
    if out_dir.exists():
        print(f"The result folder {out_dir} exists already. Skipping...")
        continue
        
    # Prepare sequana command
    sequana_cmd = f"sequana_mapper --slurm-memory 30G --reference {GENOME_HUMAN_PATH} --input-directory {run_dir} --working-directory {out_dir} --mapper bowtie2"
    print(f"Sequana command: {sequana_cmd}")
    
    # Create sequana templates
    ! module load sequana/prod && {sequana_cmd}
    ! module load sequana/prod && cd {out_dir} && sbatch -q biomics -p biomics -A biomics mapper.sh

## In silico depletion

In [None]:
depletion_cmd = f"""
bwa mem {GENOME_HUMAN_PATH} ./art/data_1.fq ../art/data_2.fq -t 8 > temp.sam \
samtools view -bS temp.sam > temp.bam \
samtools view -b -f 12 -F 256 temp.bam > temp_bothReadsUnmapped.bam \
bioconvert bam2fastq temp_bothReadsUnmapped.bam test.fastq --force \
"""

for run_dir in RUN_DIRS:
    
    # Prepare mapper outdir
    DEPLETION_DIR.mkdir(exist_ok=True)
    out_dir = DEPLETION_DIR / run_dir.stem
    print(f"- {run_dir.stem} in {out_dir}")
    FASTQ_DIR = out_dir / "depleted_fastqs"
    FASTQ_DIR.mkdir(exist_ok=True)
    
    sequana_cmd = f"sequana_mapper --slurm-memory 30G --reference {GENOME_HUMAN_PATH} --input-directory {run_dir} --working-directory {out_dir} --mapper bwa"
    ! module load sequana/prod && {sequana_cmd}
    ! module load sequana/prod && cd {out_dir} && sbatch -q biomicspole -p biomicspole mapper.sh
    
    for bam in out_dir.glob("*/bwa_mem_mapping/*.bam"):
        print(bam)
        fastq = FASTQ_DIR / bam.with_suffix('.depleted.fastq').name
        samtools_cmd = f"samtools view -b -f 12 -F 256 {bam} > {bam.with_suffix('.depleted.bam')}"
        ! {samtools_cmd}
        bioconvert_cmd = f"module load bioconvert && bioconvert bam2fastq {bam.with_suffix('.depleted.bam')} {fastq} --force"
        ! {bioconvert_cmd}

## Mapping Human and Sars-Cov

In [None]:
! cat {GENOME_SARS_PATH} {GENOME_HUMAN_PATH} > {GENOME_HUMAN_SARS_PATH}

In [None]:
for run_dir in RUN_DIRS:
    
    # Prepare mapper outdir
    run_dir = Path(run_dir)
    out_dir = MAPPER_HUMAN_SARS_DIR / run_dir.stem
    print(f"- {run_dir.stem} in {out_dir}")
    
    # Check if mapping already done:
    if out_dir.exists():
        print(f"The result folder {out_dir} exists already. Skipping...")
        continue
        
    # Prepare sequana command
    sequana_cmd = f"sequana_mapper --slurm-memory 30G --reference {GENOME_HUMAN_SARS_PATH} --input-directory {run_dir} --working-directory {out_dir} --mapper bowtie2"
    print(f"Sequana command: {sequana_cmd}")
    
    # Create sequana templates
    ! module load sequana/prod && {sequana_cmd}
    ! module load sequana/prod && cd {out_dir} && sbatch -q hubbioit -p hubbioit mapper.sh