In [66]:
echo $file

35k/35k_preprocessing_out/data/bam/VIB_hydrop_22.35k___VIB_hydrop_22__R1.bwa.out.possorted.bam


In [79]:
singularity exec --cleanenv -H $PWD -B /dodrio,/readonly/dodrio,/tmp /dodrio/scratch/projects/starting_2022_023/benchmark/scatac_benchmark/0_resources/vsn_cache/vibsinglecellnf-singlecelltoolkit-2022-07-07-0638c1d.img bash

In [56]:
mapping_summary () {
    mapping_stats="${1}";
    bam="${2}";
    sampleId="${3}";

    # Get mapping statistics from BAM file:
    #   - Read BAM file and write uncompressed BAM.
    #   - Uncompressed BAM file is written to each samtools command with tee (writes to each specified file and stdout).
    #   - samtools commands:
    #       - Get samtools statistics with:
    #           samtools stat "${bam}" > "${sampleId}.stat"
    #       - Uniquely mapped reads (BWA):
    #           samtools view -c -F 0x4 -F 0x100 -F 0x800 -e '! [XA] && ! [SA]' "${bam}"
    #       - Fraction of total read pairs mapped confidently to genome (>30 mapq):
    #           samtools view -c -F 0x4 -F 0x100 -F 0x800 -q 30 "${bam}"
    #   - Only use threads for "samtools stat". Using it with any of the other samtools commands
    #     makes everything slower than not using any threads at all.
    samtools view -u "${bam}" \
      | tee \
            >(samtools view -c -F 0x4 -F 0x100 -F 0x800 -e '! [XA] && ! [SA]' - > "${sampleId}.uniquely_mapped_reads.txt") \
            >(samtools view -c -F 0x4 -F 0x100 -F 0x800 -q 30 - > "${sampleId}.fraction_total_read_pairs.txt") \
      | samtools stat -@ 2 - > "${sampleId}.stat"


    # Output file:
    printf "\t${sampleId}\n" > "${mapping_stats}";

    grep '^SN' "${sampleId}.stat" | cut -f 2,3 >> "${mapping_stats}";

    printf "Uniquely mapped reads:\t" >> "${mapping_stats}";
    cat "${sampleId}.uniquely_mapped_reads.txt" >> "${mapping_stats}";

    printf "Reads mapped with MAPQ>30:\t" >> "${mapping_stats}";
    cat "${sampleId}.fraction_total_read_pairs.txt" >> "${mapping_stats}";

    rm "${sampleId}.uniquely_mapped_reads.txt" "${sampleId}.fraction_total_read_pairs.txt";
}

In [83]:
dirs=(35k 30k 25k)
for dir in $dirs
do
    echo $dir
    for file in $(ls $dir/${dir}_preprocessing_out/data/bam/*.bam)
    do
        echo $file
        sample=`basename $file`
        sample=(${sample//___/ })
        sample=${sample[0]}
        sample_raw=(${sample//\./ })
        echo -e "\t"$sample
        echo -e "\t"$sample_raw
        mapping_stats=$dir/${dir}_preprocessing_out/data/reports/mapping_stats/${sample}___${sample_raw}__R1.mapping_stats.tsv
        if [ ! -f "$mapping_stats" ]; then
            echo -e "\t"$mapping_stats does not exist!
            echo -e "\t\t"$file
            echo -e "\t\t"$mapping_stats
            # Get mapping statistics from BAM file:
            #   - Read BAM file and write uncompressed BAM.
            #   - Uncompressed BAM file is written to each samtools command with tee (writes to each specified file and stdout).
            #   - samtools commands:
            #       - Get samtools statistics with:
            #           samtools stat "${bam}" > "${sampleId}.stat"
            #       - Uniquely mapped reads (BWA):
            #           samtools view -c -F 0x4 -F 0x100 -F 0x800 -e '! [XA] && ! [SA]' "${bam}"
            #       - Fraction of total read pairs mapped confidently to genome (>30 mapq):
            #           samtools view -c -F 0x4 -F 0x100 -F 0x800 -q 30 "${bam}"
            #   - Only use threads for "samtools stat". Using it with any of the other samtools commands
            #     makes everything slower than not using any threads at all.
            samtools view -u "${file}" \
              | tee \
                    >(samtools view -c -F 0x4 -F 0x100 -F 0x800 -e '! [XA] && ! [SA]' - > "${sample}.uniquely_mapped_reads.txt") \
                    >(samtools view -c -F 0x4 -F 0x100 -F 0x800 -q 30 - > "${sample}.fraction_total_read_pairs.txt") \
              | samtools stat -@ 2 - > "${sample}.stat"

            printf "\t${sample}\n" > "${mapping_stats}";

            grep '^SN' "${sample}.stat" | cut -f 2,3 >> "${mapping_stats}";

            printf "Uniquely mapped reads:\t" >> "${mapping_stats}";
            cat "${sample}.uniquely_mapped_reads.txt" >> "${mapping_stats}";

            printf "Reads mapped with MAPQ>30:\t" >> "${mapping_stats}";
            cat "${sample}.fraction_total_read_pairs.txt" >> "${mapping_stats}";

            rm "${sample}.uniquely_mapped_reads.txt" "${sample}.fraction_total_read_pairs.txt";
        fi
    done
done

35k
35k/35k_preprocessing_out/data/bam/BIO_ddseq_1.35k___BIO_ddseq_1__R1.bwa.out.possorted.bam
	BIO_ddseq_1.35k
	BIO_ddseq_1
35k/35k_preprocessing_out/data/bam/BIO_ddseq_2.35k___BIO_ddseq_2__R1.bwa.out.possorted.bam
	BIO_ddseq_2.35k
	BIO_ddseq_2
35k/35k_preprocessing_out/data/bam/BIO_ddseq_3.35k___BIO_ddseq_3__R1.bwa.out.possorted.bam
	BIO_ddseq_3.35k
	BIO_ddseq_3
35k/35k_preprocessing_out/data/bam/BIO_ddseq_4.35k___BIO_ddseq_4__R1.bwa.out.possorted.bam
	BIO_ddseq_4.35k
	BIO_ddseq_4
35k/35k_preprocessing_out/data/bam/BRO_mtscatac_1.35k___BRO_mtscatac_1__R1.bwa.out.possorted.bam
	BRO_mtscatac_1.35k
	BRO_mtscatac_1
35k/35k_preprocessing_out/data/bam/BRO_mtscatac_2.35k___BRO_mtscatac_2__R1.bwa.out.possorted.bam
	BRO_mtscatac_2.35k
	BRO_mtscatac_2
35k/35k_preprocessing_out/data/bam/CNA_10xmultiome_1.35k___CNA_10xmultiome_1__R1.bwa.out.possorted.bam
	CNA_10xmultiome_1.35k
	CNA_10xmultiome_1
35k/35k_preprocessing_out/data/bam/CNA_10xmultiome_2.35k___CNA_10xmultiome_2__R1.bwa.out.possorted.ba