In [7]:
# SAMtools SAM -> BAM, Sorted BAM, and Indexed BAM files
"""
Typically fast enough, but could make it more efficient by running multiple files in parallel
"""
import subprocess

files = [
    "10KP", "14KP", "15AZ", "28KP", "31SMW", "32SMW", "34SMW", "35SMW", "36SMW", "38SMW", "39SMW",
    "46SMW", "52SMW", "55SMW", "57SMW", "69SMF", "70SMF", "8KP", "9KP", "MOR021", "MOR023"
]

for file in files:
    print(f"🔄 Processing sample: {file}")
    
    sam = f"/mnt/jupiter/johnsonlab/Capstone_proj/test_results/SAM/{file}.sam"
    bam = f"/mnt/jupiter/johnsonlab/Capstone_proj/test_results/BAM/{file}.bam"
    sorted_bam = f"/mnt/jupiter/johnsonlab/Capstone_proj/test_results/BAM/Sorted/{file}.sorted.bam"
    threads = "10"
    ram = "1g" # RAM per thread

    # Step 1: Convert SAM to BAM
    subprocess.run([
        "samtools", "view",
        "-bS", sam,
        "-o", bam
    ], check=True)

    # Step 2: Sort BAM
    subprocess.run([
        "samtools", "sort",
        "-@", threads,
        "-m", ram,
        "-o", sorted_bam,
        bam
    ], check=True)

    # Step 3: Index BAM
    subprocess.run([
        "samtools", "index",
        "-@", threads,
        sorted_bam
    ], check=True)


🔄 Processing sample: 10KP


[bam_sort_core] merging from 10 files and 10 in-memory blocks...


🔄 Processing sample: 14KP


[bam_sort_core] merging from 10 files and 10 in-memory blocks...


🔄 Processing sample: 15AZ


[bam_sort_core] merging from 0 files and 10 in-memory blocks...


🔄 Processing sample: 28KP


[bam_sort_core] merging from 0 files and 10 in-memory blocks...


🔄 Processing sample: 31SMW


[bam_sort_core] merging from 0 files and 10 in-memory blocks...


🔄 Processing sample: 32SMW


[bam_sort_core] merging from 0 files and 10 in-memory blocks...


🔄 Processing sample: 34SMW


[bam_sort_core] merging from 0 files and 10 in-memory blocks...


🔄 Processing sample: 35SMW


[bam_sort_core] merging from 10 files and 10 in-memory blocks...


🔄 Processing sample: 36SMW


[bam_sort_core] merging from 10 files and 10 in-memory blocks...


🔄 Processing sample: 38SMW


[bam_sort_core] merging from 0 files and 10 in-memory blocks...


🔄 Processing sample: 39SMW


[bam_sort_core] merging from 10 files and 10 in-memory blocks...


🔄 Processing sample: 46SMW


[bam_sort_core] merging from 10 files and 10 in-memory blocks...


🔄 Processing sample: 52SMW


[bam_sort_core] merging from 10 files and 10 in-memory blocks...
[bam_sort_core] merging from 10 files and 10 in-memory blocks...


🔄 Processing sample: 57SMW


[bam_sort_core] merging from 0 files and 10 in-memory blocks...


🔄 Processing sample: 69SMF


[bam_sort_core] merging from 0 files and 10 in-memory blocks...


🔄 Processing sample: 70SMF


[bam_sort_core] merging from 10 files and 10 in-memory blocks...


🔄 Processing sample: 8KP


[bam_sort_core] merging from 10 files and 10 in-memory blocks...


🔄 Processing sample: 9KP


[bam_sort_core] merging from 0 files and 10 in-memory blocks...


🔄 Processing sample: MOR021


[bam_sort_core] merging from 0 files and 10 in-memory blocks...


🔄 Processing sample: MOR023


[bam_sort_core] merging from 10 files and 10 in-memory blocks...
