This pipeline runs freebayes on the sorted, marked duplicates, merged BAM files.
We are looking for SNPs supported by 100 reads at least at 5% frequency.

In [1]:
import os

In [2]:
import multiprocessing, multiprocessing.pool

# user-defined variables

In [3]:
bam_top_folder = '/home/adrian/projects/reynisfjara/results/bam/'
vcf_folder = '/home/adrian/projects/reynisfjara/results/vcf/'
reference = '/home/adrian/databases/ensembl/mouse/Mus_musculus.GRCm39.cdna.all.fa'
executable_file = '/home/adrian/software/freebayes/freebayes-1.3.6-linux-amd64-static'

In [4]:
threads = 12

# read data

In [5]:
bam_folders = os.listdir(bam_top_folder)
samples = ['_'.join(element.split('_')[:2]) for element in bam_folders]
samples = list(set(samples))
samples.sort()
print(samples)

['a3922_0h', 'a3922_48h', 'a3922_72h', 'a4774_0h', 'a4774_48h', 'a4774_72h', 'a4775_0h', 'a4775_48h', 'a4775_72h', 'a4776_0h', 'a4776_48h', 'a4776_72h']


# call freebaues in parallel mode

In [6]:
def caller(sample):
        
    target_folders = [element for element in bam_folders if sample in element]
    target_folders.sort()

    target_bams = ['{}{}/positionsort.bam'.format(bam_top_folder, element) for element in target_folders]
    target_bams_string = ' '.join(target_bams)

    cmd = 'time {} -f {} --min-coverage 25 -F 0.05 {} > {}{}.vcf'.format(executable_file, 
    reference, target_bams_string, vcf_folder, sample
    )

    print(cmd)
    os.system(cmd)
    print()
    
    return None

In [7]:
hydra = multiprocessing.pool.Pool(threads)
empty = hydra.map(caller, samples)

time /home/adrian/software/freebayes/freebayes-1.3.6-linux-amd64-static -f /home/adrian/databases/ensembl/mouse/Mus_musculus.GRCm39.cdna.all.fa --min-coverage 25 -F 0.05 /home/adrian/projects/reynisfjara/results/bam/a4774_0h_1/positionsort.bam /home/adrian/projects/reynisfjara/results/bam/a4774_0h_2/positionsort.bam /home/adrian/projects/reynisfjara/results/bam/a4774_0h_3/positionsort.bam > /home/adrian/projects/reynisfjara/results/vcf/a4774_0h.vcftime /home/adrian/software/freebayes/freebayes-1.3.6-linux-amd64-static -f /home/adrian/databases/ensembl/mouse/Mus_musculus.GRCm39.cdna.all.fa --min-coverage 25 -F 0.05 /home/adrian/projects/reynisfjara/results/bam/a3922_48h_1/positionsort.bam /home/adrian/projects/reynisfjara/results/bam/a3922_48h_2/positionsort.bam /home/adrian/projects/reynisfjara/results/bam/a3922_48h_3/positionsort.bam > /home/adrian/projects/reynisfjara/results/vcf/a3922_48h.vcftime /home/adrian/software/freebayes/freebayes-1.3.6-linux-amd64-static -f /home/adrian/data

3588.92user 16.14system 1:11:36elapsed 83%CPU (0avgtext+0avgdata 765068maxresident)k
13762392inputs+116600outputs (430major+198818minor)pagefaults 0swaps





3890.55user 16.15system 1:16:20elapsed 85%CPU (0avgtext+0avgdata 870724maxresident)k
15153448inputs+99656outputs (433major+230393minor)pagefaults 0swaps





3919.27user 15.82system 1:16:47elapsed 85%CPU (0avgtext+0avgdata 806508maxresident)k
15128512inputs+104144outputs (449major+214384minor)pagefaults 0swaps





3980.37user 15.94system 1:18:21elapsed 84%CPU (0avgtext+0avgdata 825264maxresident)k
14889608inputs+96904outputs (397major+219026minor)pagefaults 0swaps





4052.03user 16.97system 1:19:20elapsed 85%CPU (0avgtext+0avgdata 822888maxresident)k
15431648inputs+127456outputs (444major+218494minor)pagefaults 0swaps





4196.08user 16.60system 1:22:01elapsed 85%CPU (0avgtext+0avgdata 954356maxresident)k
16177048inputs+120896outputs (391major+251229minor)pagefaults 0swaps





4292.16user 15.77system 1:23:20elapsed 86%CPU (0avgtext+0avgdata 914760maxresident)k
15940536inputs+102680outputs (424major+241355minor)pagefaults 0swaps





4362.08user 16.90system 1:24:24elapsed 86%CPU (0avgtext+0avgdata 1013432maxresident)k
16947232inputs+114360outputs (421major+266095minor)pagefaults 0swaps





4536.65user 16.94system 1:27:01elapsed 87%CPU (0avgtext+0avgdata 945768maxresident)k
17991856inputs+118864outputs (420major+249177minor)pagefaults 0swaps





4566.64user 16.74system 1:27:39elapsed 87%CPU (0avgtext+0avgdata 953156maxresident)k
17267824inputs+113232outputs (494major+260010minor)pagefaults 0swaps





4589.80user 17.18system 1:28:01elapsed 87%CPU (0avgtext+0avgdata 1067992maxresident)k
18496584inputs+124992outputs (398major+279624minor)pagefaults 0swaps





4729.58user 17.30system 1:30:30elapsed 87%CPU (0avgtext+0avgdata 1028016maxresident)k
19161504inputs+138432outputs (343major+269664minor)pagefaults 0swaps


In [8]:
print('... done.')

... done.
