This pipeline runs freebayes on the sorted, marked duplicates, merged BAM files.
We are looking for SNPs supported by 100 reads at least at 5% frequency.

In [1]:
import os

In [2]:
import multiprocessing, multiprocessing.pool

# user-defined variables

In [3]:
bam_top_folder = '/home/adrian/projects/reynisfjara/results/bam_genome/'
vcf_folder = '/home/adrian/projects/reynisfjara/results/vcf/'
reference = '/home/adrian/databases/ensembl/mouse_genome/Mus_musculus.GRCm39.dna.toplevel.fa'
executable_file = '/home/adrian/software/freebayes/freebayes-1.3.6-linux-amd64-static'

In [4]:
threads = 12

# read data

In [5]:
bam_folders = os.listdir(bam_top_folder)
samples = ['_'.join(element.split('_')[:2]) for element in bam_folders]
samples = list(set(samples))
samples.sort()
print(samples)

['a3922_0h', 'a3922_48h', 'a3922_72h', 'a4774_0h', 'a4774_48h', 'a4774_72h', 'a4775_0h', 'a4775_48h', 'a4775_72h', 'a4776_0h', 'a4776_48h', 'a4776_72h']


# define process

In [6]:
def caller(sample):
        
    target_folders = [element for element in bam_folders if sample in element]
    target_folders.sort()

    target_bams = ['{}{}/positionsort.bam'.format(bam_top_folder, element) for element in target_folders]
    target_bams_string = ' '.join(target_bams)

    cmd = 'time {} -f {} --min-coverage 25 -F 0.05 {} > {}{}.vcf'.format(executable_file, 
    reference, target_bams_string, vcf_folder, sample
    )

    print(cmd)
    os.system(cmd)
    print()
    
    return None

# call freebayes in sequential mode

In [7]:
# for sample in samples:
#     caller(sample)

# call freebaues in parallel mode

In [8]:
hydra = multiprocessing.pool.Pool(threads)
empty = hydra.map(caller, samples)

time /home/adrian/software/freebayes/freebayes-1.3.6-linux-amd64-static -f /home/adrian/databases/ensembl/mouse_genome/Mus_musculus.GRCm39.dna.toplevel.fa --min-coverage 25 -F 0.05 /home/adrian/projects/reynisfjara/results/bam_genome/a3922_48h_1/positionsort.bam /home/adrian/projects/reynisfjara/results/bam_genome/a3922_48h_2/positionsort.bam /home/adrian/projects/reynisfjara/results/bam_genome/a3922_48h_3/positionsort.bam > /home/adrian/projects/reynisfjara/results/vcf/a3922_48h.vcftime /home/adrian/software/freebayes/freebayes-1.3.6-linux-amd64-static -f /home/adrian/databases/ensembl/mouse_genome/Mus_musculus.GRCm39.dna.toplevel.fa --min-coverage 25 -F 0.05 /home/adrian/projects/reynisfjara/results/bam_genome/a3922_0h_1/positionsort.bam /home/adrian/projects/reynisfjara/results/bam_genome/a3922_0h_2/positionsort.bam /home/adrian/projects/reynisfjara/results/bam_genome/a3922_0h_3/positionsort.bam > /home/adrian/projects/reynisfjara/results/vcf/a3922_0h.vcftime /home/adrian/software/f

5474.29user 13.65system 1:31:28elapsed 99%CPU (0avgtext+0avgdata 2095576maxresident)k
19917360inputs+122624outputs (17major+2813770minor)pagefaults 0swaps





6291.72user 13.61system 1:45:06elapsed 99%CPU (0avgtext+0avgdata 2484744maxresident)k
16974528inputs+114224outputs (16major+3023794minor)pagefaults 0swaps





6457.95user 14.29system 1:47:52elapsed 99%CPU (0avgtext+0avgdata 2204012maxresident)k
16753528inputs+110968outputs (16major+2966810minor)pagefaults 0swaps





6622.74user 14.86system 1:50:39elapsed 99%CPU (0avgtext+0avgdata 2186128maxresident)k
18020136inputs+126304outputs (17major+3136501minor)pagefaults 0swaps





6924.08user 13.65system 1:55:38elapsed 99%CPU (0avgtext+0avgdata 2982368maxresident)k
17938616inputs+118424outputs (17major+2680366minor)pagefaults 0swaps





7073.39user 13.85system 1:58:07elapsed 99%CPU (0avgtext+0avgdata 2475156maxresident)k
18984368inputs+124256outputs (17major+2784053minor)pagefaults 0swaps





7386.09user 13.36system 2:03:20elapsed 99%CPU (0avgtext+0avgdata 2875616maxresident)k
19388344inputs+126024outputs (14major+1885613minor)pagefaults 0swaps





7421.12user 14.41system 2:03:56elapsed 99%CPU (0avgtext+0avgdata 2554764maxresident)k
20026696inputs+131136outputs (15major+2817467minor)pagefaults 0swaps





7868.15user 14.27system 2:11:22elapsed 99%CPU (0avgtext+0avgdata 2238572maxresident)k
16859512inputs+113400outputs (14major+2144218minor)pagefaults 0swaps





7868.00user 17.29system 2:11:25elapsed 99%CPU (0avgtext+0avgdata 1816960maxresident)k
17057848inputs+131856outputs (16major+3626529minor)pagefaults 0swaps





8719.22user 15.30system 2:25:35elapsed 99%CPU (0avgtext+0avgdata 2174844maxresident)k
21325336inputs+149896outputs (17major+2317990minor)pagefaults 0swaps





8861.61user 15.91system 2:27:58elapsed 99%CPU (0avgtext+0avgdata 2737308maxresident)k
20579936inputs+137944outputs (16major+2648619minor)pagefaults 0swaps


In [9]:
print('... done.')

... done.
