This pipeline runs freebayes on the sorted, marked duplicates, merged BAM files.
We are looking for SNPs supported by 100 reads at least at 5% frequency.

In [1]:
import os

In [2]:
import multiprocessing, multiprocessing.pool

# user-defined variables

In [3]:
bam_top_aggregated = '/home/adrian/projects/reynisfjara/data/aggregated_bam/'
vcf_folder = '/home/adrian/projects/reynisfjara/data/vcf/'
reference = '/home/adrian/databases/ensembl/mouse/Mus_musculus.GRCm39.cdna.all.fa'
executable_file = '/home/adrian/software/freebayes/freebayes-1.3.6-linux-amd64-static'

In [4]:
threads = 6

# read data

In [5]:
bam_files = os.listdir(bam_top_aggregated)
bam_files.sort()
print(bam_files)
print(len(bam_files))

['a3922_0h.bam', 'a3922_48h.bam', 'a3922_72h.bam', 'a4774_0h.bam', 'a4774_48h.bam', 'a4774_72h.bam', 'a4775_0h.bam', 'a4775_48h.bam', 'a4775_72h.bam', 'a4776_0h.bam', 'a4776_48h.bam', 'a4776_72h.bam']
12


# call freebayes

### feedback

Just found the document : according to this alignemen,t on the IGV, there was 17 000 reads at this exact location, as you can see in the joint picture.

# call in a parallel mode

In [6]:
def caller(bam_file):
    
    cmd = 'time {} -f {} --min-coverage 25 -F 0.05 --standard-filters --report-genotype-likelihood-max --use-mapping-quality {}{} > {}{}.vcf'.format(executable_file, reference, bam_top_aggregated, bam_file, vcf_folder, bam_file)
    
    print(cmd)
    print()
    os.system(cmd)
    print()
    
    return None

In [7]:
hydra = multiprocessing.pool.Pool(threads)
empty = hydra.map(caller, bam_files)

time /home/adrian/software/freebayes/freebayes-1.3.6-linux-amd64-static -f /home/adrian/databases/ensembl/mouse/Mus_musculus.GRCm39.cdna.all.fa --min-coverage 25 -F 0.05 --standard-filters --report-genotype-likelihood-max --use-mapping-quality /home/adrian/projects/reynisfjara/data/aggregated_bam/a3922_0h.bam > /home/adrian/projects/reynisfjara/data/vcf/a3922_0h.bam.vcftime /home/adrian/software/freebayes/freebayes-1.3.6-linux-amd64-static -f /home/adrian/databases/ensembl/mouse/Mus_musculus.GRCm39.cdna.all.fa --min-coverage 25 -F 0.05 --standard-filters --report-genotype-likelihood-max --use-mapping-quality /home/adrian/projects/reynisfjara/data/aggregated_bam/a3922_48h.bam > /home/adrian/projects/reynisfjara/data/vcf/a3922_48h.bam.vcftime /home/adrian/software/freebayes/freebayes-1.3.6-linux-amd64-static -f /home/adrian/databases/ensembl/mouse/Mus_musculus.GRCm39.cdna.all.fa --min-coverage 25 -F 0.05 --standard-filters --report-genotype-likelihood-max --use-mapping-quality /home/adri

1175.34user 4.31system 19:39.97elapsed 99%CPU (0avgtext+0avgdata 263988maxresident)k
10070040inputs+45752outputs (3major+73014minor)pagefaults 0swaps



time /home/adrian/software/freebayes/freebayes-1.3.6-linux-amd64-static -f /home/adrian/databases/ensembl/mouse/Mus_musculus.GRCm39.cdna.all.fa --min-coverage 25 -F 0.05 --standard-filters --report-genotype-likelihood-max --use-mapping-quality /home/adrian/projects/reynisfjara/data/aggregated_bam/a4775_48h.bam > /home/adrian/projects/reynisfjara/data/vcf/a4775_48h.bam.vcf



1449.71user 4.50system 24:14.68elapsed 99%CPU (0avgtext+0avgdata 297872maxresident)k
10909248inputs+44248outputs (3major+81411minor)pagefaults 0swaps



time /home/adrian/software/freebayes/freebayes-1.3.6-linux-amd64-static -f /home/adrian/databases/ensembl/mouse/Mus_musculus.GRCm39.cdna.all.fa --min-coverage 25 -F 0.05 --standard-filters --report-genotype-likelihood-max --use-mapping-quality /home/adrian/projects/reynisfjara/data/aggregated_bam/a4775_72h.bam > /home/adrian/projects/reynisfjara/data/vcf/a4775_72h.bam.vcf



1474.49user 4.68system 24:39.62elapsed 99%CPU (0avgtext+0avgdata 305608maxresident)k
10966088inputs+42872outputs (0major+83328minor)pagefaults 0swaps



time /home/adrian/software/freebayes/freebayes-1.3.6-linux-amd64-static -f /home/adrian/databases/ensembl/mouse/Mus_musculus.GRCm39.cdna.all.fa --min-coverage 25 -F 0.05 --standard-filters --report-genotype-likelihood-max --use-mapping-quality /home/adrian/projects/reynisfjara/data/aggregated_bam/a4776_0h.bam > /home/adrian/projects/reynisfjara/data/vcf/a4776_0h.bam.vcf



1532.69user 4.68system 25:37.78elapsed 99%CPU (0avgtext+0avgdata 319008maxresident)k
11734400inputs+50232outputs (0major+86688minor)pagefaults 0swaps



time /home/adrian/software/freebayes/freebayes-1.3.6-linux-amd64-static -f /home/adrian/databases/ensembl/mouse/Mus_musculus.GRCm39.cdna.all.fa --min-coverage 25 -F 0.05 --standard-filters --report-genotype-likelihood-max --use-mapping-quality /home/adrian/projects/reynisfjara/data/aggregated_bam/a4776_48h.bam > /home/adrian/projects/reynisfjara/data/vcf/a4776_48h.bam.vcf



1677.95user 4.97system 28:03.48elapsed 99%CPU (0avgtext+0avgdata 317940maxresident)k
12609640inputs+47008outputs (1major+86439minor)pagefaults 0swaps



time /home/adrian/software/freebayes/freebayes-1.3.6-linux-amd64-static -f /home/adrian/databases/ensembl/mouse/Mus_musculus.GRCm39.cdna.all.fa --min-coverage 25 -F 0.05 --standard-filters --report-genotype-likelihood-max --use-mapping-quality /home/adrian/projects/reynisfjara/data/aggregated_bam/a4776_72h.bam > /home/adrian/projects/reynisfjara/data/vcf/a4776_72h.bam.vcf



1704.53user 5.42system 28:30.42elapsed 99%CPU (0avgtext+0avgdata 311300maxresident)k
13057272inputs+49032outputs (0major+84818minor)pagefaults 0swaps





1547.92user 4.61system 25:52.88elapsed 99%CPU (0avgtext+0avgdata 308988maxresident)k
11468192inputs+42912outputs (0major+84198minor)pagefaults 0swaps





1348.16user 4.28system 22:32.90elapsed 99%CPU (0avgtext+0avgdata 296576maxresident)k
11347672inputs+41672outputs (0major+81081minor)pagefaults 0swaps





1536.84user 4.81system 25:42.25elapsed 99%CPU (0avgtext+0avgdata 321448maxresident)k
12862624inputs+47424outputs (0major+87276minor)pagefaults 0swaps





1704.62user 5.29system 28:30.71elapsed 99%CPU (0avgtext+0avgdata 331156maxresident)k
14053400inputs+50848outputs (2major+89752minor)pagefaults 0swaps





1482.86user 4.83system 24:48.99elapsed 99%CPU (0avgtext+0avgdata 301608maxresident)k
12162048inputs+53216outputs (0major+82314minor)pagefaults 0swaps





1762.77user 5.46system 29:29.60elapsed 99%CPU (0avgtext+0avgdata 326912maxresident)k
14638864inputs+57160outputs (0major+88689minor)pagefaults 0swaps


In [8]:
print('... done.')

... done.
