In [0]:
from IPython.parallel import Client

In [0]:
rc = Client(profile="huge")
dview = rc[:]
lview = rc.load_balanced_view()
len(rc)

In [0]:
exclude = set([636, 640, 648, 741])
active = []
for i in xrange(len(rc)):
    if i not in exclude:
        active.append(i)
dview = rc[active]
lview = rc.load_balanced_view(targets=active)

In [0]:
with dview.sync_imports():
    import os
    import sys
    import socket
    import stopwatch

In [0]:
bam_dir = "/data7/eckertlab/projects/ethan/analysis"
analysis_dir = os.path.join(bam_dir, "freebayes")
assert os.path.exists(analysis_dir)

In [0]:
bam_files = !ls $bam_dir/*_sorted.bam

In [0]:
freebayes = "/home/cfriedline/data7/src/freebayes/bin/freebayes"

    usage: /home/cfriedline/data7/src/freebayes/bin/freebayes -f [REFERENCE] [OPTIONS] [BAM FILES] >[OUTPUT]

    Bayesian haplotype-based polymorphism discovery.

    parameters:

       -h --help       For a complete description of options.

    citation: Erik Garrison, Gabor Marth
              "Haplotype-based variant detection from short-read sequencing"
              arXiv:1207.3907 (http://arxiv.org/abs/1207.3907)

    author:   Erik Garrison <erik.garrison@bc.edu>, Marth Lab, Boston College, 2010-2014
    version:  v0.9.18-17-ga9cd9c6-dirty

In [0]:
@lview.remote()
def remove_duplicates(args):
    sorted_bam, analysis_dir = args
    samtools = "/home/cfriedline/data7/src/samtools-0.1.19/samtools"
    out = os.path.join(analysis_dir, "%s_rmdup.bam" % sorted_bam)
    !$samtools rmdup -s $sorted_bam $out
    !$samtools index $out
    return out

In [0]:
rmdup_jobs = []
for b in bam_files:
    rmdup_jobs.append(remove_duplicates(b))

In [0]:
ready = 0
for x in rmdup_jobs:
    if x.ready():
        ready += 1
print "%d/%d" % (ready, len(rmdup_jobs))

In [0]:
assembly = "/data7/cfriedline/assemblies/foxtail2/Green_26_ATCGCGCAA.fastq_31_data_31/contigs.fa_in_map.fa"

In [0]:
bam_rmdup_files = !ls $analysis_dir/*_rmdup.bam

In [0]:
bam_string = "-b " + " -b ".join(bam_rmdup_files)

In [0]:
@lview.remote()
def run_freebayes(freebayes, freebayes_args, assembly, bam_string, analysis_dir, out_vcf):
    out = os.path.join(analysis_dir, out_vcf)
    cmd = "%s -f %s %s %s > %s" % (freebayes,
                                   assembly,
                                   freebayes_args,
                                   bam_string, 
                                   out)
    !$cmd
    return out

##Run default freebayes

In [0]:
fb = run_freebayes(freebayes, 
                   "", 
                   assembly, 
                   bam_string, 
                   analysis_dir, 
                   "foxtail_wue_default.vcf")

In [0]:
fb.ready()

##Write population file for freebayes

In [0]:
with open(os.path.join(analysis_dir, "freebayes_pops.txt"), "w") as o:
    for f in bam_rmdup_files:
        rgid = os.path.basename(f.split("sorted")[0][:-1])
        o.write("%s\t%s\n" % (rgid, os.path.basename(f).split("_")[0]))

In [0]:
fb_pop = run_freebayes(freebayes, 
                       "--populations %s" % os.path.join(analysis_dir, "freebayes_pops.txt"),
                       assembly,
                       bam_string,
                       analysis_dir,
                       "foxtail_wue_pops.vcf")

In [0]:
fb_pop.ready()

###Estimates from Eckert, A. J. et al. Multilocus analyses reveal little evidence for lineage-wide adaptive evolution within major clades of soft pines (Pinus subgenus Strobus). Molecular Ecology 22, 5635–5650 (2013).



In [0]:
theta_pi = 0.0021
theta_w = 0.0023

In [0]:
fb_pop_theta_pi = run_freebayes(freebayes, 
                       "--populations %s --theta %f" % (os.path.join(analysis_dir, "freebayes_pops.txt"),
                                                        theta_pi),
                       assembly,
                       bam_string,
                       bam_dir,
                       "foxtail_wue_pops_theta_pi_%f.vcf" % theta_pi)

In [0]:
fb_pop_theta_w = run_freebayes(freebayes, 
                       "--populations %s --theta %f" % (os.path.join(analysis_dir, "freebayes_pops.txt"),
                                                        theta_w),
                       assembly,
                       bam_string,
                       bam_dir,
                       "foxtail_wue_pops_theta_w_%f.vcf" % theta_w)

In [0]:
fb_pop_theta_w_input_vcf = run_freebayes(freebayes, 
                       "--populations %s --theta %f --variant-input %s" % (os.path.join(analysis_dir, "freebayes_pops.txt"),
                                                        theta_w,
                                                        '/home/cfriedline/data7/eckertlab/foxtail/individuals_all/Green.raw.vcf'),
                       assembly,
                       bam_string,
                       bam_dir,
                       "foxtail_wue_pops_theta_w_%f_input_vcf.vcf" % theta_w)

In [0]:
[x.stdout for x in (fb_pop_theta_pi, fb_pop_theta_w, fb_pop_theta_w_input_vcf)]