In [1]:
import io
import pandas as pd
import numpy as np
import math

import moments
import matplotlib.pyplot as plt


In [2]:
def sbatch_header(job,mem,tasks,hours):
    #sbatch submission script header
    script = 'script_' + job + '.sh'
    outfile = io.open(script,'w', newline='\n')    
    outfile.write('#!/bin/bash\n\n#SBATCH --job-name='+job+'\n')
    outfile.write('#SBATCH --mem='+mem+'G \n')
    outfile.write('#SBATCH --ntasks='+tasks+' \n')
    outfile.write('#SBATCH -e '+job+'_%A_%a.err \n')
    outfile.write('#SBATCH --time='+hours+':00:00  \n')
    outfile.write('#SBATCH --mail-user=jamcgirr@ucdavis.edu ##email you when job starts,ends,etc\n#SBATCH --mail-type=ALL\n')
    outfile.write('#SBATCH -p high \n\n')
    outfile.close()
    
def sbatch_header_loop(job,mem,tasks,hours,infile):
    #sbatch submission script header
    script = 'script_' + infile + job + '.sh'
    outfile = io.open(script,'w', newline='\n') 
    jobname= infile + job   
    outfile.write('#!/bin/bash\n\n#SBATCH --job-name='+jobname+'\n')
    outfile.write('#SBATCH --mem='+mem+'G \n')
    outfile.write('#SBATCH --ntasks='+tasks+' \n')
    outfile.write('#SBATCH -e '+jobname+'_%A_%a.err \n')
    outfile.write('#SBATCH --time='+hours+':00:00 \n')
    outfile.write('#SBATCH --mail-user=jamcgirr@ucdavis.edu ##email you when job starts,ends,etc\n#SBATCH --mail-type=ALL\n')
    outfile.write('#SBATCH -p high \n\n')
    outfile.close()

In [4]:
job_name = '_angsd_downsample_sfs'
wk_dir = '/home/jamcgirr/ph/data/mushi/light_snp_call/'
#saf_dir = '/home/jamcgirr/ph/data/angsd/SFS/downsample/saf/'
infiles = ["BC17","CA17","PWS07","PWS17","PWS91","PWS96","SS06","SS17","SS96","TB06","TB17","TB91","TB96","WA17"]
infiles = ["PWS17","TB17","SS17"]

for infile in infiles:
    script = 'script_' + infile + job_name + '.sh'
    sbatch_header_loop(job_name,'8','8','144', infile)
    o = io.open(script,'a+', newline='\n')
    
    # make saf from bams
    #o.write('/home/jamcgirr/apps/angsd_sep_20/angsd/angsd -bam '+wk_dir+'downsample_bams_'+infile+'.txt -doSaf 1 -doMajorMinor 1 -doMaf 3 -anc /home/jamcgirr/ph/data/c_harengus/c.harengus.fa -minMapQ 30 -minQ 20 -GL 1 -P 8 -out '+wk_dir+infile+'_minQ20_minMQ30 \n\n')
    # make saf from bams using filters
    #o.write('/home/jamcgirr/apps/angsd_sep_20/angsd/angsd -bam /home/jamcgirr/ph/data/angsd/SFS/downsample/downsample_bams_'+infile+'.txt -r chr1:1-5000000 -doSaf 1 -doMajorMinor 1 -doMaf 3 -doCounts 1 -doGlf 3 -anc /home/jamcgirr/ph/data/c_harengus/c.harengus.fa -ref /home/jamcgirr/ph/data/c_harengus/c.harengus.fa -minMapQ 30 -minQ 20 -GL 1 -P 8 -uniqueOnly 1 -remove_bads 1 -only_proper_pairs 1 -trim 0 -C 50 -minInd 10 -setMinDepth 10 -setMaxDepth 100 -out '+wk_dir+infile+' \n\n')    
    # make saf from bams using filters and try a 'light' snp calling
    o.write('/home/jamcgirr/apps/angsd_sep_20/angsd/angsd -bam /home/jamcgirr/ph/data/angsd/SFS/downsample/downsample_bams_'+infile+'.txt -doSaf 1 -doMajorMinor 1 -doMaf 3 -SNP_pval 1e-6 -doCounts 1 -doGlf 3 -anc /home/jamcgirr/ph/data/c_harengus/c.harengus.fa -ref /home/jamcgirr/ph/data/c_harengus/c.harengus.fa -minMapQ 30 -minQ 20 -GL 1 -P 8 -uniqueOnly 1 -remove_bads 1 -only_proper_pairs 1 -trim 0 -C 50 -minInd 10 -setMinDepth 10 -setMaxDepth 100 -out '+wk_dir+infile+' \n\n')    
    # make folded sfs for a small region using downsample 41 saf
    o.write('/home/jamcgirr/apps/angsd_sep_20/angsd/misc/realSFS '+wk_dir+infile+'.saf.idx -P 8 -fold 1 > '+wk_dir+infile+'_folded.sfs \n')

    o.write('\n\n#run: sbatch '+script)
    o.close()

# EST run time 2 min


In [8]:
job_name = '_dadi_2d_sfs_downsample'

realSFS = '/home/jamcgirr/apps/angsd/misc/realSFS'
wk_dir = '/home/jamcgirr/ph/data/moments/downsample/chr1_5mb/'

infiles = ["BC17_CA17","BC17_WA17","PWS07_PWS17","PWS07_SS06","PWS17_BC17","PWS17_CA17","PWS17_SS17","PWS17_WA17","PWS91_PWS07","PWS91_PWS17","PWS91_PWS96","PWS96_PWS07","PWS96_PWS17","PWS96_SS96","SS06_SS17","SS17_BC17","SS17_CA17","SS17_WA17","SS96_SS06","SS96_SS17","TB06_PWS07","TB06_SS06","TB06_TB17","TB17_BC17","TB17_CA17","TB17_PWS17","TB17_SS17","TB17_WA17","TB91_TB06","TB91_TB17","TB91_TB96","TB96_PWS96","TB96_SS96","TB96_TB06","TB96_TB17","WA17_CA17"]
infiles = ["PWS17_SS17","PWS17_TB17","SS17_TB17"]

for infile in infiles:
    script = 'script_' + infile + job_name + '.sh'
    sbatch_header_loop(job_name,'8','4','1', infile)
    o = io.open(script,'a+', newline='\n')
    
    pops = ''.join(infile).split("_")
    
    o.write('module load perl \n')
    
    # folded
    o.write(realSFS+' dadi '+wk_dir+pops[0]+'.saf.idx '+wk_dir+pops[1]+'.saf.idx -sfs '+wk_dir+pops[0]+'_folded.sfs -sfs '+wk_dir+pops[1]+'_folded.sfs -P 4 -ref /home/jamcgirr/ph/data/c_harengus/c.harengus.fa -anc /home/jamcgirr/ph/data/c_harengus/c.harengus.fa > '+wk_dir+pops[0]+'_'+pops[1]+'_dadi.sfs \n')
    o.write('/home/jamcgirr/apps/moments/AFS-analysis-with-moments/multimodel_inference/realsfs2dadi.pl '+wk_dir+pops[0]+'_'+pops[1]+'_dadi.sfs 41 41 > '+wk_dir+pops[0]+'_'+pops[1]+'_dadi_snp.data \n')
    o.write('sed -i \'s/pop0/'+pops[0]+'/g\' '+wk_dir+pops[0]+'_'+pops[1]+'_dadi_snp.data\n')
    o.write('sed -i \'s/pop1/'+pops[1]+'/g\' '+wk_dir+pops[0]+'_'+pops[1]+'_dadi_snp.data\n')
    o.write('sed -i \'s/REF/Ingroup/g\' '+wk_dir+pops[0]+'_'+pops[1]+'_dadi_snp.data\n')
    o.write('sed -i \'s/OUT/Outgroup/g\' '+wk_dir+pops[0]+'_'+pops[1]+'_dadi_snp.data\n')

    o.write('rm '+wk_dir+pops[0]+'_'+pops[1]+'_dadi.sfs \n\n')

    #run sbatch submission 
    o.write('\n\n#command to run: sbatch '+script)
    o.close()
    

    
# 1 min

In [None]:
job_name = 'moments_pipeline_test'

sbatch_header(job_name,'8','4','24')
script = 'script_' + job_name + '.sh'
o = io.open(script,'a+', newline='\n')

o.write('source /home/jamcgirr/apps/my_python3.7/bin/activate \n')
o.write('python moments_Run_Optimizations.py \n')
o.write('#python Simulate_and_Optimize.py \n')

#run sbatch submission 
o.write('\n\n#command to run: sbatch '+script)
o.close()