In [1]:
#Define data folders, data, and sample name
#remember that fasta should have built bowtie2 indices
annotation="/annotation_dir/mm9_norandom"
tmp="/input_dir/"
input_dir="/output_dir/"
output_dir="/output_dir/"
input_fq_1="ATAC_mesc_1_5M.fq.gz"
input_fq_2="ATAC_mesc_2_5M.fq.gz"
sample_name="mesc_atac_5M"

In [2]:
#align and sort / filter bam
#paired end
if [ -e "$input_dir""$input_fq_2" ]; then
    echo "Aligning using bowtie2 w/ paired end"
    bowtie2 -p 8 --sensitive -x  $annotation \
        -1 $input_dir$input_fq_1 -2 $input_dir$input_fq_2 | samtools view -bS - > $tmp$sample_name".bam"
else
    echo "Aligning using bowtie2 w/ single end"
    bowtie2 -p 8 --sensitive -x  $annotation \
        -U $input_dir$input_fq_1 | samtools view -bS - > $tmp$sample_name".bam"
fi

#sort index and filter for canonical chromosomes
samtools sort -@ 10 $tmp$sample_name".bam" -o $tmp$sample_name"_sorted.bam"
samtools index "$tmp$sample_name""_sorted.bam"
samtools view $tmp$sample_name"_sorted.bam" -hu chr1 chr2 chr3 chr4 chr5 chr6 chr7 chr8 chr9 chr10 chr11 chr12 chr13 chr14 chr15 chr16 chr17 chr18 chr19 chrX > "$output_dir$sample_name""_filt_sorted.bam"
samtools index "$output_dir$sample_name""_filt_sorted.bam"

Aligning using bowtie2 w/ paired end
5000000 reads; of these:
  5000000 (100.00%) were paired; of these:
    1827873 (36.56%) aligned concordantly 0 times
    1613529 (32.27%) aligned concordantly exactly 1 time
    1558598 (31.17%) aligned concordantly >1 times
    ----
    1827873 pairs aligned concordantly 0 times; of these:
      335893 (18.38%) aligned discordantly 1 time
    ----
    1491980 pairs aligned 0 times concordantly or discordantly; of these:
      2983960 mates make up the pairs; of these:
        2180326 (73.07%) aligned 0 times
        129188 (4.33%) aligned exactly 1 time
        674446 (22.60%) aligned >1 times
78.20% overall alignment rate


In [3]:
#Take a peek at the bam
samtools view "$output_dir$sample_name""_filt_sorted.bam"| head
#get alignment stats
samtools flagstat "$output_dir$sample_name""_filt_sorted.bam"
samtools idxstats "$output_dir$sample_name""_filt_sorted.bam"

SRR2927023.2702024.1	99	chr1	3015789	42	126M	=	3015872	209	GATGTGGAGAATGTGGAGAAAGAGGAACAATCCACCATTGTTGGTGGGATTACCAGCTTGTACATCCACTCCAGAAATCAGTCTGGTGGTTCCTCAGACTGTACATAGTAGTACTAGAGCATCCTG	ABBBBGGGCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGEGGGGGGGGGGGGGGGFGGGF	AS:i:-15	XN:i:0	XM:i:3	XO:i:0	XG:i:0	NM:i:3	MD:Z:32T31A36G24	YS:i:-5	YT:Z:CP
SRR2927023.2702024.2	147	chr1	3015872	42	126M	=	3015789	-209	TGGTGGTTCCTCAGACTGTACATAGTAGTACTAGAGCATCCTGCAATACCTCTCCTGTACATATATCTAGATGTTCCAACTGGTAATAAAGACACATGCCCTATTATGTTCATAGCAGCCTTATTT	GGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGDFGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGEGGGGGGGGGGGGGGGGGGGGGGCBCCB	AS:i:-5	XN:i:0	XM:i:1	XO:i:0	XG:i:0	NM:i:1	MD:Z:18G107	YS:i:-15	YT:Z:CP
SRR2927023.4438235.1	101	chr1	3021827	0	*	=	3021827	0	CTGTGGACCTCACTCAGGATATTATTTTGTAGATCCATTCATTTGCCTGTGAGTTTCATGAAGTCACTATTTTTAATAGCTGAGTAATACTCCATTTTGTAAATATATCAGATTTTCTGTATCTAT	?BBCBEGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG

In [4]:
#Switch to python2.7 for MACS2 and NucleoATAC
source activate py27
#Call acessibility peaks, and using those peaks define area around to resolve nucleosomes
macs2 callpeak -t "$output_dir$sample_name""_filt_sorted.bam" --nomodel --shift -37 -f BAMPE \
    --extsize 73 --broad --keep-dup all -n $tmp"$sample_name""_MACS"
bedops --range 500:500 --everything $tmp"$sample_name""_MACS_peaks.broadPeak" | bedtools merge -i - > $tmp"$sample_name""_MACS_merged.bed"
nucleoatac run --bed $tmp"$sample_name""_MACS_merged.bed" --bam $output_dir"$sample_name""_filt_sorted.bam" --fasta "$annotation"".fa" --out $tmp"$sample_name""_nucATAC" --cores 8
source deactivate py27

(py27) (py27) INFO  @ Fri, 26 May 2017 15:40:47: 
# Command line: callpeak -t /output_dir/mesc_atac_5M_filt_sorted.bam --nomodel --shift -37 -f BAMPE --extsize 73 --broad --keep-dup all -n /input_dir/mesc_atac_5M_MACS
# ARGUMENTS LIST:
# name = /input_dir/mesc_atac_5M_MACS
# format = BAMPE
# ChIP-seq file = ['/output_dir/mesc_atac_5M_filt_sorted.bam']
# control file = None
# effective genome size = 2.70e+09
# band width = 300
# model fold = [5, 50]
# qvalue cutoff for narrow/strong regions = 5.00e-02
# qvalue cutoff for broad/weak regions = 1.00e-01
# Larger dataset will be scaled towards smaller dataset.
# Range for calculating regional lambda is: 10000 bps
# Broad region calling is on
# Paired-End mode is on
 
INFO  @ Fri, 26 May 2017 15:40:47: #1 read fragment files... 
INFO  @ Fri, 26 May 2017 15:40:47: #1 read treatment fragments... 
INFO  @ Fri, 26 May 2017 15:40:59:  1000000 
INFO  @ Fri, 26 May 2017 15:41:03: #1 mean fragment size is determined as 229 bp from treatment 
INFO  @

  null_lik = np.sum(np.log(null_model) * mat)
  null_lik = np.sum(np.log(null_model) * mat)
  nuc_lik = np.sum(np.log(nuc_model) * mat)
  nuc_lik = np.sum(np.log(nuc_model) * mat)
  null_lik = np.sum(np.log(null_model) * mat)
  null_lik = np.sum(np.log(null_model) * mat)
  nuc_lik = np.sum(np.log(nuc_model) * mat)
  nuc_lik = np.sum(np.log(nuc_model) * mat)
  null_lik = np.sum(np.log(null_model) * mat)
  null_lik = np.sum(np.log(null_model) * mat)
  nuc_lik = np.sum(np.log(nuc_model) * mat)
  nuc_lik = np.sum(np.log(nuc_model) * mat)
  null_lik = np.sum(np.log(null_model) * mat)
  null_lik = np.sum(np.log(null_model) * mat)
  nuc_lik = np.sum(np.log(nuc_model) * mat)
  nuc_lik = np.sum(np.log(nuc_model) * mat)
  null_lik = np.sum(np.log(null_model) * mat)
  null_lik = np.sum(np.log(null_model) * mat)
  nuc_lik = np.sum(np.log(nuc_model) * mat)
  nuc_lik = np.sum(np.log(nuc_model) * mat)
  null_lik = np.sum(np.log(null_model) * mat)
  null_lik = np.sum(np.log(null_model) * mat)
  nuc_li