# ATAC data analysis

2023/6/20 by Yang Chen

---------------------------------------------------------------------------------------------------------------------------------------------

In [None]:
import os
wdir = "/home/ug0302/CITEseq3/06_ATAC2"
os.chdir(wdir)

In [None]:
%%bash

input="/home/ug0302/CITEseq3/06_ATAC2/bam"
output="/home/ug0302/CITEseq3/06_ATAC2/bam"
file=$(ls ${input}/*.bam) 

for i in $file
do
    filename=$(basename "$i" .bam)
    fileflt=${output}/${filename}_mt.bam
    samtools view -@ 20 -h ${i} | grep -v chrM | samtools sort -@ 20 -O bam -o ${fileflt}
done

In [None]:
%%bash

input="/home/ug0302/CITEseq3/06_ATAC2/bam"
output="/home/ug0302/CITEseq3/06_ATAC2/bam"
file=$(ls ${input}/*mt.bam) 

for i in $file
do
    filename=$(basename "$i" .bam)
    fileflt=${output}/${filename}_mp.bam
    samtools view -h -q 42 -o ${fileflt} ${i} 
done

### BigWig

In [None]:
%%bash

input="/home/ug0302/CITEseq3/06_ATAC2/bamg_new"
output="/home/ug0302/CITEseq3/06_ATAC2/bigwig_new"
file=$(ls ${input}/*.bam) 

for i in $file
do
    filename=$(basename "$i" .bam)
    filebw=${output}/${filename}.bw
    
    bamCoverage --bam ${i} \
                -o ${filebw} \
                --binSize 50 \
                --normalizeUsing BPM \
                -p max
done

### Peak calling

In [None]:
%%bash

source activate atac_macs2

input="/home/ug0302/CITEseq3/06_ATAC2/bamg_new"
output="/home/ug0302/CITEseq3/06_ATAC2/peak"
file=$(ls ${input}/*.bam) 

species="hs" 

for i in $file
do
    filename=$(basename "$i" .bam)
    macs2 callpeak -n ${filename} \
                   -t ${i} \
                   -f BAMPE \
                   --nomodel \
                   -p 0.01 \
                   -g ${species} \
                   --shift -75 \
                   --extsize 150 \
                   --keep-dup all \
                   --outdir ${output}
done

### Heatmap

In [7]:
%%bash

path_bw="/home/ug0302/CITEseq3/06_ATAC2/bigwig_new"
file_bw=$(ls -1 ${path_bw}/*.bw | tr '\n' ' ')

path_pk="/home/ug0302/CITEseq3/06_ATAC2/diffbind"
file_pk=$(ls -1 ${path_pk}/*.bed | tr '\n' ' ')

data="/home/ug0302/CITEseq3/06_ATAC2/figure/heatmap/nom.gz"
figure1="/home/ug0302/CITEseq3/06_ATAC2/figure/heatmap/heatmap.pdf"
figure2="/home/ug0302/CITEseq3/06_ATAC2/figure/heatmap/heatmap_stat.pdf"

width=2000
computeMatrix reference-point \
              --referencePoint center \
              -b ${width} -a ${width} -p max \
              --missingDataAsZero --skipZeros \
              -R ${file_pk} \
              -S ${file_bw} \
              -o ${data}

color='#281A2C,#352A4A,#3F396D,#404C8B,#3E6495,#427999,#488E9E,#50A3A2,#5DBAA4,#78CEA3,#A3DEA6,#D1EDB4,#FDFECC'

plotHeatmap -m ${data} \
            -out ${figure1} \
            --colorList ${color} \
            --heatmapHeight 25 --heatmapWidth 5

plotProfile -m ${data} \
            -out ${figure2} \
            --perGroup \
            --numPlotsPerRow 2 \
            --plotHeight 10 --plotWidth 10

### WhichTF

In [1]:
%%bash

genome="hg38" 
input="/home/ug0302/CITEseq3/06_ATAC2/diffbind"
output="/home/ug0302/CITEseq3/06_ATAC2/figure/whichtf"
file=$(ls ${input}/*.bed) 

for i in $file
do
    filename=$(basename "$i" .bed)
    output_file=${output}/TF_${filename}.tsv
    WhichTF ${i} ${genome} --outFile ${output_file}
done

  __import__('pkg_resources').require('whichtf==0.2')
2023-10-25 20:01:36,112-WhichTF: [INFO] verbose_level: None
2023-10-25 20:01:36,112-WhichTF: [INFO] tmpdir: /tmp
2023-10-25 20:01:36,112-WhichTF: [INFO] settings: None
2023-10-25 20:01:36,112-WhichTF: [INFO] leaveTrace: False
2023-10-25 20:01:36,112-WhichTF: [INFO] data: /home/ug0302/ATACseq/public_data/software/whichtf/data
2023-10-25 20:01:36,112-WhichTF: [INFO] inFile: /home/ug0302/CITEseq3/06_ATAC2/diffbind/Conc_C50.bed
2023-10-25 20:01:36,112-WhichTF: [INFO] assembly: hg19
2023-10-25 20:01:36,112-WhichTF: [INFO] outFile: /home/ug0302/CITEseq3/06_ATAC2/figure/whichtf/TF_Conc_C50.tsv
2023-10-25 20:01:36,112-WhichTF: [INFO] partialScore: False
2023-10-25 20:01:36,112-WhichTF: [INFO] outPartialScore: None
2023-10-25 20:01:36,113-WhichTF: [INFO] bedtools: bedtools
2023-10-25 20:01:36,113-WhichTF: [INFO] overlapSelect: overlapSelect
2023-10-25 20:01:36,113-WhichTF: [INFO] GREATER: GREATER
2023-10-25 20:01:36,113-WhichTF: [INFO] termN

2023-10-25 20:03:32,565-run.py: [INFO] Working on score 62 of 90.
2023-10-25 20:03:32,707-run.py: [INFO] Working on score 63 of 90.
2023-10-25 20:03:32,834-run.py: [INFO] Working on score 64 of 90.
2023-10-25 20:03:32,944-run.py: [INFO] Working on score 65 of 90.
2023-10-25 20:03:33,042-run.py: [INFO] Working on score 66 of 90.
2023-10-25 20:03:33,124-run.py: [INFO] Working on score 67 of 90.
2023-10-25 20:03:33,197-run.py: [INFO] Working on score 68 of 90.
2023-10-25 20:03:33,259-run.py: [INFO] Working on score 69 of 90.
2023-10-25 20:03:33,313-run.py: [INFO] Working on score 70 of 90.
2023-10-25 20:03:33,358-run.py: [INFO] Working on score 71 of 90.
2023-10-25 20:03:33,399-run.py: [INFO] Working on score 72 of 90.
2023-10-25 20:03:33,437-run.py: [INFO] Working on score 73 of 90.
2023-10-25 20:03:33,469-run.py: [INFO] Working on score 74 of 90.
2023-10-25 20:03:33,495-run.py: [INFO] Working on score 75 of 90.
2023-10-25 20:03:33,514-run.py: [INFO] Working on score 76 of 90.
2023-10-25

/home/ug0302/ATACseq/public_data/software/whichtf/data/hg19/PRISM/tfbs.data.npz
/home/ug0302/ATACseq/public_data/software/whichtf/data/hg19/PRISM/ontologies/MGIPhenotype/ont.merge.query.npz


  __import__('pkg_resources').require('whichtf==0.2')
2023-10-25 20:03:34,206-WhichTF: [INFO] verbose_level: None
2023-10-25 20:03:34,206-WhichTF: [INFO] tmpdir: /tmp
2023-10-25 20:03:34,206-WhichTF: [INFO] settings: None
2023-10-25 20:03:34,206-WhichTF: [INFO] leaveTrace: False
2023-10-25 20:03:34,206-WhichTF: [INFO] data: /home/ug0302/ATACseq/public_data/software/whichtf/data
2023-10-25 20:03:34,206-WhichTF: [INFO] inFile: /home/ug0302/CITEseq3/06_ATAC2/diffbind/Conc_NC.bed
2023-10-25 20:03:34,206-WhichTF: [INFO] assembly: hg19
2023-10-25 20:03:34,206-WhichTF: [INFO] outFile: /home/ug0302/CITEseq3/06_ATAC2/figure/whichtf/TF_Conc_NC.tsv
2023-10-25 20:03:34,206-WhichTF: [INFO] partialScore: False
2023-10-25 20:03:34,206-WhichTF: [INFO] outPartialScore: None
2023-10-25 20:03:34,206-WhichTF: [INFO] bedtools: bedtools
2023-10-25 20:03:34,206-WhichTF: [INFO] overlapSelect: overlapSelect
2023-10-25 20:03:34,206-WhichTF: [INFO] GREATER: GREATER
2023-10-25 20:03:34,206-WhichTF: [INFO] termNum

/home/ug0302/ATACseq/public_data/software/whichtf/data/hg19/PRISM/tfbs.data.npz
/home/ug0302/ATACseq/public_data/software/whichtf/data/hg19/PRISM/ontologies/MGIPhenotype/ont.merge.query.npz
