In [295]:
import os
import pandas as pd
import sys
import numpy as np
import sys
sys.path.insert(0, '../..')
import itertools

from JKBio import TerraFunction as terra
from ccle_processing.src.CCLE_postp_function import *
from JKBio import Helper as h
from JKBio.epigenetics import ChIP_helper as chip

import dalmatian as dm
import pyBigWig

from bokeh.plotting import *
from IPython.display import IFrame
import igv
from biomart import BiomartServer
import io

from sklearn.manifold import MDS, TSNE
from sklearn.decomposition import PCA
from sklearn.preprocessing import scale
from sklearn.cluster import AgglomerativeClustering
from sklearn.mixture import GaussianMixture

output_notebook()
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# ChIP

In [3]:
project="IRF2BP2_degron_ChIP"

In [None]:
!gsutil ls gs://amlproject/Chip/

In [None]:
# install bwa
! mkdir ../../ref
! curl ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/001/215/GCF_000001215.4_Release_6_plus_ISO1_MT/GCF_000001215.4_Release_6_plus_ISO1_MT_genomic.fna.gz -o ../../ref/reference_droso.fna.gz
! gunzip ref/reference_droso.fna.gz
! bwa index -a bwtsw ../../ref/reference_droso.fna

## V1

### analysis

In [None]:
rename1 = {
 'gs://transfer-amlproject/20191211_10_MP7613_S8_R1_001.fastq.gz':"gs://amlproject/Chip/IRF2BP2_degraded_rep1/fastqs/mp779-MV411_IRF2BP2_DMSO_6h-FLAG_IRF2BP2-r1_1.fastq.gz",
 'gs://transfer-amlproject/20191211_10_MP7613_S8_R2_001.fastq.gz':"gs://amlproject/Chip/IRF2BP2_degraded_rep1/fastqs/mp779-MV411_IRF2BP2_DMSO_6h-FLAG_IRF2BP2-r1_2.fastq.gz",
 'gs://transfer-amlproject/20191211_11_MP7613_S9_R1_001.fastq.gz':"gs://amlproject/Chip/IRF2BP2_degraded_rep1/fastqs/mp780-MV411_IRF2BP2_DMSO_6h-MED1-r1_1.fastq.gz",
 'gs://transfer-amlproject/20191211_11_MP7613_S9_R2_001.fastq.gz':"gs://amlproject/Chip/IRF2BP2_degraded_rep1/fastqs/mp780-MV411_IRF2BP2_DMSO_6h-MED1-r1_2.fastq.gz",
 'gs://transfer-amlproject/20191211_12_MP7613_S10_R1_001.fastq.gz':"gs://amlproject/Chip/IRF2BP2_degraded_rep1/fastqs/mp781-MV411_IRF2BP2_DMSO_6h-POLII_total-r1_1.fastq.gz",
 'gs://transfer-amlproject/20191211_12_MP7613_S10_R2_001.fastq.gz':"gs://amlproject/Chip/IRF2BP2_degraded_rep1/fastqs/mp781-MV411_IRF2BP2_DMSO_6h-POLII_total-r1_2.fastq.gz",
 'gs://transfer-amlproject/20191211_13_MP7613_S11_R1_001.fastq.gz':"gs://amlproject/Chip/IRF2BP2_degraded_rep1/fastqs/mp782-MV411_IRF2BP2_DMSO_6h-POLII_S2-r1_1.fastq.gz",
 'gs://transfer-amlproject/20191211_13_MP7613_S11_R2_001.fastq.gz':"gs://amlproject/Chip/IRF2BP2_degraded_rep1/fastqs/mp782-MV411_IRF2BP2_DMSO_6h-POLII_S2-r1_2.fastq.gz",
 'gs://transfer-amlproject/20191211_14_MP7613_S12_R1_001.fastq.gz':"gs://amlproject/Chip/IRF2BP2_degraded_rep1/fastqs/mp783-MV411_IRF2BP2_DMSO_6h-POLII_S5-r1_1.fastq.gz",
 'gs://transfer-amlproject/20191211_14_MP7613_S12_R2_001.fastq.gz':"gs://amlproject/Chip/IRF2BP2_degraded_rep1/fastqs/mp783-MV411_IRF2BP2_DMSO_6h-POLII_S5-r1_2.fastq.gz",
 'gs://transfer-amlproject/20191211_15_MP7613_S13_R1_001.fastq.gz':"gs://amlproject/Chip/IRF2BP2_degraded_rep1/fastqs/mp784-MV411_IRF2BP2_DMSO_6h-MYC-r1_1.fastq.gz",
 'gs://transfer-amlproject/20191211_15_MP7613_S13_R2_001.fastq.gz':"gs://amlproject/Chip/IRF2BP2_degraded_rep1/fastqs/mp784-MV411_IRF2BP2_DMSO_6h-MYC-r1_2.fastq.gz",
 'gs://transfer-amlproject/20191211_16_MP7613_S14_R1_001.fastq.gz':"gs://amlproject/Chip/IRF2BP2_degraded_rep1/fastqs/mp785-MV411_IRF2BP2_DMSO_6h-MYB-r1_1.fastq.gz",
 'gs://transfer-amlproject/20191211_16_MP7613_S14_R2_001.fastq.gz':"gs://amlproject/Chip/IRF2BP2_degraded_rep1/fastqs/mp785-MV411_IRF2BP2_DMSO_6h-MYB-r1_2.fastq.gz",
 'gs://transfer-amlproject/20191211_1_MP7613_S1_R1_001.fastq.gz':"gs://amlproject/Chip/IRF2BP2_degraded_rep1/fastqs/mp786-MV411_IRF2BP2_DMSO_6h-SPI1-r1_1.fastq.gz",
 'gs://transfer-amlproject/20191211_1_MP7613_S1_R2_001.fastq.gz':"gs://amlproject/Chip/IRF2BP2_degraded_rep1/fastqs/mp786-MV411_IRF2BP2_DMSO_6h-SPI1-r1_2.fastq.gz",
 'gs://transfer-amlproject/20191211_2_MP7613_S2_R1_001.fastq.gz':"gs://amlproject/Chip/IRF2BP2_degraded_rep1/fastqs/mp787-MV411_IRF2BP_VHL_6h-FLAG_IRF2BP2-r1_1.fastq.gz",
 'gs://transfer-amlproject/20191211_2_MP7613_S2_R2_001.fastq.gz':"gs://amlproject/Chip/IRF2BP2_degraded_rep1/fastqs/mp787-MV411_IRF2BP_VHL_6h-FLAG_IRF2BP2-r1_2.fastq.gz",
 'gs://transfer-amlproject/20191211_3_MP7613_S3_R1_001.fastq.gz':"gs://amlproject/Chip/IRF2BP2_degraded_rep1/fastqs/mp788-MV411_IRF2BP_VHL_6h-MED1-r1_1.fastq.gz",
 'gs://transfer-amlproject/20191211_3_MP7613_S3_R2_001.fastq.gz':"gs://amlproject/Chip/IRF2BP2_degraded_rep1/fastqs/mp788-MV411_IRF2BP_VHL_6h-MED1-r1_2.fastq.gz",
 'gs://transfer-amlproject/20191211_4_MP7613_S4_R1_001.fastq.gz':"gs://amlproject/Chip/IRF2BP2_degraded_rep1/fastqs/mp789-MV411_IRF2BP_VHL_6h-POLII_total-r1_1.fastq.gz",
 'gs://transfer-amlproject/20191211_4_MP7613_S4_R2_001.fastq.gz':"gs://amlproject/Chip/IRF2BP2_degraded_rep1/fastqs/mp789-MV411_IRF2BP_VHL_6h-POLII_total-r1_2.fastq.gz",
 'gs://transfer-amlproject/20191211_5R_MP7613_S15_R1_001.fastq.gz':"gs://amlproject/Chip/IRF2BP2_degraded_rep1/fastqs/mp790-MV411_IRF2BP_VHL_6h-POLII_S2-r1_1.fastq.gz",
 'gs://transfer-amlproject/20191211_5R_MP7613_S15_R2_001.fastq.gz':"gs://amlproject/Chip/IRF2BP2_degraded_rep1/fastqs/mp790-MV411_IRF2BP_VHL_6h-POLII_S2-r1_2.fastq.gz",
 'gs://transfer-amlproject/20191211_6R_MP7613_S16_R1_001.fastq.gz':"gs://amlproject/Chip/IRF2BP2_degraded_rep1/fastqs/mp791-MV411_IRF2BP_VHL_6h-POLII_S5-r1_1.fastq.gz",
 'gs://transfer-amlproject/20191211_6R_MP7613_S16_R2_001.fastq.gz':"gs://amlproject/Chip/IRF2BP2_degraded_rep1/fastqs/mp791-MV411_IRF2BP_VHL_6h-POLII_S5-r1_2.fastq.gz",
 'gs://transfer-amlproject/20191211_7_MP7613_S5_R1_001.fastq.gz':"gs://amlproject/Chip/IRF2BP2_degraded_rep1/fastqs/mp792-MV411_IRF2BP_VHL_6h-MYC-r1_1.fastq.gz",
 'gs://transfer-amlproject/20191211_7_MP7613_S5_R2_001.fastq.gz':"gs://amlproject/Chip/IRF2BP2_degraded_rep1/fastqs/mp792-MV411_IRF2BP_VHL_6h-MYC-r1_2.fastq.gz",
 'gs://transfer-amlproject/20191211_8_MP7613_S6_R1_001.fastq.gz':"gs://amlproject/Chip/IRF2BP2_degraded_rep1/fastqs/mp793-MV411_IRF2BP_VHL_6h-MYB-r1_1.fastq.gz",
 'gs://transfer-amlproject/20191211_8_MP7613_S6_R2_001.fastq.gz':"gs://amlproject/Chip/IRF2BP2_degraded_rep1/fastqs/mp793-MV411_IRF2BP_VHL_6h-MYB-r1_2.fastq.gz",
 'gs://transfer-amlproject/20191211_9_MP7613_S7_R1_001.fastq.gz':"gs://amlproject/Chip/IRF2BP2_degraded_rep1/fastqs/mp794-MV411_IRF2BP_VHL_6h-SPI1-r1_1.fastq.gz",
 'gs://transfer-amlproject/20191211_9_MP7613_S7_R2_001.fastq.gz':"gs://amlproject/Chip/IRF2BP2_degraded_rep1/fastqs/mp794-MV411_IRF2BP_VHL_6h-SPI1-r1_2.fastq.gz"}

In [None]:
for k, val in rename1.items():
    os.system('gsutil mv '+k+' '+val)

In [None]:
! gsutil -m cp -r gs://amlproject/Chip/IRF2BP2_degraded_rep1 ../../data/
inputfastq="gs://amlproject/Chip/fastqs/mp99-MV411-INPUT-r1.fastq.gz"
! gsutil cp $inputfastq ../../data/IRF2BP2_degraded_rep1/fastqs/
a = ! ls ../../data/IRF2BP2_degraded_rep1/fastqs/

In [None]:
from gsheets import Sheets
sheets = Sheets.from_files('~/.client_secret.json', '~/.storage.json')
url="https://docs.google.com/spreadsheets/d/1yFLjYB1McU530JnLgL0QIMAKIkVl3kl0_LCHje2gk8U"
gsheet = sheets.get(url).sheets[2].to_frame()

In [None]:
gsheet

In [None]:
df = {
"fastq1": [],
"fastq2": [],
"antibody": [],
"group": [],
"replicate": [],
"control": []
}
for val in helper.grouped(a[:-1],2):
    row = gsheet[gsheet.id==val[0].split('/')[-1].split('_')[0]]
    df['group'].append((row.id.values[0] + '_'+ row.name_replicate.values[0] +"_"+row.protein.values[0]))
    df['replicate'].append(1)
    df['fastq1'].append(val[0])
    #df['fastq2'].append(val[1])
    df['antibody'].append(row.protein.values[0])
    df['control'].append("INPUT")
df['group'].append('INPUT')
df['replicate'].append(1)
df['fastq1'].append(a[-1])
#df['fastq2'].append(val[1])
df['antibody'].append("INPUT")
df['control'].append("INPUT")
df = pd.DataFrame(df)

In [None]:
df[df.columns[[3,4,0,1,2,5]]].to_csv('../nextflow/design_IRF2BP2_degraded_rep1.csv')

In [None]:
#process chips
! sudo ./nextflow run nf-core/chipseq --single_end --seq_center 'DFCI' --email 'jkobject@gmail.com' --narrow_peak --input ../nextflow/design_IRF2BP2_degraded_rep1.csv --genome GRCh38 --skip_preseq --max_cpus 24 -profile docker -w work

In [None]:
#!gsutil cp results/* ../../data/IRF2BP2_degraded_rep1/ && sudo rm -r results && sudo rm -r work

In [None]:
# get scaling values
mappedreads, umappedreads_norm, mapped = chip.getSpikeInControlScales(refgenome="../../ref/reference_droso.fna",FastQfolder='../../data/IRF2BP2_degraded_rep1/fastqs/',pairedEnd=True, cores=8)
mappedreads, umappedreads_norm, mapped

computing scales from the excel sheet

In [None]:
scales = [[562285,1496707],
[1686168,7198567],
[3642441,2612624],
[3992589,3474812],
[3347901,3829477],
[6181136,989703],
[7523840,4173047],
[922482,1195857]]

In [None]:
scales = [[1.0, 0.3756814126], #IRF2BP2
[1.0, 0.2342366196], #MED1
[0.5546432407, 1.0], #MYB
[0.1601166841, 1.0], #MYC
[0.870315477, 1.0], #POL2S2
[1.0, 0.8742449687], #POL2S5
[0.7172728398, 1.0], #POL2TOT
[1.0, 0.7713982525]] #SPI1

### on scalled data


In [None]:
bams = !ls ../../data/IRF2BP2_degraded_rep1/diffData/*treat_pileup.bdg
bams = ['_'.join(i.split('/')[-1].split('_')[:-2]) for i in bams]
bams

In [None]:
# diffPeak on scalled data
sizes = [220, 191, 211, 204, 285, 222, 228, 194]
for i in range(int(len(bams)/2)):
    name1 = bams[i]
    name2 = bams[i+8]
    print(name1,name2)
    print(chip.diffPeak(name1,name2, directory= "../../data/IRF2BP2_degraded_rep1/diffData/", res_directory='../../data/IRF2BP2_degraded_rep1/diffPeaks/', scaling1=scales[i][1], scaling2=scales[i][0], size=size[i]))

In [None]:
# diffPeak on scalled data
for i in range(int(len(bams[1:])/2)-1):
    name1 = bams[1+i]
    name2 = bams[9+i]
    chip.fullDiffPeak(name1,name2, bams[0], scaling = scales[i],compute_size=True)

In [None]:
scales = [1.0,
1.0,
0.5546432407,
0.1601166841,
0.870315477,
1.0,
0.7172728398,
1.0,
0.3756814126,
0.2342366196,
1.0,
1.0,
1.0,
0.8742449687,
1.0,
0.7713982525]

In [None]:
chip.bigWigFrom(bams[1:], genome='GRCh38',scaling=scales)

In [None]:
! mv ../../data/recalib_bigwig_rep1/* ../../data/IRF2BP2_degraded_rep1/recalib_bigwig/
bw = ! ls ../../data/IRF2BP2_degraded_rep1/recalib_bigwig/*.bw
bw

In [None]:
cond1peak = ! ls ../../data/IRF2BP2_degraded_rep1/diffPeaks/*cond1.bed
cond2peak = ! ls ../../data/IRF2BP2_degraded_rep1/diffPeaks/*cond2.bed
commonpeak = ! ls ../../data/IRF2BP2_degraded_rep1/diffPeaks/*common.bed
commonpeak

In [None]:
names = ["FLAG_IRF2BP2","MED1","MYB","MYC","POLII_S2","POLII_S5","POLII_total","SPI1"]

In [None]:
for i in range(int(len(bw)/2)):
    if i<0:
        continue
    name1 = bw[i]
    name2 = bw[8+i]
    peak = [cond1peak[i], commonpeak[i], cond2peak[i]]
    for val in peak:
        chip.dropWeirdChromosomes(val)
    name = names[i]
    print(name1,name2)
    chip.getPeaksAt(peak, [name1, name2], bigwignames=['DMSO', 'VHL'], peaknames=['DMSO_peaks', 'common', 'VHL_peaks'], window=3000, refpoint='center', folder="", title=name, numthreads=7, torecompute=True, name='../../data/IRF2BP2_degraded_rep1/diffPeaks/'+name+'_mat.pdf', withDeeptools=True)

In [None]:
peaks = ! ls ../../data/IRF2BP2_degraded_rep1/bwa/mergedLibrary/macs/narrowPeak/*.narrowPeak
peaks

In [None]:
names = ["FLAG_IRF2BP2","MED1","MYB","MYC","POLII_S2","POLII_S5","POLII_total","SPI1"]
names.extend([i+'_VHL' for i in names])
names

In [None]:
for i in range(len(bw)):
    chip.getPeaksAt(peaks[i], bw[i], window=3000, folder="", title=names[i], numthreads=7, torecompute=True, onlyProfile=True, refpoint='center', name='../../data/IRF2BP2_degraded_rep1/diffPeaks/'+names[i]+'_mat_profile.pdf', withDeeptools=True)
    chip.getPeaksAt(peaks[i], bw[i], window=3000, folder="", title=names[i], numthreads=7, torecompute=True, refpoint='center',onlyProfile=True,name='../../data/IRF2BP2_degraded_rep1/diffPeaks/'+names[i]+'_mat_profile_clust3.pdf', withDeeptools=True, cluster=3)

### on unscalled data

In [None]:
bams = !ls ../../data/IRF2BP2_degraded_rep1/bwa/mergedLibrary/mp*.bam
bams

In [None]:
! mkdir ../../data/IRF2BP2_degraded_rep1/diffPeaks_unscaled

In [None]:
bams= list(bams)

In [None]:
bams

In [None]:
size

In [None]:
# on unscalled data 
for i in range(int((len(bams)-1)/2)-1):
    if i < 0:
        continue
    name1 = bams[1+i]
    name2 = bams[9+i]
    chip.fullDiffPeak(name1,name2, control1=bams[0], size=size[i], compute_size=False, directory = "../../data/IRF2BP2_degraded_rep1/diffData_unscaled/", res_directory = "../../data/IRF2BP2_degraded_rep1/diffPeaks_unscaled/",pairedend=False)

In [None]:
bw = ! ls ../../data/IRF2BP2_degraded_rep1/bwa/mergedLibrary/bigwig/*.bigWig

In [None]:
commonpeak = ! ls ../../data/IRF2BP2_degraded_rep1/diffPeaks_unscaled/*common.bed
cond1peak = ! ls ../../data/IRF2BP2_degraded_rep1/diffPeaks_unscaled/*cond1.bed
cond2peak = ! ls ../../data/IRF2BP2_degraded_rep1/diffPeaks_unscaled/*cond2.bed
cond2peak

In [None]:
names = ["FLAG_IRF2BP2","MED1","POLII_total","POLII_S2","POLII_S5","MYC","MYB","SPI1"]

In [None]:
for i in range(int(len(bw[1:])/2)-1):
    name1 = bw[1+i]
    name2 = bw[9+i]
    peak = [cond1peak[i], commonpeak[i], cond2peak[i]]
    for val in peak:
        chip.dropWeirdChromosomes(val)
    name = names[i]
    chip.getPeaksAt(peak, [name1, name2], bigwignames=['DMSO', 'VHL'], peaknames=['DMSO_peaks', 'common', 'VHL_peaks'], window=3000, title=name, numthreads=7, refpoint='center', folder="../../data/IRF2BP2_degraded_rep1/bwa/mergedLibrary/bigwig/", torecompute=True, name='../../data/IRF2BP2_degraded_rep1/diffPeaks_unscaled/'+name+'_mat.pdf', withDeeptools=True)

In [None]:
! gsutil -m cp -r ../../data/IRF2BP2_degraded_rep1 gs://amlproject/Chip/
# rm -r ../../data/IRF2BP2_degraded_rep1

## v2

In [None]:
# mkdir ../../data/IRF2BP2_degraded_rep2/ && mkdir ../../data/IRF2BP2_degraded_rep2/fastqs && gsutil -m cp "gs://transfer-amlproject/20191219_MP7659*" ../../data/IRF2BP2_degraded_rep2/fastqs/

### analysis

In [None]:
rename = {
"20191219_MP7659_1_S1_R1_001.fastq.gz":"mp795-MV411_IRF2BP2_DMSO_6h-FLAG_IRF2BP2-r2_R1.fastq.gz",
"20191219_MP7659_1_S1_R2_001.fastq.gz":"mp795-MV411_IRF2BP2_DMSO_6h-FLAG_IRF2BP2-r2_R2.fastq.gz",
"20191219_MP7659_2_S2_R1_001.fastq.gz":"mp796-MV411_IRF2BP2_DMSO_6h-MED1-r2_R1.fastq.gz",
"20191219_MP7659_2_S2_R2_001.fastq.gz":"mp796-MV411_IRF2BP2_DMSO_6h-MED1-r2_R2.fastq.gz",
"20191219_MP7659_3_S3_R1_001.fastq.gz":"mp797-MV411_IRF2BP2_DMSO_6h-POLII_total-r2_R1.fastq.gz",
"20191219_MP7659_3_S3_R2_001.fastq.gz":"mp797-MV411_IRF2BP2_DMSO_6h-POLII_total-r2_R2.fastq.gz",
"20191219_MP7659_4_S4_R1_001.fastq.gz":"mp798-MV411_IRF2BP2_DMSO_6h-POLII_S2-r2_R1.fastq.gz",
"20191219_MP7659_4_S4_R2_001.fastq.gz":"mp798-MV411_IRF2BP2_DMSO_6h-POLII_S2-r2_R2.fastq.gz",
"20191219_MP7659_5_S5_R1_001.fastq.gz":"mp799-MV411_IRF2BP2_DMSO_6h-POLII_S5-r2_R1.fastq.gz",
"20191219_MP7659_5_S5_R2_001.fastq.gz":"mp799-MV411_IRF2BP2_DMSO_6h-POLII_S5-r2_R2.fastq.gz",
"20191219_MP7659_6_S6_R1_001.fastq.gz":"mp800-MV411_IRF2BP2_DMSO_6h-MYC-r2_R1.fastq.gz",
"20191219_MP7659_6_S6_R2_001.fastq.gz":"mp800-MV411_IRF2BP2_DMSO_6h-MYC-r2_R2.fastq.gz",
"20191219_MP7659_7_S7_R1_001.fastq.gz":"mp801-MV411_IRF2BP2_DMSO_6h-MYB-r2_R1.fastq.gz",
"20191219_MP7659_7_S7_R2_001.fastq.gz":"mp801-MV411_IRF2BP2_DMSO_6h-MYB-r2_R2.fastq.gz",
"20191219_MP7659_8_S8_R1_001.fastq.gz":"mp802-MV411_IRF2BP2_DMSO_6h-SPI1-r2_R1.fastq.gz",
"20191219_MP7659_8_S8_R2_001.fastq.gz":"mp802-MV411_IRF2BP2_DMSO_6h-SPI1-r2_R2.fastq.gz",
"20191219_MP7659_9_S9_R1_001.fastq.gz":"mp803-MV411_IRF2BP_VHL_6h-FLAG_IRF2BP2-r2_R1.fastq.gz",
"20191219_MP7659_9_S9_R2_001.fastq.gz":"mp803-MV411_IRF2BP_VHL_6h-FLAG_IRF2BP2-r2_R2.fastq.gz",
"20191219_MP7659_10_S10_R1_001.fastq.gz":"mp804-MV411_IRF2BP_VHL_6h-MED1-r2_R1.fastq.gz",
"20191219_MP7659_10_S10_R2_001.fastq.gz":"mp804-MV411_IRF2BP_VHL_6h-MED1-r2_R2.fastq.gz",
"20191219_MP7659_11_S11_R1_001.fastq.gz":"mp805-MV411_IRF2BP_VHL_6h-POLII_total-r2_R1.fastq.gz",
"20191219_MP7659_11_S11_R2_001.fastq.gz":"mp805-MV411_IRF2BP_VHL_6h-POLII_total-r2_R2.fastq.gz",
"20191219_MP7659_12_S12_R1_001.fastq.gz":"mp806-MV411_IRF2BP_VHL_6h-POLII_S2-r2_R1.fastq.gz",
"20191219_MP7659_12_S12_R2_001.fastq.gz":"mp806-MV411_IRF2BP_VHL_6h-POLII_S2-r2_R2.fastq.gz",
"20191219_MP7659_13_S13_R1_001.fastq.gz":"mp807-MV411_IRF2BP_VHL_6h-POLII_S5-r2_R1.fastq.gz",
"20191219_MP7659_13_S13_R2_001.fastq.gz":"mp807-MV411_IRF2BP_VHL_6h-POLII_S5-r2_R2.fastq.gz",
"20191219_MP7659_14_S14_R1_001.fastq.gz":"mp808-MV411_IRF2BP_VHL_6h-MYC-r2_R1.fastq.gz",
"20191219_MP7659_14_S14_R2_001.fastq.gz":"mp808-MV411_IRF2BP_VHL_6h-MYC-r2_R2.fastq.gz",
"20191219_MP7659_15_S15_R1_001.fastq.gz":"mp809-MV411_IRF2BP_VHL_6h-MYB-r2_R1.fastq.gz",
"20191219_MP7659_15_S15_R2_001.fastq.gz":"mp809-MV411_IRF2BP_VHL_6h-MYB-r2_R2.fastq.gz",
"20191219_MP7659_16_S16_R1_001.fastq.gz":"mp810-MV411_IRF2BP_VHL_6h-SPI1-r2_R1.fastq.gz",
"20191219_MP7659_16_S16_R2_001.fastq.gz":"mp810-MV411_IRF2BP_VHL_6h-SPI1-r2_R2.fastq.gz"
}

In [None]:
for k,v in rename.items():
    ! mv ../../data/IRF2BP2_degraded_rep2/fastqs/$k ../../data/IRF2BP2_degraded_rep2/fastqs/$v


In [None]:
inputfastq="gs://amlproject/Chip/fastqs/mp99-MV411-INPUT-r1.fastq.gz"
! gsutil cp $inputfastq ../../data/IRF2BP2_degraded_rep2/fastqs
a = ! ls ../../data/IRF2BP2_degraded_rep2/fastqs

In [None]:
from gsheets import Sheets
sheets = Sheets.from_files('~/.client_secret.json', '~/.storage.json')
url="https://docs.google.com/spreadsheets/d/1yFLjYB1McU530JnLgL0QIMAKIkVl3kl0_LCHje2gk8U"
gsheet = sheets.get(url).sheets[2].to_frame()

In [None]:
df = {
"fastq1": [],
"fastq2": [],
"antibody": [],
"group": [],
"replicate": [],
"control": []
}
for val in helper.grouped(a[:-1],2):
    row = gsheet[gsheet.id==val[0].split('/')[-1].split('_')[0]]
    df['group'].append((row.id.values[0] + '_'+ row.name_replicate.values[0] +"_"+row.protein.values[0]))
    df['replicate'].append(1)
    df['fastq1'].append(val[0])
    #df['fastq2'].append(val[1])
    df['antibody'].append(row.protein.values[0])
    df['control'].append("INPUT")
df['group'].append('INPUT')
df['replicate'].append(1)
df['fastq1'].append(a[-1])
#df['fastq2'].append(val[1])
df['antibody'].append("INPUT")
df['control'].append("INPUT")
df = pd.DataFrame(df)

In [None]:
df[df.columns[[3,4,0,1,2,5]]].to_csv('../nextflow/IRF2BP2_degraded_rep2_design.csv')

In [None]:
#process chips
! sudo ./nextflow run nf-core/chipseq --single_end --seq_center 'DFCI' --email 'jkobject@gmail.com' --narrow_peak --input ../nextflow/IRF2BP2_degraded_rep2_design.csv --genome GRCh38 --skip_preseq --max_cpus 24 -profile docker -w work

In [None]:
#!gsutil cp results/* ../../data/IRF2BP2_degraded_rep2/ && sudo rm -r work && sudo rm -r results

In [None]:
# get scaling values
mappedreads, umappedreads_norm, mapped = chip.getSpikeInControlScales(refgenome="../../ref/reference_droso.fna",FastQfolder='../../data/IRF2BP2_degraded_rep2/fastqs/',pairedEnd=True, cores=8)
mappedreads, umappedreads_norm, mapped

computing scales from the excel sheet

In [None]:
scales = [[508878,637972],
[1929129,11595],
[669536,429562],
[1272730,810802],
[743859,893304],
[312888,1154119],
[1086031,880901],
[850181,1019640]]

In [None]:
scales = [[1.0, 0.7976494266],
[0.006010484524, 1.0],
[0.6415816327, 1.0],
[0.6370573492, 1.0],
[1.0, 0.8327053276],
[1.0, 0.2711054926],
[0.8111195721, 1.0],
[1.0, 0.8338050685]]

### on scalled data

In [None]:
# on scaled data
bams = ! ls ../../IRF2BP2_degraded_rep2/bwa/mergedLibrary/*.bam
for i in range(int(len(bams[1:])/2)-1):
    name1 = bams[1+val]
    name2 = bams[9+val]
    chip.fullDiffPeak(name1,name2,control1='../../data/IRF2BP2_degraded_rep2/bwa/mergedLibrary/INPUT_R1.mLb.clN.sorted.bam', directory = "../../data/IRF2BP2_degraded_rep2/diffData_unscaled/", res_directory = "../../data/IRF2BP2_degraded_rep2/diffPeaks_unscaled/", scaling = scales[val][::-1])

In [None]:
bams = ! ls ../../data/IRF2BP2_degraded_rep2/bwa/mergedLibrary/*.bam
bams = [i.split('/')[-1].split('.')[0] for i in bams]
bams

In [None]:
# diffPeak on scalled data
sizes = [220, 191, 228, 285, 222, 204, 211, 194]
for i in range(int(len(bams[1:])/2)-1):
    name1 = bams[1+i]
    name2 = bams[9+i]
    chip.diffPeak(name1,name2, directory= "../../data/IRF2BP2_degraded_rep2/diffData/", res_directory='../../data/IRF2BP2_degraded_rep2/diffPeaks/', scaling1=scales[i][1], scaling2=scales[i][0], size=size[i])

In [None]:
scales = [1.0,
0.006010484524,
0.6415816327,
0.6370573492,
1.0,
1.0,
0.8111195721,
1.0,
0.7976494266,
1.0,
1.0,
1.0,
0.8327053276,
0.2711054926,
1.0,
0.8338050685]

In [None]:
chip.bigWigFrom(bams[1:],genome='GRCh38',scaling=scales)

In [None]:
! mv ../../data/recalib_bigwig_rep2/* ../../data/IRF2BP2_degraded_rep2/recalib_bigwig/
bw = ! ls ../../data/IRF2BP2_degraded_rep2/recalib_bigwig/*.bw
bw

In [None]:
cond1peak = ! ls ../../data/IRF2BP2_degraded_rep2/diffPeaks/*cond1.bed
cond2peak = ! ls ../../data/IRF2BP2_degraded_rep2/diffPeaks/*cond2.bed
commonpeak = ! ls ../../data/IRF2BP2_degraded_rep2/diffPeaks/*common.bed
commonpeak

In [None]:
names = ["FLAG_IRF2BP2","MED1","POLII_total","POLII_S2","POLII_S5","MYC","MYB","SPI1"]

In [None]:
for i in range(int(len(bw)/2)):
    name1 = bw[i]
    name2 = bw[8+i]
    peak = [cond1peak[i], commonpeak[i], cond2peak[i]]
    for val in peak:
        chip.dropWeirdChromosomes(val)
    name = names[i]
    chip.getPeaksAt(peak, [name1, name2], bigwignames=['DMSO', 'VHL'], peaknames=['DMSO_peaks', 'common', 'VHL_peaks'], window=3000, folder="", title=name, numthreads=20, name='../../data/IRF2BP2_degraded_rep2/diffPeaks/'+name+'_mat.pdf', refpoint='center', withDeeptools=True, torecompute= True,)

In [None]:
peaks = ! ls ../../data/IRF2BP2_degraded_rep2/bwa/mergedLibrary/macs/narrowPeak/*.narrowPeak
peaks

In [None]:
names = ["FLAG_IRF2BP2","MED1","POLII_total","POLII_S2","POLII_S5","MYC","MYB","SPI1"]
names.extend([i+'_VHL' for i in names])
names

In [None]:
bw

In [None]:
for i in range(len(bw)-1):
    chip.getPeaksAt(peaks[i], bw[1+i], window=3000, folder="", title=names[i], numthreads=7, torecompute=True, onlyProfile=True, name='../../data/IRF2BP2_degraded_rep2/diffPeaks/'+names[i]+'_mat_profile.pdf', refpoint='center', withDeeptools=True)
    chip.getPeaksAt(peaks[i], bw[1+i], window=3000, folder="", title=names[i], numthreads=7, torecompute=True, refpoint='center', onlyProfile=True, name='../../data/IRF2BP2_degraded_rep2/diffPeaks/'+names[i]+'_mat_profile_clust3.pdf', withDeeptools=True, cluster=3)

### unscalled

In [None]:
bams = !ls ../../data/IRF2BP2_degraded_rep2/bwa/mergedLibrary/mp*.bam
bams

In [None]:
!mkdir  ../../data/IRF2BP2_degraded_rep2/diffPeaks_unscaled
bw = ! ../../data/IRF2BP2_degraded_rep2/bwa/mergedLibrary/bigwig/*.bigWig

In [None]:
# on unscalled data 
for i in range(int(len(bams)/2)):
    if i <0:
        continue
    name1 = bams[i]
    name2 = bams[8+i]
    chip.fullDiffPeak(name1,name2, control1='../../data/results3/bwa/mergedLibrary/INPUT_R1.mLb.clN.sorted.bam', directory = "../../data/IRF2BP2_degraded_rep2/diffData_unscaled/", res_directory = "../../data/IRF2BP2_degraded_rep2/diffPeaks_unscaled/", pairedend=False)

In [None]:
commonpeak = ! ls ../../data/IRF2BP2_degraded_rep2/diffPeaks_unscaled/*common.bed
cond1peak = ! ls ../../data/IRF2BP2_degraded_rep2/diffPeaks_unscaled/*cond1.bed
cond2peak = ! ls ../../data/IRF2BP2_degraded_rep2/diffPeaks_unscaled/*cond2.bed
cond2peak

In [None]:
names = ["FLAG_IRF2BP2","MED1","POLII_total","POLII_S2","POLII_S5","MYC","MYB","SPI1"]

In [None]:
for i in range(int(len(bw[1:])/2)):
    name1 = bw[1+i]
    name2 = bw[9+i]
    peak = [cond1peak[i], commonpeak[i], cond2peak[i]]
    for val in peak:
        chip.dropWeirdChromosomes(val)
    name = names[i]
    chip.getPeaksAt(peak, [name1, name2], bigwignames=['DMSO', 'VHL'], peaknames=['DMSO_peaks', 'common', 'VHL_peaks'], window=3000, title=name, numthreads=7, torecompute=True, refpoint='center', folder="", name='../../data/IRF2BP2_degraded_rep2/diffPeaks_unscaled/'+name+'_mat.pdf', withDeeptools=True)

In [None]:
! gsutil -m cp -r "../../data/IRF2BP2_degraded_rep2" gs://amlproject/Chip/

## v3

In [None]:
! gsutil mv gs://transfer-amlproject/*MP7781*  gs://transfer-amlproject/IRF2BP2_v3/

### analysis

In [None]:
! mkdir ../../data/IRF2BP2_degraded_rep3 && mkdir ../../data/IRF2BP2_degraded_rep3/fastqs && gsutil -m cp gs://transfer-amlproject/IRF2BP2_v3/* ../../data/IRF2BP2_degraded_rep3/fastqs/

In [None]:
rename = {
"20200203_1_MP7781_S67_R1_001.fastq.gz":"mp831-MV411_IRF2BP_DMSO_6h-CDK8-r1_R1.fastq.gz",
"20200203_1_MP7781_S67_R2_001.fastq.gz":"mp831-MV411_IRF2BP_DMSO_6h-CDK8-r1_R2.fastq.gz",
"20200203_2_MP7781_S68_R1_001.fastq.gz":"mp832-MV411_IRF2BP_DMSO_6h-BRD4-r1_R1.fastq.gz",
"20200203_2_MP7781_S68_R2_001.fastq.gz":"mp832-MV411_IRF2BP_DMSO_6h-BRD4-r1_R2.fastq.gz",
"20200203_3_MP7781_S69_R1_001.fastq.gz":"mp833-MV411_IRF2BP_DMSO_6h-IRF8-r1_R1.fastq.gz",
"20200203_3_MP7781_S69_R2_001.fastq.gz":"mp833-MV411_IRF2BP_DMSO_6h-IRF8-r1_R2.fastq.gz",
"20200203_4_MP7781_S70_R1_001.fastq.gz":"mp834-MV411_IRF2BP_DMSO_6h-SMC1-r1_R1.fastq.gz",
"20200203_4_MP7781_S70_R2_001.fastq.gz":"mp834-MV411_IRF2BP_DMSO_6h-SMC1-r1_R2.fastq.gz",
"20200203_5_MP7781_S71_R2_001.fastq.gz":"mp835-MV411_IRF2BP_DMSO_6h-MED1-r3_R2.fastq.gz",
"20200203_5_MP7781_S71_R1_001.fastq.gz":"mp835-MV411_IRF2BP_DMSO_6h-MED1-r3_R1.fastq.gz",
"20200203_6_MP7781_S72_R1_001.fastq.gz":"mp836-MV411_IRF2BP_DMSO_6h-ZEB2-r1_R1.fastq.gz",
"20200203_6_MP7781_S72_R2_001.fastq.gz":"mp836-MV411_IRF2BP_DMSO_6h-ZEB2-r1_R2.fastq.gz",
"20200203_7_MP7781_S73_R1_001.fastq.gz":"mp837-MV411_IRF2BP_DMSO_6h-CEBPA-r1_R1.fastq.gz",
"20200203_7_MP7781_S73_R2_001.fastq.gz":"mp837-MV411_IRF2BP_DMSO_6h-CEBPA-r1_R2.fastq.gz",
"20200203_8_MP7781_S74_R1_001.fastq.gz":"mp838-MV411_IRF2BP_VHL_6h-CDK8-r1_R1.fastq.gz",
"20200203_8_MP7781_S74_R2_001.fastq.gz":"mp838-MV411_IRF2BP_VHL_6h-CDK8-r1_R2.fastq.gz",
"20200203_9_MP7781_S75_R1_001.fastq.gz":"mp839-MV411_IRF2BP_VHL_6h-BRD4-r1_R1.fastq.gz",
"20200203_9_MP7781_S75_R2_001.fastq.gz":"mp839-MV411_IRF2BP_VHL_6h-BRD4-r1_R2.fastq.gz",
"20200203_10_MP7781_S76_R2_001.fastq.gz":"mp840-MV411_IRF2BP_VHL_6h-IRF8-r1_R2.fastq.gz",
"20200203_10_MP7781_S76_R1_001.fastq.gz":"mp840-MV411_IRF2BP_VHL_6h-IRF8-r1_R1.fastq.gz",
"20200203_11_MP7781_S77_R1_001.fastq.gz":"mp841-MV411_IRF2BP_VHL_6h-SMC1-r1_R1.fastq.gz",
"20200203_11_MP7781_S77_R2_001.fastq.gz":"mp841-MV411_IRF2BP_VHL_6h-SMC1-r1_R2.fastq.gz",
"20200203_12_MP7781_S78_R1_001.fastq.gz":"mp842-MV411_IRF2BP_VHL_6h-MED1-r3_R1.fastq.gz",
"20200203_12_MP7781_S78_R2_001.fastq.gz":"mp842-MV411_IRF2BP_VHL_6h-MED1-r3_R2.fastq.gz",
"20200203_13_MP7781_S79_R1_001.fastq.gz":"mp843-MV411_IRF2BP_VHL_6h-ZEB2-r1_R1.fastq.gz",
"20200203_13_MP7781_S79_R2_001.fastq.gz":"mp843-MV411_IRF2BP_VHL_6h-ZEB2-r1_R2.fastq.gz",
"20200203_14_MP7781_S80_R2_001.fastq.gz":"mp844-MV411_IRF2BP_VHL_6h-CEBPA-r1_R2.fastq.gz",
"20200203_14_MP7781_S80_R1_001.fastq.gz":"mp844-MV411_IRF2BP_VHL_6h-CEBPA-r1_R1.fastq.gz",
"20200203_Input_MP7781_S81_R1_001.fastq.gz":"mp845-MV411_IRF2BP2_-INPUT-r1_R1.fastq.gz",
"20200203_Input_MP7781_S81_R2_001.fastq.gz":"mp845-MV411_IRF2BP2_-INPUT-r1_R2.fastq.gz"
}

In [None]:
for k,v in rename.items():
    ! mv ../../data/IRF2BP2_degraded_rep3/fastqs/$k ../../data/IRF2BP2_degraded_rep3/fastqs/$v

In [None]:
from gsheets import Sheets
sheets = Sheets.from_files('~/.client_secret.json', '~/.storage.json')
url="https://docs.google.com/spreadsheets/d/1yFLjYB1McU530JnLgL0QIMAKIkVl3kl0_LCHje2gk8U"
gsheet = sheets.get(url).sheets[2].to_frame()

In [None]:
a = ! ls ../../data/IRF2BP2_degraded_rep3/fastqs

In [None]:
gsheet

In [None]:
df = {
"fastq_1": [],
"fastq_2": [],
"antibody": [],
"group": [],
"replicate": [],
"control": []
}
for val in helper.grouped(a[:-2],2):
    row = gsheet[gsheet.id==val[0].split('/')[-1].split('-')[0]]
    df['group'].append(row['name'].values[0])
    df['replicate'].append(1)
    df['fastq_1'].append(val[0])
    df['fastq_2'].append(val[1])
    df['antibody'].append(row['protein'].values[0])
    df['control'].append("INPUT")
df['group'].append('INPUT')
df['replicate'].append(1)
df['fastq_1'].append(a[-2])
df['fastq_2'].append(a[-1])
df['antibody'].append("")
df['control'].append("")
df = pd.DataFrame(df)

In [None]:
df

In [None]:
df[df.columns[[3,4,0,1,2,5]]].to_csv('../nextflow/IRF2BP2_degraded_rep3_design.csv',index=False)

In [None]:
#process chips
! sudo ../nextflow run nf-core/chipseq --paired_end --seq_center 'DFCI' --email \
'jkobject@gmail.com' --narrow_peak --input ../nextflow/../../data/IRF2BP2_degraded_rep3_design.csv --genome GRCh38 --skip_preseq \
--max_cpus 24 -profile docker -w work

In [None]:
ls

In [None]:
!cp -r results/* ../../data/IRF2BP2_degraded_rep3/ && sudo rm -r work && sudo rm -r results

In [None]:
ls ../TrimGalore-0.6.5/trim_galore

In [None]:
# get scaling values
mappedreads, umappedreads_norm = chip.getSpikeInControlScales(refgenome="../../ref/reference_droso.fna",
                                                                      fastQfolder='../../data/IRF2BP2_degraded_rep3/fastqs',
                                                                      pairedEnd=True, cores=8,
                                                                      tofilter=False,
                                                                      totrim=False,
                                                                      tomap=False,
                                                                      pathtotrim_galore="../TrimGalore-0.6.5/trim_galore")
mappedreads, umappedreads_norm

computing scales from the excel sheet

In [None]:
scales = [[536923,632558],
[601370,681405],
[2402198,1676203],
[417892,216192],
[1544590,1350802],
[1174994,1376726],
[289635,240366]]

In [None]:
scales= [[1.0, 0.8488122828],
[1.0, 0.8825441551],
[0.6977788675, 1.0],
[0.5173394083, 1.0],
[0.8745375796, 1.0],
[1.0, 0.8534697536],
[0.8298927961, 1.0]]

In [None]:
bams = ! ls ../../data/IRF2BP2_degraded_rep3/bwa/mergedLibrary/*.bam #../../data/results3/bwa/mergedLibrary/*.bam
bams

In [None]:
bams[7]

In [None]:
bams = [bam1.split('/')[-1].split('.')[0] for bam1 in bams]

### on scalled data

In [None]:
# diffPeak on scaled data
size=[206, 218, 189, 194, 217, 217, 176]
for i in range(int(len(bams)/2)):
    if i<0:
        continue
    name1 = bams[i]
    name2 = bams[7+i]
    print(name1,name2)
    chip.diffPeak(name1, name2, directory= "../../data/IRF2BP2_degraded_rep3/diffData/", res_directory='../../data/IRF2BP2_degraded_rep3/diffPeaks/', scaling1=scales[i][1], scaling2=scales[i][0], size=size[i])

In [None]:
os.popen('for i in $(ls ../../data/IRF2BP2_degraded_rep3/diffPeaks/*.bed); \
            do echo $(wc -l $i); \
            done').read().split('\n')

In [None]:
# diffPeak on scaled data
for i in range(int(len(bams)/2)):
    if i <0:
        continue
    name1 = bams[i]
    name2 = bams[7+i]
    print(name1,name2)
    chip.fullDiffPeak(name1,name2, control1='../../data/IRF2BP2_degraded_rep3/INPUT_R1.mLb.clN.sorted.bam', directory = "../../data/IRF2BP2_degraded_rep3/diffData/", res_directory = "../../data/IRF2BP2_degraded_rep3/diffPeaks/", isTF=True, compute_size=True, pairedend=False)

In [None]:
scales = [1.0,
1.0,
0.6977788675,
0.5173394083,
0.8745375796,
1.0,
0.8298927961,
0.8488122828,
0.8825441551,
1.0,
1.0,
1.0,
0.8534697536,
1.0]

In [None]:
bams

In [None]:
chip.bigWigFrom(bams[1:], 
                genome='GRCh38',scaling=scales,
               numthreads=8)

In [None]:
! mv diffPeaks ../../data/IRF2BP2_degraded_rep3
! mv diffData ../../data/IRF2BP2_degraded_rep3

In [None]:
!mv bigwig ../../data/recalib_bigwig_3 

In [None]:
os.popen('for i in $(ls ../../data/IRF2BP2_degraded_rep3/diffPeaks); \
            do echo $(wc -l "../../data/IRF2BP2_degraded_rep3/diffPeaks/"$i); \
            done').read().split('\n')

In [None]:
bw = ! ls ../../data/recalib_bigwig_3/*.bw
bw

In [None]:
cond1peak = ! ls ../../data/IRF2BP2_degraded_rep3/diffPeaks/*cond1.bed
cond2peak = ! ls ../../data/IRF2BP2_degraded_rep3/diffPeaks/*cond2.bed
commonpeak = ! ls ../../data/IRF2BP2_degraded_rep3/diffPeaks/*common.bed
cond1peak

In [None]:
names = ["CDK8","BRD4","IRF8","SMC1","MED1","ZEB2","CEBPA"]

In [None]:
for i in range(int(len(bw)/2)):
    if i<0:
        continue
    name1 = bw[i]
    name2 = bw[7+i]
    peak = [cond1peak[i], commonpeak[i], cond2peak[i]]
    #for val in peak:
        #chip.dropWeirdChromosomes(val)
    name = names[i]
    chip.getPeaksAt(peak, [name1, name2], bigwignames=['DMSO', 'VHL'], refpoint='center', peaknames=['DMSO_peaks', 'common', 'VHL_peaks'], window=3000, folder="", title=name, numthreads=14, name='../../data/IRF2BP2_degraded_rep3/diffPeaks/'+name+'_mat.pdf', withDeeptools=True, torecompute=True)

In [None]:
names = ["CDK8","BRD4","IRF8","SMC1","MED1","ZEB2","CEBPA"]
names.extend([i+'_VHL' for i in names])
names

In [None]:
peaks = ! ls ../../data/IRF2BP2_degraded_rep3/bwa/mergedLibrary/macs/narrowPeak/*.narrowPeak
peaks

In [None]:
for i in range(len(bw)):
    chip.getPeaksAt(peaks[i], bw[i], window=3000, folder="", title=names[i], numthreads=7, torecompute=True, onlyProfile=True, refpoint='center', name='../../data/IRF2BP2_degraded_rep3/diffPeaks/'+names[i]+'_mat_profile.pdf', withDeeptools=True)
    chip.getPeaksAt(peaks[i], bw[i], window=3000, folder="", title=names[i], numthreads=7, torecompute=True, refpoint='center', onlyProfile=True,name='../../data/IRF2BP2_degraded_rep3/diffPeaks/'+names[i]+'_mat_profile_clust3.pdf', withDeeptools=True, cluster=3)

### on unscalled data

In [None]:
bams = !ls ../../data/IRF2BP2_degraded_rep3/bwa/mergedLibrary/mp*.bam
bams

In [None]:
! mkdir ../../data/IRF2BP2_degraded_rep3/diffPeaks_unscaled

In [None]:
#on unscalled data 
for i in range(int(len(bams)/2)):
    if i < 0:
        continue
    name1 = bams[i]
    name2 = bams[7+i]
    print(name1,name2)
    chip.fullDiffPeak(name1,name2, control1='../../data/results3/bwa/mergedLibrary/INPUT_R1.mLb.clN.sorted.bam', directory = "../../data/IRF2BP2_degraded_rep3/diffData_unscaled/", res_directory = "../../data/IRF2BP2_degraded_rep3/diffPeaks_unscaled/",pairedend=False)

In [None]:
bw = ! ls ../../data/IRF2BP2_degraded_rep3/bwa/mergedLibrary/bigwig/*.bigWig
bw

In [None]:
cond1peak = ! ls ../../data/IRF2BP2_degraded_rep3/diffPeaks_unscaled/*cond1.bed
cond2peak = ! ls ../../data/IRF2BP2_degraded_rep3/diffPeaks_unscaled/*cond2.bed
commonpeak = ! ls ../../data/IRF2BP2_degraded_rep3/diffPeaks_unscaled/*common.bed
cond1peak

In [None]:
names = ["CDK8","BRD4","IRF8","SMC1","MED1","ZEB2","CEBPA"]

In [None]:
for i in range(int((len(bw)-1)/2)):
    if i <0:
        continue
    name1 = bw[1+i]
    name2 = bw[8+i]
    peak = [cond1peak[i], commonpeak[i], cond2peak[i]]
    #for val in peak:
        #chip.dropWeirdChromosomes(val)
    name = names[i]
    print(name1,name2)
    chip.getPeaksAt(peak, [name1, name2], bigwignames=['DMSO', 'VHL'], peaknames=['DMSO_peaks', 'common', 'VHL_peaks'], window=3000, folder="", title=name, numthreads=14, torecompute=True, refpoint="center", name='../../data/IRF2BP2_degraded_rep3/diffPeaks_unscaled/'+name+'_mat.pdf', withDeeptools=True)

In [None]:
! gsutil -m cp -r "../../data/IRF2BP2_degraded_rep3/" gs://amlproject/Chip/

## histones V1&2

In [None]:
bamfolder="../../data/IRF2BP2_degraded_histones/fastqs/"

In [None]:
! mkdir ../../data/IRF2BP2_degraded_histones && mkdir ../../data/IRF2BP2_degraded_histones/fastqs && gsutil cp "gs://transfer-amlproject/*.fastq.gz" $bamfolder

In [None]:
! gsutil -m cp gs://transfer-amlproject/*MP7693* gs://transfer-amlproject/IRF2BP2_hist/ && gsutil -m rm gs://transfer-amlproject/*MP7693*


### analysis

In [None]:
rename = {
    "20200103_1_MP7693_S6_R1_001.fastq.gz":"mp811-MV411_IRF2BP2_DMSO_6h-H3K27ac-r1_R1.fastq.gz",
    "20200103_1_MP7693_S6_R2_001.fastq.gz":"mp811-MV411_IRF2BP2_DMSO_6h-H3K27ac-r1.fastq.gz",
    "20200103_2_MP7693_S7_R1_001.fastq.gz":"mp812-MV411_IRF2BP2_DMSO_6h-H3K27ac-r2_R1.fastq.gz",
    "20200103_2_MP7693_S7_R2_001.fastq.gz":"mp812-MV411_IRF2BP2_DMSO_6h-H3K27ac-r2.fastq.gz",
    "20200103_3_MP7693_S8_R1_001.fastq.gz":"mp813-MV411_IRF2BP2_DMSO_6h-H3K27me3-r1_R1.fastq.gz",
    "20200103_3_MP7693_S8_R2_001.fastq.gz":"mp813-MV411_IRF2BP2_DMSO_6h-H3K27me3-r1.fastq.gz",
    "20200103_4_MP7693_S9_R1_001.fastq.gz":"mp814-MV411_IRF2BP2_DMSO_6h-H3K27me3-r2_R1.fastq.gz",
    "20200103_4_MP7693_S9_R2_001.fastq.gz":"mp814-MV411_IRF2BP2_DMSO_6h-H3K27me3-r2.fastq.gz",
    "20200103_5_MP7693_S10_R1_001.fastq.gz":"mp815-MV411_IRF2BP2_DMSO_6h-H3K4me1-r1_R1.fastq.gz",
    "20200103_5_MP7693_S10_R2_001.fastq.gz":"mp815-MV411_IRF2BP2_DMSO_6h-H3K4me1-r1.fastq.gz",
    "20200103_6_MP7693_S11_R1_001.fastq.gz":"mp816-MV411_IRF2BP2_DMSO_6h-H3K4me1-r2_R1.fastq.gz",
    "20200103_6_MP7693_S11_R2_001.fastq.gz":"mp816-MV411_IRF2BP2_DMSO_6h-H3K4me1-r2.fastq.gz",
    "20200103_7_MP7693_S12_R1_001.fastq.gz":"mp817-MV411_IRF2BP2_DMSO_6h-H3K4me3-r1_R1.fastq.gz",
    "20200103_7_MP7693_S12_R2_001.fastq.gz":"mp817-MV411_IRF2BP2_DMSO_6h-H3K4me3-r1.fastq.gz",
    "20200103_8_MP7693_S13_R1_001.fastq.gz":"mp818-MV411_IRF2BP2_DMSO_6h-H3K4me3-r2_R1.fastq.gz",
    "20200103_8_MP7693_S13_R2_001.fastq.gz":"mp818-MV411_IRF2BP2_DMSO_6h-H3K4me3-r2.fastq.gz",
    "20200103_9_MP7693_S14_R1_001.fastq.gz":"mp819-MV411_IRF2BP2_DMSO_6h-H3K79me2-r1_R1.fastq.gz",
    "20200103_9_MP7693_S14_R2_001.fastq.gz":"mp819-MV411_IRF2BP2_DMSO_6h-H3K79me2-r1.fastq.gz",
    "20200103_10_MP7693_S15_R1_001.fastq.gz":"mp820-MV411_IRF2BP2_DMSO_6h-H3K79me2-r2_R1.fastq.gz",
    "20200103_10_MP7693_S15_R2_001.fastq.gz":"mp820-MV411_IRF2BP2_DMSO_6h-H3K79me2-r2.fastq.gz",
    "20200103_11_MP7693_S16_R1_001.fastq.gz":"mp821-MV411_IRF2BP2_VHL_6h-H3K27ac-r1_R1.fastq.gz",
    "20200103_11_MP7693_S16_R2_001.fastq.gz":"mp821-MV411_IRF2BP2_VHL_6h-H3K27ac-r1.fastq.gz",
    "20200103_12_MP7693_S17_R1_001.fastq.gz":"mp822-MV411_IRF2BP2_VHL_6h-H3K27ac-r2_R1.fastq.gz",
    "20200103_12_MP7693_S17_R2_001.fastq.gz":"mp822-MV411_IRF2BP2_VHL_6h-H3K27ac-r2.fastq.gz",
    "20200103_13_MP7693_S18_R1_001.fastq.gz":"mp823-MV411_IRF2BP2_VHL_6h-H3K27me3-r1_R1.fastq.gz",
    "20200103_13_MP7693_S18_R2_001.fastq.gz":"mp823-MV411_IRF2BP2_VHL_6h-H3K27me3-r1.fastq.gz",
    "20200103_14_MP7693_S19_R1_001.fastq.gz":"mp824-MV411_IRF2BP2_VHL_6h-H3K27me3-r2_R1.fastq.gz",
    "20200103_14_MP7693_S19_R2_001.fastq.gz":"mp824-MV411_IRF2BP2_VHL_6h-H3K27me3-r2.fastq.gz",
    "20200103_15_MP7693_S20_R1_001.fastq.gz":"mp825-MV411_IRF2BP2_VHL_6h-H3K4me1-r1_R1.fastq.gz",
    "20200103_15_MP7693_S20_R2_001.fastq.gz":"mp825-MV411_IRF2BP2_VHL_6h-H3K4me1-r1.fastq.gz",
    "20200103_16_MP7693_S21_R1_001.fastq.gz":"mp826-MV411_IRF2BP2_VHL_6h-H3K4me1-r2_R1.fastq.gz",
    "20200103_16_MP7693_S21_R2_001.fastq.gz":"mp826-MV411_IRF2BP2_VHL_6h-H3K4me1-r2.fastq.gz",
    "20200103_17_MP7693_S22_R1_001.fastq.gz":"mp827-MV411_IRF2BP2_VHL_6h-H3K4me3-r1_R1.fastq.gz",
    "20200103_17_MP7693_S22_R2_001.fastq.gz":"mp827-MV411_IRF2BP2_VHL_6h-H3K4me3-r1.fastq.gz",
    "20200103_18_MP7693_S23_R1_001.fastq.gz":"mp828-MV411_IRF2BP2_VHL_6h-H3K4me3-r2_R1.fastq.gz",
    "20200103_18_MP7693_S23_R2_001.fastq.gz":"mp828-MV411_IRF2BP2_VHL_6h-H3K4me3-r2.fastq.gz",
    "20200103_19_MP7693_S24_R1_001.fastq.gz":"mp829-MV411_IRF2BP2_VHL_6h-H3K79me2-r1_R1.fastq.gz",
    "20200103_19_MP7693_S24_R2_001.fastq.gz":"mp829-MV411_IRF2BP2_VHL_6h-H3K79me2-r1.fastq.gz",
    "20200103_20_MP7693_S25_R1_001.fastq.gz":"mp830-MV411_IRF2BP2_VHL_6h-H3K79me2-r2_R1.fastq.gz",
    "20200103_20_MP7693_S25_R2_001.fastq.gz":"mp830-MV411_IRF2BP2_VHL_6h-H3K79me2-r2.fastq.gz"
}

In [None]:
! gsutil -m cp gs://transfer-amlproject/IRF2BP2_hist/* ../../data/IRF2BP2_degraded_histones/fastqs/

In [None]:
for k,v in rename.items():
    ! mv $bamfolder$k $bamfolder$v

In [None]:
inputfastq="gs://amlproject/Chip/fastqs/mp99-MV411-INPUT-r1.fastq.gz"
! gsutil cp $inputfastq $bamfolder

In [None]:
a = ! ls $bamfolder

In [None]:
from gsheets import Sheets
sheets = Sheets.from_files('~/.client_secret.json', '~/.storage.json')
url="https://docs.google.com/spreadsheets/d/1yFLjYB1McU530JnLgL0QIMAKIkVl3kl0_LCHje2gk8U"
gsheet = sheets.get(url).sheets[2].to_frame()

In [None]:
df = {
"fastq1": [],
"fastq2": [],
"antibody": [],
"group": [],
"replicate": [],
"control": []
}
for val in helper.grouped(a[:-1],2):
    row = gsheet[gsheet.id==val[0].split('/')[-1].split('_')[0]]
    df['group'].append((row.id.values[0] + '_'+ row.name_replicate.values[0] +"_"+row.protein.values[0]))
    df['replicate'].append(1)
    df['fastq1'].append(val[0])
    #df['fastq2'].append(val[1])
    df['antibody'].append(row.protein.values[0])
    df['control'].append("INPUT")
df['group'].append('INPUT')
df['replicate'].append(1)
df['fastq1'].append(a[-1])
#df['fastq2'].append(val[1])
df['antibody'].append("INPUT")
df['control'].append("INPUT")
df = pd.DataFrame(df)

In [None]:
df[df.columns[[3,4,0,1,2,5]]].to_csv('../nextflow/IRF2BP2_degraded_histones_design.csv')

In [None]:
#process chips
 ! sudo ./nextflow run nf-core/chipseq --single_end --seq_center 'DFCI' --email 'jkobject@gmail.com' --narrow_peak --input ../nextflow/IRF2BP2_degraded_histones_design.csv --genome GRCh38 --skip_preseq --max_cpus 24 -profile docker -w work

In [None]:
! mv results/* ../../data/IRF2BP2_degraded_histones/

In [None]:
bams = ! ls ../../data/IRF2BP2_degraded_hist/bwa/mergedLibrary/*.bam
bams = [i.split('/')[-1].split('.')[0] for i in bams]
bams

In [None]:
scales = [[191079,278272],
[274625,494562],
[1094016,2067804],
[1328914,969565],
[237779,132422],
[162971,174092],
[115788,187078],
[134269,495924],
[171890,225315],
[188370,199911]]

In [None]:
scales = [[1.0,0.6866626897],
[1.0,0.5552893267],
[1.0,0.5290714207],
[0.7295919826,1.0],
[0.5569120906,1.0],
[1.0,0.9361199825],
[1.0,0.6189290029],
[1.0,0.2707451142],
[1.0,0.762887513],
[1.0,0.9422693098]]

### on scalled data

In [None]:
# diffPeak on scaled data
size=[206, 213, 47, 272, 229, 235, 190, 196, 287, 288]
for i in range(int(len(bams)/2)):
    name1 = bams[1+i]
    name2 = bams[11+i]
    print(name1,name2)
    chip.diffPeak(name1, name2, directory= "../../data/IRF2BP2_degraded_hist/diffData/", res_directory='../../data/IRF2BP2_degraded_hist/diffPeaks/', scaling1=scales[i][1], scaling2=scales[i][0], size=size[i])

In [None]:
# diffPeak on scaled data
for i in range(int(len(bams)/2)):
    name1 = bams[1+i]
    name2 = bams[11+i]
    print(name1,name2)
    chip.fullDiffPeak(name1,name2, bams[0], scaling = scales[i], directory='../../data/IRF2BP2_degraded_hist/diffData/',
res_directory="../../data/IRF2BP2_degraded_hist/diffPeaks/", isTF=False, compute_size=True, pairedend=False)

In [None]:
scales = [1.0,
1.0,
1.0,
0.7295919826,
0.5569120906,
1.0,
1.0,
1.0,
1.0,
1.0,
0.6866626897,
0.5552893267,
0.5290714207,
1.0,
1.0,
0.9361199825,
0.6189290029,
0.2707451142,
0.762887513,
0.9422693098,]

In [None]:
chip.bigWigFrom(bams[1:],genome='GRCh38',scaling=scales)

In [None]:
! mv ../../recalib_bigwig_hist/* ../../data/IRF2BP2_degraded_histones/recalib_bigwig/
bw = ! ls ../../data/IRF2BP2_degraded_histones/recalib_bigwig/*.bw
bw

In [None]:
! rm ../../data/IRF2BP2_degraded_hist/diffData/mp*_R1*

In [None]:
cond1peak = ! ls ../../data/IRF2BP2_degraded_hist/diffPeaks/*cond1.bed
cond2peak = ! ls ../../data/IRF2BP2_degraded_hist/diffPeaks/*cond2.bed
commonpeak = ! ls ../../data/IRF2BP2_degraded_hist/diffPeaks/*common.bed
cond1peak

In [None]:
names = ["H3K27ac", "H3K27ac_v2","H3K27me3","H3K27me3_v2","H3K4me1","H3K4me1_v2", "H3K4me3", "H3K4me3_v2", "H3K79me2", "H3K79me2_v2"]

In [None]:
for i in range(int(len(bw)/2)):
    if i < 0 : 
        continue
    print(i)
    name1 = bw[i]
    name2 = bw[10+i]
    peak = [cond1peak[i], commonpeak[i], cond2peak[i]]
    #for val in peak:
        #chip.dropWeirdChromosomes(val)
    name = names[i]
    print(name1,name2)
    chip.getPeaksAt(peak, [name1, name2], torecompute= True, bigwignames=['DMSO', 'VHL'], peaknames=['DMSO_peaks', 'common', 'VHL_peaks'], window=3000, folder="", title=name, numthreads=7, refpoint='center', name='../../data/IRF2BP2_degraded_hist/diffPeaks/'+name+'_mat.pdf', withDeeptools=True)

In [None]:
names = ["H3K27ac", "H3K27ac_v2","H3K27me3","H3K27me3_v2","H3K4me1","H3K4me1_v2", "H3K4me3", "H3K4me3_v2", "H3K79me2", "H3K79me2_v2"]
names.extend([i+'_VHL' for i in names])
names

In [None]:
peaks = ! ls ../../data/IRF2BP2_degraded_histones/bwa/mergedLibrary/macs/broadPeak/*.broadPeak 
peaks

In [None]:
for i in range(len(bw)):
    chip.getPeaksAt(peaks[i], bw[i], window=3000, folder="", title=names[i], numthreads=7, torecompute=True, onlyProfile=True, refpoint='center', name='../../data/IRF2BP2_degraded_hist/diffPeaks/'+names[i]+'_mat_profile.pdf', withDeeptools=True)
    chip.getPeaksAt(peaks[i], bw[i], window=3000, folder="", title=names[i], numthreads=7, torecompute=True, refpoint='center', onlyProfile=True,name='../../data/IRF2BP2_degraded_hist/diffPeaks/'+names[i]+'_mat_profile_clust3.pdf', withDeeptools=True, cluster=3)

### on unscalled data

In [None]:
bams = !ls ../../data/IRF2BP2_degraded_hist/bwa/mergedLibrary/mp*.bam
bams

In [None]:
bw = ! ls ../../data/IRF2BP2_degraded_histones/bwa/mergedLibrary/bigwig/*.bigWig
bw

In [None]:
# on unscalled data 
for i in range(int((len(bams)-1)/2)):
    name1 = bams[1+i]
    name2 = bams[11+i]
    chip.fullDiffPeak(name1,name2, control1='../../data/IRF2BP2_degraded_hist/INPUT_R1.mLb.clN.sorted.bam', directory = "../../data/IRF2BP2_degraded_hist/diffData_unscaled/", res_directory = "../../data/IRF2BP2_degraded_hist/diffPeaks_unscaled/",isTF=False, compute_size=True, pairedend=False)

In [None]:
cond1peak = ! ls ../../data/IRF2BP2_degraded_hist/diffPeaks_unscaled/*cond1.bed
cond2peak = ! ls ../../data/IRF2BP2_degraded_hist/diffPeaks_unscaled/*cond2.bed
commonpeak = ! ls ../../data/IRF2BP2_degraded_hist/diffPeaks_unscaled/*common.bed
cond2peak

In [None]:
names = ["H3K27ac", "H3K27ac_v2","H3K27me3","H3K27me3_v2","H3K4me1","H3K4me1_v2", "H3K4me3", "H3K4me3_v2", "H3K79me2", "H3K79me2_v2"]

In [None]:
for i in range(int(len(bw)/2)):
    if i < 0:
        continue
    name1 = bw[1+i]
    name2 = bw[11+i]
    peak = [cond1peak[i], commonpeak[i], cond2peak[i]]
    #for val in peak:
        #chip.dropWeirdChromosomes(val)
    name = names[i]
    print(name1,name2)
    chip.getPeaksAt(peak, [name1, name2], bigwignames=['DMSO', 'VHL'], peaknames=['DMSO_peaks', 'common', 'VHL_peaks'], window=3000, folder="", title=name, numthreads=10, refpoint='center', name='../../data/IRF2BP2_degraded_hist/diffPeaks_unscaled/'+name+'_mat.pdf', withDeeptools=True, torecompute=True)

In [None]:
! gsutil -m cp -r "../../data/IRF2BP2_degraded_hist/" gs://amlproject/Chip/

In [None]:
## on local
! scp runningchip2.us-east1-b.aml-chipseq-rnaseq-259419:"~/data/IRF2BP2_degraded_rep3/diffPeaks_unscaled/*.pdf" unscaled/v3/
! scp runningchip2.us-east1-b.aml-chipseq-rnaseq-259419:"~/data/IRF2BP2_degraded_rep3/diffPeaks/*.pdf" scaled/v3/
! scp runningchip2.us-east1-b.aml-chipseq-rnaseq-259419:"~/data/IRF2BP2_degraded_rep1/diffPeaks_unscaled/*.pdf" unscaled/v3/
! scp runningchip2.us-east1-b.aml-chipseq-rnaseq-259419:"~/data/IRF2BP2_degraded_rep1/diffPeaks/*.pdf" scaled/v1/
! scp runningchip2.us-east1-b.aml-chipseq-rnaseq-259419:"~/data/IRF2BP2_degraded_hist/diffPeaks_unscaled/*.pdf" unscaled/v1/
! scp runningchip2.us-east1-b.aml-chipseq-rnaseq-259419:"~/data/IRF2BP2_degraded_hist/diffPeaks/*.pdf" unscaled/v1/
! scp runningchip2.us-east1-b.aml-chipseq-rnaseq-259419:"~/data/IRF2BP2_degraded_rep2/diffPeaks_unscaled/*.pdf" unscaled/v2/
! scp runningchip2.us-east1-b.aml-chipseq-rnaseq-259419:"~/data/IRF2BP2_degraded_rep2/diffPeaks/*.pdf" scaled/v2/

## v4

In [None]:
! gsutil -m mv gs://transfer-amlproject/*MP7868*  gs://transfer-amlproject/IRF2BP2_v4/

### analysis

In [None]:
! mkdir ../../data/IRF2BP2_degraded_rep4 && mkdir ../../data/IRF2BP2_degraded_rep4/fastqs && gsutil -m cp gs://transfer-amlproject/IRF2BP2_v4/* ../../data/IRF2BP2_degraded_rep4/fastqs

In [None]:
a = ! ls ../../data/IRF2BP2_degraded_rep4/fastqs
a

In [None]:
rename = {
"20200302_1_MP7868_S51_R1_001.fastq.gz":"mp846-MV411_IRF2BP_DMSO_6h-MED1-r4_R1.fastq.gz", 
"20200302_1_MP7868_S51_R2_001.fastq.gz":"mp846-MV411_IRF2BP_DMSO_6h-MED1-r4_R2.fastq.gz", 
"20200302_2_MP7868_S52_R1_001.fastq.gz":"mp847-MV411_IRF2BP_DMSO_6h-MED1-r5_R1.fastq.gz", 
"20200302_2_MP7868_S52_R2_001.fastq.gz":"mp847-MV411_IRF2BP_DMSO_6h-MED1-r5_R2.fastq.gz", 
"20200302_3_MP7868_S53_R1_001.fastq.gz":"mp848-MV411_IRF2BP_DMSO_6h-FLAG_IRF2BP2-r3_R1.fastq.gz", 
"20200302_3_MP7868_S53_R2_001.fastq.gz":"mp848-MV411_IRF2BP_DMSO_6h-FLAG_IRF2BP2-r3_R2.fastq.gz",
"20200302_4_MP7868_S54_R1_001.fastq.gz":"mp849-MV411_IRF2BP_DMSO_6h-POLII_total-r3_R1.fastq.gz",
"20200302_4_MP7868_S54_R2_001.fastq.gz":"mp849-MV411_IRF2BP_DMSO_6h-POLII_total-r3_R2.fastq.gz",
"20200302_5_MP7868_S55_R1_001.fastq.gz":"mp850-MV411_IRF2BP_DMSO_6h-POLII_S2-r3_R1.fastq.gz",
"20200302_5_MP7868_S55_R2_001.fastq.gz":"mp850-MV411_IRF2BP_DMSO_6h-POLII_S2-r3_R2.fastq.gz",
"20200302_6_MP7868_S56_R1_001.fastq.gz":"mp851-MV411_IRF2BP_DMSO_6h-POLII_S5-r3_R1.fastq.gz",
"20200302_6_MP7868_S56_R2_001.fastq.gz":"mp851-MV411_IRF2BP_DMSO_6h-POLII_S5-r3_R2.fastq.gz",
"20200302_7_MP7868_S57_R1_001.fastq.gz":"mp852-MV411_IRF2BP_VHL_6h-MED1-r4_R1.fastq.gz",
"20200302_7_MP7868_S57_R2_001.fastq.gz":"mp852-MV411_IRF2BP_VHL_6h-MED1-r4_R2.fastq.gz",
"20200302_8_MP7868_S58_R1_001.fastq.gz":"mp853-MV411_IRF2BP_VHL_6h-MED1-r5_R1.fastq.gz",
"20200302_8_MP7868_S58_R2_001.fastq.gz":"mp853-MV411_IRF2BP_VHL_6h-MED1-r5_R2.fastq.gz",
"20200302_9_MP7868_S59_R1_001.fastq.gz":"mp854-MV411_IRF2BP_VHL_6h-FLAG_IRF2BP2-r3_R1.fastq.gz",
"20200302_9_MP7868_S59_R2_001.fastq.gz":"mp854-MV411_IRF2BP_VHL_6h-FLAG_IRF2BP2-r3_R2.fastq.gz", 
"20200302_10_MP7868_S60_R1_001.fastq.gz":"mp855-MV411_IRF2BP_VHL_6h-POLII_total-r3_R1.fastq.gz",
"20200302_10_MP7868_S60_R2_001.fastq.gz":"mp855-MV411_IRF2BP_VHL_6h-POLII_total-r3_R2.fastq.gz",
"20200302_11_MP7868_S61_R1_001.fastq.gz":"mp856-MV411_IRF2BP_VHL_6h-POLII_S2-r3_R1.fastq.gz",
"20200302_11_MP7868_S61_R2_001.fastq.gz":"mp856-MV411_IRF2BP_VHL_6h-POLII_S2-r3_R2.fastq.gz",
"20200302_12_MP7868_S62_R1_001.fastq.gz":"mp857-MV411_IRF2BP_VHL_6h-POLII_S5-r3_R1.fastq.gz",
"20200302_12_MP7868_S62_R2_001.fastq.gz":"mp857-MV411_IRF2BP_VHL_6h-POLII_S5-r3_R2.fastq.gz"
}

In [None]:
for k,v in rename.items():
    ! mv ../../data/IRF2BP2_degraded_rep4/fastqs/$k ../../data/IRF2BP2_degraded_rep4/fastqs/$v

In [None]:
from gsheets import Sheets
sheets = Sheets.from_files('~/.client_secret.json', '~/.storage.json')
url="https://docs.google.com/spreadsheets/d/1yFLjYB1McU530JnLgL0QIMAKIkVl3kl0_LCHje2gk8U"
gsheet = sheets.get(url).sheets[2].to_frame()

In [None]:
gsheet

In [None]:
df = {
"fastq_1": [],
"fastq_2": [],
"antibody": [],
"group": [],
"replicate": [],
"control": []
}
for val in h.grouped(a,2):
    row = gsheet[gsheet.id==val[0].split('/')[-1].split('-')[0]]
    df['group'].append(row['name'].values[0])
    df['replicate'].append(1)
    df['fastq_1'].append(val[0])
    df['fastq_2'].append(val[1])
    df['antibody'].append(row['protein'].values[0])
    df['control'].append("INPUT")
df['group'].append('INPUT')
df['replicate'].append(1)
df['fastq_1'].append('IRF2BP2_degraded_rep3/mp845-MV411_IRF2BP2_-INPUT-r1_R1.fastq.gz')
df['fastq_2'].append('IRF2BP2_degraded_rep3/mp845-MV411_IRF2BP2_-INPUT-r1_R2.fastq.gz')
df['antibody'].append("")
df['control'].append("")
df = pd.DataFrame(df)

In [None]:
df

In [None]:
df[df.columns[[3,4,0,1,2,5]]].to_csv('../nextflow/IRF2BP2_degraded_rep4_design.csv',index=False)

In [None]:
#process chips
! sudo ../nextflow run nf-core/chipseq --paired_end --seq_center 'DFCI' --email \
'jkobject@gmail.com' --narrow_peak --input ../nextflow/IRF2BP2_degraded_rep4_design.csv --genome GRCh38 --skip_preseq \
--max_cpus 24 -profile docker -w work

In [None]:
!cp -r results/* ../../data/IRF2BP2_degraded_rep4/ && sudo rm -r work && sudo rm -r results

In [None]:
ls ../../TrimGalore-0.6.5/trim_galore

In [None]:
# get scaling values
mappedreads, umappedreads_norm = chip.getSpikeInControlScales(refgenome="../../data/ref/reference_droso.fna",
fastQfolder='../../data/IRF2BP2_degraded_rep4/fastqs/',
pairedEnd=True, cores=12,
tofilter=True,
totrim=False,
tomap=True,
results="../../data/IRF2BP2_degraded_rep4/",
pathtotrim_galore="../../TrimGalore-0.6.5/trim_galore")
mappedreads, umappedreads_norm

computing scales from the excel sheet

In [None]:
bams = ! ls ../../data/IRF2BP2_degraded_rep4/bwa/mergedLibrary/*.bam
bams

In [None]:
bams[6]

### on scalled data

In [None]:
bams = [bam1.split('/')[-1].split('.')[0] for bam1 in bams]

In [None]:
# diffPeak on scaled data
size=[ 208, 214, 207, 234, 296, 231]
for i in range(int(len(bams)/2)-1):
    name1 = bams[1+i]
    name2 = bams[7+i]
    print(name1,name2)
    chip.diffPeak(name1, name2, directory= "../../data/IRF2BP2_degraded_rep4/diffData/", res_directory='../../data/IRF2BP2_degraded_rep4/diffPeaks/', scaling1=scales[i][1], scaling2=scales[i][0], size=size[i])

In [None]:
# diffPeak on scaled data
for i in range(int(len(bams[1:])/2)):
    name1 = bams[1+i]
    name2 = bams[7+i]
    chip.fullDiffPeak(name1,name2, control1='../../data/IRF2BP2_degraded_rep3/bwa/mergedLibrary/mp845-MV411_IRF2BP2-INPUT-r1.mLb.clN.sorted.bam', directory = "../../data/IRF2BP2_degraded_rep4/diffData/", res_directory = "../../data/IRF2BP2_degraded_rep4/diffPeaks/",pairedend=False)

In [None]:
scales = [1.0,
1.0,
1.0,
1.0,
1.0,
0.626304048,
0.2628507876,
0.9021192519,
0.1869653476,
0.5500321887,
0.6046056203,
1.0]

In [None]:
bams

In [None]:
chip.bigWigFrom(bams[1:], 
                genome='GRCh38',scaling=scales,
               numthreads=8)

In [None]:
!mv bigwig ../../data/IRF2BP2_degraded_rep4/recalib_bigwig/

In [None]:
os.popen('for i in $(ls ../../data/IRF2BP2_degraded_rep4/diffPeaks/*.bed); \
            do echo $(wc -l $i); \
            done').read().split('\n')

In [None]:
bw = ! ls ../../data/IRF2BP2_degraded_rep4/recalib_bigwig/*.bw
bw

In [None]:
cond1peak = ! ls ../../data/IRF2BP2_degraded_rep4/diffPeaks/*cond1.bed
cond2peak = ! ls ../../data/IRF2BP2_degraded_rep4/diffPeaks/*cond2.bed
commonpeak = ! ls ../../data/IRF2BP2_degraded_rep4/diffPeaks/*common.bed
cond1peak

In [None]:
names = ["MED1","MED1_v2","FLAG_IRF2BP2","POLII_total","POLII_S2","POLII_S5"]

In [None]:
for i in range(int(len(bw)/2)):
    if i <0:
        continue
    name1 = bw[i]
    name2 = bw[i+6]
    peak = [cond1peak[i], commonpeak[i], cond2peak[i]]
   # for val in peak:
      #  chip.dropWeirdChromosomes(val)
    name = names[i]
    print(name1,name2)
    chip.getPeaksAt(peak, [name1, name2], bigwignames=['DMSO', 'VHL'], peaknames=['DMSO_peaks', 'common', 'VHL_peaks'], window=3000, folder="", title=name, numthreads=7, refpoint="center", name='../../data/IRF2BP2_degraded_rep4/diffPeaks/'+name+'_mat.pdf', withDeeptools=True, torecompute=True)

In [None]:
peaks = ! ls ../../data/results4/bwa/mergedLibrary/macs/narrowPeak/*.narrowPeak
peaks

In [None]:
names = ["MED1","MED1_v2","FLAG_IRF2BP2","POLII_total","POLII_S2","POLII_S5"]
names.extend([i+'_VHL' for i in names])
names

In [None]:
for i in range(len(bw)):
    chip.getPeaksAt(peaks[i], bw[i], window=3000, folder="", title=names[i], numthreads=7, torecompute=True, onlyProfile=True, name='../../data/IRF2BP2_degraded_rep4/diffPeaks/'+names[i]+'_mat_profile.pdf', withDeeptools=True, refpoint="center")
    chip.getPeaksAt(peaks[i], bw[i], window=3000, folder="", title=names[i], numthreads=7, torecompute=True, onlyProfile=True,name='../../data/IRF2BP2_degraded_rep4/diffPeaks/'+names[i]+'_mat_profile_clust3.pdf', withDeeptools=True, cluster=3, refpoint="center")

### on unscalled data

In [None]:
bams = !ls ../../data/results4/bwa/mergedLibrary/mp*.bam
bams

In [None]:
! mkdir ../../data/IRF2BP2_degraded_rep4/diffPeaks_unscaled

In [None]:
#on unscalled data 
for i in range(int(len(bams)/2)):
    if i < 5:
        continue
    name1 = bams[i]
    name2 = bams[6+i]
    print(name1,name2)
    chip.fullDiffPeak(name1,name2, control1='../../data/results4/bwa/mergedLibrary/INPUT_R1.mLb.clN.sorted.bam', directory = "../../data/IRF2BP2_degraded_rep4/diffData_unscaled/", res_directory = "../../data/IRF2BP2_degraded_rep4/diffPeaks_unscaled/",pairedend=False)

In [None]:
bw = ! ls ../../data/results4/bwa/mergedLibrary/bigwig/*.bigWig
bw

In [None]:
cond1peak = ! ls ../../data/IRF2BP2_degraded_rep4/diffPeaks_unscaled/*cond1.bed
cond2peak = ! ls ../../data/IRF2BP2_degraded_rep4/diffPeaks_unscaled/*cond2.bed
commonpeak = ! ls ../../data/IRF2BP2_degraded_rep4/diffPeaks_unscaled/*common.bed
commonpeak

In [None]:
names = ["MED1","MED1_v2","FLAG_IRF2BP2","POLII_total","POLII_S2","POLII_S5"]

In [None]:
for i in range(int((len(bw)-1)/2)):
    name1 = bw[1+i]
    name2 = bw[7+i]
    peak = [cond1peak[i], commonpeak[i], cond2peak[i]]
    #for val in peak:
     #   chip.dropWeirdChromosomes(val)
    name = names[i]
    print(name1,name2)
    chip.getPeaksAt(peak, [name1, name2], bigwignames=['DMSO', 'VHL'], peaknames=['DMSO_peaks', 'common', 'VHL_peaks'], window=3000, folder="", title=name, numthreads=7, torecompute=True, refpoint='center', name='../../data/IRF2BP2_degraded_rep4/diffPeaks_unscaled/'+name+'_mat.pdf', withDeeptools=True)

In [None]:
! gsutil -m cp -r "../../data/IRF2BP2_degraded_rep4" gs://amlproject/Chip/

## Copying data

In [30]:
mkdir ../results/$project/diffPeaks_unscaled/

In [31]:
! cp ../../data/*/diffPeaks_unscaled/*.bed ../results/$project/diffPeaks_unscaled/

In [35]:
mkdir ../results/$project/diffPeaks_scaled/

In [36]:
! cp ../../data/*/diffPeaks/*.bed ../results/$project/diffPeaks_scaled/

## v5

In [4]:
project="additional_degraded_v1"

In [37]:
! gsutil ls gs://transfer-amlproject/200723_MP8095_fastq/

gs://transfer-amlproject/200723_MP8095_fastq/20200723_10_MP8095_S220_R1_001.fastq.gz
gs://transfer-amlproject/200723_MP8095_fastq/20200723_10_MP8095_S220_R2_001.fastq.gz
gs://transfer-amlproject/200723_MP8095_fastq/20200723_11_MP8095_S221_R1_001.fastq.gz
gs://transfer-amlproject/200723_MP8095_fastq/20200723_11_MP8095_S221_R2_001.fastq.gz
gs://transfer-amlproject/200723_MP8095_fastq/20200723_12_MP8095_S222_R1_001.fastq.gz
gs://transfer-amlproject/200723_MP8095_fastq/20200723_12_MP8095_S222_R2_001.fastq.gz
gs://transfer-amlproject/200723_MP8095_fastq/20200723_13_MP8095_S223_R1_001.fastq.gz
gs://transfer-amlproject/200723_MP8095_fastq/20200723_13_MP8095_S223_R2_001.fastq.gz
gs://transfer-amlproject/200723_MP8095_fastq/20200723_14_MP8095_S224_R1_001.fastq.gz
gs://transfer-amlproject/200723_MP8095_fastq/20200723_14_MP8095_S224_R2_001.fastq.gz
gs://transfer-amlproject/200723_MP8095_fastq/20200723_15_MP8095_S225_R1_001.fastq.gz
gs://transfer-amlproject/200723_MP8095_fastq/20200723_

In [42]:
mkdir ../data/$project/qc/

In [43]:
!gsutil -m cp gs://transfer-amlproject/200723_MP8095_fastq/multiqc_report.html ../data/$project/qc/
!gsutil -m cp -r gs://transfer-amlproject/200723_MP8095_fastq/Reports/ ../data/$project/qc/
!gsutil -m cp -r gs://transfer-amlproject/200723_MP8095_fastq/multiqc_data/ ../data/$project/qc/

Copying gs://transfer-amlproject/200723_MP8095_fastq/multiqc_report.html...
/ [1/1 files][  1.5 MiB/  1.5 MiB] 100% Done                                    
Operation completed over 1 objects/1.5 MiB.                                      
Copying gs://transfer-amlproject/200723_MP8095_fastq/Reports/20200723_10_MP8095_S220_R1_001_fastqc.html...
Copying gs://transfer-amlproject/200723_MP8095_fastq/Reports/20200723_10_MP8095_S220_R2_001_fastqc.html...
Copying gs://transfer-amlproject/200723_MP8095_fastq/Reports/20200723_10_MP8095_S220_R1_001_fastqc.zip...
Copying gs://transfer-amlproject/200723_MP8095_fastq/Reports/20200723_10_MP8095_S220_R2_001_fastqc.zip...
Copying gs://transfer-amlproject/200723_MP8095_fastq/Reports/20200723_11_MP8095_S221_R1_001_fastqc.html...
Copying gs://transfer-amlproject/200723_MP8095_fastq/Reports/20200723_11_MP8095_S221_R1_001_fastqc.zip...
Copying gs://transfer-amlproject/200723_MP8095_fastq/Reports/20200723_11_MP8095_S221_R2_001_fastqc.zip...
Copying gs://tra

/ [6/6 files][  2.2 MiB/  2.2 MiB] 100% Done                                    
Operation completed over 6 objects/2.2 MiB.                                      


In [46]:
! gsutil -m cp gs://transfer-amlproject/200723_MP8095_fastq/*  gs://transfer-amlproject/$project/

Omitting prefix "gs://transfer-amlproject/200723_MP8095_fastq/Reports/". (Did you mean to do cp -r?)
Omitting prefix "gs://transfer-amlproject/200723_MP8095_fastq/multiqc_data/". (Did you mean to do cp -r?)
Copying gs://transfer-amlproject/200723_MP8095_fastq/20200723_10_MP8095_S220_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/200723_MP8095_fastq/20200723_10_MP8095_S220_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/200723_MP8095_fastq/20200723_11_MP8095_S221_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/200723_MP8095_fastq/20200723_11_MP8095_S221_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/200723_MP8095_fastq/20200723_12_MP8095_S222_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/200723_MP8095_fastq/20200723_12_MP8095_S222_R2_001.fastq.gz [Content-Type=applic

### analysis

In [48]:
! mkdir ../../data/$project && mkdir ../../data/$project/fastqs && gsutil -m cp gs://transfer-amlproject/$project/* ../../data/$project/fastqs

Copying gs://transfer-amlproject/additional_degraded_v1/20200723_10_MP8095_S220_R1_001.fastq.gz...
Copying gs://transfer-amlproject/additional_degraded_v1/20200723_10_MP8095_S220_R2_001.fastq.gz...
Copying gs://transfer-amlproject/additional_degraded_v1/20200723_11_MP8095_S221_R1_001.fastq.gz...
Copying gs://transfer-amlproject/additional_degraded_v1/20200723_11_MP8095_S221_R2_001.fastq.gz...
Copying gs://transfer-amlproject/additional_degraded_v1/20200723_12_MP8095_S222_R2_001.fastq.gz...
Copying gs://transfer-amlproject/additional_degraded_v1/20200723_13_MP8095_S223_R1_001.fastq.gz...
Copying gs://transfer-amlproject/additional_degraded_v1/20200723_12_MP8095_S222_R1_001.fastq.gz...
Copying gs://transfer-amlproject/additional_degraded_v1/20200723_13_MP8095_S223_R2_001.fastq.gz...
Copying gs://transfer-amlproject/additional_degraded_v1/20200723_14_MP8095_S224_R1_001.fastq.gz...
Copying gs://transfer-amlproject/additional_degraded_v1/20200723_14_MP8095_S224_R2_001.fastq.gz...
Copying gs

In [50]:
! rm ../../data/$project/fastqs/multiqc_report.html

In [3]:
a = ! ls ../../data/$project/fastqs
a

['mp858-MV411_RNP_AAVS1-H3K27AC-r1_R1_001.fastq.gz',
 'mp858-MV411_RNP_AAVS1-H3K27AC-r1_R2_001.fastq.gz',
 'mp859-MV411_RNP_AAVS1-H3K27AC-r2_R1_001.fastq.gz',
 'mp859-MV411_RNP_AAVS1-H3K27AC-r2_R2_001.fastq.gz',
 'mp860-MV411_RNP_RUNX1-H3K27AC-r1_R1_001.fastq.gz',
 'mp860-MV411_RNP_RUNX1-H3K27AC-r1_R2_001.fastq.gz',
 'mp861-MV411_RNP_RUNX1-H3K27AC-r2_R1_001.fastq.gz',
 'mp861-MV411_RNP_RUNX1-H3K27AC-r2_R2_001.fastq.gz',
 'mp862-MV411_RNP_RUNX2-H3K27AC-r1_R1_001.fastq.gz',
 'mp862-MV411_RNP_RUNX2-H3K27AC-r1_R2_001.fastq.gz',
 'mp863-MV411_RNP_RUNX2-H3K27AC-r2_R1_001.fastq.gz',
 'mp863-MV411_RNP_RUNX2-H3K27AC-r2_R2_001.fastq.gz',
 'mp864-MV411_RNP_RUNX1_RUNX2-H3K27AC-r1_R1_001.fastq.gz',
 'mp864-MV411_RNP_RUNX1_RUNX2-H3K27AC-r1_R2_001.fastq.gz',
 'mp865-MV411_RNP_RUNX1_RUNX2-H3K27AC-r2_R1_001.fastq.gz',
 'mp865-MV411_RNP_RUNX1_RUNX2-H3K27AC-r2_R2_001.fastq.gz',
 'mp866-MV411_RNP_MEF2D-H3K27AC-r1_R1_001.fastq.gz',
 'mp866-MV411_RNP_MEF2D-H3K27AC-r1_R2_001.fastq.gz',
 'mp867-MV411_RNP_MEF2

In [67]:
rename = {
"20200723_1_MP8095_S211": "mp858-MV411_RNP_AAVS1-H3K27AC-r1",
"20200723_2_MP8095_S212": "mp859-MV411_RNP_AAVS1-H3K27AC-r2",
"20200723_3_MP8095_S213": "mp860-MV411_RNP_RUNX1-H3K27AC-r1",
"20200723_4_MP8095_S214": "mp861-MV411_RNP_RUNX1-H3K27AC-r2",
"20200723_5_MP8095_S215": "mp862-MV411_RNP_RUNX2-H3K27AC-r1",
"20200723_6_MP8095_S216": "mp863-MV411_RNP_RUNX2-H3K27AC-r2",
"20200723_7_MP8095_S217": "mp864-MV411_RNP_RUNX1_RUNX2-H3K27AC-r1",
"20200723_8_MP8095_S218": "mp865-MV411_RNP_RUNX1_RUNX2-H3K27AC-r2",
"20200723_9_MP8095_S219": "mp866-MV411_RNP_MEF2D-H3K27AC-r1",
"20200723_10_MP8095_S220": "mp867-MV411_RNP_MEF2D-H3K27AC-r2",
"20200723_11_MP8095_S221": "mp868-MV411_RNP_IRF8-H3K27AC-r1",
"20200723_12_MP8095_S222": "mp869-MV411_RNP_IRF8-H3K27AC-r2",
"20200723_13_MP8095_S223": "mp870-MV411_RNP_MYB-H3K27AC-r1",
"20200723_14_MP8095_S224": "mp871-MV411_RNP_MYB-H3K27AC-r2",
"20200723_15_MP8095_S225": "mp872-MV411_RNP_SPI1-H3K27AC-r1",
"20200723_16_MP8095_S226": "mp873-MV411_RNP_SPI1-H3K27AC-r2",
"20200723_1S_MP8095_S209": "mp874-MV411_MEF2D_NT_SC_63-FLAG_MEF2D-r2",
"20200723_2S_MP8095_S210": "mp875-MV411_MEF2C_NT-FLAG_MEF2C-r1"}

In [68]:
for val in a:
    rep = val
    for k,v in rename.items():
        rep = rep.replace(k,v)
    !mv ../../data/$project/fastqs/$val ../../data/$project/fastqs/$rep

In [69]:
from gsheets import Sheets
sheets = Sheets.from_files('~/.client_secret.json', '~/.storage.json')
url="https://docs.google.com/spreadsheets/d/1yFLjYB1McU530JnLgL0QIMAKIkVl3kl0_LCHje2gk8U"
gsheet = sheets.get(url).sheets[2].to_frame()

In [70]:
gsheet

Unnamed: 0,id,cell line,replicate,protein,quality,paired_end,matching input name,processed,name,previous name,...,ratio to droso,unique mapped reads(droso),scaling factor,Total QC,folderNarrow,folderCompensated,folderQC,folderBroad,folder Bigwig,folder diffPeaks
0,mp100,U937,1,INPUT,,n,,Y,mp100-U937-INPUT-r1,,...,,,,https://storage.cloud.google.com/amlproject/Ch...,https://console.cloud.google.com/storage/brows...,,https://console.cloud.google.com/storage/brows...,https://console.cloud.google.com/storage/brows...,https://console.cloud.google.com/storage/brows...,
1,mp101,NOMO1,1,INPUT,,n,,Y,mp101-NOMO1-INPUT-r1,,...,,,,,,,,,,
2,mp102,UT7,1,INPUT,,n,,Y,mp102-UT7-INPUT-r1,,...,,,,,,,,,,
3,mp106,MV411,1,MYB,x,n,INPUT_MV411,Y,mp106-MV411-MYB-r1,,...,,,,,,,,,,
4,mp109,M6,1,CEBPA,x,n,INPUT_M6,Y,mp109-M6-CEBPA-r1,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
299,mp871,MV411_RNP_MYB,2,H3K27AC,,y,INPUT_MV411,,mp871-MV411_RNP_MYB-H3K27AC-r2,20200723_14_MP8095_S224,...,,,,,,,,,,
300,mp872,MV411_RNP_SPI1,1,H3K27AC,,y,INPUT_MV411,,mp872-MV411_RNP_SPI1-H3K27AC-r1,20200723_15_MP8095_S225,...,,,,,,,,,,
301,mp873,MV411_RNP_SPI1,2,H3K27AC,,y,INPUT_MV411,,mp873-MV411_RNP_SPI1-H3K27AC-r2,20200723_16_MP8095_S226,...,,,,,,,,,,
302,mp874,MV411_MEF2D_NT_SC_63,2,FLAG_MEF2D,,y,INPUT_MV411,,mp874-MV411_MEF2D_NT_SC_63-FLAG_MEF2D-r2,20200723_1S_MP8095_S209,...,,,,,,,,,,


In [71]:
a = ! ls ../../data/$project/fastqs
a

['mp858-MV411_RNP_AAVS1-H3K27AC-r1_R1_001.fastq.gz',
 'mp858-MV411_RNP_AAVS1-H3K27AC-r1_R2_001.fastq.gz',
 'mp859-MV411_RNP_AAVS1-H3K27AC-r2_R1_001.fastq.gz',
 'mp859-MV411_RNP_AAVS1-H3K27AC-r2_R2_001.fastq.gz',
 'mp860-MV411_RNP_RUNX1-H3K27AC-r1_R1_001.fastq.gz',
 'mp860-MV411_RNP_RUNX1-H3K27AC-r1_R2_001.fastq.gz',
 'mp861-MV411_RNP_RUNX1-H3K27AC-r2_R1_001.fastq.gz',
 'mp861-MV411_RNP_RUNX1-H3K27AC-r2_R2_001.fastq.gz',
 'mp862-MV411_RNP_RUNX2-H3K27AC-r1_R1_001.fastq.gz',
 'mp862-MV411_RNP_RUNX2-H3K27AC-r1_R2_001.fastq.gz',
 'mp863-MV411_RNP_RUNX2-H3K27AC-r2_R1_001.fastq.gz',
 'mp863-MV411_RNP_RUNX2-H3K27AC-r2_R2_001.fastq.gz',
 'mp864-MV411_RNP_RUNX1_RUNX2-H3K27AC-r1_R1_001.fastq.gz',
 'mp864-MV411_RNP_RUNX1_RUNX2-H3K27AC-r1_R2_001.fastq.gz',
 'mp865-MV411_RNP_RUNX1_RUNX2-H3K27AC-r2_R1_001.fastq.gz',
 'mp865-MV411_RNP_RUNX1_RUNX2-H3K27AC-r2_R2_001.fastq.gz',
 'mp866-MV411_RNP_MEF2D-H3K27AC-r1_R1_001.fastq.gz',
 'mp866-MV411_RNP_MEF2D-H3K27AC-r1_R2_001.fastq.gz',
 'mp867-MV411_RNP_MEF2

In [73]:
for val in a[-4:]:
    !gsutil cp ../../data/$project/fastqs/$val gs://amlproject/Chip/fastqs/
    !rm ../../data/$project/fastqs/$val

Copying file://../../data/additional_degraded_v1/fastqs/mp874-MV411_MEF2D_NT_SC_63-FLAG_MEF2D-r2_R1_001.fastq.gz [Content-Type=application/octet-stream]...
==> NOTE: You are uploading one or more large file(s), which would run          
significantly faster if you enable parallel composite uploads. This
feature can be enabled by editing the
"parallel_composite_upload_threshold" value in your .boto
configuration file. However, note that if you do this large files will
be uploaded as `composite objects
<https://cloud.google.com/storage/docs/composite-objects>`_,which
means that any user who downloads such objects will need to have a
compiled crcmod installed (see "gsutil help crcmod"). This is because
without a compiled crcmod, computing checksums on composite objects is
so slow that gsutil disables downloads of composite objects.

/ [1 files][  1.7 GiB/  1.7 GiB]   98.9 MiB/s                                   
Operation completed over 1 objects/1.7 GiB.                                  

In [76]:
gcp.patternRN({'mp845-MV411_IRF2BP2_-INPUT-r1':'mp845-MV411-INPUT-r2'},'gs://amlproject/Chip/',wildcards=['**','.*'], test=False)

found 2 files to rename


In [87]:
df = {
"fastq_1": [],
"fastq_2": [],
"antibody": [],
"group": [],
"replicate": [],
"control": []
}
for val in h.grouped(a[:-4],2):
    row = gsheet[gsheet.id==val[0].split('/')[-1].split('-')[0]]
    df['group'].append("MV4"+row['name'].values[0].split('-r')[0].split('-MV4')[1])
    df['replicate'].append(row['name'].values[0].split('-r')[1])
    df['fastq_1'].append(project+"/fastqs/"+val[0])
    df['fastq_2'].append(project+"/fastqs/"+val[1])
    df['antibody'].append(row['protein'].values[0])
    df['control'].append("INPUT")
df['group'].append('INPUT')
df['replicate'].append(1)
df['fastq_1'].append('ref/mp845-MV411-INPUT-r2_R1.fastq.gz')
df['fastq_2'].append('ref/mp845-MV411-INPUT-r2_R2.fastq.gz')
df['antibody'].append("")
df['control'].append("")
df = pd.DataFrame(df)

In [88]:
df

Unnamed: 0,fastq_1,fastq_2,antibody,group,replicate,control
0,additional_degraded_v1/fastqs/mp858-MV411_RNP_...,additional_degraded_v1/fastqs/mp858-MV411_RNP_...,H3K27AC,MV411_RNP_AAVS1-H3K27AC,1,INPUT
1,additional_degraded_v1/fastqs/mp859-MV411_RNP_...,additional_degraded_v1/fastqs/mp859-MV411_RNP_...,H3K27AC,MV411_RNP_AAVS1-H3K27AC,2,INPUT
2,additional_degraded_v1/fastqs/mp860-MV411_RNP_...,additional_degraded_v1/fastqs/mp860-MV411_RNP_...,H3K27AC,MV411_RNP_RUNX1-H3K27AC,1,INPUT
3,additional_degraded_v1/fastqs/mp861-MV411_RNP_...,additional_degraded_v1/fastqs/mp861-MV411_RNP_...,H3K27AC,MV411_RNP_RUNX1-H3K27AC,2,INPUT
4,additional_degraded_v1/fastqs/mp862-MV411_RNP_...,additional_degraded_v1/fastqs/mp862-MV411_RNP_...,H3K27AC,MV411_RNP_RUNX2-H3K27AC,1,INPUT
5,additional_degraded_v1/fastqs/mp863-MV411_RNP_...,additional_degraded_v1/fastqs/mp863-MV411_RNP_...,H3K27AC,MV411_RNP_RUNX2-H3K27AC,2,INPUT
6,additional_degraded_v1/fastqs/mp864-MV411_RNP_...,additional_degraded_v1/fastqs/mp864-MV411_RNP_...,H3K27AC,MV411_RNP_RUNX1_RUNX2-H3K27AC,1,INPUT
7,additional_degraded_v1/fastqs/mp865-MV411_RNP_...,additional_degraded_v1/fastqs/mp865-MV411_RNP_...,H3K27AC,MV411_RNP_RUNX1_RUNX2-H3K27AC,2,INPUT
8,additional_degraded_v1/fastqs/mp866-MV411_RNP_...,additional_degraded_v1/fastqs/mp866-MV411_RNP_...,H3K27AC,MV411_RNP_MEF2D-H3K27AC,1,INPUT
9,additional_degraded_v1/fastqs/mp867-MV411_RNP_...,additional_degraded_v1/fastqs/mp867-MV411_RNP_...,H3K27AC,MV411_RNP_MEF2D-H3K27AC,2,INPUT


In [89]:
df[df.columns[[3,4,0,1,2,5]]].to_csv('../nextflow/additional_degraded_v1_design.csv',index=False)

In [6]:
! cd ../../data/ && sudo ../nextflow log ## to get access to the previous runs

TIMESTAMP          	DURATION      	RUN NAME              	STATUS	REVISION ID	SESSION ID                          	COMMAND                                                                                                                                                                                                                                                          
2020-02-10 22:39:18	8m 25s        	stupefied_crick       	ERR   	21be314954 	76ea5df0-153c-4e71-a59d-52c6112fda84	nextflow run nf-core/chipseq --paired_end --seq_center DFCI --email jkobject@gmail.com --narrow_peak --input design.csv --genome GRCh38 --skip_preseq --max_cpus 24 -profile docker -w work                                                                      
2020-02-10 22:40:37	1m 6s         	ridiculous_hilbert    	ERR   	21be314954 	75004903-035d-4504-ab80-cab74b5acac4	nextflow run nf-core/chipseq --paired_end --seq_center DFCI --email jkobject@gmail.com --narrow_peak --input design.csv --genome GRCh38 --sk

In [7]:
#process chips
! cd ../../data/ && sudo ../nextflow run nf-core/chipseq --paired_end --seq_center 'DFCI' --email 'jkobject@gmail.com' --input ../AMLproject/nextflow/additional_degraded_v1_design.csv --genome GRCh38 --skip_preseq --max_cpus 16 -profile docker -w work -resume exotic_bartik

N E X T F L O W  ~  version 19.10.0
Launching `nf-core/chipseq` [wise_legentil] - revision: 21be314954 [master]
NOTE: Your local project version looks outdated - a different revision is available in the remote repository [0f487ed76d]
-[2m--------------------------------------------------[0m-
                                        [0;32m,--.[0;30m/[0;32m,-.[0m
[0;34m        ___     __   __   __   ___     [0;32m/,-._.--~'[0m
[0;34m  |\ | |__  __ /  ` /  \ |__) |__         [0;33m}  {[0m
[0;34m  | \| |       \__, \__/ |  \ |___     [0;32m\`-._,-`-,[0m
                                        [0;32m`._,._,'[0m
[0;35m  nf-core/chipseq v1.1.0[0m
-[2m--------------------------------------------------[0m-
Run Name            : wise_legentil
Data Type           : Paired-End
Design File         : ../AMLproject/nextflow/additional_degraded_v1_design.csv
Genome              : GRCh38
Fasta File          : s3://ngi-igenomes/igenomes//Homo_sapiens/NCBI/GRCh38/Sequence/WholeGenomeF

[28A
executor >  local (1)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[-        ] process > MergeBAM                       -[K
[-        ] process > MergeBAMFilter                 -[K
[-        ] process > MergeBAMRemoveOrphan           -[K
[-        ] process > Preseq                         -[K
[-        ] process > CollectMultipleMetrics         -[K
[-        ] process > BigWig                         -[K
[-        ] process > PlotProfile   

[28A
executor >  local (6)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[fc/cf6140] process > MergeBAM (MV411_RNP_SPI1-H3... [ 88%] 14 of 16, cached: 11[K
[-        ] process > MergeBAMFilter                 -[K
[-        ] process > MergeBAMRemoveOrphan           -[K
[-        ] process > Preseq                         -[K
[-        ] process > CollectMultipleMetrics         -[K
[-        ] process > BigWig                         -[K
[-        

[28A
executor >  local (11)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[84/574208] process > MergeBAMFilter (MV411_RNP_S... [ 50%] 2 of 4[K
[-        ] process > MergeBAMRemoveOrphan           -[K
[-        ] process > Preseq                         -[K
[-        ] process > CollectMultipleMetrics         -[K
[-        ] process > BigWig                         -

[28A
executor >  local (15)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[e1/d4ac46] process > MergeBAMFilter (MV411_RNP_A... [ 63%] 5 of 8[K
[-        ] process > MergeBAMRemoveOrphan           -[K
[-        ] process > Preseq                         -[K
[-        ] process > CollectMultipleMetrics         -[K
[-        ] process > BigWig                         -

[28A
executor >  local (18)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[77/a464ff] process > MergeBAMFilter (MV411_RNP_I... [ 91%] 10 of 11[K
[-        ] process > MergeBAMRemoveOrphan           -[K
[-        ] process > Preseq                         -[K
[-        ] process > CollectMultipleMetrics         -[K
[-        ] process > BigWig                         

[28A
executor >  local (23)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[e0/a67acd] process > MergeBAMFilter (MV411_RNP_M... [ 88%] 14 of 16[K
[-        ] process > MergeBAMRemoveOrphan           -[K
[-        ] process > Preseq                         -[K
[-        ] process > CollectMultipleMetrics         -[K
[-        ] process > BigWig                         

[28A
executor >  local (28)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[48/4b5e62] process > MergeBAMFilter (MV411_RNP_R... [100%] 17 of 17 ✔[K
[57/199f77] process > MergeBAMRemoveOrphan (MV411... [ 50%] 2 of 4[K
[-        ] process > Preseq                         -[K
[-        ] process > CollectMultipleMetrics         -[K
[-        ] process > BigWig           

[28A
executor >  local (33)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[48/4b5e62] process > MergeBAMFilter (MV411_RNP_R... [100%] 17 of 17 ✔[K
[63/892aee] process > MergeBAMRemoveOrphan (MV411... [ 78%] 7 of 9[K
[-        ] process > Preseq                         -[K
[-        ] process > CollectMultipleMetrics         -[K
[-        ] process > BigWig           

[28A
executor >  local (38)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[48/4b5e62] process > MergeBAMFilter (MV411_RNP_R... [100%] 17 of 17 ✔[K
[ba/00ce48] process > MergeBAMRemoveOrphan (MV411... [ 79%] 11 of 14[K
[-        ] process > Preseq                         -[K
[-        ] process > CollectMultipleMetrics         -[K
[-        ] process > BigWig         

[28A
executor >  local (41)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[48/4b5e62] process > MergeBAMFilter (MV411_RNP_R... [100%] 17 of 17 ✔[K
[c0/52c2db] process > MergeBAMRemoveOrphan (MV411... [ 94%] 15 of 16[K
[-        ] process > Preseq                         -[K
[-        ] process > CollectMultipleMetrics         -[K
[-        ] process > BigWig         

[28A
executor >  local (46)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[48/4b5e62] process > MergeBAMFilter (MV411_RNP_R... [100%] 17 of 17 ✔[K
[dc/6cb451] process > MergeBAMRemoveOrphan (MV411... [ 94%] 16 of 17[K
[-        ] process > Preseq                         -[K
[18/e3205c] process > CollectMultipleMetrics (MV4... [100%] 1 of 1[K
[fa/7ca520] process > Big

[c1/b1a581] process > PhantomPeakQualTools (MV411... [ 67%] 2 of 3[K
[-        ] process > PlotFingerprint                -[K
[-        ] process > MACSCallPeak                   -[K
[-        ] process > AnnotatePeaks                  -[K
[-        ] process > PeakQC                         -[K
[-        ] process > ConsensusPeakSet               -[K
[-        ] process > ConsensusPeakSetAnnotate       -[K
[-        ] process > ConsensusPeakSetDESeq          -[K
[-        ] process > IGV                            -[K
[7e/9c912a] process > get_software_versions          [100%] 1 of 1 ✔[K
[-        ] process > MultiQC                        -[K
[8a/7334d7] process > output_documentation           [100%] 1 of 1, cached: 1 ✔[K
[28A
executor >  local (51)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [1

[28A
executor >  local (56)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[48/4b5e62] process > MergeBAMFilter (MV411_RNP_R... [100%] 17 of 17 ✔[K
[dc/6cb451] process > MergeBAMRemoveOrphan (MV411... [100%] 17 of 17 ✔[K
[-        ] process > Preseq                         -[K
[36/03ab9d] process > CollectMultipleMetrics (MV4... [ 80%] 4 of 5[K
[26/8106f4] process > B

[28A
executor >  local (60)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[48/4b5e62] process > MergeBAMFilter (MV411_RNP_R... [100%] 17 of 17 ✔[K
[dc/6cb451] process > MergeBAMRemoveOrphan (MV411... [100%] 17 of 17 ✔[K
[-        ] process > Preseq                         -[K
[36/03ab9d] process > CollectMultipleMetrics (MV4... [100%] 5 of 5[K
[2d/982381] process > B

[28A
executor >  local (65)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[48/4b5e62] process > MergeBAMFilter (MV411_RNP_R... [100%] 17 of 17 ✔[K
[dc/6cb451] process > MergeBAMRemoveOrphan (MV411... [100%] 17 of 17 ✔[K
[-        ] process > Preseq                         -[K
[6f/0c32f5] process > CollectMultipleMetrics (INP... [100%] 6 of 6[K
[2d/982381] process > B

[28A
executor >  local (70)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[48/4b5e62] process > MergeBAMFilter (MV411_RNP_R... [100%] 17 of 17 ✔[K
[dc/6cb451] process > MergeBAMRemoveOrphan (MV411... [100%] 17 of 17 ✔[K
[-        ] process > Preseq                         -[K
[77/85e402] process > CollectMultipleMetrics (MV4... [100%] 7 of 7[K
[83/f433d6] process > B

[28A
executor >  local (74)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[48/4b5e62] process > MergeBAMFilter (MV411_RNP_R... [100%] 17 of 17 ✔[K
[dc/6cb451] process > MergeBAMRemoveOrphan (MV411... [100%] 17 of 17 ✔[K
[-        ] process > Preseq                         -[K
[f7/870fa3] process > CollectMultipleMetrics (MV4... [100%] 8 of 8[K
[be/8c5392] process > B

[28A
executor >  local (79)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[48/4b5e62] process > MergeBAMFilter (MV411_RNP_R... [100%] 17 of 17 ✔[K
[dc/6cb451] process > MergeBAMRemoveOrphan (MV411... [100%] 17 of 17 ✔[K
[-        ] process > Preseq                         -[K
[eb/85c928] process > CollectMultipleMetrics (MV4... [100%] 9 of 9[K
[4e/c46492] process > B

[28A
executor >  local (84)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[48/4b5e62] process > MergeBAMFilter (MV411_RNP_R... [100%] 17 of 17 ✔[K
[dc/6cb451] process > MergeBAMRemoveOrphan (MV411... [100%] 17 of 17 ✔[K
[-        ] process > Preseq                         -[K
[af/724a3d] process > CollectMultipleMetrics (MV4... [ 91%] 10 of 11[K
[60/3850df] process >

[28A
executor >  local (89)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[48/4b5e62] process > MergeBAMFilter (MV411_RNP_R... [100%] 17 of 17 ✔[K
[dc/6cb451] process > MergeBAMRemoveOrphan (MV411... [100%] 17 of 17 ✔[K
[-        ] process > Preseq                         -[K
[be/e4f2a1] process > CollectMultipleMetrics (MV4... [100%] 12 of 12[K
[eb/3a1856] process >

[28A
executor >  local (94)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[48/4b5e62] process > MergeBAMFilter (MV411_RNP_R... [100%] 17 of 17 ✔[K
[dc/6cb451] process > MergeBAMRemoveOrphan (MV411... [100%] 17 of 17 ✔[K
[-        ] process > Preseq                         -[K
[58/9c859d] process > CollectMultipleMetrics (MV4... [100%] 13 of 13[K
[eb/3a1856] process >

[28A
executor >  local (99)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[48/4b5e62] process > MergeBAMFilter (MV411_RNP_R... [100%] 17 of 17 ✔[K
[dc/6cb451] process > MergeBAMRemoveOrphan (MV411... [100%] 17 of 17 ✔[K
[-        ] process > Preseq                         -[K
[39/7b314d] process > CollectMultipleMetrics (MV4... [100%] 14 of 14[K
[e1/c25e9c] process >

[28A
executor >  local (104)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[48/4b5e62] process > MergeBAMFilter (MV411_RNP_R... [100%] 17 of 17 ✔[K
[dc/6cb451] process > MergeBAMRemoveOrphan (MV411... [100%] 17 of 17 ✔[K
[-        ] process > Preseq                         -[K
[85/5ed89a] process > CollectMultipleMetrics (MV4... [ 94%] 15 of 16[K
[9c/f2630b] process 

[28A
executor >  local (109)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[48/4b5e62] process > MergeBAMFilter (MV411_RNP_R... [100%] 17 of 17 ✔[K
[dc/6cb451] process > MergeBAMRemoveOrphan (MV411... [100%] 17 of 17 ✔[K
[-        ] process > Preseq                         -[K
[7b/722dae] process > CollectMultipleMetrics (MV4... [100%] 17 of 17 ✔[K
[a1/2654ae] proces

[28A
executor >  local (114)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[48/4b5e62] process > MergeBAMFilter (MV411_RNP_R... [100%] 17 of 17 ✔[K
[dc/6cb451] process > MergeBAMRemoveOrphan (MV411... [100%] 17 of 17 ✔[K
[-        ] process > Preseq                         -[K
[7b/722dae] process > CollectMultipleMetrics (MV4... [100%] 17 of 17 ✔[K
[a1/2654ae] proces

[28A
executor >  local (120)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[48/4b5e62] process > MergeBAMFilter (MV411_RNP_R... [100%] 17 of 17 ✔[K
[dc/6cb451] process > MergeBAMRemoveOrphan (MV411... [100%] 17 of 17 ✔[K
[-        ] process > Preseq                         -[K
[7b/722dae] process > CollectMultipleMetrics (MV4... [100%] 17 of 17 ✔[K
[a1/2654ae] proces

[28A
executor >  local (125)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[48/4b5e62] process > MergeBAMFilter (MV411_RNP_R... [100%] 17 of 17 ✔[K
[dc/6cb451] process > MergeBAMRemoveOrphan (MV411... [100%] 17 of 17 ✔[K
[-        ] process > Preseq                         -[K
[7b/722dae] process > CollectMultipleMetrics (MV4... [100%] 17 of 17 ✔[K
[a1/2654ae] proces

[28A
executor >  local (129)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[48/4b5e62] process > MergeBAMFilter (MV411_RNP_R... [100%] 17 of 17 ✔[K
[dc/6cb451] process > MergeBAMRemoveOrphan (MV411... [100%] 17 of 17 ✔[K
[-        ] process > Preseq                         -[K
[7b/722dae] process > CollectMultipleMetrics (MV4... [100%] 17 of 17 ✔[K
[a1/2654ae] proces

[28A
executor >  local (133)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[48/4b5e62] process > MergeBAMFilter (MV411_RNP_R... [100%] 17 of 17 ✔[K
[dc/6cb451] process > MergeBAMRemoveOrphan (MV411... [100%] 17 of 17 ✔[K
[-        ] process > Preseq                         -[K
[7b/722dae] process > CollectMultipleMetrics (MV4... [100%] 17 of 17 ✔[K
[a1/2654ae] proces

[28A
executor >  local (138)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[48/4b5e62] process > MergeBAMFilter (MV411_RNP_R... [100%] 17 of 17 ✔[K
[dc/6cb451] process > MergeBAMRemoveOrphan (MV411... [100%] 17 of 17 ✔[K
[-        ] process > Preseq                         -[K
[7b/722dae] process > CollectMultipleMetrics (MV4... [100%] 17 of 17 ✔[K
[a1/2654ae] proces

[28A
executor >  local (143)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[48/4b5e62] process > MergeBAMFilter (MV411_RNP_R... [100%] 17 of 17 ✔[K
[dc/6cb451] process > MergeBAMRemoveOrphan (MV411... [100%] 17 of 17 ✔[K
[-        ] process > Preseq                         -[K
[7b/722dae] process > CollectMultipleMetrics (MV4... [100%] 17 of 17 ✔[K
[a1/2654ae] proces

[28A
executor >  local (148)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[48/4b5e62] process > MergeBAMFilter (MV411_RNP_R... [100%] 17 of 17 ✔[K
[dc/6cb451] process > MergeBAMRemoveOrphan (MV411... [100%] 17 of 17 ✔[K
[-        ] process > Preseq                         -[K
[7b/722dae] process > CollectMultipleMetrics (MV4... [100%] 17 of 17 ✔[K
[a1/2654ae] proces

[28A
executor >  local (153)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[48/4b5e62] process > MergeBAMFilter (MV411_RNP_R... [100%] 17 of 17 ✔[K
[dc/6cb451] process > MergeBAMRemoveOrphan (MV411... [100%] 17 of 17 ✔[K
[-        ] process > Preseq                         -[K
[7b/722dae] process > CollectMultipleMetrics (MV4... [100%] 17 of 17 ✔[K
[a1/2654ae] proces

[28A
executor >  local (158)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[48/4b5e62] process > MergeBAMFilter (MV411_RNP_R... [100%] 17 of 17 ✔[K
[dc/6cb451] process > MergeBAMRemoveOrphan (MV411... [100%] 17 of 17 ✔[K
[-        ] process > Preseq                         -[K
[7b/722dae] process > CollectMultipleMetrics (MV4... [100%] 17 of 17 ✔[K
[a1/2654ae] proces

[28A
executor >  local (162)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[48/4b5e62] process > MergeBAMFilter (MV411_RNP_R... [100%] 17 of 17 ✔[K
[dc/6cb451] process > MergeBAMRemoveOrphan (MV411... [100%] 17 of 17 ✔[K
[-        ] process > Preseq                         -[K
[7b/722dae] process > CollectMultipleMetrics (MV4... [100%] 17 of 17 ✔[K
[a1/2654ae] proces

[31A
executor >  local (163)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[48/4b5e62] process > MergeBAMFilter (MV411_RNP_R... [100%] 17 of 17 ✔[K
[dc/6cb451] process > MergeBAMRemoveOrphan (MV411... [100%] 17 of 17 ✔[K
[-        ] process > Preseq                         -[K
[7b/722dae] process > CollectMultipleMetrics (MV4... [100%] 17 of 17 ✔[K
[a1/2654ae] proces

In [10]:
!cp -r ../../data/results/* ../../data/$project/ && sudo rm -r ../data/results && sudo rm -r ..data/work

^C


In [14]:
# get scaling values
norm, mapped = h.getSpikeInControlScales(refgenome="../../data/ref/reference_droso.fna",
fastQfolder='../../data/'+project+'/fastqs/',
pairedEnd=True, cores=12,
tofilter=True,
totrim=True,
tomap=True,
results="../../data/"+project+"/",
pathtotrim_galore="../../TrimGalore-0.6.5/trim_galore")
norm, mapped

using all files from folder
need to be name_*1, name_*2
[('mp858-MV411_RNP_AAVS1-H3K27AC-r1_R1_001.fastq.gz', 'mp858-MV411_RNP_AAVS1-H3K27AC-r1_R2_001.fastq.gz'), ('mp859-MV411_RNP_AAVS1-H3K27AC-r2_R1_001.fastq.gz', 'mp859-MV411_RNP_AAVS1-H3K27AC-r2_R2_001.fastq.gz'), ('mp860-MV411_RNP_RUNX1-H3K27AC-r1_R1_001.fastq.gz', 'mp860-MV411_RNP_RUNX1-H3K27AC-r1_R2_001.fastq.gz'), ('mp861-MV411_RNP_RUNX1-H3K27AC-r2_R1_001.fastq.gz', 'mp861-MV411_RNP_RUNX1-H3K27AC-r2_R2_001.fastq.gz'), ('mp862-MV411_RNP_RUNX2-H3K27AC-r1_R1_001.fastq.gz', 'mp862-MV411_RNP_RUNX2-H3K27AC-r1_R2_001.fastq.gz'), ('mp863-MV411_RNP_RUNX2-H3K27AC-r2_R1_001.fastq.gz', 'mp863-MV411_RNP_RUNX2-H3K27AC-r2_R2_001.fastq.gz'), ('mp864-MV411_RNP_RUNX1_RUNX2-H3K27AC-r1_R1_001.fastq.gz', 'mp864-MV411_RNP_RUNX1_RUNX2-H3K27AC-r1_R2_001.fastq.gz'), ('mp865-MV411_RNP_RUNX1_RUNX2-H3K27AC-r2_R1_001.fastq.gz', 'mp865-MV411_RNP_RUNX1_RUNX2-H3K27AC-r2_R2_001.fastq.gz'), ('mp866-MV411_RNP_MEF2D-H3K27AC-r1_R1_001.fastq.gz', 'mp866-MV411_RNP_M

({'mp858-MV411_RNP_AAVS1-H3K27AC-r1_R1_001_val_1': 0.6644280581189461,
  'mp859-MV411_RNP_AAVS1-H3K27AC-r2_R1_001_val_1': 0.5408483387497395,
  'mp860-MV411_RNP_RUNX1-H3K27AC-r1_R1_001_val_1': 0.9306982320552297,
  'mp861-MV411_RNP_RUNX1-H3K27AC-r2_R1_001_val_1': 0.4595249033344818,
  'mp862-MV411_RNP_RUNX2-H3K27AC-r1_R1_001_val_1': 1.0,
  'mp863-MV411_RNP_RUNX2-H3K27AC-r2_R1_001_val_1': 0.45660285309024545,
  'mp864-MV411_RNP_RUNX1_RUNX2-H3K27AC-r1_R1_001_val_1': 0.500947891528239,
  'mp865-MV411_RNP_RUNX1_RUNX2-H3K27AC-r2_R1_001_val_1': 0.3734932073757382,
  'mp866-MV411_RNP_MEF2D-H3K27AC-r1_R1_001_val_1': 0.37678768740112,
  'mp867-MV411_RNP_MEF2D-H3K27AC-r2_R1_001_val_1': 0.397505335892133,
  'mp868-MV411_RNP_IRF8-H3K27AC-r1_R1_001_val_1': 0.20011420092694476,
  'mp869-MV411_RNP_IRF8-H3K27AC-r2_R1_001_val_1': 0.2631428229278934,
  'mp870-MV411_RNP_MYB-H3K27AC-r1_R1_001_val_1': 0.5015211299972058,
  'mp871-MV411_RNP_MYB-H3K27AC-r2_R1_001_val_1': 0.32322855597358185,
  'mp872-MV411_R

computing scales from the excel sheet

In [119]:
scales = [0.3011826465, # 'MV411_RNP_IRF8-H3K27AC_R1',
0.4865371752, # 'MV411_RNP_IRF8-H3K27AC_R2',
0.5670857556, # 'MV411_RNP_MEF2D-H3K27AC_R1',
0.7349663619, # 'MV411_RNP_MEF2D-H3K27AC_R2',
0.7548163023, # 'MV411_RNP_MYB-H3K27AC_R1',
0.5976325206, # 'MV411_RNP_MYB-H3K27AC_R2',
1.400750948, # 'MV411_RNP_RUNX1-H3K27AC_R1',
0.849637265, # 'MV411_RNP_RUNX1-H3K27AC_R2',
0.7539535476, # 'MV411_RNP_RUNX1_RUNX2-H3K27AC_R1',
0.6905692051, # 'MV411_RNP_RUNX1_RUNX2-H3K27AC_R2',
1.50505384, # 'MV411_RNP_RUNX2-H3K27AC_R1',
0.8442345485, # 'MV411_RNP_RUNX2-H3K27AC_R2',
0.5019100631, # 'MV411_RNP_SPI1-H3K27AC_R1',
0.8688220473, # 'MV411_RNP_SPI1-H3K27AC_R2'
         ]

In [78]:
bams = ! ls ../../data/$project/bwa/mergedLibrary/*.bam
bams

['../../data/additional_degraded_v1/bwa/mergedLibrary/INPUT_R1.mLb.clN.sorted.bam',
 '../../data/additional_degraded_v1/bwa/mergedLibrary/MV411_RNP_AAVS1-H3K27AC_R1.mLb.clN.sorted.bam',
 '../../data/additional_degraded_v1/bwa/mergedLibrary/MV411_RNP_AAVS1-H3K27AC_R2.mLb.clN.sorted.bam',
 '../../data/additional_degraded_v1/bwa/mergedLibrary/MV411_RNP_IRF8-H3K27AC_R1.mLb.clN.sorted.bam',
 '../../data/additional_degraded_v1/bwa/mergedLibrary/MV411_RNP_IRF8-H3K27AC_R2.mLb.clN.sorted.bam',
 '../../data/additional_degraded_v1/bwa/mergedLibrary/MV411_RNP_MEF2D-H3K27AC_R1.mLb.clN.sorted.bam',
 '../../data/additional_degraded_v1/bwa/mergedLibrary/MV411_RNP_MEF2D-H3K27AC_R2.mLb.clN.sorted.bam',
 '../../data/additional_degraded_v1/bwa/mergedLibrary/MV411_RNP_MYB-H3K27AC_R1.mLb.clN.sorted.bam',
 '../../data/additional_degraded_v1/bwa/mergedLibrary/MV411_RNP_MYB-H3K27AC_R2.mLb.clN.sorted.bam',
 '../../data/additional_degraded_v1/bwa/mergedLibrary/MV411_RNP_RUNX1-H3K27AC_R1.mLb.clN.sorted.bam',
 '..

In [26]:
bams[6]

'../../data/additional_degraded_v1/bwa/mergedLibrary/MV411_RNP_MEF2D-H3K27AC_R2.mLb.clN.sorted.bam'

### on scalled data

In [21]:
! mkdir ../../data/$project/diffPeaks/ && ! mkdir ../../data/$project/diffData/

In [32]:
! mkdir ../../data/$project/droso_aligned

In [33]:
! mv ../../data/$project/mp* ../../data/$project/droso_aligned

In [24]:
wigs = ! ls ../../data/$project/bwa/mergedLibrary/bigwig/*.bigWig

In [5]:
ls ../../data/$project/bwa/mergedLibrary/bigwig/

INPUT_R1.mLb.clN.bigWig
MV411_RNP_AAVS1-H3K27AC_R1.mLb.clN.bdg
MV411_RNP_AAVS1-H3K27AC_R1.mLb.clN.bigWig
MV411_RNP_AAVS1-H3K27AC_R2.mLb.clN.bdg
MV411_RNP_AAVS1-H3K27AC_R2.mLb.clN.bigWig
MV411_RNP_IRF8-H3K27AC_R1.mLb.clN.bdg
MV411_RNP_IRF8-H3K27AC_R1.mLb.clN.bigWig
MV411_RNP_IRF8-H3K27AC_R2.mLb.clN.bdg
MV411_RNP_IRF8-H3K27AC_R2.mLb.clN.bigWig
MV411_RNP_MEF2D-H3K27AC_R1.mLb.clN.bdg
MV411_RNP_MEF2D-H3K27AC_R1.mLb.clN.bigWig
MV411_RNP_MEF2D-H3K27AC_R2.mLb.clN.bdg
MV411_RNP_MEF2D-H3K27AC_R2.mLb.clN.bigWig
MV411_RNP_MYB-H3K27AC_R1.mLb.clN.bdg
MV411_RNP_MYB-H3K27AC_R1.mLb.clN.bigWig
MV411_RNP_MYB-H3K27AC_R2.mLb.clN.bdg
MV411_RNP_MYB-H3K27AC_R2.mLb.clN.bigWig
MV411_RNP_RUNX1-H3K27AC_R1.mLb.clN.bdg
MV411_RNP_RUNX1-H3K27AC_R1.mLb.clN.bigWig
MV411_RNP_RUNX1-H3K27AC_R2.mLb.clN.bdg
MV411_RNP_RUNX1-H3K27AC_R2.mLb.clN.bigWig
MV411_RNP_RUNX1_RUNX2-H3K27AC_R1.mLb.clN.bdg
MV411_RNP_RUNX1_RUNX2-H3K27AC_R1.mLb.clN.bigWig
MV411_RNP_RUNX1_RUNX2-H3K27AC_R2.mLb.clN.bdg
MV411_RNP_RUNX1_

In [80]:
! gsutil -m cp -r ../../data/$project/bwa/mergedLibrary/bigwig/ gs://amlproject/Chip/$project/bwa/mergedLibrary/
! gsutil -m cp -r ../../data/$project/droso_aligned gs://amlproject/Chip/$project/
! gsutil -m cp -r ../../data/$project/recalib_bigwig gs://amlproject/Chip/$project/

Copying file://../../data/additional_degraded_v1/bwa/mergedLibrary/bigwig/MV411_RNP_IRF8-H3K27AC_R2.mLb.clN.bigWig [Content-Type=application/octet-stream]...
Copying file://../../data/additional_degraded_v1/bwa/mergedLibrary/bigwig/MV411_RNP_MYB-H3K27AC_R1.mLb.clN.bigWig [Content-Type=application/octet-stream]...
==> NOTE: You are uploading one or more large file(s), which would run
significantly faster if you enable parallel composite uploads. This
feature can be enabled by editing the
"parallel_composite_upload_threshold" value in your .boto
configuration file. However, note that if you do this large files will
be uploaded as `composite objects
<https://cloud.google.com/storage/docs/composite-objects>`_,which
means that any user who downloads such objects will need to have a
compiled crcmod installed (see "gsutil help crcmod"). This is because
without a compiled crcmod, computing checksums on composite objects is
so slow that gsutil disables downloads of composite objects.

Copying f

Copying file://../../data/additional_degraded_v1/bwa/mergedLibrary/bigwig/scale/MV411_RNP_MYB-H3K27AC_R1.mLb.clN.scale_factor.txt [Content-Type=text/plain]...
/ [50/50 files][ 22.6 GiB/ 22.6 GiB] 100% Done  83.7 MiB/s ETA 00:00:00         
Operation completed over 50 objects/22.6 GiB.                                    
Copying file://../../data/additional_degraded_v1/droso_aligned/mp862-MV411_RNP_RUNX2-H3K27AC-r1_R1_001_val_1.sorted.bam [Content-Type=application/octet-stream]...
Copying file://../../data/additional_degraded_v1/droso_aligned/mp860-MV411_RNP_RUNX1-H3K27AC-r1_R1_001.sorted.bam.bai [Content-Type=application/octet-stream]...
Copying file://../../data/additional_degraded_v1/droso_aligned/mp870-MV411_RNP_MYB-H3K27AC-r1_R1_001.sorted.bam.bai [Content-Type=application/octet-stream]...
==> NOTE: You are uploading one or more large file(s), which would run          
significantly faster if you enable parallel composite uploads. This
feature can be enabled by editing the
"paralle

Copying file://../../data/additional_degraded_v1/droso_aligned/mp873-MV411_RNP_SPI1-H3K27AC-r2_R1_001_val_1.sorted.bam.idxstat [Content-Type=application/octet-stream]...
Copying file://../../data/additional_degraded_v1/droso_aligned/mp865-MV411_RNP_RUNX1_RUNX2-H3K27AC-r2_R1_001_val_1.sorted.bam [Content-Type=application/octet-stream]...
Copying file://../../data/additional_degraded_v1/droso_aligned/mp858-MV411_RNP_AAVS1-H3K27AC-r1_R1_001_val_1.sorted.bam.bai [Content-Type=application/octet-stream]...
Copying file://../../data/additional_degraded_v1/droso_aligned/mp870-MV411_RNP_MYB-H3K27AC-r1_R1_001_val_1.sorted.bam.bai [Content-Type=application/octet-stream]...
Copying file://../../data/additional_degraded_v1/droso_aligned/mp871-MV411_RNP_MYB-H3K27AC-r2_R2_001_val_2.fq.gz [Content-Type=application/octet-stream]...
Copying file://../../data/additional_degraded_v1/droso_aligned/mp858-MV411_RNP_AAVS1-H3K27AC-r1_R1_001.sorted.bam [Content-Type=application/octet-stream]...
Copying file://.

Copying file://../../data/additional_degraded_v1/droso_aligned/mp872-MV411_RNP_SPI1-H3K27AC-r1_R1_001_val_1.sorted.bam [Content-Type=application/octet-stream]...
Copying file://../../data/additional_degraded_v1/droso_aligned/mp864-MV411_RNP_RUNX1_RUNX2-H3K27AC-r1_R1_001.sorted.bam [Content-Type=application/octet-stream]...
Copying file://../../data/additional_degraded_v1/droso_aligned/mp868-MV411_RNP_IRF8-H3K27AC-r1_R1_001.sorted.bam.bai [Content-Type=application/octet-stream]...
Copying file://../../data/additional_degraded_v1/droso_aligned/mp868-MV411_RNP_IRF8-H3K27AC-r1_R1_001_val_1.fq.gz [Content-Type=application/octet-stream]...
Copying file://../../data/additional_degraded_v1/droso_aligned/mp873-MV411_RNP_SPI1-H3K27AC-r2_R1_001.sorted.bam.idxstat [Content-Type=application/octet-stream]...
Copying file://../../data/additional_degraded_v1/droso_aligned/mp868-MV411_RNP_IRF8-H3K27AC-r1_R1_001.fastq.gz_trimming_report.txt [Content-Type=text/plain]...
Copying file://../../data/addition

Copying file://../../data/additional_degraded_v1/droso_aligned/mp861-MV411_RNP_RUNX1-H3K27AC-r2_R2_001_val_2.fq.gz [Content-Type=application/octet-stream]...
Copying file://../../data/additional_degraded_v1/droso_aligned/mp872-MV411_RNP_SPI1-H3K27AC-r1_R1_001.sorted.bam [Content-Type=application/octet-stream]...
Copying file://../../data/additional_degraded_v1/droso_aligned/mp862-MV411_RNP_RUNX2-H3K27AC-r1_R1_001_val_1.sorted.bam.bai [Content-Type=application/octet-stream]...
Copying file://../../data/additional_degraded_v1/droso_aligned/mp870-MV411_RNP_MYB-H3K27AC-r1_R1_001.sorted.bam.idxstat [Content-Type=application/octet-stream]...
Copying file://../../data/additional_degraded_v1/droso_aligned/mp868-MV411_RNP_IRF8-H3K27AC-r1_R2_001.fastq.gz_trimming_report.txt [Content-Type=text/plain]...
Copying file://../../data/additional_degraded_v1/droso_aligned/mp864-MV411_RNP_RUNX1_RUNX2-H3K27AC-r1_R1_001.fastq.gz_trimming_report.txt [Content-Type=text/plain]...
Copying file://../../data/add

In [65]:
for val in wigs:
    bedg = val[:-6]+'bdg'
    ! bigWigToBedGraph $val $bedg

In [66]:
bdg=! ls ../../data/additional_degraded_v1/bwa/mergedLibrary/bigwig/*.bdg
bdg

['../../data/additional_degraded_v1/bwa/mergedLibrary/bigwig/INPUT_R1.mLb.clN.bdg',
 '../../data/additional_degraded_v1/bwa/mergedLibrary/bigwig/MV411_RNP_AAVS1-H3K27AC_R1.mLb.clN.bdg',
 '../../data/additional_degraded_v1/bwa/mergedLibrary/bigwig/MV411_RNP_AAVS1-H3K27AC_R2.mLb.clN.bdg',
 '../../data/additional_degraded_v1/bwa/mergedLibrary/bigwig/MV411_RNP_IRF8-H3K27AC_R1.mLb.clN.bdg',
 '../../data/additional_degraded_v1/bwa/mergedLibrary/bigwig/MV411_RNP_IRF8-H3K27AC_R2.mLb.clN.bdg',
 '../../data/additional_degraded_v1/bwa/mergedLibrary/bigwig/MV411_RNP_MEF2D-H3K27AC_R1.mLb.clN.bdg',
 '../../data/additional_degraded_v1/bwa/mergedLibrary/bigwig/MV411_RNP_MEF2D-H3K27AC_R2.mLb.clN.bdg',
 '../../data/additional_degraded_v1/bwa/mergedLibrary/bigwig/MV411_RNP_MYB-H3K27AC_R1.mLb.clN.bdg',
 '../../data/additional_degraded_v1/bwa/mergedLibrary/bigwig/MV411_RNP_MYB-H3K27AC_R2.mLb.clN.bdg',
 '../../data/additional_degraded_v1/bwa/mergedLibrary/bigwig/MV411_RNP_RUNX1-H3K27AC_R1.mLb.clN.bdg',
 '..

In [73]:
# diffPeak on scaled data 
size= 240
for i in range(int(len(bdg)-3)):
    name1 = bdg[1+i%2]
    name2 = bdg[3+i]
    print(name1,name2)
    print(chip.diffPeak(name1, name2, control1=bdg[0], control2=bdg[0], res_directory="../../data/"+project+"/diffPeaks/", scaling1=1, scaling2=scales[i], size=size))

../../data/additional_degraded_v1/bwa/mergedLibrary/bigwig/MV411_RNP_AAVS1-H3K27AC_R1.mLb.clN.bdg ../../data/additional_degraded_v1/bwa/mergedLibrary/bigwig/MV411_RNP_IRF8-H3K27AC_R1.mLb.clN.bdg
doing differential peak binding
CompletedProcess(args='macs2 bdgdiff --t1 ../../data/additional_degraded_v1/bwa/mergedLibrary/bigwig/MV411_RNP_AAVS1-H3K27AC_R1.mLb.clN.bdg --c1 ../../data/additional_degraded_v1/bwa/mergedLibrary/bigwig/INPUT_R1.mLb.clN.bdg --t2 ../../data/additional_degraded_v1/bwa/mergedLibrary/bigwig/MV411_RNP_IRF8-H3K27AC_R1.mLb.clN.bdg --c2 ../../data/additional_degraded_v1/bwa/mergedLibrary/bigwig/INPUT_R1.mLb.clN.bdg --d1 1 --d2 0.3011826465 -g 60 -l 240 --o-prefix MV411_RNP_AAVS1-H3K27AC_R1_vs_MV411_RNP_IRF8-H3K27AC_R1 --outdir ../../data/additional_degraded_v1/diffPeaks/', returncode=0, stdout=b'', stderr=b'INFO  @ Wed, 09 Sep 2020 20:39:48: Read and build treatment 1 bedGraph... \nINFO  @ Wed, 09 Sep 2020 20:40:17: Read and build control 1 bedGraph... \nINFO  @ Wed, 09

CompletedProcess(args='macs2 bdgdiff --t1 ../../data/additional_degraded_v1/bwa/mergedLibrary/bigwig/MV411_RNP_AAVS1-H3K27AC_R2.mLb.clN.bdg --c1 ../../data/additional_degraded_v1/bwa/mergedLibrary/bigwig/INPUT_R1.mLb.clN.bdg --t2 ../../data/additional_degraded_v1/bwa/mergedLibrary/bigwig/MV411_RNP_RUNX1-H3K27AC_R2.mLb.clN.bdg --c2 ../../data/additional_degraded_v1/bwa/mergedLibrary/bigwig/INPUT_R1.mLb.clN.bdg --d1 1 --d2 0.849637265 -g 60 -l 240 --o-prefix MV411_RNP_AAVS1-H3K27AC_R2_vs_MV411_RNP_RUNX1-H3K27AC_R2 --outdir ../../data/additional_degraded_v1/diffPeaks/', returncode=0, stdout=b'', stderr=b'INFO  @ Wed, 09 Sep 2020 21:50:20: Read and build treatment 1 bedGraph... \nINFO  @ Wed, 09 Sep 2020 21:51:00: Read and build control 1 bedGraph... \nINFO  @ Wed, 09 Sep 2020 21:53:11: Read and build treatment 2 bedGraph... \nINFO  @ Wed, 09 Sep 2020 21:54:06: Read and build control 2 bedGraph... \nINFO  @ Wed, 09 Sep 2020 22:01:04: Write peaks... \nINFO  @ Wed, 09 Sep 2020 22:01:04: Done

In [148]:
bams

['../../data/additional_degraded_v1/bwa/mergedLibrary/INPUT_R1.mLb.clN.sorted.bam',
 '../../data/additional_degraded_v1/bwa/mergedLibrary/MV411_RNP_AAVS1-H3K27AC_R1.mLb.clN.sorted.bam',
 '../../data/additional_degraded_v1/bwa/mergedLibrary/MV411_RNP_AAVS1-H3K27AC_R2.mLb.clN.sorted.bam',
 '../../data/additional_degraded_v1/bwa/mergedLibrary/MV411_RNP_IRF8-H3K27AC_R1.mLb.clN.sorted.bam',
 '../../data/additional_degraded_v1/bwa/mergedLibrary/MV411_RNP_IRF8-H3K27AC_R2.mLb.clN.sorted.bam',
 '../../data/additional_degraded_v1/bwa/mergedLibrary/MV411_RNP_MEF2D-H3K27AC_R1.mLb.clN.sorted.bam',
 '../../data/additional_degraded_v1/bwa/mergedLibrary/MV411_RNP_MEF2D-H3K27AC_R2.mLb.clN.sorted.bam',
 '../../data/additional_degraded_v1/bwa/mergedLibrary/MV411_RNP_MYB-H3K27AC_R1.mLb.clN.sorted.bam',
 '../../data/additional_degraded_v1/bwa/mergedLibrary/MV411_RNP_MYB-H3K27AC_R2.mLb.clN.sorted.bam',
 '../../data/additional_degraded_v1/bwa/mergedLibrary/MV411_RNP_RUNX1-H3K27AC_R1.mLb.clN.sorted.bam',
 '..

In [154]:
bams[10]

'../../data/additional_degraded_v1/bwa/mergedLibrary/MV411_RNP_RUNX1-H3K27AC_R2.mLb.clN.sorted.bam'

In [155]:
# diffPeak on scaled data (full reprocessing)
size= 240
for i in range(len(bams)-3):
    if i < 8:
        continue
    bam1 = bams[1+i%2]
    bam2 = bams[3+i]
    print(bam1,bam2)
    print(chip.fullDiffPeak(bam1,bam2, control1=bams[0], compute_size=False, size=size, scaling=[scales[i],1], directory = "../../data/"+project+"/diffData/", res_directory = "../../data/"+project+"/diffPeaks/",pairedend=True))

../../data/additional_degraded_v1/bwa/mergedLibrary/MV411_RNP_AAVS1-H3K27AC_R1.mLb.clN.sorted.bam ../../data/additional_degraded_v1/bwa/mergedLibrary/MV411_RNP_RUNX1_RUNX2-H3K27AC_R1.mLb.clN.sorted.bam
doing diff from ../../data/additional_degraded_v1/bwa/mergedLibrary/MV411_RNP_AAVS1-H3K27AC_R1.mLb.clN.sorted.bam and ../../data/additional_degraded_v1/bwa/mergedLibrary/MV411_RNP_RUNX1_RUNX2-H3K27AC_R1.mLb.clN.sorted.bam
using default|given size
computing the scaling values
b'INFO  @ Mon, 14 Sep 2020 15:06:40: \n# Command line: callpeak -B -t ../../data/additional_degraded_v1/bwa/mergedLibrary/MV411_RNP_AAVS1-H3K27AC_R1.mLb.clN.sorted.bam -c ../../data/additional_degraded_v1/bwa/mergedLibrary/INPUT_R1.mLb.clN.sorted.bam --nomodel --extsize 240 -n MV411_RNP_AAVS1-H3K27AC_R1 --outdir ../../data/additional_degraded_v1/diffData/ -f BAMPE\n# ARGUMENTS LIST:\n# name = MV411_RNP_AAVS1-H3K27AC_R1\n# format = BAMPE\n# ChIP-seq file = [\'../../data/additional_degraded_v1/bwa/mergedLibrary/MV411_R

b'INFO  @ Mon, 14 Sep 2020 15:31:11: \n# Command line: callpeak -B -t ../../data/additional_degraded_v1/bwa/mergedLibrary/MV411_RNP_RUNX1_RUNX2-H3K27AC_R1.mLb.clN.sorted.bam -c ../../data/additional_degraded_v1/bwa/mergedLibrary/INPUT_R1.mLb.clN.sorted.bam --nomodel --extsize 240 -n MV411_RNP_RUNX1_RUNX2-H3K27AC_R1 --outdir ../../data/additional_degraded_v1/diffData/ -f BAMPE\n# ARGUMENTS LIST:\n# name = MV411_RNP_RUNX1_RUNX2-H3K27AC_R1\n# format = BAMPE\n# ChIP-seq file = [\'../../data/additional_degraded_v1/bwa/mergedLibrary/MV411_RNP_RUNX1_RUNX2-H3K27AC_R1.mLb.clN.sorted.bam\']\n# control file = [\'../../data/additional_degraded_v1/bwa/mergedLibrary/INPUT_R1.mLb.clN.sorted.bam\']\n# effective genome size = 2.70e+09\n# band width = 300\n# model fold = [5, 50]\n# qvalue cutoff = 5.00e-02\n# The maximum gap between significant sites is assigned as the read length/tag size.\n# The minimum length of peaks is assigned as the predicted fragment length "d".\n# Larger dataset will be scaled 

b'INFO  @ Mon, 14 Sep 2020 16:11:37: \n# Command line: callpeak -B -t ../../data/additional_degraded_v1/bwa/mergedLibrary/MV411_RNP_AAVS1-H3K27AC_R2.mLb.clN.sorted.bam -c ../../data/additional_degraded_v1/bwa/mergedLibrary/INPUT_R1.mLb.clN.sorted.bam --nomodel --extsize 240 -n MV411_RNP_AAVS1-H3K27AC_R2 --outdir ../../data/additional_degraded_v1/diffData/ -f BAMPE\n# ARGUMENTS LIST:\n# name = MV411_RNP_AAVS1-H3K27AC_R2\n# format = BAMPE\n# ChIP-seq file = [\'../../data/additional_degraded_v1/bwa/mergedLibrary/MV411_RNP_AAVS1-H3K27AC_R2.mLb.clN.sorted.bam\']\n# control file = [\'../../data/additional_degraded_v1/bwa/mergedLibrary/INPUT_R1.mLb.clN.sorted.bam\']\n# effective genome size = 2.70e+09\n# band width = 300\n# model fold = [5, 50]\n# qvalue cutoff = 5.00e-02\n# The maximum gap between significant sites is assigned as the read length/tag size.\n# The minimum length of peaks is assigned as the predicted fragment length "d".\n# Larger dataset will be scaled towards smaller dataset.

b'INFO  @ Mon, 14 Sep 2020 16:37:06: \n# Command line: callpeak -B -t ../../data/additional_degraded_v1/bwa/mergedLibrary/MV411_RNP_RUNX1_RUNX2-H3K27AC_R2.mLb.clN.sorted.bam -c ../../data/additional_degraded_v1/bwa/mergedLibrary/INPUT_R1.mLb.clN.sorted.bam --nomodel --extsize 240 -n MV411_RNP_RUNX1_RUNX2-H3K27AC_R2 --outdir ../../data/additional_degraded_v1/diffData/ -f BAMPE\n# ARGUMENTS LIST:\n# name = MV411_RNP_RUNX1_RUNX2-H3K27AC_R2\n# format = BAMPE\n# ChIP-seq file = [\'../../data/additional_degraded_v1/bwa/mergedLibrary/MV411_RNP_RUNX1_RUNX2-H3K27AC_R2.mLb.clN.sorted.bam\']\n# control file = [\'../../data/additional_degraded_v1/bwa/mergedLibrary/INPUT_R1.mLb.clN.sorted.bam\']\n# effective genome size = 2.70e+09\n# band width = 300\n# model fold = [5, 50]\n# qvalue cutoff = 5.00e-02\n# The maximum gap between significant sites is assigned as the read length/tag size.\n# The minimum length of peaks is assigned as the predicted fragment length "d".\n# Larger dataset will be scaled 

b'INFO  @ Mon, 14 Sep 2020 17:18:45: \n# Command line: callpeak -B -t ../../data/additional_degraded_v1/bwa/mergedLibrary/MV411_RNP_AAVS1-H3K27AC_R1.mLb.clN.sorted.bam -c ../../data/additional_degraded_v1/bwa/mergedLibrary/INPUT_R1.mLb.clN.sorted.bam --nomodel --extsize 240 -n MV411_RNP_AAVS1-H3K27AC_R1 --outdir ../../data/additional_degraded_v1/diffData/ -f BAMPE\n# ARGUMENTS LIST:\n# name = MV411_RNP_AAVS1-H3K27AC_R1\n# format = BAMPE\n# ChIP-seq file = [\'../../data/additional_degraded_v1/bwa/mergedLibrary/MV411_RNP_AAVS1-H3K27AC_R1.mLb.clN.sorted.bam\']\n# control file = [\'../../data/additional_degraded_v1/bwa/mergedLibrary/INPUT_R1.mLb.clN.sorted.bam\']\n# effective genome size = 2.70e+09\n# band width = 300\n# model fold = [5, 50]\n# qvalue cutoff = 5.00e-02\n# The maximum gap between significant sites is assigned as the read length/tag size.\n# The minimum length of peaks is assigned as the predicted fragment length "d".\n# Larger dataset will be scaled towards smaller dataset.

CompletedProcess(args='macs2 bdgdiff --t1 ../../data/additional_degraded_v1/diffData/MV411_RNP_AAVS1-H3K27AC_R1_treat_pileup.bdg --c1 ../../data/additional_degraded_v1/diffData/MV411_RNP_AAVS1-H3K27AC_R1_control_lambda.bdg --t2 ../../data/additional_degraded_v1/diffData/MV411_RNP_RUNX2-H3K27AC_R1_treat_pileup.bdg --c2 ../../data/additional_degraded_v1/diffData/MV411_RNP_RUNX2-H3K27AC_R1_control_lambda.bdg --d1 31040563 --d2 21632631 -g 60 -l 240 --o-prefix MV411_RNP_AAVS1-H3K27AC_R1_treat_pileup_vs_MV411_RNP_RUNX2-H3K27AC_R1_treat_pileup --outdir ../../data/additional_degraded_v1/diffPeaks/', returncode=0, stdout=b'', stderr=b'INFO  @ Mon, 14 Sep 2020 18:07:36: Read and build treatment 1 bedGraph... \nINFO  @ Mon, 14 Sep 2020 18:08:07: Read and build control 1 bedGraph... \nINFO  @ Mon, 14 Sep 2020 18:11:23: Read and build treatment 2 bedGraph... \nINFO  @ Mon, 14 Sep 2020 18:11:54: Read and build control 2 bedGraph... \nINFO  @ Mon, 14 Sep 2020 18:22:21: Write peaks... \nINFO  @ Mon, 

b'INFO  @ Mon, 14 Sep 2020 18:47:37: \n# Command line: callpeak -B -t ../../data/additional_degraded_v1/bwa/mergedLibrary/MV411_RNP_RUNX2-H3K27AC_R2.mLb.clN.sorted.bam -c ../../data/additional_degraded_v1/bwa/mergedLibrary/INPUT_R1.mLb.clN.sorted.bam --nomodel --extsize 240 -n MV411_RNP_RUNX2-H3K27AC_R2 --outdir ../../data/additional_degraded_v1/diffData/ -f BAMPE\n# ARGUMENTS LIST:\n# name = MV411_RNP_RUNX2-H3K27AC_R2\n# format = BAMPE\n# ChIP-seq file = [\'../../data/additional_degraded_v1/bwa/mergedLibrary/MV411_RNP_RUNX2-H3K27AC_R2.mLb.clN.sorted.bam\']\n# control file = [\'../../data/additional_degraded_v1/bwa/mergedLibrary/INPUT_R1.mLb.clN.sorted.bam\']\n# effective genome size = 2.70e+09\n# band width = 300\n# model fold = [5, 50]\n# qvalue cutoff = 5.00e-02\n# The maximum gap between significant sites is assigned as the read length/tag size.\n# The minimum length of peaks is assigned as the predicted fragment length "d".\n# Larger dataset will be scaled towards smaller dataset.

CompletedProcess(args='macs2 bdgdiff --t1 ../../data/additional_degraded_v1/diffData/MV411_RNP_AAVS1-H3K27AC_R2_treat_pileup.bdg --c1 ../../data/additional_degraded_v1/diffData/MV411_RNP_AAVS1-H3K27AC_R2_control_lambda.bdg --t2 ../../data/additional_degraded_v1/diffData/MV411_RNP_RUNX2-H3K27AC_R2_treat_pileup.bdg --c2 ../../data/additional_degraded_v1/diffData/MV411_RNP_RUNX2-H3K27AC_R2_control_lambda.bdg --d1 23219629 --d2 38378814 -g 60 -l 240 --o-prefix MV411_RNP_AAVS1-H3K27AC_R2_treat_pileup_vs_MV411_RNP_RUNX2-H3K27AC_R2_treat_pileup --outdir ../../data/additional_degraded_v1/diffPeaks/', returncode=0, stdout=b'', stderr=b'INFO  @ Mon, 14 Sep 2020 19:15:32: Read and build treatment 1 bedGraph... \nINFO  @ Mon, 14 Sep 2020 19:16:11: Read and build control 1 bedGraph... \nINFO  @ Mon, 14 Sep 2020 19:19:27: Read and build treatment 2 bedGraph... \nINFO  @ Mon, 14 Sep 2020 19:20:15: Read and build control 2 bedGraph... \nINFO  @ Mon, 14 Sep 2020 19:33:33: Write peaks... \nINFO  @ Mon, 

b'INFO  @ Mon, 14 Sep 2020 19:57:52: \n# Command line: callpeak -B -t ../../data/additional_degraded_v1/bwa/mergedLibrary/MV411_RNP_SPI1-H3K27AC_R1.mLb.clN.sorted.bam -c ../../data/additional_degraded_v1/bwa/mergedLibrary/INPUT_R1.mLb.clN.sorted.bam --nomodel --extsize 240 -n MV411_RNP_SPI1-H3K27AC_R1 --outdir ../../data/additional_degraded_v1/diffData/ -f BAMPE\n# ARGUMENTS LIST:\n# name = MV411_RNP_SPI1-H3K27AC_R1\n# format = BAMPE\n# ChIP-seq file = [\'../../data/additional_degraded_v1/bwa/mergedLibrary/MV411_RNP_SPI1-H3K27AC_R1.mLb.clN.sorted.bam\']\n# control file = [\'../../data/additional_degraded_v1/bwa/mergedLibrary/INPUT_R1.mLb.clN.sorted.bam\']\n# effective genome size = 2.70e+09\n# band width = 300\n# model fold = [5, 50]\n# qvalue cutoff = 5.00e-02\n# The maximum gap between significant sites is assigned as the read length/tag size.\n# The minimum length of peaks is assigned as the predicted fragment length "d".\n# Larger dataset will be scaled towards smaller dataset.\n# 

CompletedProcess(args='macs2 bdgdiff --t1 ../../data/additional_degraded_v1/diffData/MV411_RNP_AAVS1-H3K27AC_R1_treat_pileup.bdg --c1 ../../data/additional_degraded_v1/diffData/MV411_RNP_AAVS1-H3K27AC_R1_control_lambda.bdg --t2 ../../data/additional_degraded_v1/diffData/MV411_RNP_SPI1-H3K27AC_R1_treat_pileup.bdg --c2 ../../data/additional_degraded_v1/diffData/MV411_RNP_SPI1-H3K27AC_R1_control_lambda.bdg --d1 10351504 --d2 27433416 -g 60 -l 240 --o-prefix MV411_RNP_AAVS1-H3K27AC_R1_treat_pileup_vs_MV411_RNP_SPI1-H3K27AC_R1_treat_pileup --outdir ../../data/additional_degraded_v1/diffPeaks/', returncode=0, stdout=b'', stderr=b'INFO  @ Mon, 14 Sep 2020 20:23:06: Read and build treatment 1 bedGraph... \nINFO  @ Mon, 14 Sep 2020 20:23:40: Read and build control 1 bedGraph... \nINFO  @ Mon, 14 Sep 2020 20:27:11: Read and build treatment 2 bedGraph... \nINFO  @ Mon, 14 Sep 2020 20:27:52: Read and build control 2 bedGraph... \nINFO  @ Mon, 14 Sep 2020 20:40:46: Write peaks... \nINFO  @ Mon, 14 

b'INFO  @ Mon, 14 Sep 2020 21:06:01: \n# Command line: callpeak -B -t ../../data/additional_degraded_v1/bwa/mergedLibrary/MV411_RNP_SPI1-H3K27AC_R2.mLb.clN.sorted.bam -c ../../data/additional_degraded_v1/bwa/mergedLibrary/INPUT_R1.mLb.clN.sorted.bam --nomodel --extsize 240 -n MV411_RNP_SPI1-H3K27AC_R2 --outdir ../../data/additional_degraded_v1/diffData/ -f BAMPE\n# ARGUMENTS LIST:\n# name = MV411_RNP_SPI1-H3K27AC_R2\n# format = BAMPE\n# ChIP-seq file = [\'../../data/additional_degraded_v1/bwa/mergedLibrary/MV411_RNP_SPI1-H3K27AC_R2.mLb.clN.sorted.bam\']\n# control file = [\'../../data/additional_degraded_v1/bwa/mergedLibrary/INPUT_R1.mLb.clN.sorted.bam\']\n# effective genome size = 2.70e+09\n# band width = 300\n# model fold = [5, 50]\n# qvalue cutoff = 5.00e-02\n# The maximum gap between significant sites is assigned as the read length/tag size.\n# The minimum length of peaks is assigned as the predicted fragment length "d".\n# Larger dataset will be scaled towards smaller dataset.\n# 

CompletedProcess(args='macs2 bdgdiff --t1 ../../data/additional_degraded_v1/diffData/MV411_RNP_AAVS1-H3K27AC_R2_treat_pileup.bdg --c1 ../../data/additional_degraded_v1/diffData/MV411_RNP_AAVS1-H3K27AC_R2_control_lambda.bdg --t2 ../../data/additional_degraded_v1/diffData/MV411_RNP_SPI1-H3K27AC_R2_treat_pileup.bdg --c2 ../../data/additional_degraded_v1/diffData/MV411_RNP_SPI1-H3K27AC_R2_control_lambda.bdg --d1 23895878 --d2 27314390 -g 60 -l 240 --o-prefix MV411_RNP_AAVS1-H3K27AC_R2_treat_pileup_vs_MV411_RNP_SPI1-H3K27AC_R2_treat_pileup --outdir ../../data/additional_degraded_v1/diffPeaks/', returncode=0, stdout=b'', stderr=b'INFO  @ Mon, 14 Sep 2020 21:31:16: Read and build treatment 1 bedGraph... \nINFO  @ Mon, 14 Sep 2020 21:31:56: Read and build control 1 bedGraph... \nINFO  @ Mon, 14 Sep 2020 21:35:20: Read and build treatment 2 bedGraph... \nINFO  @ Mon, 14 Sep 2020 21:36:00: Read and build control 2 bedGraph... \nINFO  @ Mon, 14 Sep 2020 21:49:10: Write peaks... \nINFO  @ Mon, 14 

In [34]:
initscales = ! cat ../../data/$project/bwa/mergedLibrary/bigwig/scale/*.txt

In [75]:
scales = [val*float(initscales[3+i]) for i, val in enumerate(scales)]

In [76]:
scales

[0.0055422425878344,
 0.0095582660753916,
 0.0099217323799776,
 0.0123312656199582,
 0.01928382044626971,
 0.00920760471838008,
 0.028572797987493598,
 0.00965697715399,
 0.01991590914591828,
 0.01796301710614069,
 0.034577257425544,
 0.0109291227710616,
 0.00907162286248202,
 0.015778937847629487]

In [79]:
chip.bigWigFrom(bams[3:], 
                genome='GRCh38',scaling=scales,
               numthreads=8)

In [88]:
ls ../../data/$project/recalib_bigwig/

MV411_RNP_IRF8-H3K27AC_R1.bw   MV411_RNP_RUNX1-H3K27AC_R2.bw
MV411_RNP_IRF8-H3K27AC_R2.bw   MV411_RNP_RUNX1_RUNX2-H3K27AC_R1.bw
MV411_RNP_MEF2D-H3K27AC_R1.bw  MV411_RNP_RUNX1_RUNX2-H3K27AC_R2.bw
MV411_RNP_MEF2D-H3K27AC_R2.bw  MV411_RNP_RUNX2-H3K27AC_R1.bw
MV411_RNP_MYB-H3K27AC_R1.bw    MV411_RNP_RUNX2-H3K27AC_R2.bw
MV411_RNP_MYB-H3K27AC_R2.bw    MV411_RNP_SPI1-H3K27AC_R1.bw
MV411_RNP_RUNX1-H3K27AC_R1.bw  MV411_RNP_SPI1-H3K27AC_R2.bw


In [84]:
!mv bigwig/* ../../data/$project/recalib_bigwig/

In [91]:
!cp ../../data/$project/bwa/mergedLibrary/bigwig/MV411_RNP_AAVS1-*.bigWig ../../data/$project/recalib_bigwig/

In [156]:
os.popen('for i in $(ls ../../data/'+project+'/diffPeaks/*.bed); \
            do echo $(wc -l $i); \
            done').read().split('\n')

[autoreload of JKBio.epigenetics.ChIP_helper failed: Traceback (most recent call last):
  File "/home/jeremie/.local/lib/python3.8/site-packages/IPython/extensions/autoreload.py", line 245, in check
    superreload(m, reload, self.old_objects)
  File "/home/jeremie/.local/lib/python3.8/site-packages/IPython/extensions/autoreload.py", line 394, in superreload
    module = reload(module)
  File "/usr/lib/python3.8/imp.py", line 314, in reload
    return importlib.reload(module)
  File "/usr/lib/python3.8/importlib/__init__.py", line 169, in reload
    _bootstrap._exec(spec, module)
  File "<frozen importlib._bootstrap>", line 604, in _exec
  File "<frozen importlib._bootstrap_external>", line 779, in exec_module
  File "<frozen importlib._bootstrap_external>", line 916, in get_code
  File "<frozen importlib._bootstrap_external>", line 846, in source_to_code
  File "<frozen importlib._bootstrap>", line 219, in _call_with_frames_removed
  File "../../JKBio/epigenetics/ChIP_helper.py", line

['10158 ../../data/additional_degraded_v1/diffPeaks/MV411_RNP_AAVS1-H3K27AC_R1_treat_pileup_vs_MV411_RNP_IRF8-H3K27AC_R1_treat_pileup_c3.0_common.bed',
 '44772 ../../data/additional_degraded_v1/diffPeaks/MV411_RNP_AAVS1-H3K27AC_R1_treat_pileup_vs_MV411_RNP_IRF8-H3K27AC_R1_treat_pileup_c3.0_cond1.bed',
 '138 ../../data/additional_degraded_v1/diffPeaks/MV411_RNP_AAVS1-H3K27AC_R1_treat_pileup_vs_MV411_RNP_IRF8-H3K27AC_R1_treat_pileup_c3.0_cond2.bed',
 '43929 ../../data/additional_degraded_v1/diffPeaks/MV411_RNP_AAVS1-H3K27AC_R1_treat_pileup_vs_MV411_RNP_MEF2D-H3K27AC_R1_treat_pileup_c3.0_common.bed',
 '31829 ../../data/additional_degraded_v1/diffPeaks/MV411_RNP_AAVS1-H3K27AC_R1_treat_pileup_vs_MV411_RNP_MEF2D-H3K27AC_R1_treat_pileup_c3.0_cond1.bed',
 '13 ../../data/additional_degraded_v1/diffPeaks/MV411_RNP_AAVS1-H3K27AC_R1_treat_pileup_vs_MV411_RNP_MEF2D-H3K27AC_R1_treat_pileup_c3.0_cond2.bed',
 '42999 ../../data/additional_degraded_v1/diffPeaks/MV411_RNP_AAVS1-H3K27AC_R1_treat_pileup_vs

In [240]:
bw = ! ls ../../data/additional_degraded_v1/recalib_bigwig/*
bw

['../../data/additional_degraded_v1/recalib_bigwig/MV411_RNP_AAVS1-H3K27AC_R1.mLb.clN.bigWig',
 '../../data/additional_degraded_v1/recalib_bigwig/MV411_RNP_AAVS1-H3K27AC_R2.mLb.clN.bigWig',
 '../../data/additional_degraded_v1/recalib_bigwig/MV411_RNP_IRF8-H3K27AC_R1.bw',
 '../../data/additional_degraded_v1/recalib_bigwig/MV411_RNP_IRF8-H3K27AC_R2.bw',
 '../../data/additional_degraded_v1/recalib_bigwig/MV411_RNP_MEF2D-H3K27AC_R1.bw',
 '../../data/additional_degraded_v1/recalib_bigwig/MV411_RNP_MEF2D-H3K27AC_R2.bw',
 '../../data/additional_degraded_v1/recalib_bigwig/MV411_RNP_MYB-H3K27AC_R1.bw',
 '../../data/additional_degraded_v1/recalib_bigwig/MV411_RNP_MYB-H3K27AC_R2.bw',
 '../../data/additional_degraded_v1/recalib_bigwig/MV411_RNP_RUNX1-H3K27AC_R1.bw',
 '../../data/additional_degraded_v1/recalib_bigwig/MV411_RNP_RUNX1-H3K27AC_R2.bw',
 '../../data/additional_degraded_v1/recalib_bigwig/MV411_RNP_RUNX1_RUNX2-H3K27AC_R1.bw',
 '../../data/additional_degraded_v1/recalib_bigwig/MV411_RNP_RU

In [167]:
!mkdir ../results/$project/
!mkdir ../results/$project/plots
!mkdir ../results/$project/plots/heatmaps/

mkdir: cannot create directory ‘../results/additional_degraded_v1/’: File exists
mkdir: cannot create directory ‘../results/additional_degraded_v1/plots’: File exists


In [None]:
# GENOME WIDE comparison

In [242]:
peaks = ! ls ../../data/$project/bwa/mergedLibrary/macs/broadPeak/*.broadPeak

In [241]:
names = ["AAVS1", "AAVS1_v2", "IRF8","IRF8_v2","MEF2D","MEF2D_v2","MYB","MYB_v2","RUNX1","RUNX1_v2","RUNX1_RUNX2","RUNX1_RUNX2_v2","RUNX2","RUNX2_v2", "SPI1","SPI1_v2"]

In [245]:
for i, val in enumerate(bw):
    if i <2:
        continue
    name = names[i]
    print(name)
    chip.getPeaksAt(peaks[0], bigwigs = [val,bw[0]], bigwignames= [name,"AAVS1"],peaknames=['Macs2_Peaks'], window=3000, folder="", title=name+"_vs_AAVS1", numthreads=8, refpoint="center", name='../../data/'+project+'/'+name+'_mat.pdf', withDeeptools=True, torecompute=True, vmax=4, legendLoc="lower-left")

IRF8
CompletedProcess(args='computeMatrix reference-point -S ../../data/additional_degraded_v1/recalib_bigwig/MV411_RNP_IRF8-H3K27AC_R1.bw ../../data/additional_degraded_v1/recalib_bigwig/MV411_RNP_AAVS1-H3K27AC_R1.mLb.clN.bigWig  --referencePoint center --regionsFileName ../../data/additional_degraded_v1/bwa/mergedLibrary/macs/broadPeak/MV411_RNP_AAVS1-H3K27AC_R1_peaks.broadPeak --missingDataAsZero --outFileName ../../data/additional_degraded_v1/IRF8_mat.gz --upstream 3000 --downstream 3000 --numberOfProcessors 8 && plotHeatmap --matrixFile ../../data/additional_degraded_v1/IRF8_mat.gz --outFileName ../../data/additional_degraded_v1/IRF8_mat.pdf --refPointLabel center -max 4 --legendLocation lower-left --regionsLabel Macs2_Peaks --samplesLabel IRF8 AAVS1 --plotTitle IRF8_vs_AAVS1', returncode=0, stdout=b'', stderr=b'\nThe following chromosome names did not match between the bigwig files\nchromosome\tlength\nchrUn_KI270389v1\t      1298\nchrUn_KI270335v1\t      1048\nchrUn_KI270394v1\t

CompletedProcess(args='computeMatrix reference-point -S ../../data/additional_degraded_v1/recalib_bigwig/MV411_RNP_MYB-H3K27AC_R2.bw ../../data/additional_degraded_v1/recalib_bigwig/MV411_RNP_AAVS1-H3K27AC_R1.mLb.clN.bigWig  --referencePoint center --regionsFileName ../../data/additional_degraded_v1/bwa/mergedLibrary/macs/broadPeak/MV411_RNP_AAVS1-H3K27AC_R1_peaks.broadPeak --missingDataAsZero --outFileName ../../data/additional_degraded_v1/MYB_v2_mat.gz --upstream 3000 --downstream 3000 --numberOfProcessors 8 && plotHeatmap --matrixFile ../../data/additional_degraded_v1/MYB_v2_mat.gz --outFileName ../../data/additional_degraded_v1/MYB_v2_mat.pdf --refPointLabel center -max 4 --legendLocation lower-left --regionsLabel Macs2_Peaks --samplesLabel MYB_v2 AAVS1 --plotTitle MYB_v2_vs_AAVS1', returncode=0, stdout=b'', stderr=b'\nThe following chromosome names did not match between the bigwig files\nchromosome\tlength\nchrUn_KI270381v1\t      1930\nchrUn_KI270412v1\t      1179\nchrUn_KI270372

CompletedProcess(args='computeMatrix reference-point -S ../../data/additional_degraded_v1/recalib_bigwig/MV411_RNP_RUNX2-H3K27AC_R1.bw ../../data/additional_degraded_v1/recalib_bigwig/MV411_RNP_AAVS1-H3K27AC_R1.mLb.clN.bigWig  --referencePoint center --regionsFileName ../../data/additional_degraded_v1/bwa/mergedLibrary/macs/broadPeak/MV411_RNP_AAVS1-H3K27AC_R1_peaks.broadPeak --missingDataAsZero --outFileName ../../data/additional_degraded_v1/RUNX2_mat.gz --upstream 3000 --downstream 3000 --numberOfProcessors 8 && plotHeatmap --matrixFile ../../data/additional_degraded_v1/RUNX2_mat.gz --outFileName ../../data/additional_degraded_v1/RUNX2_mat.pdf --refPointLabel center -max 4 --legendLocation lower-left --regionsLabel Macs2_Peaks --samplesLabel RUNX2 AAVS1 --plotTitle RUNX2_vs_AAVS1', returncode=0, stdout=b'', stderr=b'\nThe following chromosome names did not match between the bigwig files\nchromosome\tlength\nchrUn_KI270395v1\t      1143\nchrUn_KI270394v1\t       970\n         chrEBV\t

In [247]:
! cp ../../data/additional_degraded_v1/*.pdf ../results/$project/plots/scaled/heatmaps/

In [190]:
cond1peak = ! ls ../../data/$project/diffPeaks/*cond1.bed
cond2peak = ! ls ../../data/$project/diffPeaks/*cond2.bed
commonpeak = ! ls ../../data/$project/diffPeaks/*common.bed
cond1peak

['../../data/additional_degraded_v1/diffPeaks/MV411_RNP_AAVS1-H3K27AC_R1_treat_pileup_vs_MV411_RNP_IRF8-H3K27AC_R1_treat_pileup_c3.0_cond1.bed',
 '../../data/additional_degraded_v1/diffPeaks/MV411_RNP_AAVS1-H3K27AC_R1_treat_pileup_vs_MV411_RNP_MEF2D-H3K27AC_R1_treat_pileup_c3.0_cond1.bed',
 '../../data/additional_degraded_v1/diffPeaks/MV411_RNP_AAVS1-H3K27AC_R1_treat_pileup_vs_MV411_RNP_MYB-H3K27AC_R1_treat_pileup_c3.0_cond1.bed',
 '../../data/additional_degraded_v1/diffPeaks/MV411_RNP_AAVS1-H3K27AC_R1_treat_pileup_vs_MV411_RNP_RUNX1-H3K27AC_R1_treat_pileup_c3.0_cond1.bed',
 '../../data/additional_degraded_v1/diffPeaks/MV411_RNP_AAVS1-H3K27AC_R1_treat_pileup_vs_MV411_RNP_RUNX1_RUNX2-H3K27AC_R1_treat_pileup_c3.0_cond1.bed',
 '../../data/additional_degraded_v1/diffPeaks/MV411_RNP_AAVS1-H3K27AC_R1_treat_pileup_vs_MV411_RNP_RUNX2-H3K27AC_R1_treat_pileup_c3.0_cond1.bed',
 '../../data/additional_degraded_v1/diffPeaks/MV411_RNP_AAVS1-H3K27AC_R1_treat_pileup_vs_MV411_RNP_SPI1-H3K27AC_R1_treat_

In [234]:
names = ["IRF8", "MEF2D", "MYB","RUNX1","RUNX1_RUNX2","RUNX2", "SPI1", "IRF8_v2", "MEF2D_v2", "MYB_v2", "RUNX1_v2", "RUNX1_RUNX2_v2", "RUNX2_v2", "SPI1_v2"]

In [222]:
bw

['../../data/additional_degraded_v1/recalib_bigwig/MV411_RNP_AAVS1-H3K27AC_R1.mLb.clN.bigWig',
 '../../data/additional_degraded_v1/recalib_bigwig/MV411_RNP_AAVS1-H3K27AC_R2.mLb.clN.bigWig',
 '../../data/additional_degraded_v1/recalib_bigwig/MV411_RNP_IRF8-H3K27AC_R1.bw',
 '../../data/additional_degraded_v1/recalib_bigwig/MV411_RNP_IRF8-H3K27AC_R2.bw',
 '../../data/additional_degraded_v1/recalib_bigwig/MV411_RNP_MEF2D-H3K27AC_R1.bw',
 '../../data/additional_degraded_v1/recalib_bigwig/MV411_RNP_MEF2D-H3K27AC_R2.bw',
 '../../data/additional_degraded_v1/recalib_bigwig/MV411_RNP_MYB-H3K27AC_R1.bw',
 '../../data/additional_degraded_v1/recalib_bigwig/MV411_RNP_MYB-H3K27AC_R2.bw',
 '../../data/additional_degraded_v1/recalib_bigwig/MV411_RNP_RUNX1-H3K27AC_R1.bw',
 '../../data/additional_degraded_v1/recalib_bigwig/MV411_RNP_RUNX1-H3K27AC_R2.bw',
 '../../data/additional_degraded_v1/recalib_bigwig/MV411_RNP_RUNX1_RUNX2-H3K27AC_R1.bw',
 '../../data/additional_degraded_v1/recalib_bigwig/MV411_RNP_RU

In [216]:
bw[(6*2)+3]

'../../data/additional_degraded_v1/recalib_bigwig/MV411_RNP_SPI1-H3K27AC_R2.bw'

In [218]:
int(len(bw[2:])/2)

7

In [219]:
bw[round(7/6)]

'../../data/additional_degraded_v1/recalib_bigwig/MV411_RNP_AAVS1-H3K27AC_R2.mLb.clN.bigWig'

In [238]:
for i in range(int(len(bw[2:]))):
    if i <8:
        continue
    cl = len(bw[2:])
    name1 = bw[round(i/(cl-1))]
    name2 = bw[((i%int(cl/2))*2)+2+round(i/(cl-1))]
    peak = [cond1peak[i], commonpeak[i], cond2peak[i]]
   # for val in peak:
      #  chip.dropWeirdChromosomes(val)
    name = names[i]
    print(name1,name2)
    chip.getPeaksAt(peak, [name1, name2], bigwignames=['DMSO', 'VHL'], peaknames=['DMSO_peaks', 'common', 'VHL_peaks'], window=3000, folder="", title=name, numthreads=7, refpoint="center", name='../../data/'+project+'/diffPeaks/'+name+'_mat.pdf', withDeeptools=True, torecompute=False)

../../data/additional_degraded_v1/recalib_bigwig/MV411_RNP_AAVS1-H3K27AC_R2.mLb.clN.bigWig ../../data/additional_degraded_v1/recalib_bigwig/MV411_RNP_MEF2D-H3K27AC_R2.bw
../../data/additional_degraded_v1/recalib_bigwig/MV411_RNP_AAVS1-H3K27AC_R2.mLb.clN.bigWig ../../data/additional_degraded_v1/recalib_bigwig/MV411_RNP_MYB-H3K27AC_R2.bw
CompletedProcess(args='plotHeatmap --matrixFile ../../data/additional_degraded_v1/diffPeaks/MYB_v2_mat.gz --outFileName ../../data/additional_degraded_v1/diffPeaks/MYB_v2_mat.pdf --refPointLabel center --regionsLabel DMSO_peaks common VHL_peaks --samplesLabel DMSO VHL --plotTitle MYB_v2', returncode=0, stdout=b'', stderr=b'')
../../data/additional_degraded_v1/recalib_bigwig/MV411_RNP_AAVS1-H3K27AC_R2.mLb.clN.bigWig ../../data/additional_degraded_v1/recalib_bigwig/MV411_RNP_RUNX1-H3K27AC_R2.bw
CompletedProcess(args='plotHeatmap --matrixFile ../../data/additional_degraded_v1/diffPeaks/RUNX1_v2_mat.gz --outFileName ../../data/additional_degraded_v1/diffPeak

In [139]:
! mkdir ../results/additional_degraded_v1/plots/scaled/diffPeaks/
! cp ../../data/additional_degraded_v1/diffPeaks/*.pdf ../results/additional_degraded_v1/plots/scaled/diffPeaks/

MV411_RNP_AAVS1-H3K27AC_R1_treat_pileup_vs_MV411_RNP_IRF8-H3K27AC_R1_treat_pileup_c3.0_common.bed
MV411_RNP_AAVS1-H3K27AC_R1_treat_pileup_vs_MV411_RNP_IRF8-H3K27AC_R1_treat_pileup_c3.0_cond1.bed
MV411_RNP_AAVS1-H3K27AC_R1_treat_pileup_vs_MV411_RNP_IRF8-H3K27AC_R1_treat_pileup_c3.0_cond2.bed
MV411_RNP_AAVS1-H3K27AC_R1_treat_pileup_vs_MV411_RNP_MEF2D-H3K27AC_R1_treat_pileup_c3.0_common.bed
MV411_RNP_AAVS1-H3K27AC_R1_treat_pileup_vs_MV411_RNP_MEF2D-H3K27AC_R1_treat_pileup_c3.0_cond1.bed
MV411_RNP_AAVS1-H3K27AC_R1_treat_pileup_vs_MV411_RNP_MEF2D-H3K27AC_R1_treat_pileup_c3.0_cond2.bed
MV411_RNP_AAVS1-H3K27AC_R1_treat_pileup_vs_MV411_RNP_MYB-H3K27AC_R1_treat_pileup_c3.0_common.bed
MV411_RNP_AAVS1-H3K27AC_R1_treat_pileup_vs_MV411_RNP_MYB-H3K27AC_R1_treat_pileup_c3.0_cond1.bed
MV411_RNP_AAVS1-H3K27AC_R1_treat_pileup_vs_MV411_RNP_MYB-H3K27AC_R1_treat_pileup_c3.0_cond2.bed
MV411_RNP_AAVS1-H3K27AC_R1_treat_pileup_vs_MV411_RNP_RUNX1-H3K27AC_R1_treat_pileup_c3.0_common.bed
MV411_RNP_AAVS1

### on unscalled data

In [None]:
bams = !ls ../../data/$project/bwa/mergedLibrary/mp*.bam
bams

In [None]:
! mkdir ../../data/$project/diffPeaks_unscaled

In [None]:
#on unscalled data 
for i in range(int(len(bams)/2)):
    if i < 5:
        continue
    name1 = bams[i]
    name2 = bams[6+i]
    print(name1,name2)
    chip.fullDiffPeak(name1,name2, control1='../../data/'+project+'/bwa/mergedLibrary/INPUT_R1.mLb.clN.sorted.bam', directory = "../../data/"+project+"/diffData_unscaled/", res_directory = "../../data/"+project+"/diffPeaks_unscaled/",pairedend=False)

In [None]:
bw = ! ls ../../data/$project/bwa/mergedLibrary/bigwig/*.bigWig
bw

In [None]:
cond1peak = ! ls ../../data/$project/diffPeaks_unscaled/*cond1.bed
cond2peak = ! ls ../../data/$project/diffPeaks_unscaled/*cond2.bed
commonpeak = ! ls ../../data/$project/diffPeaks_unscaled/*common.bed
commonpeak

In [None]:
names = ["MED1","MED1_v2","FLAG_IRF2BP2","POLII_total","POLII_S2","POLII_S5"]

In [None]:
for i in range(int((len(bw)-1)/2)):
    name1 = bw[1+i]
    name2 = bw[7+i]
    peak = [cond1peak[i], commonpeak[i], cond2peak[i]]
    #for val in peak:
     #   chip.dropWeirdChromosomes(val)
    name = names[i]
    print(name1,name2)
    chip.getPeaksAt(peak, [name1, name2], bigwignames=['DMSO', 'VHL'], peaknames=['DMSO_peaks', 'common', 'VHL_peaks'], window=3000, folder="", title=name, numthreads=7, torecompute=True, refpoint='center', name='../../data/'+project+'/diffPeaks_unscaled/'+name+'_mat.pdf', withDeeptools=True)

In [None]:
! gsutil -m cp -r "../../data/additional_degraded_v1" gs://amlproject/Chip/

In [None]:
! cp ../../$project/*_mat.pdf ../results/$project/unscaled/
! cp ../../$project/*_mat.pdf ../results/$project/scaled/

! cp ../../$project/*_mat.pdf ../results/$project/unscaled/
! cp ../../$project/*_mat.pdf ../results/$project/unscaled/

! cp -r ../data/$project/bwa/mergedLibrary/deepTools/plot/* ../results/$project/plots

## v6

In [2]:
project="chipseq_MEF2D_degraded"

In [250]:
! gsutil ls gs://transfer-amlproject/201001_MP8262_fastq/

gs://transfer-amlproject/201001_MP8262_fastq/20201001_FLAG_DMSO_MP8262_S93_R1_001.fastq.gz
gs://transfer-amlproject/201001_MP8262_fastq/20201001_FLAG_DMSO_MP8262_S93_R2_001.fastq.gz
gs://transfer-amlproject/201001_MP8262_fastq/20201001_FLAG_VHL_MP8262_S99_R1_001.fastq.gz
gs://transfer-amlproject/201001_MP8262_fastq/20201001_FLAG_VHL_MP8262_S99_R2_001.fastq.gz
gs://transfer-amlproject/201001_MP8262_fastq/20201001_IRF8_DMSO_MP8262_S88_R1_001.fastq.gz
gs://transfer-amlproject/201001_MP8262_fastq/20201001_IRF8_DMSO_MP8262_S88_R2_001.fastq.gz
gs://transfer-amlproject/201001_MP8262_fastq/20201001_IRF8_VHL_MP8262_S94_R1_001.fastq.gz
gs://transfer-amlproject/201001_MP8262_fastq/20201001_IRF8_VHL_MP8262_S94_R2_001.fastq.gz
gs://transfer-amlproject/201001_MP8262_fastq/20201001_MED1_DMSO_MP8262_S91_R1_001.fastq.gz
gs://transfer-amlproject/201001_MP8262_fastq/20201001_MED1_DMSO_MP8262_S91_R2_001.fastq.gz
gs://transfer-amlproject/201001_MP8262_fastq/20201001_MED1_VHL_MP8262_S97_R1_001.fas

In [252]:
mkdir ../data/$project/ && mkdir ../data/$project/qc/

In [253]:
!gsutil -m cp gs://transfer-amlproject/201001_MP8262_fastq/multiqc_report.html ../data/$project/qc/
!gsutil -m cp -r gs://transfer-amlproject/201001_MP8262_fastq/Reports/ ../data/$project/qc/
!gsutil -m cp -r gs://transfer-amlproject/201001_MP8262_fastq/multiqc_data/ ../data/$project/qc/

Copying gs://transfer-amlproject/201001_MP8262_fastq/multiqc_report.html...
/ [1/1 files][  1.4 MiB/  1.4 MiB] 100% Done                                    
Operation completed over 1 objects/1.4 MiB.                                      
Copying gs://transfer-amlproject/201001_MP8262_fastq/Reports/20201001_FLAG_DMSO_MP8262_S93_R1_001_fastqc.html...
Copying gs://transfer-amlproject/201001_MP8262_fastq/Reports/20201001_FLAG_DMSO_MP8262_S93_R1_001_fastqc.zip...
Copying gs://transfer-amlproject/201001_MP8262_fastq/Reports/20201001_FLAG_DMSO_MP8262_S93_R2_001_fastqc.zip...
Copying gs://transfer-amlproject/201001_MP8262_fastq/Reports/20201001_FLAG_DMSO_MP8262_S93_R2_001_fastqc.html...
Copying gs://transfer-amlproject/201001_MP8262_fastq/Reports/20201001_FLAG_VHL_MP8262_S99_R1_001_fastqc.html...
Copying gs://transfer-amlproject/201001_MP8262_fastq/Reports/20201001_FLAG_VHL_MP8262_S99_R2_001_fastqc.zip...
Copying gs://transfer-amlproject/201001_MP8262_fastq/Reports/20201001_FLAG_VHL_MP8262_S9

In [254]:
! gsutil -m cp gs://transfer-amlproject/201001_MP8262_fastq/*  gs://transfer-amlproject/$project/

Omitting prefix "gs://transfer-amlproject/201001_MP8262_fastq/Reports/". (Did you mean to do cp -r?)
Omitting prefix "gs://transfer-amlproject/201001_MP8262_fastq/multiqc_data/". (Did you mean to do cp -r?)
Copying gs://transfer-amlproject/201001_MP8262_fastq/20201001_FLAG_DMSO_MP8262_S93_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/201001_MP8262_fastq/20201001_FLAG_DMSO_MP8262_S93_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/201001_MP8262_fastq/20201001_FLAG_VHL_MP8262_S99_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/201001_MP8262_fastq/20201001_FLAG_VHL_MP8262_S99_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/201001_MP8262_fastq/20201001_IRF8_DMSO_MP8262_S88_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/201001_MP8262_fastq/20201001_IRF8_VHL_MP8262_S94_R1_

### analysis

In [255]:
! mkdir ../../data/$project && mkdir ../../data/$project/fastqs && gsutil -m cp gs://transfer-amlproject/$project/* ../../data/$project/fastqs

Copying gs://transfer-amlproject/chipseq_MEF2D_degraded/20201001_FLAG_DMSO_MP8262_S93_R1_001.fastq.gz...
Copying gs://transfer-amlproject/chipseq_MEF2D_degraded/20201001_FLAG_DMSO_MP8262_S93_R2_001.fastq.gz...
Copying gs://transfer-amlproject/chipseq_MEF2D_degraded/20201001_FLAG_VHL_MP8262_S99_R1_001.fastq.gz...
Copying gs://transfer-amlproject/chipseq_MEF2D_degraded/20201001_FLAG_VHL_MP8262_S99_R2_001.fastq.gz...
Copying gs://transfer-amlproject/chipseq_MEF2D_degraded/20201001_IRF8_DMSO_MP8262_S88_R1_001.fastq.gz...
Copying gs://transfer-amlproject/chipseq_MEF2D_degraded/20201001_IRF8_DMSO_MP8262_S88_R2_001.fastq.gz...
Copying gs://transfer-amlproject/chipseq_MEF2D_degraded/20201001_IRF8_VHL_MP8262_S94_R1_001.fastq.gz...
Copying gs://transfer-amlproject/chipseq_MEF2D_degraded/20201001_IRF8_VHL_MP8262_S94_R2_001.fastq.gz...
Copying gs://transfer-amlproject/chipseq_MEF2D_degraded/20201001_MED1_DMSO_MP8262_S91_R1_001.fastq.gz...
Copying gs://transfer-amlproject/chipseq_MEF2D_degraded/202

In [256]:
! rm ../../data/$project/fastqs/multiqc_report.html

In [257]:
a = ! ls ../../data/$project/fastqs
a

['20201001_FLAG_DMSO_MP8262_S93_R1_001.fastq.gz',
 '20201001_FLAG_DMSO_MP8262_S93_R2_001.fastq.gz',
 '20201001_FLAG_VHL_MP8262_S99_R1_001.fastq.gz',
 '20201001_FLAG_VHL_MP8262_S99_R2_001.fastq.gz',
 '20201001_IRF8_DMSO_MP8262_S88_R1_001.fastq.gz',
 '20201001_IRF8_DMSO_MP8262_S88_R2_001.fastq.gz',
 '20201001_IRF8_VHL_MP8262_S94_R1_001.fastq.gz',
 '20201001_IRF8_VHL_MP8262_S94_R2_001.fastq.gz',
 '20201001_MED1_DMSO_MP8262_S91_R1_001.fastq.gz',
 '20201001_MED1_DMSO_MP8262_S91_R2_001.fastq.gz',
 '20201001_MED1_VHL_MP8262_S97_R1_001.fastq.gz',
 '20201001_MED1_VHL_MP8262_S97_R2_001.fastq.gz',
 '20201001_MEF2C_DMSO_MP8262_S90_R1_001.fastq.gz',
 '20201001_MEF2C_DMSO_MP8262_S90_R2_001.fastq.gz',
 '20201001_MEF2C_VHL_MP8262_S96_R1_001.fastq.gz',
 '20201001_MEF2C_VHL_MP8262_S96_R2_001.fastq.gz',
 '20201001_MYC_DMSO_MP8262_S89_R1_001.fastq.gz',
 '20201001_MYC_DMSO_MP8262_S89_R2_001.fastq.gz',
 '20201001_MYC_VHL_MP8262_S95_R1_001.fastq.gz',
 '20201001_MYC_VHL_MP8262_S95_R2_001.fastq.gz',
 '20201001

In [258]:
rename = {
"20201001_FLAG_DMSO_MP8262_S93": "mp881-MV411_MEF2D_NT_SC_63_DMSO-FLAG_MEF2D-r3",
"20201001_FLAG_VHL_MP8262_S99": "mp882-MV411_MEF2D_NT_SC_63_VHL-FLAG_MEF2D-r1",
"20201001_IRF8_DMSO_MP8262_S88": "mp883-MV411_MEF2D_NT_SC_63_DMSO-IFR8-r1",
"20201001_IRF8_VHL_MP8262_S94": "mp884-MV411_MEF2D_NT_SC_63_VHL-IFR8-r1",
"20201001_MED1_DMSO_MP8262_S91": "mp885-MV411_MEF2D_NT_SC_63_DMSO-MED1-r1",
"20201001_MED1_VHL_MP8262_S97": "mp886-MV411_MEF2D_NT_SC_63_VHL-MED1-r1",
"20201001_MEF2C_DMSO_MP8262_S90": "mp887-MV411_MEF2D_NT_SC_63_DMSO-MEF2C-r1",
"20201001_MEF2C_VHL_MP8262_S96": "mp888-MV411_MEF2D_NT_SC_63_VHL-MEF2C-r1",
"20201001_MYC_DMSO_MP8262_S89": "mp889-MV411_MEF2D_NT_SC_63_DMSO-MYC-r1",
"20201001_MYC_VHL_MP8262_S95": "mp890-MV411_MEF2D_NT_SC_63_VHL-MYC-r1",
"20201001_POL_II_Total_DMSO_MP8262_S92": "mp891-MV411_MEF2D_NT_SC_63_DMSO-POLII-r1",
"20201001_POL_II_Total_VHL_MP8262_S98": "mp892-MV411_MEF2D_NT_SC_63_VHL-POLII-r1",}

In [259]:
for val in a:
    rep = val
    for k,v in rename.items():
        rep = rep.replace(k,v)
    !mv ../../data/$project/fastqs/$val ../../data/$project/fastqs/$rep

In [260]:
from gsheets import Sheets
sheets = Sheets.from_files('~/.client_secret.json', '~/.storage.json')
url="https://docs.google.com/spreadsheets/d/1yFLjYB1McU530JnLgL0QIMAKIkVl3kl0_LCHje2gk8U"
gsheet = sheets.get(url).sheets[2].to_frame()

In [70]:
gsheet

Unnamed: 0,id,cell line,replicate,protein,quality,paired_end,matching input name,processed,name,previous name,...,ratio to droso,unique mapped reads(droso),scaling factor,Total QC,folderNarrow,folderCompensated,folderQC,folderBroad,folder Bigwig,folder diffPeaks
0,mp100,U937,1,INPUT,,n,,Y,mp100-U937-INPUT-r1,,...,,,,https://storage.cloud.google.com/amlproject/Ch...,https://console.cloud.google.com/storage/brows...,,https://console.cloud.google.com/storage/brows...,https://console.cloud.google.com/storage/brows...,https://console.cloud.google.com/storage/brows...,
1,mp101,NOMO1,1,INPUT,,n,,Y,mp101-NOMO1-INPUT-r1,,...,,,,,,,,,,
2,mp102,UT7,1,INPUT,,n,,Y,mp102-UT7-INPUT-r1,,...,,,,,,,,,,
3,mp106,MV411,1,MYB,x,n,INPUT_MV411,Y,mp106-MV411-MYB-r1,,...,,,,,,,,,,
4,mp109,M6,1,CEBPA,x,n,INPUT_M6,Y,mp109-M6-CEBPA-r1,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
299,mp871,MV411_RNP_MYB,2,H3K27AC,,y,INPUT_MV411,,mp871-MV411_RNP_MYB-H3K27AC-r2,20200723_14_MP8095_S224,...,,,,,,,,,,
300,mp872,MV411_RNP_SPI1,1,H3K27AC,,y,INPUT_MV411,,mp872-MV411_RNP_SPI1-H3K27AC-r1,20200723_15_MP8095_S225,...,,,,,,,,,,
301,mp873,MV411_RNP_SPI1,2,H3K27AC,,y,INPUT_MV411,,mp873-MV411_RNP_SPI1-H3K27AC-r2,20200723_16_MP8095_S226,...,,,,,,,,,,
302,mp874,MV411_MEF2D_NT_SC_63,2,FLAG_MEF2D,,y,INPUT_MV411,,mp874-MV411_MEF2D_NT_SC_63-FLAG_MEF2D-r2,20200723_1S_MP8095_S209,...,,,,,,,,,,


In [261]:
a = ! ls ../../data/$project/fastqs
a

['mp881-MV411_MEF2D_NT_SC_63_DMSO-FLAG_MEF2D-r3_R1_001.fastq.gz',
 'mp881-MV411_MEF2D_NT_SC_63_DMSO-FLAG_MEF2D-r3_R2_001.fastq.gz',
 'mp882-MV411_MEF2D_NT_SC_63_VHL-FLAG_MEF2D-r1_R1_001.fastq.gz',
 'mp882-MV411_MEF2D_NT_SC_63_VHL-FLAG_MEF2D-r1_R2_001.fastq.gz',
 'mp883-MV411_MEF2D_NT_SC_63_DMSO-IFR8-r1_R1_001.fastq.gz',
 'mp883-MV411_MEF2D_NT_SC_63_DMSO-IFR8-r1_R2_001.fastq.gz',
 'mp884-MV411_MEF2D_NT_SC_63_VHL-IFR8-r1_R1_001.fastq.gz',
 'mp884-MV411_MEF2D_NT_SC_63_VHL-IFR8-r1_R2_001.fastq.gz',
 'mp885-MV411_MEF2D_NT_SC_63_DMSO-MED1-r1_R1_001.fastq.gz',
 'mp885-MV411_MEF2D_NT_SC_63_DMSO-MED1-r1_R2_001.fastq.gz',
 'mp886-MV411_MEF2D_NT_SC_63_VHL-MED1-r1_R1_001.fastq.gz',
 'mp886-MV411_MEF2D_NT_SC_63_VHL-MED1-r1_R2_001.fastq.gz',
 'mp887-MV411_MEF2D_NT_SC_63_DMSO-MEF2C-r1_R1_001.fastq.gz',
 'mp887-MV411_MEF2D_NT_SC_63_DMSO-MEF2C-r1_R2_001.fastq.gz',
 'mp888-MV411_MEF2D_NT_SC_63_VHL-MEF2C-r1_R1_001.fastq.gz',
 'mp888-MV411_MEF2D_NT_SC_63_VHL-MEF2C-r1_R2_001.fastq.gz',
 'mp889-MV411_MEF2D_

In [262]:
df = {
"fastq_1": [],
"fastq_2": [],
"antibody": [],
"group": [],
"replicate": [],
"control": []
}
for val in h.grouped(a,2):
    row = gsheet[gsheet.id==val[0].split('/')[-1].split('-')[0]]
    df['group'].append("MV4"+row['name'].values[0].split('-r')[0].split('-MV4')[1])
    df['replicate'].append(row['name'].values[0].split('-r')[1])
    df['fastq_1'].append(project+"/fastqs/"+val[0])
    df['fastq_2'].append(project+"/fastqs/"+val[1])
    df['antibody'].append(row['protein'].values[0])
    df['control'].append("INPUT")
df['group'].append('INPUT')
df['replicate'].append(1)
df['fastq_1'].append('ref/mp845-MV411-INPUT-r2_R1.fastq.gz')
df['fastq_2'].append('ref/mp845-MV411-INPUT-r2_R2.fastq.gz')
df['antibody'].append("")
df['control'].append("")
df = pd.DataFrame(df)

In [265]:
df.loc[0].tolist()

['chipseq_MEF2D_degraded/fastqs/mp881-MV411_MEF2D_NT_SC_63_DMSO-FLAG_MEF2D-r3_R1_001.fastq.gz',
 'chipseq_MEF2D_degraded/fastqs/mp881-MV411_MEF2D_NT_SC_63_DMSO-FLAG_MEF2D-r3_R2_001.fastq.gz',
 'FLAG_MEF2D',
 'MV411_MEF2D_NT_SC_63_DMSO-FLAG_MEF2D',
 '3',
 'INPUT']

In [8]:
cat ../nextflow/chipseq_MEF2D_degraded_design.csv

group,replicate,fastq_1,fastq_2,antibody,control
MV411_MEF2D_NT_SC_63_DMSO-FLAG_MEF2D,1,chipseq_MEF2D_degraded/fastqs/mp881-MV411_MEF2D_NT_SC_63_DMSO-FLAG_MEF2D-r3_R1_001.fastq.gz,chipseq_MEF2D_degraded/fastqs/mp881-MV411_MEF2D_NT_SC_63_DMSO-FLAG_MEF2D-r3_R2_001.fastq.gz,FLAG_MEF2D,INPUT
MV411_MEF2D_NT_SC_63_VHL-FLAG_MEF2D,1,chipseq_MEF2D_degraded/fastqs/mp882-MV411_MEF2D_NT_SC_63_VHL-FLAG_MEF2D-r1_R1_001.fastq.gz,chipseq_MEF2D_degraded/fastqs/mp882-MV411_MEF2D_NT_SC_63_VHL-FLAG_MEF2D-r1_R2_001.fastq.gz,FLAG_MEF2D,INPUT
MV411_MEF2D_NT_SC_63_DMSO-IFR8,1,chipseq_MEF2D_degraded/fastqs/mp883-MV411_MEF2D_NT_SC_63_DMSO-IFR8-r1_R1_001.fastq.gz,chipseq_MEF2D_degraded/fastqs/mp883-MV411_MEF2D_NT_SC_63_DMSO-IFR8-r1_R2_001.fastq.gz,IFR8,INPUT
MV411_MEF2D_NT_SC_63_VHL-IFR8,1,chipseq_MEF2D_degraded/fastqs/mp884-MV411_MEF2D_NT_SC_63_VHL-IFR8-r1_R1_001.fastq.gz,chipseq_MEF2D_degraded/fastqs/mp884-MV411_MEF2D_NT_SC_63_VHL-IFR8-r1_R2_001.fastq.gz,IFR8,INPUT
MV411_MEF2D_NT_SC_63_DMSO-MED1,1,chipseq

In [267]:
df[df.columns[[3,4,0,1,2,5]]].to_csv('../nextflow/'+project+'_design.csv',index=False)

In [6]:
! cd ../../data/ && sudo ../nextflow log ## to get access to the previous runs

TIMESTAMP          	DURATION      	RUN NAME              	STATUS	REVISION ID	SESSION ID                          	COMMAND                                                                                                                                                                                                                                                          
2020-02-10 22:39:18	8m 25s        	stupefied_crick       	ERR   	21be314954 	76ea5df0-153c-4e71-a59d-52c6112fda84	nextflow run nf-core/chipseq --paired_end --seq_center DFCI --email jkobject@gmail.com --narrow_peak --input design.csv --genome GRCh38 --skip_preseq --max_cpus 24 -profile docker -w work                                                                      
2020-02-10 22:40:37	1m 6s         	ridiculous_hilbert    	ERR   	21be314954 	75004903-035d-4504-ab80-cab74b5acac4	nextflow run nf-core/chipseq --paired_end --seq_center DFCI --email jkobject@gmail.com --narrow_peak --input design.csv --genome GRCh38 --sk

In [7]:
#process chips
! cd ../../data/ && sudo ../nextflow run nf-core/chipseq --paired_end --seq_center 'DFCI' --email 'jkobject@gmail.com' --input ../AMLproject/nextflow/$project_design.csv --genome GRCh38 --skip_preseq --max_cpus 16 -profile docker -w work -resume exotic_bartik

N E X T F L O W  ~  version 19.10.0
Launching `nf-core/chipseq` [wise_legentil] - revision: 21be314954 [master]
NOTE: Your local project version looks outdated - a different revision is available in the remote repository [0f487ed76d]
-[2m--------------------------------------------------[0m-
                                        [0;32m,--.[0;30m/[0;32m,-.[0m
[0;34m        ___     __   __   __   ___     [0;32m/,-._.--~'[0m
[0;34m  |\ | |__  __ /  ` /  \ |__) |__         [0;33m}  {[0m
[0;34m  | \| |       \__, \__/ |  \ |___     [0;32m\`-._,-`-,[0m
                                        [0;32m`._,._,'[0m
[0;35m  nf-core/chipseq v1.1.0[0m
-[2m--------------------------------------------------[0m-
Run Name            : wise_legentil
Data Type           : Paired-End
Design File         : ../AMLproject/nextflow/additional_degraded_v1_design.csv
Genome              : GRCh38
Fasta File          : s3://ngi-igenomes/igenomes//Homo_sapiens/NCBI/GRCh38/Sequence/WholeGenomeF

[28A
executor >  local (1)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[-        ] process > MergeBAM                       -[K
[-        ] process > MergeBAMFilter                 -[K
[-        ] process > MergeBAMRemoveOrphan           -[K
[-        ] process > Preseq                         -[K
[-        ] process > CollectMultipleMetrics         -[K
[-        ] process > BigWig                         -[K
[-        ] process > PlotProfile   

[28A
executor >  local (6)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[fc/cf6140] process > MergeBAM (MV411_RNP_SPI1-H3... [ 88%] 14 of 16, cached: 11[K
[-        ] process > MergeBAMFilter                 -[K
[-        ] process > MergeBAMRemoveOrphan           -[K
[-        ] process > Preseq                         -[K
[-        ] process > CollectMultipleMetrics         -[K
[-        ] process > BigWig                         -[K
[-        

[28A
executor >  local (11)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[84/574208] process > MergeBAMFilter (MV411_RNP_S... [ 50%] 2 of 4[K
[-        ] process > MergeBAMRemoveOrphan           -[K
[-        ] process > Preseq                         -[K
[-        ] process > CollectMultipleMetrics         -[K
[-        ] process > BigWig                         -

[28A
executor >  local (15)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[e1/d4ac46] process > MergeBAMFilter (MV411_RNP_A... [ 63%] 5 of 8[K
[-        ] process > MergeBAMRemoveOrphan           -[K
[-        ] process > Preseq                         -[K
[-        ] process > CollectMultipleMetrics         -[K
[-        ] process > BigWig                         -

[28A
executor >  local (18)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[77/a464ff] process > MergeBAMFilter (MV411_RNP_I... [ 91%] 10 of 11[K
[-        ] process > MergeBAMRemoveOrphan           -[K
[-        ] process > Preseq                         -[K
[-        ] process > CollectMultipleMetrics         -[K
[-        ] process > BigWig                         

[28A
executor >  local (23)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[e0/a67acd] process > MergeBAMFilter (MV411_RNP_M... [ 88%] 14 of 16[K
[-        ] process > MergeBAMRemoveOrphan           -[K
[-        ] process > Preseq                         -[K
[-        ] process > CollectMultipleMetrics         -[K
[-        ] process > BigWig                         

[28A
executor >  local (28)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[48/4b5e62] process > MergeBAMFilter (MV411_RNP_R... [100%] 17 of 17 ✔[K
[57/199f77] process > MergeBAMRemoveOrphan (MV411... [ 50%] 2 of 4[K
[-        ] process > Preseq                         -[K
[-        ] process > CollectMultipleMetrics         -[K
[-        ] process > BigWig           

[28A
executor >  local (33)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[48/4b5e62] process > MergeBAMFilter (MV411_RNP_R... [100%] 17 of 17 ✔[K
[63/892aee] process > MergeBAMRemoveOrphan (MV411... [ 78%] 7 of 9[K
[-        ] process > Preseq                         -[K
[-        ] process > CollectMultipleMetrics         -[K
[-        ] process > BigWig           

[28A
executor >  local (38)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[48/4b5e62] process > MergeBAMFilter (MV411_RNP_R... [100%] 17 of 17 ✔[K
[ba/00ce48] process > MergeBAMRemoveOrphan (MV411... [ 79%] 11 of 14[K
[-        ] process > Preseq                         -[K
[-        ] process > CollectMultipleMetrics         -[K
[-        ] process > BigWig         

[28A
executor >  local (41)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[48/4b5e62] process > MergeBAMFilter (MV411_RNP_R... [100%] 17 of 17 ✔[K
[c0/52c2db] process > MergeBAMRemoveOrphan (MV411... [ 94%] 15 of 16[K
[-        ] process > Preseq                         -[K
[-        ] process > CollectMultipleMetrics         -[K
[-        ] process > BigWig         

[28A
executor >  local (46)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[48/4b5e62] process > MergeBAMFilter (MV411_RNP_R... [100%] 17 of 17 ✔[K
[dc/6cb451] process > MergeBAMRemoveOrphan (MV411... [ 94%] 16 of 17[K
[-        ] process > Preseq                         -[K
[18/e3205c] process > CollectMultipleMetrics (MV4... [100%] 1 of 1[K
[fa/7ca520] process > Big

[c1/b1a581] process > PhantomPeakQualTools (MV411... [ 67%] 2 of 3[K
[-        ] process > PlotFingerprint                -[K
[-        ] process > MACSCallPeak                   -[K
[-        ] process > AnnotatePeaks                  -[K
[-        ] process > PeakQC                         -[K
[-        ] process > ConsensusPeakSet               -[K
[-        ] process > ConsensusPeakSetAnnotate       -[K
[-        ] process > ConsensusPeakSetDESeq          -[K
[-        ] process > IGV                            -[K
[7e/9c912a] process > get_software_versions          [100%] 1 of 1 ✔[K
[-        ] process > MultiQC                        -[K
[8a/7334d7] process > output_documentation           [100%] 1 of 1, cached: 1 ✔[K
[28A
executor >  local (51)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [1

[28A
executor >  local (56)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[48/4b5e62] process > MergeBAMFilter (MV411_RNP_R... [100%] 17 of 17 ✔[K
[dc/6cb451] process > MergeBAMRemoveOrphan (MV411... [100%] 17 of 17 ✔[K
[-        ] process > Preseq                         -[K
[36/03ab9d] process > CollectMultipleMetrics (MV4... [ 80%] 4 of 5[K
[26/8106f4] process > B

[28A
executor >  local (60)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[48/4b5e62] process > MergeBAMFilter (MV411_RNP_R... [100%] 17 of 17 ✔[K
[dc/6cb451] process > MergeBAMRemoveOrphan (MV411... [100%] 17 of 17 ✔[K
[-        ] process > Preseq                         -[K
[36/03ab9d] process > CollectMultipleMetrics (MV4... [100%] 5 of 5[K
[2d/982381] process > B

[28A
executor >  local (65)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[48/4b5e62] process > MergeBAMFilter (MV411_RNP_R... [100%] 17 of 17 ✔[K
[dc/6cb451] process > MergeBAMRemoveOrphan (MV411... [100%] 17 of 17 ✔[K
[-        ] process > Preseq                         -[K
[6f/0c32f5] process > CollectMultipleMetrics (INP... [100%] 6 of 6[K
[2d/982381] process > B

[28A
executor >  local (70)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[48/4b5e62] process > MergeBAMFilter (MV411_RNP_R... [100%] 17 of 17 ✔[K
[dc/6cb451] process > MergeBAMRemoveOrphan (MV411... [100%] 17 of 17 ✔[K
[-        ] process > Preseq                         -[K
[77/85e402] process > CollectMultipleMetrics (MV4... [100%] 7 of 7[K
[83/f433d6] process > B

[28A
executor >  local (74)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[48/4b5e62] process > MergeBAMFilter (MV411_RNP_R... [100%] 17 of 17 ✔[K
[dc/6cb451] process > MergeBAMRemoveOrphan (MV411... [100%] 17 of 17 ✔[K
[-        ] process > Preseq                         -[K
[f7/870fa3] process > CollectMultipleMetrics (MV4... [100%] 8 of 8[K
[be/8c5392] process > B

[28A
executor >  local (79)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[48/4b5e62] process > MergeBAMFilter (MV411_RNP_R... [100%] 17 of 17 ✔[K
[dc/6cb451] process > MergeBAMRemoveOrphan (MV411... [100%] 17 of 17 ✔[K
[-        ] process > Preseq                         -[K
[eb/85c928] process > CollectMultipleMetrics (MV4... [100%] 9 of 9[K
[4e/c46492] process > B

[28A
executor >  local (84)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[48/4b5e62] process > MergeBAMFilter (MV411_RNP_R... [100%] 17 of 17 ✔[K
[dc/6cb451] process > MergeBAMRemoveOrphan (MV411... [100%] 17 of 17 ✔[K
[-        ] process > Preseq                         -[K
[af/724a3d] process > CollectMultipleMetrics (MV4... [ 91%] 10 of 11[K
[60/3850df] process >

[28A
executor >  local (89)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[48/4b5e62] process > MergeBAMFilter (MV411_RNP_R... [100%] 17 of 17 ✔[K
[dc/6cb451] process > MergeBAMRemoveOrphan (MV411... [100%] 17 of 17 ✔[K
[-        ] process > Preseq                         -[K
[be/e4f2a1] process > CollectMultipleMetrics (MV4... [100%] 12 of 12[K
[eb/3a1856] process >

[28A
executor >  local (94)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[48/4b5e62] process > MergeBAMFilter (MV411_RNP_R... [100%] 17 of 17 ✔[K
[dc/6cb451] process > MergeBAMRemoveOrphan (MV411... [100%] 17 of 17 ✔[K
[-        ] process > Preseq                         -[K
[58/9c859d] process > CollectMultipleMetrics (MV4... [100%] 13 of 13[K
[eb/3a1856] process >

[28A
executor >  local (99)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[48/4b5e62] process > MergeBAMFilter (MV411_RNP_R... [100%] 17 of 17 ✔[K
[dc/6cb451] process > MergeBAMRemoveOrphan (MV411... [100%] 17 of 17 ✔[K
[-        ] process > Preseq                         -[K
[39/7b314d] process > CollectMultipleMetrics (MV4... [100%] 14 of 14[K
[e1/c25e9c] process >

[28A
executor >  local (104)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[48/4b5e62] process > MergeBAMFilter (MV411_RNP_R... [100%] 17 of 17 ✔[K
[dc/6cb451] process > MergeBAMRemoveOrphan (MV411... [100%] 17 of 17 ✔[K
[-        ] process > Preseq                         -[K
[85/5ed89a] process > CollectMultipleMetrics (MV4... [ 94%] 15 of 16[K
[9c/f2630b] process 

[28A
executor >  local (109)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[48/4b5e62] process > MergeBAMFilter (MV411_RNP_R... [100%] 17 of 17 ✔[K
[dc/6cb451] process > MergeBAMRemoveOrphan (MV411... [100%] 17 of 17 ✔[K
[-        ] process > Preseq                         -[K
[7b/722dae] process > CollectMultipleMetrics (MV4... [100%] 17 of 17 ✔[K
[a1/2654ae] proces

[28A
executor >  local (114)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[48/4b5e62] process > MergeBAMFilter (MV411_RNP_R... [100%] 17 of 17 ✔[K
[dc/6cb451] process > MergeBAMRemoveOrphan (MV411... [100%] 17 of 17 ✔[K
[-        ] process > Preseq                         -[K
[7b/722dae] process > CollectMultipleMetrics (MV4... [100%] 17 of 17 ✔[K
[a1/2654ae] proces

[28A
executor >  local (120)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[48/4b5e62] process > MergeBAMFilter (MV411_RNP_R... [100%] 17 of 17 ✔[K
[dc/6cb451] process > MergeBAMRemoveOrphan (MV411... [100%] 17 of 17 ✔[K
[-        ] process > Preseq                         -[K
[7b/722dae] process > CollectMultipleMetrics (MV4... [100%] 17 of 17 ✔[K
[a1/2654ae] proces

[28A
executor >  local (125)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[48/4b5e62] process > MergeBAMFilter (MV411_RNP_R... [100%] 17 of 17 ✔[K
[dc/6cb451] process > MergeBAMRemoveOrphan (MV411... [100%] 17 of 17 ✔[K
[-        ] process > Preseq                         -[K
[7b/722dae] process > CollectMultipleMetrics (MV4... [100%] 17 of 17 ✔[K
[a1/2654ae] proces

[28A
executor >  local (129)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[48/4b5e62] process > MergeBAMFilter (MV411_RNP_R... [100%] 17 of 17 ✔[K
[dc/6cb451] process > MergeBAMRemoveOrphan (MV411... [100%] 17 of 17 ✔[K
[-        ] process > Preseq                         -[K
[7b/722dae] process > CollectMultipleMetrics (MV4... [100%] 17 of 17 ✔[K
[a1/2654ae] proces

[28A
executor >  local (133)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[48/4b5e62] process > MergeBAMFilter (MV411_RNP_R... [100%] 17 of 17 ✔[K
[dc/6cb451] process > MergeBAMRemoveOrphan (MV411... [100%] 17 of 17 ✔[K
[-        ] process > Preseq                         -[K
[7b/722dae] process > CollectMultipleMetrics (MV4... [100%] 17 of 17 ✔[K
[a1/2654ae] proces

[28A
executor >  local (138)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[48/4b5e62] process > MergeBAMFilter (MV411_RNP_R... [100%] 17 of 17 ✔[K
[dc/6cb451] process > MergeBAMRemoveOrphan (MV411... [100%] 17 of 17 ✔[K
[-        ] process > Preseq                         -[K
[7b/722dae] process > CollectMultipleMetrics (MV4... [100%] 17 of 17 ✔[K
[a1/2654ae] proces

[28A
executor >  local (143)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[48/4b5e62] process > MergeBAMFilter (MV411_RNP_R... [100%] 17 of 17 ✔[K
[dc/6cb451] process > MergeBAMRemoveOrphan (MV411... [100%] 17 of 17 ✔[K
[-        ] process > Preseq                         -[K
[7b/722dae] process > CollectMultipleMetrics (MV4... [100%] 17 of 17 ✔[K
[a1/2654ae] proces

[28A
executor >  local (148)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[48/4b5e62] process > MergeBAMFilter (MV411_RNP_R... [100%] 17 of 17 ✔[K
[dc/6cb451] process > MergeBAMRemoveOrphan (MV411... [100%] 17 of 17 ✔[K
[-        ] process > Preseq                         -[K
[7b/722dae] process > CollectMultipleMetrics (MV4... [100%] 17 of 17 ✔[K
[a1/2654ae] proces

[28A
executor >  local (153)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[48/4b5e62] process > MergeBAMFilter (MV411_RNP_R... [100%] 17 of 17 ✔[K
[dc/6cb451] process > MergeBAMRemoveOrphan (MV411... [100%] 17 of 17 ✔[K
[-        ] process > Preseq                         -[K
[7b/722dae] process > CollectMultipleMetrics (MV4... [100%] 17 of 17 ✔[K
[a1/2654ae] proces

[28A
executor >  local (158)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[48/4b5e62] process > MergeBAMFilter (MV411_RNP_R... [100%] 17 of 17 ✔[K
[dc/6cb451] process > MergeBAMRemoveOrphan (MV411... [100%] 17 of 17 ✔[K
[-        ] process > Preseq                         -[K
[7b/722dae] process > CollectMultipleMetrics (MV4... [100%] 17 of 17 ✔[K
[a1/2654ae] proces

[28A
executor >  local (162)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[48/4b5e62] process > MergeBAMFilter (MV411_RNP_R... [100%] 17 of 17 ✔[K
[dc/6cb451] process > MergeBAMRemoveOrphan (MV411... [100%] 17 of 17 ✔[K
[-        ] process > Preseq                         -[K
[7b/722dae] process > CollectMultipleMetrics (MV4... [100%] 17 of 17 ✔[K
[a1/2654ae] proces

[31A
executor >  local (163)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[48/4b5e62] process > MergeBAMFilter (MV411_RNP_R... [100%] 17 of 17 ✔[K
[dc/6cb451] process > MergeBAMRemoveOrphan (MV411... [100%] 17 of 17 ✔[K
[-        ] process > Preseq                         -[K
[7b/722dae] process > CollectMultipleMetrics (MV4... [100%] 17 of 17 ✔[K
[a1/2654ae] proces

In [10]:
! sudo mv ../../data/results/* ../../data/$project/ && sudo rm -r ..data/work

^C


In [11]:
ls ../../TrimGalore-0.6.5/trim_galore

[0m[01;32m../../TrimGalore-0.6.5/trim_galore[0m*


In [None]:
# get scaling values
norm, mapped = h.getSpikeInControlScales(refgenome="../../data/ref/reference_droso.fna",
fastQfolder='../../data/'+project+'/fastqs/',
pairedEnd=True, cores=12,
tofilter=True,
totrim=True,
tomap=True,
results="../../data/"+project+"/",
pathtotrim_galore="../../TrimGalore-0.6.5/trim_galore")
norm, mapped

using all files from folder
need to be name_*1, name_*2
[('mp881-MV411_MEF2D_NT_SC_63_DMSO-FLAG_MEF2D-r3_R1_001.fastq.gz', 'mp881-MV411_MEF2D_NT_SC_63_DMSO-FLAG_MEF2D-r3_R2_001.fastq.gz'), ('mp882-MV411_MEF2D_NT_SC_63_VHL-FLAG_MEF2D-r1_R1_001.fastq.gz', 'mp882-MV411_MEF2D_NT_SC_63_VHL-FLAG_MEF2D-r1_R2_001.fastq.gz'), ('mp883-MV411_MEF2D_NT_SC_63_DMSO-IFR8-r1_R1_001.fastq.gz', 'mp883-MV411_MEF2D_NT_SC_63_DMSO-IFR8-r1_R2_001.fastq.gz'), ('mp884-MV411_MEF2D_NT_SC_63_VHL-IFR8-r1_R1_001.fastq.gz', 'mp884-MV411_MEF2D_NT_SC_63_VHL-IFR8-r1_R2_001.fastq.gz'), ('mp885-MV411_MEF2D_NT_SC_63_DMSO-MED1-r1_R1_001.fastq.gz', 'mp885-MV411_MEF2D_NT_SC_63_DMSO-MED1-r1_R2_001.fastq.gz'), ('mp886-MV411_MEF2D_NT_SC_63_VHL-MED1-r1_R1_001.fastq.gz', 'mp886-MV411_MEF2D_NT_SC_63_VHL-MED1-r1_R2_001.fastq.gz'), ('mp887-MV411_MEF2D_NT_SC_63_DMSO-MEF2C-r1_R1_001.fastq.gz', 'mp887-MV411_MEF2D_NT_SC_63_DMSO-MEF2C-r1_R2_001.fastq.gz'), ('mp888-MV411_MEF2D_NT_SC_63_VHL-MEF2C-r1_R1_001.fastq.gz', 'mp888-MV411_MEF2D_NT_S

In [14]:
mapped

{'mp881-MV411_MEF2D_NT_SC_63_DMSO-FLAG_MEF2D-r3_R1_001_val_1': 3913318,
 'mp882-MV411_MEF2D_NT_SC_63_VHL-FLAG_MEF2D-r1_R1_001_val_1': 3675100,
 'mp883-MV411_MEF2D_NT_SC_63_DMSO-IFR8-r1_R1_001_val_1': 3909333,
 'mp884-MV411_MEF2D_NT_SC_63_VHL-IFR8-r1_R1_001_val_1': 5479191,
 'mp885-MV411_MEF2D_NT_SC_63_DMSO-MED1-r1_R1_001_val_1': 5217619,
 'mp886-MV411_MEF2D_NT_SC_63_VHL-MED1-r1_R1_001_val_1': 5460060,
 'mp887-MV411_MEF2D_NT_SC_63_DMSO-MEF2C-r1_R1_001_val_1': 4117350,
 'mp888-MV411_MEF2D_NT_SC_63_VHL-MEF2C-r1_R1_001_val_1': 3388517,
 'mp889-MV411_MEF2D_NT_SC_63_DMSO-MYC-r1_R1_001_val_1': 6008413,
 'mp890-MV411_MEF2D_NT_SC_63_VHL-MYC-r1_R1_001_val_1': 5878757,
 'mp891-MV411_MEF2D_NT_SC_63_DMSO-POLII-r1_R1_001_val_1': 3529483,
 'mp892-MV411_MEF2D_NT_SC_63_VHL-POLII-r1_R1_001_val_1': 9964979}

computing scales from the excel sheet

In [142]:
scales = [1.0,
1.0,
0.9644136372,
0.9068238696,
1.0,
1.0,
0.3954022974,
0.8368988329,
1.0,
1.0,
0.5551449949,
0.844680746]

In [36]:
bams = ! ls ../../data/$project/bwa/mergedLibrary/*.bam
bams

['../../data/chipseq_MEF2D_degraded/bwa/mergedLibrary/INPUT_R1.mLb.clN.sorted.bam',
 '../../data/chipseq_MEF2D_degraded/bwa/mergedLibrary/MV411_MEF2D_NT_SC_63_DMSO-FLAG_MEF2D_R1.mLb.clN.sorted.bam',
 '../../data/chipseq_MEF2D_degraded/bwa/mergedLibrary/MV411_MEF2D_NT_SC_63_DMSO-IFR8_R1.mLb.clN.sorted.bam',
 '../../data/chipseq_MEF2D_degraded/bwa/mergedLibrary/MV411_MEF2D_NT_SC_63_DMSO-MED1_R1.mLb.clN.sorted.bam',
 '../../data/chipseq_MEF2D_degraded/bwa/mergedLibrary/MV411_MEF2D_NT_SC_63_DMSO-MEF2C_R1.mLb.clN.sorted.bam',
 '../../data/chipseq_MEF2D_degraded/bwa/mergedLibrary/MV411_MEF2D_NT_SC_63_DMSO-MYC_R1.mLb.clN.sorted.bam',
 '../../data/chipseq_MEF2D_degraded/bwa/mergedLibrary/MV411_MEF2D_NT_SC_63_DMSO-POLII_R1.mLb.clN.sorted.bam',
 '../../data/chipseq_MEF2D_degraded/bwa/mergedLibrary/MV411_MEF2D_NT_SC_63_VHL-FLAG_MEF2D_R1.mLb.clN.sorted.bam',
 '../../data/chipseq_MEF2D_degraded/bwa/mergedLibrary/MV411_MEF2D_NT_SC_63_VHL-IFR8_R1.mLb.clN.sorted.bam',
 '../../data/chipseq_MEF2D_degrad

In [37]:
bams[6]

'../../data/chipseq_MEF2D_degraded/bwa/mergedLibrary/MV411_MEF2D_NT_SC_63_DMSO-POLII_R1.mLb.clN.sorted.bam'

### on scalled data

In [19]:
! mkdir ../../data/$project/diffPeaks/ && ! mkdir ../../data/$project/diffData/

In [20]:
! mkdir ../../data/$project/droso_aligned

In [21]:
! mv ../../data/$project/mp* ../../data/$project/droso_aligned

In [27]:
wigs = ! ls ../../data/$project/bwa/mergedLibrary/bigwig/*.bigWig

In [131]:
initscales = ! cat ../../data/$project/bwa/mergedLibrary/bigwig/scale/*.txt

In [135]:
! ls ../../data/$project/bwa/mergedLibrary/bigwig/scale/*.txt

../../data/chipseq_MEF2D_degraded/bwa/mergedLibrary/bigwig/scale/INPUT_R1.mLb.clN.scale_factor.txt
../../data/chipseq_MEF2D_degraded/bwa/mergedLibrary/bigwig/scale/MV411_MEF2D_NT_SC_63_DMSO-FLAG_MEF2D_R1.mLb.clN.scale_factor.txt
../../data/chipseq_MEF2D_degraded/bwa/mergedLibrary/bigwig/scale/MV411_MEF2D_NT_SC_63_DMSO-IFR8_R1.mLb.clN.scale_factor.txt
../../data/chipseq_MEF2D_degraded/bwa/mergedLibrary/bigwig/scale/MV411_MEF2D_NT_SC_63_DMSO-MED1_R1.mLb.clN.scale_factor.txt
../../data/chipseq_MEF2D_degraded/bwa/mergedLibrary/bigwig/scale/MV411_MEF2D_NT_SC_63_DMSO-MEF2C_R1.mLb.clN.scale_factor.txt
../../data/chipseq_MEF2D_degraded/bwa/mergedLibrary/bigwig/scale/MV411_MEF2D_NT_SC_63_DMSO-MYC_R1.mLb.clN.scale_factor.txt
../../data/chipseq_MEF2D_degraded/bwa/mergedLibrary/bigwig/scale/MV411_MEF2D_NT_SC_63_DMSO-POLII_R1.mLb.clN.scale_factor.txt
../../data/chipseq_MEF2D_degraded/bwa/mergedLibrary/bigwig/scale/MV411_MEF2D_NT_SC_63_VHL-FLAG_MEF2D_R1.mLb.clN.scale_factor.txt
../../data/ch

In [143]:
rescales = [val*float(initscales[1+i]) for i, val in enumerate(scales)]

In [144]:
rescales

[0.00547739,
 0.011174,
 0.008672451055975512,
 0.004635565734292153,
 0.00588873,
 0.00439044,
 0.00738607537520226,
 0.00599152612449768,
 0.00599184,
 0.0111614,
 0.004559755584460487,
 0.00375175089504852]

In [145]:
chip.bigWigFrom(bams[1:], 
                genome='GRCh38',scaling=rescales,
               numthreads=12)

In [146]:
!mkdir ../../data/$project/recalib_bigwig/ && mv bigwig/* ../../data/$project/recalib_bigwig/

mkdir: cannot create directory ‘../../data/chipseq_MEF2D_degraded/recalib_bigwig/’: File exists


In [155]:
mv bigwig/* ../../data/$project/recalib_bigwig/

In [156]:
#! gsutil -m cp -r ../../data/$project/droso_aligned gs://amlproject/Chip/$project/
! gsutil -m cp -r ../../data/$project/recalib_bigwig gs://amlproject/Chip/$project/

Copying file://../../data/chipseq_MEF2D_degraded/recalib_bigwig/MV411_MEF2D_NT_SC_63_DMSO-FLAG_MEF2D_R1.bw [Content-Type=application/octet-stream]...
==> NOTE: You are uploading one or more large file(s), which would run
significantly faster if you enable parallel composite uploads. This
feature can be enabled by editing the
"parallel_composite_upload_threshold" value in your .boto
configuration file. However, note that if you do this large files will
be uploaded as `composite objects
<https://cloud.google.com/storage/docs/composite-objects>`_,which
means that any user who downloads such objects will need to have a
compiled crcmod installed (see "gsutil help crcmod"). This is because
without a compiled crcmod, computing checksums on composite objects is
so slow that gsutil disables downloads of composite objects.

Copying file://../../data/chipseq_MEF2D_degraded/recalib_bigwig/MV411_MEF2D_NT_SC_63_DMSO-IFR8_R1.bw [Content-Type=application/octet-stream]...
Copying file://../../data/chip

In [50]:
rm -r ../../data/$project/droso_aligned/

In [201]:
# diffPeak on scaled data 
for i in range(int((len(bams)-1)/2)):
    if i==4:
        continue
    name1 = bams[i+1]
    name2 = bams[i+7]
    print('\n')
    print(name1,name2)
    print(chip.fullDiffPeak(name1, name2, control1=bams[0], res_directory="../../data/"+project+"/diffPeaks/", scaling=[scales[i],scales[i+6]], directory="../../data/"+project+'/diffData/'))



../../data/chipseq_MEF2D_degraded/bwa/mergedLibrary/MV411_MEF2D_NT_SC_63_DMSO-FLAG_MEF2D_R1.mLb.clN.sorted.bam ../../data/chipseq_MEF2D_degraded/bwa/mergedLibrary/MV411_MEF2D_NT_SC_63_VHL-FLAG_MEF2D_R1.mLb.clN.sorted.bam
doing diff from ../../data/chipseq_MEF2D_degraded/bwa/mergedLibrary/MV411_MEF2D_NT_SC_63_DMSO-FLAG_MEF2D_R1.mLb.clN.sorted.bam and ../../data/chipseq_MEF2D_degraded/bwa/mergedLibrary/MV411_MEF2D_NT_SC_63_VHL-FLAG_MEF2D_R1.mLb.clN.sorted.bam
computing the fragment avg size
230
computing the scaling values
b'INFO  @ Mon, 02 Nov 2020 20:57:45: \n# Command line: callpeak -B -t ../../data/chipseq_MEF2D_degraded/bwa/mergedLibrary/MV411_MEF2D_NT_SC_63_DMSO-FLAG_MEF2D_R1.mLb.clN.sorted.bam -c ../../data/chipseq_MEF2D_degraded/bwa/mergedLibrary/INPUT_R1.mLb.clN.sorted.bam --nomodel --extsize 230 -n MV411_MEF2D_NT_SC_63_DMSO-FLAG_MEF2D_R1 --outdir ../../data/chipseq_MEF2D_degraded/diffData/ -f BAMPE\n# ARGUMENTS LIST:\n# name = MV411_MEF2D_NT_SC_63_DMSO-FLAG_MEF2D_R1\n# format

b'INFO  @ Mon, 02 Nov 2020 21:36:44: \n# Command line: callpeak -B -t ../../data/chipseq_MEF2D_degraded/bwa/mergedLibrary/MV411_MEF2D_NT_SC_63_VHL-FLAG_MEF2D_R1.mLb.clN.sorted.bam -c ../../data/chipseq_MEF2D_degraded/bwa/mergedLibrary/INPUT_R1.mLb.clN.sorted.bam --nomodel --extsize 230 -n MV411_MEF2D_NT_SC_63_VHL-FLAG_MEF2D_R1 --outdir ../../data/chipseq_MEF2D_degraded/diffData/ -f BAMPE\n# ARGUMENTS LIST:\n# name = MV411_MEF2D_NT_SC_63_VHL-FLAG_MEF2D_R1\n# format = BAMPE\n# ChIP-seq file = [\'../../data/chipseq_MEF2D_degraded/bwa/mergedLibrary/MV411_MEF2D_NT_SC_63_VHL-FLAG_MEF2D_R1.mLb.clN.sorted.bam\']\n# control file = [\'../../data/chipseq_MEF2D_degraded/bwa/mergedLibrary/INPUT_R1.mLb.clN.sorted.bam\']\n# effective genome size = 2.70e+09\n# band width = 300\n# model fold = [5, 50]\n# qvalue cutoff = 5.00e-02\n# The maximum gap between significant sites is assigned as the read length/tag size.\n# The minimum length of peaks is assigned as the predicted fragment length "d".\n# Larger

CompletedProcess(args='macs2 bdgdiff --t1 ../../data/chipseq_MEF2D_degraded/diffData/MV411_MEF2D_NT_SC_63_DMSO-FLAG_MEF2D_R1_treat_pileup.bdg --c1 ../../data/chipseq_MEF2D_degraded/diffData/MV411_MEF2D_NT_SC_63_DMSO-FLAG_MEF2D_R1_control_lambda.bdg --t2 ../../data/chipseq_MEF2D_degraded/diffData/MV411_MEF2D_NT_SC_63_VHL-FLAG_MEF2D_R1_treat_pileup.bdg --c2 ../../data/chipseq_MEF2D_degraded/diffData/MV411_MEF2D_NT_SC_63_VHL-FLAG_MEF2D_R1_control_lambda.bdg --d1 66632618 --d2 67047491 -g 60 -l 230 --o-prefix MV411_MEF2D_NT_SC_63_DMSO-FLAG_MEF2D_R1_treat_pileup_vs_MV411_MEF2D_NT_SC_63_VHL-FLAG_MEF2D_R1_treat_pileup --outdir ../../data/chipseq_MEF2D_degraded/diffPeaks/', returncode=0, stdout=b'', stderr=b'INFO  @ Mon, 02 Nov 2020 22:03:53: Read and build treatment 1 bedGraph... \nINFO  @ Mon, 02 Nov 2020 22:07:28: Read and build control 1 bedGraph... \nINFO  @ Mon, 02 Nov 2020 22:12:02: Read and build treatment 2 bedGraph... \nINFO  @ Mon, 02 Nov 2020 22:12:53: Read and build control 2 bedG

b'INFO  @ Mon, 02 Nov 2020 23:02:52: \n# Command line: callpeak -B -t ../../data/chipseq_MEF2D_degraded/bwa/mergedLibrary/MV411_MEF2D_NT_SC_63_VHL-IFR8_R1.mLb.clN.sorted.bam -c ../../data/chipseq_MEF2D_degraded/bwa/mergedLibrary/INPUT_R1.mLb.clN.sorted.bam --nomodel --extsize 208 -n MV411_MEF2D_NT_SC_63_VHL-IFR8_R1 --outdir ../../data/chipseq_MEF2D_degraded/diffData/ -f BAMPE\n# ARGUMENTS LIST:\n# name = MV411_MEF2D_NT_SC_63_VHL-IFR8_R1\n# format = BAMPE\n# ChIP-seq file = [\'../../data/chipseq_MEF2D_degraded/bwa/mergedLibrary/MV411_MEF2D_NT_SC_63_VHL-IFR8_R1.mLb.clN.sorted.bam\']\n# control file = [\'../../data/chipseq_MEF2D_degraded/bwa/mergedLibrary/INPUT_R1.mLb.clN.sorted.bam\']\n# effective genome size = 2.70e+09\n# band width = 300\n# model fold = [5, 50]\n# qvalue cutoff = 5.00e-02\n# The maximum gap between significant sites is assigned as the read length/tag size.\n# The minimum length of peaks is assigned as the predicted fragment length "d".\n# Larger dataset will be scaled 

CompletedProcess(args='macs2 bdgdiff --t1 ../../data/chipseq_MEF2D_degraded/diffData/MV411_MEF2D_NT_SC_63_DMSO-IFR8_R1_treat_pileup.bdg --c1 ../../data/chipseq_MEF2D_degraded/diffData/MV411_MEF2D_NT_SC_63_DMSO-IFR8_R1_control_lambda.bdg --t2 ../../data/chipseq_MEF2D_degraded/diffData/MV411_MEF2D_NT_SC_63_VHL-IFR8_R1_treat_pileup.bdg --c2 ../../data/chipseq_MEF2D_degraded/diffData/MV411_MEF2D_NT_SC_63_VHL-IFR8_R1_control_lambda.bdg --d1 44411793 --d2 79618485 -g 60 -l 208 --o-prefix MV411_MEF2D_NT_SC_63_DMSO-IFR8_R1_treat_pileup_vs_MV411_MEF2D_NT_SC_63_VHL-IFR8_R1_treat_pileup --outdir ../../data/chipseq_MEF2D_degraded/diffPeaks/', returncode=0, stdout=b'', stderr=b'INFO  @ Mon, 02 Nov 2020 23:40:59: Read and build treatment 1 bedGraph... \nINFO  @ Mon, 02 Nov 2020 23:42:35: Read and build control 1 bedGraph... \nINFO  @ Mon, 02 Nov 2020 23:46:44: Read and build treatment 2 bedGraph... \nINFO  @ Mon, 02 Nov 2020 23:48:42: Read and build control 2 bedGraph... \nINFO  @ Tue, 03 Nov 2020 0

b'INFO  @ Tue, 03 Nov 2020 00:42:46: \n# Command line: callpeak -B -t ../../data/chipseq_MEF2D_degraded/bwa/mergedLibrary/MV411_MEF2D_NT_SC_63_VHL-MED1_R1.mLb.clN.sorted.bam -c ../../data/chipseq_MEF2D_degraded/bwa/mergedLibrary/INPUT_R1.mLb.clN.sorted.bam --nomodel --extsize 220 -n MV411_MEF2D_NT_SC_63_VHL-MED1_R1 --outdir ../../data/chipseq_MEF2D_degraded/diffData/ -f BAMPE\n# ARGUMENTS LIST:\n# name = MV411_MEF2D_NT_SC_63_VHL-MED1_R1\n# format = BAMPE\n# ChIP-seq file = [\'../../data/chipseq_MEF2D_degraded/bwa/mergedLibrary/MV411_MEF2D_NT_SC_63_VHL-MED1_R1.mLb.clN.sorted.bam\']\n# control file = [\'../../data/chipseq_MEF2D_degraded/bwa/mergedLibrary/INPUT_R1.mLb.clN.sorted.bam\']\n# effective genome size = 2.70e+09\n# band width = 300\n# model fold = [5, 50]\n# qvalue cutoff = 5.00e-02\n# The maximum gap between significant sites is assigned as the read length/tag size.\n# The minimum length of peaks is assigned as the predicted fragment length "d".\n# Larger dataset will be scaled 

CompletedProcess(args='macs2 bdgdiff --t1 ../../data/chipseq_MEF2D_degraded/diffData/MV411_MEF2D_NT_SC_63_DMSO-MED1_R1_treat_pileup.bdg --c1 ../../data/chipseq_MEF2D_degraded/diffData/MV411_MEF2D_NT_SC_63_DMSO-MED1_R1_control_lambda.bdg --t2 ../../data/chipseq_MEF2D_degraded/diffData/MV411_MEF2D_NT_SC_63_VHL-MED1_R1_treat_pileup.bdg --c2 ../../data/chipseq_MEF2D_degraded/diffData/MV411_MEF2D_NT_SC_63_VHL-MED1_R1_control_lambda.bdg --d1 57247538 --d2 66632618 -g 60 -l 220 --o-prefix MV411_MEF2D_NT_SC_63_DMSO-MED1_R1_treat_pileup_vs_MV411_MEF2D_NT_SC_63_VHL-MED1_R1_treat_pileup --outdir ../../data/chipseq_MEF2D_degraded/diffPeaks/', returncode=0, stdout=b'', stderr=b'INFO  @ Tue, 03 Nov 2020 01:18:55: Read and build treatment 1 bedGraph... \nINFO  @ Tue, 03 Nov 2020 01:20:39: Read and build control 1 bedGraph... \nINFO  @ Tue, 03 Nov 2020 01:24:18: Read and build treatment 2 bedGraph... \nINFO  @ Tue, 03 Nov 2020 01:26:47: Read and build control 2 bedGraph... \nINFO  @ Tue, 03 Nov 2020 0

b'INFO  @ Tue, 03 Nov 2020 02:28:00: \n# Command line: callpeak -B -t ../../data/chipseq_MEF2D_degraded/bwa/mergedLibrary/MV411_MEF2D_NT_SC_63_VHL-MEF2C_R1.mLb.clN.sorted.bam -c ../../data/chipseq_MEF2D_degraded/bwa/mergedLibrary/INPUT_R1.mLb.clN.sorted.bam --nomodel --extsize 229 -n MV411_MEF2D_NT_SC_63_VHL-MEF2C_R1 --outdir ../../data/chipseq_MEF2D_degraded/diffData/ -f BAMPE\n# ARGUMENTS LIST:\n# name = MV411_MEF2D_NT_SC_63_VHL-MEF2C_R1\n# format = BAMPE\n# ChIP-seq file = [\'../../data/chipseq_MEF2D_degraded/bwa/mergedLibrary/MV411_MEF2D_NT_SC_63_VHL-MEF2C_R1.mLb.clN.sorted.bam\']\n# control file = [\'../../data/chipseq_MEF2D_degraded/bwa/mergedLibrary/INPUT_R1.mLb.clN.sorted.bam\']\n# effective genome size = 2.70e+09\n# band width = 300\n# model fold = [5, 50]\n# qvalue cutoff = 5.00e-02\n# The maximum gap between significant sites is assigned as the read length/tag size.\n# The minimum length of peaks is assigned as the predicted fragment length "d".\n# Larger dataset will be sca

CompletedProcess(args='macs2 bdgdiff --t1 ../../data/chipseq_MEF2D_degraded/diffData/MV411_MEF2D_NT_SC_63_DMSO-MEF2C_R1_treat_pileup.bdg --c1 ../../data/chipseq_MEF2D_degraded/diffData/MV411_MEF2D_NT_SC_63_DMSO-MEF2C_R1_control_lambda.bdg --t2 ../../data/chipseq_MEF2D_degraded/diffData/MV411_MEF2D_NT_SC_63_VHL-MEF2C_R1_treat_pileup.bdg --c2 ../../data/chipseq_MEF2D_degraded/diffData/MV411_MEF2D_NT_SC_63_VHL-MEF2C_R1_control_lambda.bdg --d1 73479117 --d2 44499637 -g 60 -l 229 --o-prefix MV411_MEF2D_NT_SC_63_DMSO-MEF2C_R1_treat_pileup_vs_MV411_MEF2D_NT_SC_63_VHL-MEF2C_R1_treat_pileup --outdir ../../data/chipseq_MEF2D_degraded/diffPeaks/', returncode=0, stdout=b'', stderr=b'INFO  @ Tue, 03 Nov 2020 02:57:19: Read and build treatment 1 bedGraph... \nINFO  @ Tue, 03 Nov 2020 03:00:16: Read and build control 1 bedGraph... \nINFO  @ Tue, 03 Nov 2020 03:03:40: Read and build treatment 2 bedGraph... \nINFO  @ Tue, 03 Nov 2020 03:04:59: Read and build control 2 bedGraph... \nINFO  @ Tue, 03 Nov 

b'INFO  @ Tue, 03 Nov 2020 04:10:24: \n# Command line: callpeak -B -t ../../data/chipseq_MEF2D_degraded/bwa/mergedLibrary/MV411_MEF2D_NT_SC_63_VHL-POLII_R1.mLb.clN.sorted.bam -c ../../data/chipseq_MEF2D_degraded/bwa/mergedLibrary/INPUT_R1.mLb.clN.sorted.bam --nomodel --extsize 250 -n MV411_MEF2D_NT_SC_63_VHL-POLII_R1 --outdir ../../data/chipseq_MEF2D_degraded/diffData/ -f BAMPE\n# ARGUMENTS LIST:\n# name = MV411_MEF2D_NT_SC_63_VHL-POLII_R1\n# format = BAMPE\n# ChIP-seq file = [\'../../data/chipseq_MEF2D_degraded/bwa/mergedLibrary/MV411_MEF2D_NT_SC_63_VHL-POLII_R1.mLb.clN.sorted.bam\']\n# control file = [\'../../data/chipseq_MEF2D_degraded/bwa/mergedLibrary/INPUT_R1.mLb.clN.sorted.bam\']\n# effective genome size = 2.70e+09\n# band width = 300\n# model fold = [5, 50]\n# qvalue cutoff = 5.00e-02\n# The maximum gap between significant sites is assigned as the read length/tag size.\n# The minimum length of peaks is assigned as the predicted fragment length "d".\n# Larger dataset will be sca

CompletedProcess(args='macs2 bdgdiff --t1 ../../data/chipseq_MEF2D_degraded/diffData/MV411_MEF2D_NT_SC_63_DMSO-POLII_R1_treat_pileup.bdg --c1 ../../data/chipseq_MEF2D_degraded/diffData/MV411_MEF2D_NT_SC_63_DMSO-POLII_R1_control_lambda.bdg --t2 ../../data/chipseq_MEF2D_degraded/diffData/MV411_MEF2D_NT_SC_63_VHL-POLII_R1_treat_pileup.bdg --c2 ../../data/chipseq_MEF2D_degraded/diffData/MV411_MEF2D_NT_SC_63_VHL-POLII_R1_control_lambda.bdg --d1 66632618 --d2 78884973 -g 60 -l 250 --o-prefix MV411_MEF2D_NT_SC_63_DMSO-POLII_R1_treat_pileup_vs_MV411_MEF2D_NT_SC_63_VHL-POLII_R1_treat_pileup --outdir ../../data/chipseq_MEF2D_degraded/diffPeaks/', returncode=0, stdout=b'', stderr=b'INFO  @ Tue, 03 Nov 2020 04:52:32: Read and build treatment 1 bedGraph... \nINFO  @ Tue, 03 Nov 2020 04:55:34: Read and build control 1 bedGraph... \nINFO  @ Tue, 03 Nov 2020 04:59:16: Read and build treatment 2 bedGraph... \nINFO  @ Tue, 03 Nov 2020 05:02:05: Read and build control 2 bedGraph... \nINFO  @ Tue, 03 Nov 

In [202]:
os.popen('for i in $(ls ../../data/'+project+'/diffPeaks/*.bed); \
            do echo $(wc -l $i); \
            done').read().split('\n')

['728 ../../data/chipseq_MEF2D_degraded/diffPeaks/MV411_MEF2D_NT_SC_63_DMSO-FLAG_MEF2D_R1_treat_pileup_vs_MV411_MEF2D_NT_SC_63_VHL-FLAG_MEF2D_R1_treat_pileup_c3.0_common.bed',
 '10695 ../../data/chipseq_MEF2D_degraded/diffPeaks/MV411_MEF2D_NT_SC_63_DMSO-FLAG_MEF2D_R1_treat_pileup_vs_MV411_MEF2D_NT_SC_63_VHL-FLAG_MEF2D_R1_treat_pileup_c3.0_cond1.bed',
 '245 ../../data/chipseq_MEF2D_degraded/diffPeaks/MV411_MEF2D_NT_SC_63_DMSO-FLAG_MEF2D_R1_treat_pileup_vs_MV411_MEF2D_NT_SC_63_VHL-FLAG_MEF2D_R1_treat_pileup_c3.0_cond2.bed',
 '17543 ../../data/chipseq_MEF2D_degraded/diffPeaks/MV411_MEF2D_NT_SC_63_DMSO-IFR8_R1_treat_pileup_vs_MV411_MEF2D_NT_SC_63_VHL-IFR8_R1_treat_pileup_c3.0_common.bed',
 '253 ../../data/chipseq_MEF2D_degraded/diffPeaks/MV411_MEF2D_NT_SC_63_DMSO-IFR8_R1_treat_pileup_vs_MV411_MEF2D_NT_SC_63_VHL-IFR8_R1_treat_pileup_c3.0_cond1.bed',
 '16862 ../../data/chipseq_MEF2D_degraded/diffPeaks/MV411_MEF2D_NT_SC_63_DMSO-IFR8_R1_treat_pileup_vs_MV411_MEF2D_NT_SC_63_VHL-IFR8_R1_treat_pi

In [203]:
bw = ! ls ../../data/$project/recalib_bigwig/*
bw

['../../data/chipseq_MEF2D_degraded/recalib_bigwig/MV411_MEF2D_NT_SC_63_DMSO-FLAG_MEF2D_R1.bw',
 '../../data/chipseq_MEF2D_degraded/recalib_bigwig/MV411_MEF2D_NT_SC_63_DMSO-IFR8_R1.bw',
 '../../data/chipseq_MEF2D_degraded/recalib_bigwig/MV411_MEF2D_NT_SC_63_DMSO-MED1_R1.bw',
 '../../data/chipseq_MEF2D_degraded/recalib_bigwig/MV411_MEF2D_NT_SC_63_DMSO-MEF2C_R1.bw',
 '../../data/chipseq_MEF2D_degraded/recalib_bigwig/MV411_MEF2D_NT_SC_63_DMSO-MYC_R1.bw',
 '../../data/chipseq_MEF2D_degraded/recalib_bigwig/MV411_MEF2D_NT_SC_63_DMSO-POLII_R1.bw',
 '../../data/chipseq_MEF2D_degraded/recalib_bigwig/MV411_MEF2D_NT_SC_63_VHL-FLAG_MEF2D_R1.bw',
 '../../data/chipseq_MEF2D_degraded/recalib_bigwig/MV411_MEF2D_NT_SC_63_VHL-IFR8_R1.bw',
 '../../data/chipseq_MEF2D_degraded/recalib_bigwig/MV411_MEF2D_NT_SC_63_VHL-MED1_R1.bw',
 '../../data/chipseq_MEF2D_degraded/recalib_bigwig/MV411_MEF2D_NT_SC_63_VHL-MEF2C_R1.bw',
 '../../data/chipseq_MEF2D_degraded/recalib_bigwig/MV411_MEF2D_NT_SC_63_VHL-MYC_R1.bw',
 '

In [151]:
!mkdir ../results/$project/
!mkdir ../results/$project/plots
!mkdir ../results/$project/plots/heatmaps/

In [None]:
# GENOME WIDE comparison

In [157]:
peaks = ! ls ../../data/$project/bwa/mergedLibrary/macs/broadPeak/*.broadPeak

In [171]:
names = ["DMSO_FLAG_MEF2D",
"DMSO_IFR8",
"DMSO_MED1",
"DMSO_MEF2C",
"DMSO_MYC",
"DMSO_POLII",
"VHL_FLAG_MEF2D",
"VHL_IFR8",
"VHL_MED1",
"VHL_MEF2C",
"VHL_MYC",
"VHL_POLII"]

In [162]:
! mkdir ../../data/$project/peakplot/

In [207]:
for i, val in enumerate(bw):
    if i <0:
        continue
    name = names[i]
    print(name)
    chip.getPeaksAt(peaks[i], bigwigs = val, bigwignames= name, peaknames=['Macs2_Peaks'], window=3000, folder="", title=name, numthreads=8, refpoint="center", name='../../data/'+project+'/peakplot/'+name+'_mat.pdf', withDeeptools=True, torecompute=True, vmax=2.5, legendLoc="lower-left")

FLAG_MEF2D


KeyboardInterrupt: 

In [179]:
rm ../../data/chipseq_MEF2D_degraded/diffPeaks/MV411_MEF2D_NT_SC_63_DMSO-FLAG_MEF2D_R1_treat_pileup_vs_MV411_MEF2D_NT_SC_63_VHL-POLII_R1_tre*

In [204]:
cond1peak = ! ls ../../data/$project/diffPeaks/*cond1.bed
cond2peak = ! ls ../../data/$project/diffPeaks/*cond2.bed
commonpeak = ! ls ../../data/$project/diffPeaks/*common.bed
cond1peak

['../../data/chipseq_MEF2D_degraded/diffPeaks/MV411_MEF2D_NT_SC_63_DMSO-FLAG_MEF2D_R1_treat_pileup_vs_MV411_MEF2D_NT_SC_63_VHL-FLAG_MEF2D_R1_treat_pileup_c3.0_cond1.bed',
 '../../data/chipseq_MEF2D_degraded/diffPeaks/MV411_MEF2D_NT_SC_63_DMSO-IFR8_R1_treat_pileup_vs_MV411_MEF2D_NT_SC_63_VHL-IFR8_R1_treat_pileup_c3.0_cond1.bed',
 '../../data/chipseq_MEF2D_degraded/diffPeaks/MV411_MEF2D_NT_SC_63_DMSO-MED1_R1_treat_pileup_vs_MV411_MEF2D_NT_SC_63_VHL-MED1_R1_treat_pileup_c3.0_cond1.bed',
 '../../data/chipseq_MEF2D_degraded/diffPeaks/MV411_MEF2D_NT_SC_63_DMSO-MEF2C_R1_treat_pileup_vs_MV411_MEF2D_NT_SC_63_VHL-MEF2C_R1_treat_pileup_c3.0_cond1.bed',
 '../../data/chipseq_MEF2D_degraded/diffPeaks/MV411_MEF2D_NT_SC_63_DMSO-MYC_R1_treat_pileup_vs_MV411_MEF2D_NT_SC_63_VHL-MYC_R1_treat_pileup_c3.0_cond1.bed',
 '../../data/chipseq_MEF2D_degraded/diffPeaks/MV411_MEF2D_NT_SC_63_DMSO-POLII_R1_treat_pileup_vs_MV411_MEF2D_NT_SC_63_VHL-POLII_R1_treat_pileup_c3.0_cond1.bed']

In [205]:
names = ["FLAG_MEF2D",
"IFR8",
"MED1",
"MEF2C",
"MYC",
"POLII"]

In [209]:
for i in range(int(len(bw)/2)):
    if i <0:
        continue
    cl = len(bw)
    name1 = bw[i]
    name2 = bw[i+6]
    peak = [cond1peak[i], commonpeak[i], cond2peak[i]]
   # for val in peak:
      #  chip.dropWeirdChromosomes(val)
    name = names[i]
    print(name1,name2)
    chip.getPeaksAt(peak, [name1, name2], bigwignames=['DMSO', 'VHL'], peaknames=['DMSO_peaks', 'common', 'VHL_peaks'], window=3000, folder="", title=name, numthreads=7, refpoint="center", name='../../data/'+project+'/diffPeaks/'+name+'_mat.pdf', withDeeptools=True, torecompute=True)

../../data/chipseq_MEF2D_degraded/recalib_bigwig/MV411_MEF2D_NT_SC_63_DMSO-FLAG_MEF2D_R1.bw ../../data/chipseq_MEF2D_degraded/recalib_bigwig/MV411_MEF2D_NT_SC_63_VHL-FLAG_MEF2D_R1.bw
CompletedProcess(args='computeMatrix reference-point -S ../../data/chipseq_MEF2D_degraded/recalib_bigwig/MV411_MEF2D_NT_SC_63_DMSO-FLAG_MEF2D_R1.bw ../../data/chipseq_MEF2D_degraded/recalib_bigwig/MV411_MEF2D_NT_SC_63_VHL-FLAG_MEF2D_R1.bw  --referencePoint center --regionsFileName ../../data/chipseq_MEF2D_degraded/diffPeaks/MV411_MEF2D_NT_SC_63_DMSO-FLAG_MEF2D_R1_treat_pileup_vs_MV411_MEF2D_NT_SC_63_VHL-FLAG_MEF2D_R1_treat_pileup_c3.0_cond1.bed ../../data/chipseq_MEF2D_degraded/diffPeaks/MV411_MEF2D_NT_SC_63_DMSO-FLAG_MEF2D_R1_treat_pileup_vs_MV411_MEF2D_NT_SC_63_VHL-FLAG_MEF2D_R1_treat_pileup_c3.0_common.bed ../../data/chipseq_MEF2D_degraded/diffPeaks/MV411_MEF2D_NT_SC_63_DMSO-FLAG_MEF2D_R1_treat_pileup_vs_MV411_MEF2D_NT_SC_63_VHL-FLAG_MEF2D_R1_treat_pileup_c3.0_cond2.bed --missingDataAsZero --outFileName

In [210]:
#h.createFoldersFor('../results/'+project+'/plots/scaled/diffPeaks/')
! cp ../../data/$project/diffPeaks/*.pdf ../results/$project/plots/scaled/diffPeaks/

In [211]:
!cp ../../data/$project/peakplot/*_mat.pdf ../results/$project/plots/scaled/

### looking at TSS

In [301]:
server = BiomartServer( "http://www.ensembl.org/biomart" )
ensembl = server.datasets['hsapiens_gene_ensembl']
ensembltss = pd.read_csv(io.StringIO(ensmbl.search({
  'attributes': ['ensembl_gene_id','gene_biotype', "transcription_start_site", "3_utr_start","start_position","external_gene_name"]
}, header=1).content.decode()), sep='\t')

In [298]:
ensmbl.show_attributes_by_page()

{'feature_page': 'Features': (attributes: {'ensembl_gene_id': 'Gene stable ID' (default: True), 'ensembl_gene_id_version': 'Gene stable ID version' (default: True), 'ensembl_transcript_id': 'Transcript stable ID' (default: True), 'ensembl_transcript_id_version': 'Transcript stable ID version' (default: True), 'ensembl_peptide_id': 'Protein stable ID' (default: False), 'ensembl_peptide_id_version': 'Protein stable ID version' (default: False), 'ensembl_exon_id': 'Exon stable ID' (default: False), 'description': 'Gene description' (default: False), 'chromosome_name': 'Chromosome/scaffold name' (default: False), 'start_position': 'Gene start (bp)' (default: False), 'end_position': 'Gene end (bp)' (default: False), 'strand': 'Strand' (default: False), 'band': 'Karyotype band' (default: False), 'transcript_start': 'Transcript start (bp)' (default: False), 'transcript_end': 'Transcript end (bp)' (default: False), 'transcription_start_site': 'Transcription start site (TSS)' (default: False), 

In [294]:
a = h.fileToList('../results/slamseqMax/MEF2Dtargets.txt')
a.pop(0)

'# Filtered by: padj <=0.05 in SLAM-seq at 2 or 24 hours, basemean>10, log2FC SLAM at 2 hours <-0.2'

In [302]:
ensembltss

Unnamed: 0,Gene stable ID,Gene type,Transcription start site (TSS),3' UTR start,Gene start (bp),Gene name
0,ENSG00000210049,Mt_tRNA,577,,577,MT-TF
1,ENSG00000211459,Mt_rRNA,648,,648,MT-RNR1
2,ENSG00000210077,Mt_tRNA,1602,,1602,MT-TV
3,ENSG00000210082,Mt_rRNA,1671,,1671,MT-RNR2
4,ENSG00000209082,Mt_tRNA,3230,,3230,MT-TL1
...,...,...,...,...,...,...
383612,ENSG00000285114,protein_coding,129795674,,129757225,GSDMC
383613,ENSG00000285114,protein_coding,129795674,129757225.0,129757225,GSDMC
383614,ENSG00000285114,protein_coding,129786637,,129757225,GSDMC
383615,ENSG00000285114,protein_coding,129786637,129757226.0,129757225,GSDMC


### on unscalled data

In [None]:
bams = !ls ../../data/$project/bwa/mergedLibrary/mp*.bam
bams

In [193]:
! mkdir ../../data/$project/diffPeaks_unscaled

In [191]:
bams

['../../data/chipseq_MEF2D_degraded/bwa/mergedLibrary/INPUT_R1.mLb.clN.sorted.bam',
 '../../data/chipseq_MEF2D_degraded/bwa/mergedLibrary/MV411_MEF2D_NT_SC_63_DMSO-FLAG_MEF2D_R1.mLb.clN.sorted.bam',
 '../../data/chipseq_MEF2D_degraded/bwa/mergedLibrary/MV411_MEF2D_NT_SC_63_DMSO-IFR8_R1.mLb.clN.sorted.bam',
 '../../data/chipseq_MEF2D_degraded/bwa/mergedLibrary/MV411_MEF2D_NT_SC_63_DMSO-MED1_R1.mLb.clN.sorted.bam',
 '../../data/chipseq_MEF2D_degraded/bwa/mergedLibrary/MV411_MEF2D_NT_SC_63_DMSO-MEF2C_R1.mLb.clN.sorted.bam',
 '../../data/chipseq_MEF2D_degraded/bwa/mergedLibrary/MV411_MEF2D_NT_SC_63_DMSO-MYC_R1.mLb.clN.sorted.bam',
 '../../data/chipseq_MEF2D_degraded/bwa/mergedLibrary/MV411_MEF2D_NT_SC_63_DMSO-POLII_R1.mLb.clN.sorted.bam',
 '../../data/chipseq_MEF2D_degraded/bwa/mergedLibrary/MV411_MEF2D_NT_SC_63_VHL-FLAG_MEF2D_R1.mLb.clN.sorted.bam',
 '../../data/chipseq_MEF2D_degraded/bwa/mergedLibrary/MV411_MEF2D_NT_SC_63_VHL-IFR8_R1.mLb.clN.sorted.bam',
 '../../data/chipseq_MEF2D_degrad

In [213]:
#on unscalled data 
for i in range(int(len(bams[1:])/2)):
    if i < 0:
        continue
    name1 = bams[1+i]
    name2 = bams[7+i]
    print(name1,name2)
    chip.fullDiffPeak(name1,name2, control1='../../data/'+project+'/bwa/mergedLibrary/INPUT_R1.mLb.clN.sorted.bam', directory = "../../data/"+project+"/diffData_unscaled/", res_directory = "../../data/"+project+"/diffPeaks_unscaled/",pairedend=False)

../../data/chipseq_MEF2D_degraded/bwa/mergedLibrary/MV411_MEF2D_NT_SC_63_DMSO-FLAG_MEF2D_R1.mLb.clN.sorted.bam ../../data/chipseq_MEF2D_degraded/bwa/mergedLibrary/MV411_MEF2D_NT_SC_63_VHL-FLAG_MEF2D_R1.mLb.clN.sorted.bam
doing diff from ../../data/chipseq_MEF2D_degraded/bwa/mergedLibrary/MV411_MEF2D_NT_SC_63_DMSO-FLAG_MEF2D_R1.mLb.clN.sorted.bam and ../../data/chipseq_MEF2D_degraded/bwa/mergedLibrary/MV411_MEF2D_NT_SC_63_VHL-FLAG_MEF2D_R1.mLb.clN.sorted.bam
computing the fragment avg size
230
computing the scaling values
b'INFO  @ Tue, 03 Nov 2020 14:54:27: \n# Command line: callpeak -B -t ../../data/chipseq_MEF2D_degraded/bwa/mergedLibrary/MV411_MEF2D_NT_SC_63_DMSO-FLAG_MEF2D_R1.mLb.clN.sorted.bam -c ../../data/chipseq_MEF2D_degraded/bwa/mergedLibrary/INPUT_R1.mLb.clN.sorted.bam --nomodel --extsize 230 -n MV411_MEF2D_NT_SC_63_DMSO-FLAG_MEF2D_R1 --outdir ../../data/chipseq_MEF2D_degraded/diffData_unscaled/ -f BAM\n# ARGUMENTS LIST:\n# name = MV411_MEF2D_NT_SC_63_DMSO-FLAG_MEF2D_R1\n# f

b'INFO  @ Tue, 03 Nov 2020 15:18:37: \n# Command line: callpeak -B -t ../../data/chipseq_MEF2D_degraded/bwa/mergedLibrary/MV411_MEF2D_NT_SC_63_VHL-FLAG_MEF2D_R1.mLb.clN.sorted.bam -c ../../data/chipseq_MEF2D_degraded/bwa/mergedLibrary/INPUT_R1.mLb.clN.sorted.bam --nomodel --extsize 230 -n MV411_MEF2D_NT_SC_63_VHL-FLAG_MEF2D_R1 --outdir ../../data/chipseq_MEF2D_degraded/diffData_unscaled/ -f BAM\n# ARGUMENTS LIST:\n# name = MV411_MEF2D_NT_SC_63_VHL-FLAG_MEF2D_R1\n# format = BAM\n# ChIP-seq file = [\'../../data/chipseq_MEF2D_degraded/bwa/mergedLibrary/MV411_MEF2D_NT_SC_63_VHL-FLAG_MEF2D_R1.mLb.clN.sorted.bam\']\n# control file = [\'../../data/chipseq_MEF2D_degraded/bwa/mergedLibrary/INPUT_R1.mLb.clN.sorted.bam\']\n# effective genome size = 2.70e+09\n# band width = 300\n# model fold = [5, 50]\n# qvalue cutoff = 5.00e-02\n# The maximum gap between significant sites is assigned as the read length/tag size.\n# The minimum length of peaks is assigned as the predicted fragment length "d".\n# L

../../data/chipseq_MEF2D_degraded/bwa/mergedLibrary/MV411_MEF2D_NT_SC_63_DMSO-IFR8_R1.mLb.clN.sorted.bam ../../data/chipseq_MEF2D_degraded/bwa/mergedLibrary/MV411_MEF2D_NT_SC_63_VHL-IFR8_R1.mLb.clN.sorted.bam
doing diff from ../../data/chipseq_MEF2D_degraded/bwa/mergedLibrary/MV411_MEF2D_NT_SC_63_DMSO-IFR8_R1.mLb.clN.sorted.bam and ../../data/chipseq_MEF2D_degraded/bwa/mergedLibrary/MV411_MEF2D_NT_SC_63_VHL-IFR8_R1.mLb.clN.sorted.bam
computing the fragment avg size
208
computing the scaling values
b'INFO  @ Tue, 03 Nov 2020 15:54:07: \n# Command line: callpeak -B -t ../../data/chipseq_MEF2D_degraded/bwa/mergedLibrary/MV411_MEF2D_NT_SC_63_DMSO-IFR8_R1.mLb.clN.sorted.bam -c ../../data/chipseq_MEF2D_degraded/bwa/mergedLibrary/INPUT_R1.mLb.clN.sorted.bam --nomodel --extsize 208 -n MV411_MEF2D_NT_SC_63_DMSO-IFR8_R1 --outdir ../../data/chipseq_MEF2D_degraded/diffData_unscaled/ -f BAM\n# ARGUMENTS LIST:\n# name = MV411_MEF2D_NT_SC_63_DMSO-IFR8_R1\n# format = BAM\n# ChIP-seq file = [\'../../da

b'INFO  @ Tue, 03 Nov 2020 16:12:00: \n# Command line: callpeak -B -t ../../data/chipseq_MEF2D_degraded/bwa/mergedLibrary/MV411_MEF2D_NT_SC_63_VHL-IFR8_R1.mLb.clN.sorted.bam -c ../../data/chipseq_MEF2D_degraded/bwa/mergedLibrary/INPUT_R1.mLb.clN.sorted.bam --nomodel --extsize 208 -n MV411_MEF2D_NT_SC_63_VHL-IFR8_R1 --outdir ../../data/chipseq_MEF2D_degraded/diffData_unscaled/ -f BAM\n# ARGUMENTS LIST:\n# name = MV411_MEF2D_NT_SC_63_VHL-IFR8_R1\n# format = BAM\n# ChIP-seq file = [\'../../data/chipseq_MEF2D_degraded/bwa/mergedLibrary/MV411_MEF2D_NT_SC_63_VHL-IFR8_R1.mLb.clN.sorted.bam\']\n# control file = [\'../../data/chipseq_MEF2D_degraded/bwa/mergedLibrary/INPUT_R1.mLb.clN.sorted.bam\']\n# effective genome size = 2.70e+09\n# band width = 300\n# model fold = [5, 50]\n# qvalue cutoff = 5.00e-02\n# The maximum gap between significant sites is assigned as the read length/tag size.\n# The minimum length of peaks is assigned as the predicted fragment length "d".\n# Larger dataset will be sc

../../data/chipseq_MEF2D_degraded/bwa/mergedLibrary/MV411_MEF2D_NT_SC_63_DMSO-MED1_R1.mLb.clN.sorted.bam ../../data/chipseq_MEF2D_degraded/bwa/mergedLibrary/MV411_MEF2D_NT_SC_63_VHL-MED1_R1.mLb.clN.sorted.bam
doing diff from ../../data/chipseq_MEF2D_degraded/bwa/mergedLibrary/MV411_MEF2D_NT_SC_63_DMSO-MED1_R1.mLb.clN.sorted.bam and ../../data/chipseq_MEF2D_degraded/bwa/mergedLibrary/MV411_MEF2D_NT_SC_63_VHL-MED1_R1.mLb.clN.sorted.bam
computing the fragment avg size
220
computing the scaling values
b'INFO  @ Tue, 03 Nov 2020 16:51:34: \n# Command line: callpeak -B -t ../../data/chipseq_MEF2D_degraded/bwa/mergedLibrary/MV411_MEF2D_NT_SC_63_DMSO-MED1_R1.mLb.clN.sorted.bam -c ../../data/chipseq_MEF2D_degraded/bwa/mergedLibrary/INPUT_R1.mLb.clN.sorted.bam --nomodel --extsize 220 -n MV411_MEF2D_NT_SC_63_DMSO-MED1_R1 --outdir ../../data/chipseq_MEF2D_degraded/diffData_unscaled/ -f BAM\n# ARGUMENTS LIST:\n# name = MV411_MEF2D_NT_SC_63_DMSO-MED1_R1\n# format = BAM\n# ChIP-seq file = [\'../../da

b'INFO  @ Tue, 03 Nov 2020 17:09:57: \n# Command line: callpeak -B -t ../../data/chipseq_MEF2D_degraded/bwa/mergedLibrary/MV411_MEF2D_NT_SC_63_VHL-MED1_R1.mLb.clN.sorted.bam -c ../../data/chipseq_MEF2D_degraded/bwa/mergedLibrary/INPUT_R1.mLb.clN.sorted.bam --nomodel --extsize 220 -n MV411_MEF2D_NT_SC_63_VHL-MED1_R1 --outdir ../../data/chipseq_MEF2D_degraded/diffData_unscaled/ -f BAM\n# ARGUMENTS LIST:\n# name = MV411_MEF2D_NT_SC_63_VHL-MED1_R1\n# format = BAM\n# ChIP-seq file = [\'../../data/chipseq_MEF2D_degraded/bwa/mergedLibrary/MV411_MEF2D_NT_SC_63_VHL-MED1_R1.mLb.clN.sorted.bam\']\n# control file = [\'../../data/chipseq_MEF2D_degraded/bwa/mergedLibrary/INPUT_R1.mLb.clN.sorted.bam\']\n# effective genome size = 2.70e+09\n# band width = 300\n# model fold = [5, 50]\n# qvalue cutoff = 5.00e-02\n# The maximum gap between significant sites is assigned as the read length/tag size.\n# The minimum length of peaks is assigned as the predicted fragment length "d".\n# Larger dataset will be sc

../../data/chipseq_MEF2D_degraded/bwa/mergedLibrary/MV411_MEF2D_NT_SC_63_DMSO-MEF2C_R1.mLb.clN.sorted.bam ../../data/chipseq_MEF2D_degraded/bwa/mergedLibrary/MV411_MEF2D_NT_SC_63_VHL-MEF2C_R1.mLb.clN.sorted.bam
doing diff from ../../data/chipseq_MEF2D_degraded/bwa/mergedLibrary/MV411_MEF2D_NT_SC_63_DMSO-MEF2C_R1.mLb.clN.sorted.bam and ../../data/chipseq_MEF2D_degraded/bwa/mergedLibrary/MV411_MEF2D_NT_SC_63_VHL-MEF2C_R1.mLb.clN.sorted.bam
computing the fragment avg size
229
computing the scaling values
b'INFO  @ Tue, 03 Nov 2020 17:55:31: \n# Command line: callpeak -B -t ../../data/chipseq_MEF2D_degraded/bwa/mergedLibrary/MV411_MEF2D_NT_SC_63_DMSO-MEF2C_R1.mLb.clN.sorted.bam -c ../../data/chipseq_MEF2D_degraded/bwa/mergedLibrary/INPUT_R1.mLb.clN.sorted.bam --nomodel --extsize 229 -n MV411_MEF2D_NT_SC_63_DMSO-MEF2C_R1 --outdir ../../data/chipseq_MEF2D_degraded/diffData_unscaled/ -f BAM\n# ARGUMENTS LIST:\n# name = MV411_MEF2D_NT_SC_63_DMSO-MEF2C_R1\n# format = BAM\n# ChIP-seq file = [\'.

b'INFO  @ Tue, 03 Nov 2020 18:19:07: \n# Command line: callpeak -B -t ../../data/chipseq_MEF2D_degraded/bwa/mergedLibrary/MV411_MEF2D_NT_SC_63_VHL-MEF2C_R1.mLb.clN.sorted.bam -c ../../data/chipseq_MEF2D_degraded/bwa/mergedLibrary/INPUT_R1.mLb.clN.sorted.bam --nomodel --extsize 229 -n MV411_MEF2D_NT_SC_63_VHL-MEF2C_R1 --outdir ../../data/chipseq_MEF2D_degraded/diffData_unscaled/ -f BAM\n# ARGUMENTS LIST:\n# name = MV411_MEF2D_NT_SC_63_VHL-MEF2C_R1\n# format = BAM\n# ChIP-seq file = [\'../../data/chipseq_MEF2D_degraded/bwa/mergedLibrary/MV411_MEF2D_NT_SC_63_VHL-MEF2C_R1.mLb.clN.sorted.bam\']\n# control file = [\'../../data/chipseq_MEF2D_degraded/bwa/mergedLibrary/INPUT_R1.mLb.clN.sorted.bam\']\n# effective genome size = 2.70e+09\n# band width = 300\n# model fold = [5, 50]\n# qvalue cutoff = 5.00e-02\n# The maximum gap between significant sites is assigned as the read length/tag size.\n# The minimum length of peaks is assigned as the predicted fragment length "d".\n# Larger dataset will b

../../data/chipseq_MEF2D_degraded/bwa/mergedLibrary/MV411_MEF2D_NT_SC_63_DMSO-MYC_R1.mLb.clN.sorted.bam ../../data/chipseq_MEF2D_degraded/bwa/mergedLibrary/MV411_MEF2D_NT_SC_63_VHL-MYC_R1.mLb.clN.sorted.bam
doing diff from ../../data/chipseq_MEF2D_degraded/bwa/mergedLibrary/MV411_MEF2D_NT_SC_63_DMSO-MYC_R1.mLb.clN.sorted.bam and ../../data/chipseq_MEF2D_degraded/bwa/mergedLibrary/MV411_MEF2D_NT_SC_63_VHL-MYC_R1.mLb.clN.sorted.bam
computing the fragment avg size
216
computing the scaling values
b'INFO  @ Tue, 03 Nov 2020 19:00:21: \n# Command line: callpeak -B -t ../../data/chipseq_MEF2D_degraded/bwa/mergedLibrary/MV411_MEF2D_NT_SC_63_DMSO-MYC_R1.mLb.clN.sorted.bam -c ../../data/chipseq_MEF2D_degraded/bwa/mergedLibrary/INPUT_R1.mLb.clN.sorted.bam --nomodel --extsize 216 -n MV411_MEF2D_NT_SC_63_DMSO-MYC_R1 --outdir ../../data/chipseq_MEF2D_degraded/diffData_unscaled/ -f BAM\n# ARGUMENTS LIST:\n# name = MV411_MEF2D_NT_SC_63_DMSO-MYC_R1\n# format = BAM\n# ChIP-seq file = [\'../../data/chip

b'INFO  @ Tue, 03 Nov 2020 19:22:55: \n# Command line: callpeak -B -t ../../data/chipseq_MEF2D_degraded/bwa/mergedLibrary/MV411_MEF2D_NT_SC_63_VHL-MYC_R1.mLb.clN.sorted.bam -c ../../data/chipseq_MEF2D_degraded/bwa/mergedLibrary/INPUT_R1.mLb.clN.sorted.bam --nomodel --extsize 216 -n MV411_MEF2D_NT_SC_63_VHL-MYC_R1 --outdir ../../data/chipseq_MEF2D_degraded/diffData_unscaled/ -f BAM\n# ARGUMENTS LIST:\n# name = MV411_MEF2D_NT_SC_63_VHL-MYC_R1\n# format = BAM\n# ChIP-seq file = [\'../../data/chipseq_MEF2D_degraded/bwa/mergedLibrary/MV411_MEF2D_NT_SC_63_VHL-MYC_R1.mLb.clN.sorted.bam\']\n# control file = [\'../../data/chipseq_MEF2D_degraded/bwa/mergedLibrary/INPUT_R1.mLb.clN.sorted.bam\']\n# effective genome size = 2.70e+09\n# band width = 300\n# model fold = [5, 50]\n# qvalue cutoff = 5.00e-02\n# The maximum gap between significant sites is assigned as the read length/tag size.\n# The minimum length of peaks is assigned as the predicted fragment length "d".\n# Larger dataset will be scaled

../../data/chipseq_MEF2D_degraded/bwa/mergedLibrary/MV411_MEF2D_NT_SC_63_DMSO-POLII_R1.mLb.clN.sorted.bam ../../data/chipseq_MEF2D_degraded/bwa/mergedLibrary/MV411_MEF2D_NT_SC_63_VHL-POLII_R1.mLb.clN.sorted.bam
doing diff from ../../data/chipseq_MEF2D_degraded/bwa/mergedLibrary/MV411_MEF2D_NT_SC_63_DMSO-POLII_R1.mLb.clN.sorted.bam and ../../data/chipseq_MEF2D_degraded/bwa/mergedLibrary/MV411_MEF2D_NT_SC_63_VHL-POLII_R1.mLb.clN.sorted.bam
computing the fragment avg size
250
computing the scaling values
b'INFO  @ Tue, 03 Nov 2020 20:09:13: \n# Command line: callpeak -B -t ../../data/chipseq_MEF2D_degraded/bwa/mergedLibrary/MV411_MEF2D_NT_SC_63_DMSO-POLII_R1.mLb.clN.sorted.bam -c ../../data/chipseq_MEF2D_degraded/bwa/mergedLibrary/INPUT_R1.mLb.clN.sorted.bam --nomodel --extsize 250 -n MV411_MEF2D_NT_SC_63_DMSO-POLII_R1 --outdir ../../data/chipseq_MEF2D_degraded/diffData_unscaled/ -f BAM\n# ARGUMENTS LIST:\n# name = MV411_MEF2D_NT_SC_63_DMSO-POLII_R1\n# format = BAM\n# ChIP-seq file = [\'.

b'INFO  @ Tue, 03 Nov 2020 20:34:13: \n# Command line: callpeak -B -t ../../data/chipseq_MEF2D_degraded/bwa/mergedLibrary/MV411_MEF2D_NT_SC_63_VHL-POLII_R1.mLb.clN.sorted.bam -c ../../data/chipseq_MEF2D_degraded/bwa/mergedLibrary/INPUT_R1.mLb.clN.sorted.bam --nomodel --extsize 250 -n MV411_MEF2D_NT_SC_63_VHL-POLII_R1 --outdir ../../data/chipseq_MEF2D_degraded/diffData_unscaled/ -f BAM\n# ARGUMENTS LIST:\n# name = MV411_MEF2D_NT_SC_63_VHL-POLII_R1\n# format = BAM\n# ChIP-seq file = [\'../../data/chipseq_MEF2D_degraded/bwa/mergedLibrary/MV411_MEF2D_NT_SC_63_VHL-POLII_R1.mLb.clN.sorted.bam\']\n# control file = [\'../../data/chipseq_MEF2D_degraded/bwa/mergedLibrary/INPUT_R1.mLb.clN.sorted.bam\']\n# effective genome size = 2.70e+09\n# band width = 300\n# model fold = [5, 50]\n# qvalue cutoff = 5.00e-02\n# The maximum gap between significant sites is assigned as the read length/tag size.\n# The minimum length of peaks is assigned as the predicted fragment length "d".\n# Larger dataset will b

In [214]:
bw = ! ls ../../data/$project/bwa/mergedLibrary/bigwig/*.bigWig
bw

['../../data/chipseq_MEF2D_degraded/bwa/mergedLibrary/bigwig/INPUT_R1.mLb.clN.bigWig',
 '../../data/chipseq_MEF2D_degraded/bwa/mergedLibrary/bigwig/MV411_MEF2D_NT_SC_63_DMSO-FLAG_MEF2D_R1.mLb.clN.bigWig',
 '../../data/chipseq_MEF2D_degraded/bwa/mergedLibrary/bigwig/MV411_MEF2D_NT_SC_63_DMSO-IFR8_R1.mLb.clN.bigWig',
 '../../data/chipseq_MEF2D_degraded/bwa/mergedLibrary/bigwig/MV411_MEF2D_NT_SC_63_DMSO-MED1_R1.mLb.clN.bigWig',
 '../../data/chipseq_MEF2D_degraded/bwa/mergedLibrary/bigwig/MV411_MEF2D_NT_SC_63_DMSO-MEF2C_R1.mLb.clN.bigWig',
 '../../data/chipseq_MEF2D_degraded/bwa/mergedLibrary/bigwig/MV411_MEF2D_NT_SC_63_DMSO-MYC_R1.mLb.clN.bigWig',
 '../../data/chipseq_MEF2D_degraded/bwa/mergedLibrary/bigwig/MV411_MEF2D_NT_SC_63_DMSO-POLII_R1.mLb.clN.bigWig',
 '../../data/chipseq_MEF2D_degraded/bwa/mergedLibrary/bigwig/MV411_MEF2D_NT_SC_63_VHL-FLAG_MEF2D_R1.mLb.clN.bigWig',
 '../../data/chipseq_MEF2D_degraded/bwa/mergedLibrary/bigwig/MV411_MEF2D_NT_SC_63_VHL-IFR8_R1.mLb.clN.bigWig',
 '../.

In [215]:
cond1peak = ! ls ../../data/$project/diffPeaks_unscaled/*cond1.bed
cond2peak = ! ls ../../data/$project/diffPeaks_unscaled/*cond2.bed
commonpeak = ! ls ../../data/$project/diffPeaks_unscaled/*common.bed
commonpeak

['../../data/chipseq_MEF2D_degraded/diffPeaks_unscaled/MV411_MEF2D_NT_SC_63_DMSO-FLAG_MEF2D_R1_treat_pileup_vs_MV411_MEF2D_NT_SC_63_VHL-FLAG_MEF2D_R1_treat_pileup_c3.0_common.bed',
 '../../data/chipseq_MEF2D_degraded/diffPeaks_unscaled/MV411_MEF2D_NT_SC_63_DMSO-IFR8_R1_treat_pileup_vs_MV411_MEF2D_NT_SC_63_VHL-IFR8_R1_treat_pileup_c3.0_common.bed',
 '../../data/chipseq_MEF2D_degraded/diffPeaks_unscaled/MV411_MEF2D_NT_SC_63_DMSO-MED1_R1_treat_pileup_vs_MV411_MEF2D_NT_SC_63_VHL-MED1_R1_treat_pileup_c3.0_common.bed',
 '../../data/chipseq_MEF2D_degraded/diffPeaks_unscaled/MV411_MEF2D_NT_SC_63_DMSO-MEF2C_R1_treat_pileup_vs_MV411_MEF2D_NT_SC_63_VHL-MEF2C_R1_treat_pileup_c3.0_common.bed',
 '../../data/chipseq_MEF2D_degraded/diffPeaks_unscaled/MV411_MEF2D_NT_SC_63_DMSO-MYC_R1_treat_pileup_vs_MV411_MEF2D_NT_SC_63_VHL-MYC_R1_treat_pileup_c3.0_common.bed',
 '../../data/chipseq_MEF2D_degraded/diffPeaks_unscaled/MV411_MEF2D_NT_SC_63_DMSO-POLII_R1_treat_pileup_vs_MV411_MEF2D_NT_SC_63_VHL-POLII_R1_tre

In [216]:
names = ["FLAG_MEF2D",
"IFR8",
"MED1",
"MEF2C",
"MYC",
"POLII"]

In [217]:
for i in range(int(len(bw)/2)):
    name1 = bw[i]
    name2 = bw[6+i]
    peak = [cond1peak[i], commonpeak[i], cond2peak[i]]
    #for val in peak:
     #   chip.dropWeirdChromosomes(val)
    name = names[i]
    print(name1,name2)
    chip.getPeaksAt(peak, [name1, name2], bigwignames=['DMSO', 'VHL'], peaknames=['DMSO_peaks', 'common', 'VHL_peaks'], window=3000, folder="", title=name, numthreads=7, torecompute=True, refpoint='center', name='../../data/'+project+'/diffPeaks_unscaled/'+name+'_mat.pdf', withDeeptools=True)

../../data/chipseq_MEF2D_degraded/bwa/mergedLibrary/bigwig/INPUT_R1.mLb.clN.bigWig ../../data/chipseq_MEF2D_degraded/bwa/mergedLibrary/bigwig/MV411_MEF2D_NT_SC_63_DMSO-POLII_R1.mLb.clN.bigWig
CompletedProcess(args='computeMatrix reference-point -S ../../data/chipseq_MEF2D_degraded/bwa/mergedLibrary/bigwig/INPUT_R1.mLb.clN.bigWig ../../data/chipseq_MEF2D_degraded/bwa/mergedLibrary/bigwig/MV411_MEF2D_NT_SC_63_DMSO-POLII_R1.mLb.clN.bigWig  --referencePoint center --regionsFileName ../../data/chipseq_MEF2D_degraded/diffPeaks_unscaled/MV411_MEF2D_NT_SC_63_DMSO-FLAG_MEF2D_R1_treat_pileup_vs_MV411_MEF2D_NT_SC_63_VHL-FLAG_MEF2D_R1_treat_pileup_c3.0_common.bed ../../data/chipseq_MEF2D_degraded/diffPeaks_unscaled/MV411_MEF2D_NT_SC_63_DMSO-FLAG_MEF2D_R1_treat_pileup_vs_MV411_MEF2D_NT_SC_63_VHL-FLAG_MEF2D_R1_treat_pileup_c3.0_cond2.bed --missingDataAsZero --outFileName ../../data/chipseq_MEF2D_degraded/diffPeaks_unscaled/FLAG_MEF2D_mat.gz --upstream 3000 --downstream 3000 --numberOfProcessors 7 &&

CompletedProcess(args='computeMatrix reference-point -S ../../data/chipseq_MEF2D_degraded/bwa/mergedLibrary/bigwig/MV411_MEF2D_NT_SC_63_DMSO-MYC_R1.mLb.clN.bigWig ../../data/chipseq_MEF2D_degraded/bwa/mergedLibrary/bigwig/MV411_MEF2D_NT_SC_63_VHL-MYC_R1.mLb.clN.bigWig  --referencePoint center --regionsFileName ../../data/chipseq_MEF2D_degraded/diffPeaks_unscaled/MV411_MEF2D_NT_SC_63_DMSO-POLII_R1_treat_pileup_vs_MV411_MEF2D_NT_SC_63_VHL-POLII_R1_treat_pileup_c3.0_cond1.bed ../../data/chipseq_MEF2D_degraded/diffPeaks_unscaled/MV411_MEF2D_NT_SC_63_DMSO-POLII_R1_treat_pileup_vs_MV411_MEF2D_NT_SC_63_VHL-POLII_R1_treat_pileup_c3.0_common.bed ../../data/chipseq_MEF2D_degraded/diffPeaks_unscaled/MV411_MEF2D_NT_SC_63_DMSO-POLII_R1_treat_pileup_vs_MV411_MEF2D_NT_SC_63_VHL-POLII_R1_treat_pileup_c3.0_cond2.bed --missingDataAsZero --outFileName ../../data/chipseq_MEF2D_degraded/diffPeaks_unscaled/POLII_mat.gz --upstream 3000 --downstream 3000 --numberOfProcessors 7 && plotHeatmap --matrixFile ../.

In [228]:
h.createFoldersFor('../results/'+project+'/plots/unscaled/diffPeaks/')
! cp ../../data/$project/diffPeaks_unscaled/*.pdf ../results/$project/plots/unscaled/diffPeaks/

! gsutil -m cp gs://amlproject/Chip/$project/bwa/mergedLibrary/deepTools/**.pdf ../results/$project/plots/

Copying gs://amlproject/Chip/chipseq_MEF2D_degraded/bwa/mergedLibrary/deepTools/plotFingerprint/MV411_MEF2D_NT_SC_63_DMSO-FLAG_MEF2D_R1.plotFingerprint.pdf...
Copying gs://amlproject/Chip/chipseq_MEF2D_degraded/bwa/mergedLibrary/deepTools/plotFingerprint/MV411_MEF2D_NT_SC_63_DMSO-IFR8_R1.plotFingerprint.pdf...
Copying gs://amlproject/Chip/chipseq_MEF2D_degraded/bwa/mergedLibrary/deepTools/plotFingerprint/MV411_MEF2D_NT_SC_63_DMSO-MED1_R1.plotFingerprint.pdf...
Copying gs://amlproject/Chip/chipseq_MEF2D_degraded/bwa/mergedLibrary/deepTools/plotFingerprint/MV411_MEF2D_NT_SC_63_DMSO-MEF2C_R1.plotFingerprint.pdf...
Copying gs://amlproject/Chip/chipseq_MEF2D_degraded/bwa/mergedLibrary/deepTools/plotFingerprint/MV411_MEF2D_NT_SC_63_DMSO-MYC_R1.plotFingerprint.pdf...
Copying gs://amlproject/Chip/chipseq_MEF2D_degraded/bwa/mergedLibrary/deepTools/plotFingerprint/MV411_MEF2D_NT_SC_63_VHL-FLAG_MEF2D.plotFingerprint.pdf...
Copying gs://amlproject/Chip/chipseq_MEF2D_degraded/bwa/mergedLibrary/deep

In [236]:
! gsutil -m cp -r ../../data/$project/diffPeaks gs://amlproject/Chip/$project/
! gsutil -m cp -r ../../data/$project/diffData gs://amlproject/Chip/$project/
! gsutil -m cp -r ../../data/$project/diffPeaks_unscaled gs://amlproject/Chip/$project/
! gsutil -m cp -r ../../data/$project/diffData_unscaled gs://amlproject/Chip/$project/
! gsutil -m cp -r ../../data/$project/peakplot gs://amlproject/Chip/$project/

Copying file://../../data/chipseq_MEF2D_degraded/diffPeaks/MV411_MEF2D_NT_SC_63_DMSO-MED1_R1_treat_pileup_vs_MV411_MEF2D_NT_SC_63_VHL-MED1_R1_treat_pileup_c3.0_common.bed [Content-Type=application/octet-stream]...
Copying file://../../data/chipseq_MEF2D_degraded/diffPeaks/MV411_MEF2D_NT_SC_63_DMSO-MEF2C_R1_treat_pileup_vs_MV411_MEF2D_NT_SC_63_VHL-MEF2C_R1_treat_pileup_c3.0_cond2.bed [Content-Type=application/octet-stream]...
Copying file://../../data/chipseq_MEF2D_degraded/diffPeaks/FLAG_MEF2D_mat.gz [Content-Type=application/octet-stream]...
Copying file://../../data/chipseq_MEF2D_degraded/diffPeaks/MV411_MEF2D_NT_SC_63_DMSO-MED1_R1_treat_pileup_vs_MV411_MEF2D_NT_SC_63_VHL-MED1_R1_treat_pileup_c3.0_cond1.bed [Content-Type=application/octet-stream]...
Copying file://../../data/chipseq_MEF2D_degraded/diffPeaks/POLII_mat.gz [Content-Type=application/octet-stream]...
Copying file://../../data/chipseq_MEF2D_degraded/diffPeaks/MV411_MEF2D_NT_SC_63_DMSO-IFR8_R1_treat_pileup_vs_MV411_MEF2D_NT

Copying file://../../data/chipseq_MEF2D_degraded/diffData/MV411_MEF2D_NT_SC_63_DMSO-MEF2C_R1_summits.bed [Content-Type=application/octet-stream]...
Copying file://../../data/chipseq_MEF2D_degraded/diffData/MV411_MEF2D_NT_SC_63_VHL-FLAG_MEF2D_R1_peaks.narrowPeak [Content-Type=application/octet-stream]...
Copying file://../../data/chipseq_MEF2D_degraded/diffData/MV411_MEF2D_NT_SC_63_DMSO-MYC_R1_summits.bed [Content-Type=application/octet-stream]...
Copying file://../../data/chipseq_MEF2D_degraded/diffData/MV411_MEF2D_NT_SC_63_DMSO-POLII_R1_treat_pileup.bdg [Content-Type=application/octet-stream]...
Copying file://../../data/chipseq_MEF2D_degraded/diffData/MV411_MEF2D_NT_SC_63_DMSO-MYC_R1_treat_pileup.bdg [Content-Type=application/octet-stream]...
Copying file://../../data/chipseq_MEF2D_degraded/diffData/MV411_MEF2D_NT_SC_63_VHL-MED1_R1_control_lambda.bdg [Content-Type=application/octet-stream]...
Copying file://../../data/chipseq_MEF2D_degraded/diffData/MV411_MEF2D_NT_SC_63_VHL-MED1_R1_s

Copying file://../../data/chipseq_MEF2D_degraded/diffPeaks_unscaled/MYC_mat.gz [Content-Type=application/octet-stream]...
Copying file://../../data/chipseq_MEF2D_degraded/diffPeaks_unscaled/MV411_MEF2D_NT_SC_63_DMSO-FLAG_MEF2D_R1_treat_pileup_vs_MV411_MEF2D_NT_SC_63_VHL-FLAG_MEF2D_R1_treat_pileup_c3.0_cond2.bed [Content-Type=application/octet-stream]...
Copying file://../../data/chipseq_MEF2D_degraded/diffPeaks_unscaled/MV411_MEF2D_NT_SC_63_DMSO-MED1_R1_treat_pileup_vs_MV411_MEF2D_NT_SC_63_VHL-MED1_R1_treat_pileup_c3.0_cond2.bed [Content-Type=application/octet-stream]...
Copying file://../../data/chipseq_MEF2D_degraded/diffPeaks_unscaled/MV411_MEF2D_NT_SC_63_DMSO-FLAG_MEF2D_R1_treat_pileup_vs_MV411_MEF2D_NT_SC_63_VHL-FLAG_MEF2D_R1_treat_pileup_c3.0_common.bed [Content-Type=application/octet-stream]...
Copying file://../../data/chipseq_MEF2D_degraded/diffPeaks_unscaled/MV411_MEF2D_NT_SC_63_DMSO-IFR8_R1_treat_pileup_vs_MV411_MEF2D_NT_SC_63_VHL-IFR8_R1_treat_pileup_c3.0_common.bed [Conten

Copying file://../../data/chipseq_MEF2D_degraded/diffData_unscaled/MV411_MEF2D_NT_SC_63_DMSO-MEF2C_R1_treat_pileup.bdg [Content-Type=application/octet-stream]...
Copying file://../../data/chipseq_MEF2D_degraded/diffData_unscaled/MV411_MEF2D_NT_SC_63_DMSO-MEF2C_R1_peaks.narrowPeak [Content-Type=application/octet-stream]...
Copying file://../../data/chipseq_MEF2D_degraded/diffData_unscaled/MV411_MEF2D_NT_SC_63_VHL-IFR8_R1_summits.bed [Content-Type=application/octet-stream]...
Copying file://../../data/chipseq_MEF2D_degraded/diffData_unscaled/MV411_MEF2D_NT_SC_63_VHL-MYC_R1_summits.bed [Content-Type=application/octet-stream]...
Copying file://../../data/chipseq_MEF2D_degraded/diffData_unscaled/MV411_MEF2D_NT_SC_63_DMSO-MEF2C_R1_control_lambda.bdg [Content-Type=application/octet-stream]...
Copying file://../../data/chipseq_MEF2D_degraded/diffData_unscaled/MV411_MEF2D_NT_SC_63_VHL-MED1_R1_peaks.xls [Content-Type=application/vnd.ms-excel]...
Copying file://../../data/chipseq_MEF2D_degraded/d

## v7

In [None]:
project="chipseq_MEF2CMEF2D_degraded"

In [None]:
! gsutil ls gs://transfer-amlproject/200924_MP8230_fastq/

In [271]:
mkdir ../data/$project/ && mkdir ../data/$project/qc/

In [272]:
!gsutil -m cp gs://transfer-amlproject/200924_MP8230_fastq/multiqc_report.html ../data/$project/qc/
!gsutil -m cp -r gs://transfer-amlproject/200924_MP8230_fastq/Reports/ ../data/$project/qc/
!gsutil -m cp -r gs://transfer-amlproject/200924_MP8230_fastq/multiqc_data/ ../data/$project/qc/

Copying gs://transfer-amlproject/200924_MP8230_fastq/multiqc_report.html...
/ [1/1 files][  1.3 MiB/  1.3 MiB] 100% Done                                    
Operation completed over 1 objects/1.3 MiB.                                      
Copying gs://transfer-amlproject/200924_MP8230_fastq/Reports/20200924_MP1_MP8230_S113_R1_001_fastqc.html...
Copying gs://transfer-amlproject/200924_MP8230_fastq/Reports/20200924_MP1_MP8230_S113_R1_001_fastqc.zip...
Copying gs://transfer-amlproject/200924_MP8230_fastq/Reports/20200924_MP1_MP8230_S113_R2_001_fastqc.html...
Copying gs://transfer-amlproject/200924_MP8230_fastq/Reports/20200924_MP1_MP8230_S113_R2_001_fastqc.zip...
Copying gs://transfer-amlproject/200924_MP8230_fastq/Reports/20200924_MP2_MP8230_S114_R1_001_fastqc.html...
Copying gs://transfer-amlproject/200924_MP8230_fastq/Reports/20200924_MP2_MP8230_S114_R1_001_fastqc.zip...
Copying gs://transfer-amlproject/200924_MP8230_fastq/Reports/20200924_MP2_MP8230_S114_R2_001_fastqc.html...
Copying 

In [273]:
! gsutil -m cp gs://transfer-amlproject/200924_MP8230_fastq/*  gs://transfer-amlproject/$project/

Omitting prefix "gs://transfer-amlproject/200924_MP8230_fastq/Reports/". (Did you mean to do cp -r?)
Omitting prefix "gs://transfer-amlproject/200924_MP8230_fastq/multiqc_data/". (Did you mean to do cp -r?)
Copying gs://transfer-amlproject/200924_MP8230_fastq/20200924_MP1_MP8230_S113_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/200924_MP8230_fastq/20200924_MP1_MP8230_S113_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/200924_MP8230_fastq/20200924_MP2_MP8230_S114_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/200924_MP8230_fastq/20200924_MP2_MP8230_S114_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/200924_MP8230_fastq/20200924_MP3_MP8230_S115_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/200924_MP8230_fastq/20200924_MP3_MP8230_S115_R2_001.fastq.gz [Content-Type=

### analysis

In [274]:
! mkdir ../../data/$project && mkdir ../../data/$project/fastqs && gsutil -m cp gs://transfer-amlproject/$project/* ../../data/$project/fastqs

Copying gs://transfer-amlproject/chipseq_MEFF2CMEF2D_degraded/20200924_MP1_MP8230_S113_R1_001.fastq.gz...
Copying gs://transfer-amlproject/chipseq_MEFF2CMEF2D_degraded/20200924_MP1_MP8230_S113_R2_001.fastq.gz...
Copying gs://transfer-amlproject/chipseq_MEFF2CMEF2D_degraded/20200924_MP2_MP8230_S114_R1_001.fastq.gz...
Copying gs://transfer-amlproject/chipseq_MEFF2CMEF2D_degraded/20200924_MP2_MP8230_S114_R2_001.fastq.gz...
Copying gs://transfer-amlproject/chipseq_MEFF2CMEF2D_degraded/20200924_MP3_MP8230_S115_R1_001.fastq.gz...
Copying gs://transfer-amlproject/chipseq_MEFF2CMEF2D_degraded/20200924_MP3_MP8230_S115_R2_001.fastq.gz...
Copying gs://transfer-amlproject/chipseq_MEFF2CMEF2D_degraded/20200924_MP4_MP8230_S116_R1_001.fastq.gz...
Copying gs://transfer-amlproject/chipseq_MEFF2CMEF2D_degraded/20200924_MP4_MP8230_S116_R2_001.fastq.gz...
Copying gs://transfer-amlproject/chipseq_MEFF2CMEF2D_degraded/20200924_MP5_MP8230_S117_R1_001.fastq.gz...
Copying gs://transfer-amlproject/chipseq_MEFF2

In [275]:
! rm ../../data/$project/fastqs/multiqc_report.html

In [276]:
a = ! ls ../../data/$project/fastqs
a

['20200924_MP1_MP8230_S113_R1_001.fastq.gz',
 '20200924_MP1_MP8230_S113_R2_001.fastq.gz',
 '20200924_MP2_MP8230_S114_R1_001.fastq.gz',
 '20200924_MP2_MP8230_S114_R2_001.fastq.gz',
 '20200924_MP3_MP8230_S115_R1_001.fastq.gz',
 '20200924_MP3_MP8230_S115_R2_001.fastq.gz',
 '20200924_MP4_MP8230_S116_R1_001.fastq.gz',
 '20200924_MP4_MP8230_S116_R2_001.fastq.gz',
 '20200924_MP5_MP8230_S117_R1_001.fastq.gz',
 '20200924_MP5_MP8230_S117_R2_001.fastq.gz',
 '20200924_MP6_MP8230_S118_R1_001.fastq.gz',
 '20200924_MP6_MP8230_S118_R2_001.fastq.gz',
 '20200924_MP7_MP8230_S119_R1_001.fastq.gz',
 '20200924_MP7_MP8230_S119_R2_001.fastq.gz',
 '20200924_MP8_MP8230_S120_R1_001.fastq.gz',
 '20200924_MP8_MP8230_S120_R2_001.fastq.gz']

In [277]:
rename = {
"20200924_MP1_MP8230_S113": "mp893-MV411_RNP_AAVS1-H3K27AC-r3",
"20200924_MP2_MP8230_S114": "mp894-MV411_RNP_AAVS1-H3K27AC-r4",
"20200924_MP3_MP8230_S115": "mp895-MV411_RNP_MEF2C-H3K27AC-r1",
"20200924_MP4_MP8230_S116": "mp896-MV411_RNP_MEF2C-H3K27AC-r2",
"20200924_MP5_MP8230_S117": "mp897-MV411_RNP_MEF2D-H3K27AC-r3",
"20200924_MP6_MP8230_S118": "mp898-MV411_RNP_MEF2D-H3K27AC-r4",
"20200924_MP7_MP8230_S119": "mp899-MV411_RNP_MEF2C_MEF2D-H3K27AC-r1",
"20200924_MP8_MP8230_S120": "mp900-MV411_RNP_MEF2C_MEF2D-H3K27AC-r2",}

In [278]:
for val in a:
    rep = val
    for k,v in rename.items():
        rep = rep.replace(k,v)
    !mv ../../data/$project/fastqs/$val ../../data/$project/fastqs/$rep

In [279]:
from gsheets import Sheets
sheets = Sheets.from_files('~/.client_secret.json', '~/.storage.json')
url="https://docs.google.com/spreadsheets/d/1yFLjYB1McU530JnLgL0QIMAKIkVl3kl0_LCHje2gk8U"
gsheet = sheets.get(url).sheets[2].to_frame()

In [280]:
gsheet

Unnamed: 0,id,cell line,replicate,protein,quality,paired_end,matching input name,processed,name,previous name,...,ratio to droso,unique mapped reads(droso),scaling factor,Total QC,folderNarrow,folderCompensated,folderQC,folderBroad,folder Bigwig,folder diffPeaks
0,mp100,U937,1.0,INPUT,,n,,Y,mp100-U937-INPUT-r1,,...,,,,https://storage.cloud.google.com/amlproject/Ch...,https://console.cloud.google.com/storage/brows...,,https://console.cloud.google.com/storage/brows...,https://console.cloud.google.com/storage/brows...,https://console.cloud.google.com/storage/brows...,
1,mp101,NOMO1,1.0,INPUT,,n,,Y,mp101-NOMO1-INPUT-r1,,...,,,,,,,,,,
2,mp102,UT7,1.0,INPUT,,n,,Y,mp102-UT7-INPUT-r1,,...,,,,,,,,,,
3,mp106,MV411,1.0,MYB,x,n,INPUT_MV411,Y,mp106-MV411-MYB-r1,,...,,,,,,,,,,
4,mp109,M6,1.0,CEBPA,x,n,INPUT_M6,Y,mp109-M6-CEBPA-r1,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
325,mp897,MV411_RNP_MEF2D,3.0,H3K27AC,,y,INPUT_MV411,,mp897-MV411_RNP_MEF2D-H3K27AC-r3,20200924_MP5_MP8230_S117,...,,,,,,,,,,
326,mp898,MV411_RNP_MEF2D,4.0,H3K27AC,,y,INPUT_MV411,,mp898-MV411_RNP_MEF2D-H3K27AC-r4,20200924_MP6_MP8230_S118,...,,,,,,,,,,
327,mp899,MV411_RNP_MEF2C_MEF2D,1.0,H3K27AC,,y,INPUT_MV411,,mp899-MV411_RNP_MEF2C_MEF2D-H3K27AC-r1,20200924_MP7_MP8230_S119,...,,,,,,,,,,
328,mp900,MV411_RNP_MEF2C_MEF2D,2.0,H3K27AC,,y,INPUT_MV411,,mp900-MV411_RNP_MEF2C_MEF2D-H3K27AC-r2,20200924_MP8_MP8230_S120,...,,,,,,,,,,


In [281]:
a = ! ls ../../data/$project/fastqs
a

['mp893-MV411_RNP_AAVS1-H3K27AC-r3_R1_001.fastq.gz',
 'mp893-MV411_RNP_AAVS1-H3K27AC-r3_R2_001.fastq.gz',
 'mp894-MV411_RNP_AAVS1-H3K27AC-r4_R1_001.fastq.gz',
 'mp894-MV411_RNP_AAVS1-H3K27AC-r4_R2_001.fastq.gz',
 'mp895-MV411_RNP_MEF2C-H3K27AC-r1_R1_001.fastq.gz',
 'mp895-MV411_RNP_MEF2C-H3K27AC-r1_R2_001.fastq.gz',
 'mp896-MV411_RNP_MEF2C-H3K27AC-r2_R1_001.fastq.gz',
 'mp896-MV411_RNP_MEF2C-H3K27AC-r2_R2_001.fastq.gz',
 'mp897-MV411_RNP_MEF2D-H3K27AC-r3_R1_001.fastq.gz',
 'mp897-MV411_RNP_MEF2D-H3K27AC-r3_R2_001.fastq.gz',
 'mp898-MV411_RNP_MEF2D-H3K27AC-r4_R1_001.fastq.gz',
 'mp898-MV411_RNP_MEF2D-H3K27AC-r4_R2_001.fastq.gz',
 'mp899-MV411_RNP_MEF2C_MEF2D-H3K27AC-r1_R1_001.fastq.gz',
 'mp899-MV411_RNP_MEF2C_MEF2D-H3K27AC-r1_R2_001.fastq.gz',
 'mp900-MV411_RNP_MEF2C_MEF2D-H3K27AC-r2_R1_001.fastq.gz',
 'mp900-MV411_RNP_MEF2C_MEF2D-H3K27AC-r2_R2_001.fastq.gz']

In [282]:
df = {
"fastq_1": [],
"fastq_2": [],
"antibody": [],
"group": [],
"replicate": [],
"control": []
}
for val in h.grouped(a,2):
    row = gsheet[gsheet.id==val[0].split('/')[-1].split('-')[0]]
    df['group'].append("MV4"+row['name'].values[0].split('-r')[0].split('-MV4')[1])
    df['replicate'].append(row['name'].values[0].split('-r')[1])
    df['fastq_1'].append(project+"/fastqs/"+val[0])
    df['fastq_2'].append(project+"/fastqs/"+val[1])
    df['antibody'].append(row['protein'].values[0])
    df['control'].append("INPUT")
df['group'].append('INPUT')
df['replicate'].append(1)
df['fastq_1'].append('ref/mp845-MV411-INPUT-r2_R1.fastq.gz')
df['fastq_2'].append('ref/mp845-MV411-INPUT-r2_R2.fastq.gz')
df['antibody'].append("")
df['control'].append("")
df = pd.DataFrame(df)

In [283]:
df

Unnamed: 0,fastq_1,fastq_2,antibody,group,replicate,control
0,chipseq_MEFF2CMEF2D_degraded/fastqs/mp893-MV41...,chipseq_MEFF2CMEF2D_degraded/fastqs/mp893-MV41...,H3K27AC,MV411_RNP_AAVS1-H3K27AC,3,INPUT
1,chipseq_MEFF2CMEF2D_degraded/fastqs/mp894-MV41...,chipseq_MEFF2CMEF2D_degraded/fastqs/mp894-MV41...,H3K27AC,MV411_RNP_AAVS1-H3K27AC,4,INPUT
2,chipseq_MEFF2CMEF2D_degraded/fastqs/mp895-MV41...,chipseq_MEFF2CMEF2D_degraded/fastqs/mp895-MV41...,H3K27AC,MV411_RNP_MEF2C-H3K27AC,1,INPUT
3,chipseq_MEFF2CMEF2D_degraded/fastqs/mp896-MV41...,chipseq_MEFF2CMEF2D_degraded/fastqs/mp896-MV41...,H3K27AC,MV411_RNP_MEF2C-H3K27AC,2,INPUT
4,chipseq_MEFF2CMEF2D_degraded/fastqs/mp897-MV41...,chipseq_MEFF2CMEF2D_degraded/fastqs/mp897-MV41...,H3K27AC,MV411_RNP_MEF2D-H3K27AC,3,INPUT
5,chipseq_MEFF2CMEF2D_degraded/fastqs/mp898-MV41...,chipseq_MEFF2CMEF2D_degraded/fastqs/mp898-MV41...,H3K27AC,MV411_RNP_MEF2D-H3K27AC,4,INPUT
6,chipseq_MEFF2CMEF2D_degraded/fastqs/mp899-MV41...,chipseq_MEFF2CMEF2D_degraded/fastqs/mp899-MV41...,H3K27AC,MV411_RNP_MEF2C_MEF2D-H3K27AC,1,INPUT
7,chipseq_MEFF2CMEF2D_degraded/fastqs/mp900-MV41...,chipseq_MEFF2CMEF2D_degraded/fastqs/mp900-MV41...,H3K27AC,MV411_RNP_MEF2C_MEF2D-H3K27AC,2,INPUT
8,ref/mp845-MV411-INPUT-r2_R1.fastq.gz,ref/mp845-MV411-INPUT-r2_R2.fastq.gz,,INPUT,1,


In [284]:
df[df.columns[[3,4,0,1,2,5]]].to_csv('../nextflow/'+project+'_design.csv',index=False)

In [6]:
! cd ../../data/ && sudo ../nextflow log ## to get access to the previous runs

TIMESTAMP          	DURATION      	RUN NAME              	STATUS	REVISION ID	SESSION ID                          	COMMAND                                                                                                                                                                                                                                                          
2020-02-10 22:39:18	8m 25s        	stupefied_crick       	ERR   	21be314954 	76ea5df0-153c-4e71-a59d-52c6112fda84	nextflow run nf-core/chipseq --paired_end --seq_center DFCI --email jkobject@gmail.com --narrow_peak --input design.csv --genome GRCh38 --skip_preseq --max_cpus 24 -profile docker -w work                                                                      
2020-02-10 22:40:37	1m 6s         	ridiculous_hilbert    	ERR   	21be314954 	75004903-035d-4504-ab80-cab74b5acac4	nextflow run nf-core/chipseq --paired_end --seq_center DFCI --email jkobject@gmail.com --narrow_peak --input design.csv --genome GRCh38 --sk

In [7]:
#process chips
! cd ../../data/ && sudo ../nextflow run nf-core/chipseq --paired_end --seq_center 'DFCI' --email 'jkobject@gmail.com' --input ../AMLproject/nextflow/$project_design.csv --genome GRCh38 --skip_preseq --max_cpus 16 -profile docker -w work -resume exotic_bartik

N E X T F L O W  ~  version 19.10.0
Launching `nf-core/chipseq` [wise_legentil] - revision: 21be314954 [master]
NOTE: Your local project version looks outdated - a different revision is available in the remote repository [0f487ed76d]
-[2m--------------------------------------------------[0m-
                                        [0;32m,--.[0;30m/[0;32m,-.[0m
[0;34m        ___     __   __   __   ___     [0;32m/,-._.--~'[0m
[0;34m  |\ | |__  __ /  ` /  \ |__) |__         [0;33m}  {[0m
[0;34m  | \| |       \__, \__/ |  \ |___     [0;32m\`-._,-`-,[0m
                                        [0;32m`._,._,'[0m
[0;35m  nf-core/chipseq v1.1.0[0m
-[2m--------------------------------------------------[0m-
Run Name            : wise_legentil
Data Type           : Paired-End
Design File         : ../AMLproject/nextflow/additional_degraded_v1_design.csv
Genome              : GRCh38
Fasta File          : s3://ngi-igenomes/igenomes//Homo_sapiens/NCBI/GRCh38/Sequence/WholeGenomeF

[28A
executor >  local (1)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[-        ] process > MergeBAM                       -[K
[-        ] process > MergeBAMFilter                 -[K
[-        ] process > MergeBAMRemoveOrphan           -[K
[-        ] process > Preseq                         -[K
[-        ] process > CollectMultipleMetrics         -[K
[-        ] process > BigWig                         -[K
[-        ] process > PlotProfile   

[28A
executor >  local (6)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[fc/cf6140] process > MergeBAM (MV411_RNP_SPI1-H3... [ 88%] 14 of 16, cached: 11[K
[-        ] process > MergeBAMFilter                 -[K
[-        ] process > MergeBAMRemoveOrphan           -[K
[-        ] process > Preseq                         -[K
[-        ] process > CollectMultipleMetrics         -[K
[-        ] process > BigWig                         -[K
[-        

[28A
executor >  local (11)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[84/574208] process > MergeBAMFilter (MV411_RNP_S... [ 50%] 2 of 4[K
[-        ] process > MergeBAMRemoveOrphan           -[K
[-        ] process > Preseq                         -[K
[-        ] process > CollectMultipleMetrics         -[K
[-        ] process > BigWig                         -

[28A
executor >  local (15)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[e1/d4ac46] process > MergeBAMFilter (MV411_RNP_A... [ 63%] 5 of 8[K
[-        ] process > MergeBAMRemoveOrphan           -[K
[-        ] process > Preseq                         -[K
[-        ] process > CollectMultipleMetrics         -[K
[-        ] process > BigWig                         -

[28A
executor >  local (18)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[77/a464ff] process > MergeBAMFilter (MV411_RNP_I... [ 91%] 10 of 11[K
[-        ] process > MergeBAMRemoveOrphan           -[K
[-        ] process > Preseq                         -[K
[-        ] process > CollectMultipleMetrics         -[K
[-        ] process > BigWig                         

[28A
executor >  local (23)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[e0/a67acd] process > MergeBAMFilter (MV411_RNP_M... [ 88%] 14 of 16[K
[-        ] process > MergeBAMRemoveOrphan           -[K
[-        ] process > Preseq                         -[K
[-        ] process > CollectMultipleMetrics         -[K
[-        ] process > BigWig                         

[28A
executor >  local (28)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[48/4b5e62] process > MergeBAMFilter (MV411_RNP_R... [100%] 17 of 17 ✔[K
[57/199f77] process > MergeBAMRemoveOrphan (MV411... [ 50%] 2 of 4[K
[-        ] process > Preseq                         -[K
[-        ] process > CollectMultipleMetrics         -[K
[-        ] process > BigWig           

[28A
executor >  local (33)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[48/4b5e62] process > MergeBAMFilter (MV411_RNP_R... [100%] 17 of 17 ✔[K
[63/892aee] process > MergeBAMRemoveOrphan (MV411... [ 78%] 7 of 9[K
[-        ] process > Preseq                         -[K
[-        ] process > CollectMultipleMetrics         -[K
[-        ] process > BigWig           

[28A
executor >  local (38)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[48/4b5e62] process > MergeBAMFilter (MV411_RNP_R... [100%] 17 of 17 ✔[K
[ba/00ce48] process > MergeBAMRemoveOrphan (MV411... [ 79%] 11 of 14[K
[-        ] process > Preseq                         -[K
[-        ] process > CollectMultipleMetrics         -[K
[-        ] process > BigWig         

[28A
executor >  local (41)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[48/4b5e62] process > MergeBAMFilter (MV411_RNP_R... [100%] 17 of 17 ✔[K
[c0/52c2db] process > MergeBAMRemoveOrphan (MV411... [ 94%] 15 of 16[K
[-        ] process > Preseq                         -[K
[-        ] process > CollectMultipleMetrics         -[K
[-        ] process > BigWig         

[28A
executor >  local (46)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[48/4b5e62] process > MergeBAMFilter (MV411_RNP_R... [100%] 17 of 17 ✔[K
[dc/6cb451] process > MergeBAMRemoveOrphan (MV411... [ 94%] 16 of 17[K
[-        ] process > Preseq                         -[K
[18/e3205c] process > CollectMultipleMetrics (MV4... [100%] 1 of 1[K
[fa/7ca520] process > Big

[c1/b1a581] process > PhantomPeakQualTools (MV411... [ 67%] 2 of 3[K
[-        ] process > PlotFingerprint                -[K
[-        ] process > MACSCallPeak                   -[K
[-        ] process > AnnotatePeaks                  -[K
[-        ] process > PeakQC                         -[K
[-        ] process > ConsensusPeakSet               -[K
[-        ] process > ConsensusPeakSetAnnotate       -[K
[-        ] process > ConsensusPeakSetDESeq          -[K
[-        ] process > IGV                            -[K
[7e/9c912a] process > get_software_versions          [100%] 1 of 1 ✔[K
[-        ] process > MultiQC                        -[K
[8a/7334d7] process > output_documentation           [100%] 1 of 1, cached: 1 ✔[K
[28A
executor >  local (51)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [1

[28A
executor >  local (56)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[48/4b5e62] process > MergeBAMFilter (MV411_RNP_R... [100%] 17 of 17 ✔[K
[dc/6cb451] process > MergeBAMRemoveOrphan (MV411... [100%] 17 of 17 ✔[K
[-        ] process > Preseq                         -[K
[36/03ab9d] process > CollectMultipleMetrics (MV4... [ 80%] 4 of 5[K
[26/8106f4] process > B

[28A
executor >  local (60)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[48/4b5e62] process > MergeBAMFilter (MV411_RNP_R... [100%] 17 of 17 ✔[K
[dc/6cb451] process > MergeBAMRemoveOrphan (MV411... [100%] 17 of 17 ✔[K
[-        ] process > Preseq                         -[K
[36/03ab9d] process > CollectMultipleMetrics (MV4... [100%] 5 of 5[K
[2d/982381] process > B

[28A
executor >  local (65)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[48/4b5e62] process > MergeBAMFilter (MV411_RNP_R... [100%] 17 of 17 ✔[K
[dc/6cb451] process > MergeBAMRemoveOrphan (MV411... [100%] 17 of 17 ✔[K
[-        ] process > Preseq                         -[K
[6f/0c32f5] process > CollectMultipleMetrics (INP... [100%] 6 of 6[K
[2d/982381] process > B

[28A
executor >  local (70)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[48/4b5e62] process > MergeBAMFilter (MV411_RNP_R... [100%] 17 of 17 ✔[K
[dc/6cb451] process > MergeBAMRemoveOrphan (MV411... [100%] 17 of 17 ✔[K
[-        ] process > Preseq                         -[K
[77/85e402] process > CollectMultipleMetrics (MV4... [100%] 7 of 7[K
[83/f433d6] process > B

[28A
executor >  local (74)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[48/4b5e62] process > MergeBAMFilter (MV411_RNP_R... [100%] 17 of 17 ✔[K
[dc/6cb451] process > MergeBAMRemoveOrphan (MV411... [100%] 17 of 17 ✔[K
[-        ] process > Preseq                         -[K
[f7/870fa3] process > CollectMultipleMetrics (MV4... [100%] 8 of 8[K
[be/8c5392] process > B

[28A
executor >  local (79)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[48/4b5e62] process > MergeBAMFilter (MV411_RNP_R... [100%] 17 of 17 ✔[K
[dc/6cb451] process > MergeBAMRemoveOrphan (MV411... [100%] 17 of 17 ✔[K
[-        ] process > Preseq                         -[K
[eb/85c928] process > CollectMultipleMetrics (MV4... [100%] 9 of 9[K
[4e/c46492] process > B

[28A
executor >  local (84)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[48/4b5e62] process > MergeBAMFilter (MV411_RNP_R... [100%] 17 of 17 ✔[K
[dc/6cb451] process > MergeBAMRemoveOrphan (MV411... [100%] 17 of 17 ✔[K
[-        ] process > Preseq                         -[K
[af/724a3d] process > CollectMultipleMetrics (MV4... [ 91%] 10 of 11[K
[60/3850df] process >

[28A
executor >  local (89)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[48/4b5e62] process > MergeBAMFilter (MV411_RNP_R... [100%] 17 of 17 ✔[K
[dc/6cb451] process > MergeBAMRemoveOrphan (MV411... [100%] 17 of 17 ✔[K
[-        ] process > Preseq                         -[K
[be/e4f2a1] process > CollectMultipleMetrics (MV4... [100%] 12 of 12[K
[eb/3a1856] process >

[28A
executor >  local (94)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[48/4b5e62] process > MergeBAMFilter (MV411_RNP_R... [100%] 17 of 17 ✔[K
[dc/6cb451] process > MergeBAMRemoveOrphan (MV411... [100%] 17 of 17 ✔[K
[-        ] process > Preseq                         -[K
[58/9c859d] process > CollectMultipleMetrics (MV4... [100%] 13 of 13[K
[eb/3a1856] process >

[28A
executor >  local (99)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[48/4b5e62] process > MergeBAMFilter (MV411_RNP_R... [100%] 17 of 17 ✔[K
[dc/6cb451] process > MergeBAMRemoveOrphan (MV411... [100%] 17 of 17 ✔[K
[-        ] process > Preseq                         -[K
[39/7b314d] process > CollectMultipleMetrics (MV4... [100%] 14 of 14[K
[e1/c25e9c] process >

[28A
executor >  local (104)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[48/4b5e62] process > MergeBAMFilter (MV411_RNP_R... [100%] 17 of 17 ✔[K
[dc/6cb451] process > MergeBAMRemoveOrphan (MV411... [100%] 17 of 17 ✔[K
[-        ] process > Preseq                         -[K
[85/5ed89a] process > CollectMultipleMetrics (MV4... [ 94%] 15 of 16[K
[9c/f2630b] process 

[28A
executor >  local (109)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[48/4b5e62] process > MergeBAMFilter (MV411_RNP_R... [100%] 17 of 17 ✔[K
[dc/6cb451] process > MergeBAMRemoveOrphan (MV411... [100%] 17 of 17 ✔[K
[-        ] process > Preseq                         -[K
[7b/722dae] process > CollectMultipleMetrics (MV4... [100%] 17 of 17 ✔[K
[a1/2654ae] proces

[28A
executor >  local (114)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[48/4b5e62] process > MergeBAMFilter (MV411_RNP_R... [100%] 17 of 17 ✔[K
[dc/6cb451] process > MergeBAMRemoveOrphan (MV411... [100%] 17 of 17 ✔[K
[-        ] process > Preseq                         -[K
[7b/722dae] process > CollectMultipleMetrics (MV4... [100%] 17 of 17 ✔[K
[a1/2654ae] proces

[28A
executor >  local (120)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[48/4b5e62] process > MergeBAMFilter (MV411_RNP_R... [100%] 17 of 17 ✔[K
[dc/6cb451] process > MergeBAMRemoveOrphan (MV411... [100%] 17 of 17 ✔[K
[-        ] process > Preseq                         -[K
[7b/722dae] process > CollectMultipleMetrics (MV4... [100%] 17 of 17 ✔[K
[a1/2654ae] proces

[28A
executor >  local (125)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[48/4b5e62] process > MergeBAMFilter (MV411_RNP_R... [100%] 17 of 17 ✔[K
[dc/6cb451] process > MergeBAMRemoveOrphan (MV411... [100%] 17 of 17 ✔[K
[-        ] process > Preseq                         -[K
[7b/722dae] process > CollectMultipleMetrics (MV4... [100%] 17 of 17 ✔[K
[a1/2654ae] proces

[28A
executor >  local (129)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[48/4b5e62] process > MergeBAMFilter (MV411_RNP_R... [100%] 17 of 17 ✔[K
[dc/6cb451] process > MergeBAMRemoveOrphan (MV411... [100%] 17 of 17 ✔[K
[-        ] process > Preseq                         -[K
[7b/722dae] process > CollectMultipleMetrics (MV4... [100%] 17 of 17 ✔[K
[a1/2654ae] proces

[28A
executor >  local (133)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[48/4b5e62] process > MergeBAMFilter (MV411_RNP_R... [100%] 17 of 17 ✔[K
[dc/6cb451] process > MergeBAMRemoveOrphan (MV411... [100%] 17 of 17 ✔[K
[-        ] process > Preseq                         -[K
[7b/722dae] process > CollectMultipleMetrics (MV4... [100%] 17 of 17 ✔[K
[a1/2654ae] proces

[28A
executor >  local (138)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[48/4b5e62] process > MergeBAMFilter (MV411_RNP_R... [100%] 17 of 17 ✔[K
[dc/6cb451] process > MergeBAMRemoveOrphan (MV411... [100%] 17 of 17 ✔[K
[-        ] process > Preseq                         -[K
[7b/722dae] process > CollectMultipleMetrics (MV4... [100%] 17 of 17 ✔[K
[a1/2654ae] proces

[28A
executor >  local (143)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[48/4b5e62] process > MergeBAMFilter (MV411_RNP_R... [100%] 17 of 17 ✔[K
[dc/6cb451] process > MergeBAMRemoveOrphan (MV411... [100%] 17 of 17 ✔[K
[-        ] process > Preseq                         -[K
[7b/722dae] process > CollectMultipleMetrics (MV4... [100%] 17 of 17 ✔[K
[a1/2654ae] proces

[28A
executor >  local (148)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[48/4b5e62] process > MergeBAMFilter (MV411_RNP_R... [100%] 17 of 17 ✔[K
[dc/6cb451] process > MergeBAMRemoveOrphan (MV411... [100%] 17 of 17 ✔[K
[-        ] process > Preseq                         -[K
[7b/722dae] process > CollectMultipleMetrics (MV4... [100%] 17 of 17 ✔[K
[a1/2654ae] proces

[28A
executor >  local (153)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[48/4b5e62] process > MergeBAMFilter (MV411_RNP_R... [100%] 17 of 17 ✔[K
[dc/6cb451] process > MergeBAMRemoveOrphan (MV411... [100%] 17 of 17 ✔[K
[-        ] process > Preseq                         -[K
[7b/722dae] process > CollectMultipleMetrics (MV4... [100%] 17 of 17 ✔[K
[a1/2654ae] proces

[28A
executor >  local (158)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[48/4b5e62] process > MergeBAMFilter (MV411_RNP_R... [100%] 17 of 17 ✔[K
[dc/6cb451] process > MergeBAMRemoveOrphan (MV411... [100%] 17 of 17 ✔[K
[-        ] process > Preseq                         -[K
[7b/722dae] process > CollectMultipleMetrics (MV4... [100%] 17 of 17 ✔[K
[a1/2654ae] proces

[28A
executor >  local (162)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[48/4b5e62] process > MergeBAMFilter (MV411_RNP_R... [100%] 17 of 17 ✔[K
[dc/6cb451] process > MergeBAMRemoveOrphan (MV411... [100%] 17 of 17 ✔[K
[-        ] process > Preseq                         -[K
[7b/722dae] process > CollectMultipleMetrics (MV4... [100%] 17 of 17 ✔[K
[a1/2654ae] proces

[31A
executor >  local (163)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[48/4b5e62] process > MergeBAMFilter (MV411_RNP_R... [100%] 17 of 17 ✔[K
[dc/6cb451] process > MergeBAMRemoveOrphan (MV411... [100%] 17 of 17 ✔[K
[-        ] process > Preseq                         -[K
[7b/722dae] process > CollectMultipleMetrics (MV4... [100%] 17 of 17 ✔[K
[a1/2654ae] proces

In [None]:
! gsutil -m cp -r gs://workamlproject/MEF2CMEF2D/output/* gs://amlproject/Chip/chipseq_MEF2CMEF2D_degraded/

In [None]:
# get scaling values
norm, mapped = h.getSpikeInControlScales(refgenome="../../data/ref/reference_droso.fna",
fastQfolder='../../data/'+project+'/fastqs/',
pairedEnd=True, cores=12,
tofilter=True,
totrim=True,
tomap=True,
results="../../data/"+project+"/",
pathtotrim_galore="../../TrimGalore-0.6.5/trim_galore")
norm, mapped

computing scales from the excel sheet

In [None]:
scales = [1.00,
1.00,
0.98,
1.08,
0.74,
0.99,
1.13,
0.94]

In [None]:
#! mkdir ../../data/$project/bwa 
#!mkdir ../../data/$project/bwa/mergedLibrary
#!gsutil cp gs://amlproject/Chip/$project/bwa/mergedLibrary/*.ba* ../../data/$project/bwa/mergedLibrary/
#! gsutil -m cp -r gs://amlproject/Chip/$project/bwa/mergedLibrary/bigwig/ ../../data/$project/bwa/mergedLibrary/
! gsutil  -m cp -r gs://amlproject/Chip/$project/bwa/mergedLibrary/macs/ ../../data/$project/bwa/mergedLibrary/

In [None]:
bams = ! ls ../../data/$project/bwa/mergedLibrary/*.bam
bams

### on scalled data

In [None]:
! mkdir ../../data/$project/diffPeaks/
! mkdir ../../data/$project/diffData/
! mkdir ../../data/$project/droso_aligned
! mv ../../data/$project/mp* ../../data/$project/droso_aligned

In [None]:
wigs = ! ls ../../data/$project/bwa/mergedLibrary/bigwig/*.bigWig
wigs

In [None]:
# diffPeak on scaled data (full reprocessing)
for i in range(len(bams[3:])):
    if i < 3:
        continue
    bam1 = bams[1+(i%2)]
    bam2 = bams[3+i]
    print(bam1,bam2)
    print(chip.fullDiffPeak(bam1,bam2, control1=bams[0], scaling=[1.0, scales[2+i]], directory = "../../data/"+project+"/diffData/", res_directory = "../../data/"+project+"/diffPeaks/",pairedend=True))

In [None]:
initscales = ! cat ../../data/$project/bwa/mergedLibrary/bigwig/scale/*.txt
initscales

In [None]:
rescales = [val*float(initscales[1+i]) for i, val in enumerate(scales)]
rescales

In [None]:
chip.bigWigFrom(bams[3:], 
                genome='GRCh38',scaling=rescales[2:],
               numthreads=8)

In [None]:
!mkdir ../../data/$project/recalib_bigwig/ 
!mv bigwig/* ../../data/$project/recalib_bigwig/
!cp ../../data/$project/bwa/mergedLibrary/bigwig/*AAVS1*.bigWig ../../data/$project/recalib_bigwig/

In [None]:
os.popen('for i in $(ls ../../data/'+project+'/diffPeaks/*.bed); \
            do echo $(wc -l $i); \
            done').read().split('\n')

In [None]:
bw = ! ls ../../data/$project/recalib_bigwig/*
bw

In [None]:
! gsutil -m cp -r ../../data/$project/droso_aligned gs://amlproject/Chip/$project/
! gsutil -m cp -r ../../data/$project/recalib_bigwig gs://amlproject/Chip/$project/

In [None]:
!mkdir ../results/$project/
!mkdir ../results/$project/plots
!mkdir ../results/$project/plots/heatmaps/

In [None]:
# GENOME WIDE comparison

In [None]:
peaks = ! ls ../../data/$project/bwa/mergedLibrary/macs/broadPeak/*.broadPeak

In [None]:
names = ["AAVS1", "AAVS1_v2", "MEF2C","MEF2C_v2","MEF2C-MEF2D","MEF2C-MEF2D_v2", "MEF2D","MEF2D_v2"]

In [None]:
for i, val in enumerate(bw):
    if i <0:
        continue
    name = names[i]
    print(name)
    chip.getPeaksAt(peaks[i], bigwigs = val, bigwignames= name, peaknames=['Macs2_Peaks'], window=3000, folder="", title=name, numthreads=8, refpoint="center", name='../../data/'+project+'/peakplot/'+name+'_mat.pdf', withDeeptools=True, torecompute=True, vmax=2.5, legendLoc="lower-left")

In [None]:
h.createFoldersFor('../results/'+project+'/plots/scaled/heatmaps/')
! cp ../../data/$project/*.pdf ../results/$project/plots/scaled/heatmaps/

In [None]:
cond1peak = ! ls ../../data/$project/diffPeaks/*cond1.bed
cond2peak = ! ls ../../data/$project/diffPeaks/*cond2.bed
commonpeak = ! ls ../../data/$project/diffPeaks/*common.bed
cond1peak

In [None]:
for i in range(int(len(bw[2:]))):
    if i <1:
        continue
    name1 = bw[i%2]
    name2 = bw[i+2]
    a = int(i/2)+3 if i%2 else int(i/2)
    peak = [cond1peak[a], commonpeak[a], cond2peak[a]]
   # for val in peak:
      #  chip.dropWeirdChromosomes(val)
    name = names[i+2]
    print(name,name1,name2,a)
    chip.getPeaksAt(peak, [name1, name2], bigwignames=['DMSO', 'VHL'], peaknames=['DMSO_peaks', 'common', 'VHL_peaks'], window=3000, folder="", title=name, numthreads=7, refpoint="center", name='../../data/'+project+'/diffPeaks/'+name+'_mat.pdf', withDeeptools=True, torecompute=False)

In [None]:
! mkdir ../results/additional_degraded_v1/plots/scaled/diffPeaks/
! cp ../../data/additional_degraded_v1/diffPeaks/*.pdf ../results/additional_degraded_v1/plots/scaled/diffPeaks/

### on unscalled data

In [None]:
bams = !ls ../../data/$project/bwa/mergedLibrary/mp*.bam
bams

In [None]:
! mkdir ../../data/$project/diffPeaks_unscaled

In [None]:
#on unscalled data 
for i in range(int(len(bams[3:]))):
    if i < 4:
        continue
    name1 = bams[1+i%2]
    name2 = bams[3+i]
    print(name1,name2)
    chip.fullDiffPeak(name1,name2, control1='../../data/'+project+'/bwa/mergedLibrary/INPUT_R1.mLb.clN.sorted.bam', directory = "../../data/"+project+"/diffData_unscaled/", res_directory = "../../data/"+project+"/diffPeaks_unscaled/",pairedend=False)

In [None]:
bw = ! ls ../../data/$project/bwa/mergedLibrary/bigwig/*.bigWig
bw

In [None]:
cond1peak = ! ls ../../data/$project/diffPeaks_unscaled/*cond1.bed
cond2peak = ! ls ../../data/$project/diffPeaks_unscaled/*cond2.bed
commonpeak = ! ls ../../data/$project/diffPeaks_unscaled/*common.bed
commonpeak

In [None]:
for i in range(int(len(bw[2:]))):
    name1 = bw[i%2]
    name2 = bw[2+i]
    a = int(i/2)+3 if i%2 else int(i/2)
    peak = [cond1peak[a], commonpeak[a], cond2peak[a]]
    #for val in peak:
     #   chip.dropWeirdChromosomes(val)
    name = names[i+2]
    print(name1,name2)
    chip.getPeaksAt(peak, [name1, name2], bigwignames=['DMSO', 'VHL'], peaknames=['DMSO_peaks', 'common', 'VHL_peaks'], window=3000, folder="", title=name, numthreads=7, torecompute=True, refpoint='center', name='../../data/'+project+'/diffPeaks_unscaled/'+name+'_mat.pdf', withDeeptools=True)

In [None]:
h.createFoldersFor('../results/'+project+'/plots/unscaled/diffPeaks/')
! cp ../../data/$project/diffPeaks_unscaled/*.pdf ../results/$project/plots/unscaled/diffPeaks/

! gsutil -m cp gs://amlproject/Chip/$project/bwa/mergedLibrary/deepTools/**.pdf ../results/$project/plots/

In [None]:
! gsutil -m cp -r ../../data/$project/diffPeaks gs://amlproject/Chip/$project/
! gsutil -m cp -r ../../data/$project/diffData gs://amlproject/Chip/$project/
! gsutil -m cp -r ../../data/$project/diffPeaks_unscaled gs://amlproject/Chip/$project/
! gsutil -m cp -r ../../data/$project/diffData_unscaled gs://amlproject/Chip/$project/
! gsutil -m cp -r ../../data/$project/peakplot gs://amlproject/Chip/$project/

## v8

In [238]:
project="chipseq_IRF8_degraded"

In [None]:
fastq = ! gsutil ls gs://transfer-amlproject/201023_MP8292_fastq/
fastq

In [240]:
mkdir ../data/$project/ && mkdir ../data/$project/qc/

mkdir: cannot create directory ‘../data/chipseq_IRF8_degraded/’: File exists


In [None]:
!gsutil -m cp gs://transfer-amlproject/201023_MP8292_fastq/multiqc_report.html ../data/$project/qc/
!gsutil -m cp -r gs://transfer-amlproject/201023_MP8292_fastq/Reports/ ../data/$project/qc/
!gsutil -m cp -r gs://transfer-amlproject/201023_MP8292_fastq/multiqc_data/ ../data/$project/qc/

In [None]:
! gsutil -m cp gs://transfer-amlproject/201023_MP8292_fastq/*  gs://transfer-amlproject/$project/

### analysis

In [None]:
rename = {
"20201023_IRF8_DMSO_MP8292_S119": "mp901-MV411_IRF8_NT_DMSO-IRF8-r1",
"20201023_IRF8_VHL_MP8292_S125": "mp902-MV411_IRF8_NT_VHL-IRF8-r1",
"20201023_MED1_DMSO_MP8292_S122": "mp903-MV411_IRF8_NT_DMSO-MED1-r1",
"20201023_MED1_VHL_MP8292_S128": "mp904-MV411_IRF8_NT_VHL-MED1-r1",
"20201023_MEF2C_DMSO_MP8292_S121": "mp905-MV411_IRF8_NT_DMSO-MEF2C-r1",
"20201023_MEF2C_VHL_MP8292_S127": "mp906-MV411_IRF8_NT_VHL-MEF2C-r1",
"20201023_MEF2D_DMSO_MP8292_S120": "mp907-MV411_IRF8_NT_DMSO-MEF2D-r1",
"20201023_MEF2D_VHL_MP8292_S126": "mp908-MV411_IRF8_NT_VHL-MEF2D-r1",
"20201023_MYC_DMSO_MP8292_S123": "mp909-MV411_IRF8_NT_DMSO-MYC-r1",
"20201023_MYC_VHL_MP8292_S129": "mp910-MV411_IRF8_NT_VHL-MYC-r1",
"20201023_POL_II_total_DMSO_MP8292_S124": "mp911-MV411_IRF8_NT_DMSO-POLII_total-r1",
"20201023_POL_II_total_VHL_MP8292_S130": "mp912-MV411_IRF8_NT_VHL-POLII_total-r1",}

In [None]:
for val in fastq:
    rep = val
    for k,v in rename.items():
        rep = rep.replace(k,v)
    rep = rep.replace('transfer-amlproject/201023_MP8292_fastq/',"amlproject/Chip/"+project+'/fastqs')
    !gsutil cp $val $rep

In [None]:
from gsheets import Sheets
sheets = Sheets.from_files('~/.client_secret.json', '~/.storage.json')
url="https://docs.google.com/spreadsheets/d/1yFLjYB1McU530JnLgL0QIMAKIkVl3kl0_LCHje2gk8U"
gsheet = sheets.get(url).sheets[2].to_frame()

In [None]:
fastq = ! gsutil ls gs://amlproject/Chip/$project/fastqs/
fastq

In [None]:
df = {
"fastq_1": [],
"fastq_2": [],
"antibody": [],
"group": [],
"replicate": [],
"control": []
}
for val in h.grouped(fastq,2):
    row = gsheet[gsheet.id==val[0].split('/')[-1].split('-')[0]]
    df['group'].append("MV4"+row['name'].values[0].split('-r')[0].split('-MV4')[1])
    df['replicate'].append(row['name'].values[0].split('-r')[1])
    df['fastq_1'].append(val[0])
    df['fastq_2'].append(val[1])
    df['antibody'].append(row['protein'].values[0])
    df['control'].append("INPUT")
df['group'].append('INPUT')
df['replicate'].append(1)
df['fastq_1'].append('gs://amlproject/Chip/IRF2BP2_degraded_rep3/fastqs/mp845-MV411-INPUT-r2_R1.fastq.gz')
df['fastq_2'].append('gs://amlproject/Chip/IRF2BP2_degraded_rep3/fastqs/mp845-MV411-INPUT-r2_R2.fastq.gz')
df['antibody'].append("")
df['control'].append("")
df = pd.DataFrame(df)

In [None]:
df

In [None]:
df[df.columns[[3,4,0,1,2,5]]].to_csv('../nextflow/'+project+'_design.csv',index=False)

### about

0. you need to have a google project set up with a billing account
1. you need to activte your APIs this way: https://cloud.google.com/life-sciences/docs/tutorials/nextflow?hl=fr
2. nextflow needs to be installed with this installation command `export NXF_MODE=google && curl https://get.nextflow.io | bash`

In [None]:
! cd ../../nextflow log ## to get access to the previous runs

In [None]:
#process chips 
! ../../nextflow run jkobject/chipseq \
    --paired_end \
    --seq_center 'DFCI' \
    --email 'jkobject@gmail.com' \
    --input ../nextflow/chipseq_IRF8_degraded_design.csv \
    --genome GRCh38 \
    --max_cpus 16 \
    -profile jkcloud \ #my profile for the cloud (to edit for your account)
    -w gs://workamlproject/IRF8res \ #where the withinput will be stored
    --spiking \ #I have spiking using drosophilia genome (default)
    --outdir gs://workamlproject/IRF8 \ #specify aa bucket+folderr where the results will be
    --tracedir ../nextflow/IRF8info/ \ #you need to specify a local place for that

In [243]:
h.createFoldersFor('../../data/'+project+'/bwa/mergedLibrary/')
!gsutil -m cp gs://amlproject/Chip/$project/bwa/mergedLibrary/*.ba* ../../data/$project/bwa/mergedLibrary/
! gsutil -m cp -r gs://amlproject/Chip/$project/bwa/mergedLibrary/bigwig/ ../../data/$project/bwa/mergedLibrary/
! gsutil  -m cp -r gs://amlproject/Chip/$project/bwa/mergedLibrary/macs/ ../../data/$project/bwa/mergedLibrary/

Copying gs://amlproject/Chip/chipseq_IRF8_degraded/bwa/mergedLibrary/INPUT_R1.mLb.clN.sorted.bam...
Copying gs://amlproject/Chip/chipseq_IRF8_degraded/bwa/mergedLibrary/INPUT_R1.mLb.clN.sorted.bam.bai...
Copying gs://amlproject/Chip/chipseq_IRF8_degraded/bwa/mergedLibrary/MV411_IRF8_NT_DMSO-IRF8_R1.mLb.clN.sorted.bam...
Copying gs://amlproject/Chip/chipseq_IRF8_degraded/bwa/mergedLibrary/MV411_IRF8_NT_DMSO-IRF8_R1.mLb.clN.sorted.bam.bai...
/ [4 files][  7.5 GiB/  7.5 GiB]  120.9 MiB/s                                   
==> NOTE: You are performing a sequence of gsutil operations that may
run significantly faster if you instead use gsutil -m cp ... Please
see the -m section under "gsutil help options" for further information
about when gsutil -m can be advantageous.

Copying gs://amlproject/Chip/chipseq_IRF8_degraded/bwa/mergedLibrary/MV411_IRF8_NT_DMSO-MED1_R1.mLb.clN.sorted.bam...
Copying gs://amlproject/Chip/chipseq_IRF8_degraded/bwa/mergedLibrary/MV411_IRF8_NT_DMSO-MED1_R1.mLb.clN.s

Copying gs://amlproject/Chip/chipseq_IRF8_degraded/bwa/mergedLibrary/macs/broadPeak/MV411_IRF8_NT_DMSO-MEF2C_R1_peaks.gappedPeak...
Copying gs://amlproject/Chip/chipseq_IRF8_degraded/bwa/mergedLibrary/macs/broadPeak/MV411_IRF8_NT_DMSO-MEF2C_R1_peaks.xls...
Copying gs://amlproject/Chip/chipseq_IRF8_degraded/bwa/mergedLibrary/macs/broadPeak/MV411_IRF8_NT_DMSO-MEF2D_R1_peaks.annotatePeaks.txt...
Copying gs://amlproject/Chip/chipseq_IRF8_degraded/bwa/mergedLibrary/macs/broadPeak/MV411_IRF8_NT_DMSO-MEF2D_R1_peaks.broadPeak...
Copying gs://amlproject/Chip/chipseq_IRF8_degraded/bwa/mergedLibrary/macs/broadPeak/MV411_IRF8_NT_DMSO-MEF2D_R1_peaks.gappedPeak...
Copying gs://amlproject/Chip/chipseq_IRF8_degraded/bwa/mergedLibrary/macs/broadPeak/MV411_IRF8_NT_DMSO-MEF2D_R1_peaks.xls...
Copying gs://amlproject/Chip/chipseq_IRF8_degraded/bwa/mergedLibrary/macs/broadPeak/MV411_IRF8_NT_DMSO-MYC_R1_peaks.annotatePeaks.txt...
Copying gs://amlproject/Chip/chipseq_IRF8_degraded/bwa/mergedLibrary/macs/broad

Copying gs://amlproject/Chip/chipseq_IRF8_degraded/bwa/mergedLibrary/macs/broadPeak/consensus/POLII_total/POLII_total.consensus_peaks.boolean.txt...
Copying gs://amlproject/Chip/chipseq_IRF8_degraded/bwa/mergedLibrary/macs/broadPeak/consensus/POLII_total/POLII_total.consensus_peaks.featureCounts.txt...
Copying gs://amlproject/Chip/chipseq_IRF8_degraded/bwa/mergedLibrary/macs/broadPeak/consensus/POLII_total/POLII_total.consensus_peaks.featureCounts.txt.summary...
Copying gs://amlproject/Chip/chipseq_IRF8_degraded/bwa/mergedLibrary/macs/broadPeak/consensus/POLII_total/POLII_total.consensus_peaks.saf...
Copying gs://amlproject/Chip/chipseq_IRF8_degraded/bwa/mergedLibrary/macs/broadPeak/qc/MV411_IRF8_NT_DMSO-IRF8_R1_peaks.FRiP_mqc.tsv...
Copying gs://amlproject/Chip/chipseq_IRF8_degraded/bwa/mergedLibrary/macs/broadPeak/qc/MV411_IRF8_NT_DMSO-MED1_R1_peaks.count_mqc.tsv...
Copying gs://amlproject/Chip/chipseq_IRF8_degraded/bwa/mergedLibrary/macs/broadPeak/qc/MV411_IRF8_NT_DMSO-IRF8_R1_peaks

computing scales from the excel sheet

In [244]:
scales = [
        1.00,
        0.79,
        0.59,
        1.00,
        0.77,
        1.00,
        1.00,
        0.72,
        0.91,
        1.00,
        1.00,
        0.23,
         ]

In [245]:
bams = ! ls ../../data/$project/bwa/mergedLibrary/*.bam
bams

['../../data/chipseq_IRF8_degraded/bwa/mergedLibrary/INPUT_R1.mLb.clN.sorted.bam',
 '../../data/chipseq_IRF8_degraded/bwa/mergedLibrary/MV411_IRF8_NT_DMSO-IRF8_R1.mLb.clN.sorted.bam',
 '../../data/chipseq_IRF8_degraded/bwa/mergedLibrary/MV411_IRF8_NT_DMSO-MED1_R1.mLb.clN.sorted.bam',
 '../../data/chipseq_IRF8_degraded/bwa/mergedLibrary/MV411_IRF8_NT_DMSO-MEF2C_R1.mLb.clN.sorted.bam',
 '../../data/chipseq_IRF8_degraded/bwa/mergedLibrary/MV411_IRF8_NT_DMSO-MEF2D_R1.mLb.clN.sorted.bam',
 '../../data/chipseq_IRF8_degraded/bwa/mergedLibrary/MV411_IRF8_NT_DMSO-MYC_R1.mLb.clN.sorted.bam',
 '../../data/chipseq_IRF8_degraded/bwa/mergedLibrary/MV411_IRF8_NT_DMSO-POLII_total_R1.mLb.clN.sorted.bam',
 '../../data/chipseq_IRF8_degraded/bwa/mergedLibrary/MV411_IRF8_NT_VHL-IRF8_R1.mLb.clN.sorted.bam',
 '../../data/chipseq_IRF8_degraded/bwa/mergedLibrary/MV411_IRF8_NT_VHL-MED1_R1.mLb.clN.sorted.bam',
 '../../data/chipseq_IRF8_degraded/bwa/mergedLibrary/MV411_IRF8_NT_VHL-MEF2C_R1.mLb.clN.sorted.bam',
 '

### on scalled data

In [246]:
! mkdir ../../data/$project/diffPeaks/ && ! mkdir ../../data/$project/diffData/

In [247]:
wigs = ! ls ../../data/$project/bwa/mergedLibrary/bigwig/*.bigWig
wigs

['../../data/chipseq_IRF8_degraded/bwa/mergedLibrary/bigwig/INPUT_R1.bigWig',
 '../../data/chipseq_IRF8_degraded/bwa/mergedLibrary/bigwig/MV411_IRF8_NT_DMSO-IRF8_R1.bigWig',
 '../../data/chipseq_IRF8_degraded/bwa/mergedLibrary/bigwig/MV411_IRF8_NT_DMSO-MED1_R1.bigWig',
 '../../data/chipseq_IRF8_degraded/bwa/mergedLibrary/bigwig/MV411_IRF8_NT_DMSO-MEF2C_R1.bigWig',
 '../../data/chipseq_IRF8_degraded/bwa/mergedLibrary/bigwig/MV411_IRF8_NT_DMSO-MEF2D_R1.bigWig',
 '../../data/chipseq_IRF8_degraded/bwa/mergedLibrary/bigwig/MV411_IRF8_NT_DMSO-MYC_R1.bigWig',
 '../../data/chipseq_IRF8_degraded/bwa/mergedLibrary/bigwig/MV411_IRF8_NT_DMSO-POLII_total_R1.bigWig',
 '../../data/chipseq_IRF8_degraded/bwa/mergedLibrary/bigwig/MV411_IRF8_NT_VHL-IRF8_R1.bigWig',
 '../../data/chipseq_IRF8_degraded/bwa/mergedLibrary/bigwig/MV411_IRF8_NT_VHL-MED1_R1.bigWig',
 '../../data/chipseq_IRF8_degraded/bwa/mergedLibrary/bigwig/MV411_IRF8_NT_VHL-MEF2C_R1.bigWig',
 '../../data/chipseq_IRF8_degraded/bwa/mergedLibrary

In [249]:
# diffPeak on scaled data (full reprocessing)
for i in range(int(len(bams)/2)):
    if i < 0:
        continue
    bam1 = bams[1+i]
    bam2 = bams[7+i]
    print(bam1,bam2)
    print(chip.fullDiffPeak(bam1,bam2, control1=bams[0], scaling=[scales[(i*2)], scales[(i*2)+1]], directory = "../../data/"+project+"/diffData/", res_directory = "../../data/"+project+"/diffPeaks/",pairedend=True))

../../data/chipseq_IRF8_degraded/bwa/mergedLibrary/MV411_IRF8_NT_DMSO-IRF8_R1.mLb.clN.sorted.bam ../../data/chipseq_IRF8_degraded/bwa/mergedLibrary/MV411_IRF8_NT_VHL-IRF8_R1.mLb.clN.sorted.bam
doing diff from ../../data/chipseq_IRF8_degraded/bwa/mergedLibrary/MV411_IRF8_NT_DMSO-IRF8_R1.mLb.clN.sorted.bam and ../../data/chipseq_IRF8_degraded/bwa/mergedLibrary/MV411_IRF8_NT_VHL-IRF8_R1.mLb.clN.sorted.bam
computing the fragment avg size
200
computing the scaling values
b'INFO  @ Thu, 05 Nov 2020 00:06:25: \n# Command line: callpeak -B -t ../../data/chipseq_IRF8_degraded/bwa/mergedLibrary/MV411_IRF8_NT_DMSO-IRF8_R1.mLb.clN.sorted.bam -c ../../data/chipseq_IRF8_degraded/bwa/mergedLibrary/INPUT_R1.mLb.clN.sorted.bam --nomodel --extsize 200 -n MV411_IRF8_NT_DMSO-IRF8_R1 --outdir ../../data/chipseq_IRF8_degraded/diffData/ -f BAMPE\n# ARGUMENTS LIST:\n# name = MV411_IRF8_NT_DMSO-IRF8_R1\n# format = BAMPE\n# ChIP-seq file = [\'../../data/chipseq_IRF8_degraded/bwa/mergedLibrary/MV411_IRF8_NT_DMSO

b'INFO  @ Thu, 05 Nov 2020 00:30:30: \n# Command line: callpeak -B -t ../../data/chipseq_IRF8_degraded/bwa/mergedLibrary/MV411_IRF8_NT_VHL-IRF8_R1.mLb.clN.sorted.bam -c ../../data/chipseq_IRF8_degraded/bwa/mergedLibrary/INPUT_R1.mLb.clN.sorted.bam --nomodel --extsize 200 -n MV411_IRF8_NT_VHL-IRF8_R1 --outdir ../../data/chipseq_IRF8_degraded/diffData/ -f BAMPE\n# ARGUMENTS LIST:\n# name = MV411_IRF8_NT_VHL-IRF8_R1\n# format = BAMPE\n# ChIP-seq file = [\'../../data/chipseq_IRF8_degraded/bwa/mergedLibrary/MV411_IRF8_NT_VHL-IRF8_R1.mLb.clN.sorted.bam\']\n# control file = [\'../../data/chipseq_IRF8_degraded/bwa/mergedLibrary/INPUT_R1.mLb.clN.sorted.bam\']\n# effective genome size = 2.70e+09\n# band width = 300\n# model fold = [5, 50]\n# qvalue cutoff = 5.00e-02\n# The maximum gap between significant sites is assigned as the read length/tag size.\n# The minimum length of peaks is assigned as the predicted fragment length "d".\n# Larger dataset will be scaled towards smaller dataset.\n# Range

212
computing the scaling values
b'INFO  @ Thu, 05 Nov 2020 01:12:21: \n# Command line: callpeak -B -t ../../data/chipseq_IRF8_degraded/bwa/mergedLibrary/MV411_IRF8_NT_DMSO-MED1_R1.mLb.clN.sorted.bam -c ../../data/chipseq_IRF8_degraded/bwa/mergedLibrary/INPUT_R1.mLb.clN.sorted.bam --nomodel --extsize 212 -n MV411_IRF8_NT_DMSO-MED1_R1 --outdir ../../data/chipseq_IRF8_degraded/diffData/ -f BAMPE\n# ARGUMENTS LIST:\n# name = MV411_IRF8_NT_DMSO-MED1_R1\n# format = BAMPE\n# ChIP-seq file = [\'../../data/chipseq_IRF8_degraded/bwa/mergedLibrary/MV411_IRF8_NT_DMSO-MED1_R1.mLb.clN.sorted.bam\']\n# control file = [\'../../data/chipseq_IRF8_degraded/bwa/mergedLibrary/INPUT_R1.mLb.clN.sorted.bam\']\n# effective genome size = 2.70e+09\n# band width = 300\n# model fold = [5, 50]\n# qvalue cutoff = 5.00e-02\n# The maximum gap between significant sites is assigned as the read length/tag size.\n# The minimum length of peaks is assigned as the predicted fragment length "d".\n# Larger dataset will be sca

CompletedProcess(args='macs2 bdgdiff --t1 ../../data/chipseq_IRF8_degraded/diffData/MV411_IRF8_NT_DMSO-MED1_R1_treat_pileup.bdg --c1 ../../data/chipseq_IRF8_degraded/diffData/MV411_IRF8_NT_DMSO-MED1_R1_control_lambda.bdg --t2 ../../data/chipseq_IRF8_degraded/diffData/MV411_IRF8_NT_VHL-MED1_R1_treat_pileup.bdg --c2 ../../data/chipseq_IRF8_degraded/diffData/MV411_IRF8_NT_VHL-MED1_R1_control_lambda.bdg --d1 35135377 --d2 24558248 -g 60 -l 212 --o-prefix MV411_IRF8_NT_DMSO-MED1_R1_treat_pileup_vs_MV411_IRF8_NT_VHL-MED1_R1_treat_pileup --outdir ../../data/chipseq_IRF8_degraded/diffPeaks/', returncode=0, stdout=b'', stderr=b'INFO  @ Thu, 05 Nov 2020 02:02:35: Read and build treatment 1 bedGraph... \nINFO  @ Thu, 05 Nov 2020 02:03:16: Read and build control 1 bedGraph... \nINFO  @ Thu, 05 Nov 2020 02:07:01: Read and build treatment 2 bedGraph... \nINFO  @ Thu, 05 Nov 2020 02:07:49: Read and build control 2 bedGraph... \nINFO  @ Thu, 05 Nov 2020 02:20:00: Write peaks... \nINFO  @ Thu, 05 Nov 2

b'INFO  @ Thu, 05 Nov 2020 02:46:19: \n# Command line: callpeak -B -t ../../data/chipseq_IRF8_degraded/bwa/mergedLibrary/MV411_IRF8_NT_VHL-MEF2C_R1.mLb.clN.sorted.bam -c ../../data/chipseq_IRF8_degraded/bwa/mergedLibrary/INPUT_R1.mLb.clN.sorted.bam --nomodel --extsize 204 -n MV411_IRF8_NT_VHL-MEF2C_R1 --outdir ../../data/chipseq_IRF8_degraded/diffData/ -f BAMPE\n# ARGUMENTS LIST:\n# name = MV411_IRF8_NT_VHL-MEF2C_R1\n# format = BAMPE\n# ChIP-seq file = [\'../../data/chipseq_IRF8_degraded/bwa/mergedLibrary/MV411_IRF8_NT_VHL-MEF2C_R1.mLb.clN.sorted.bam\']\n# control file = [\'../../data/chipseq_IRF8_degraded/bwa/mergedLibrary/INPUT_R1.mLb.clN.sorted.bam\']\n# effective genome size = 2.70e+09\n# band width = 300\n# model fold = [5, 50]\n# qvalue cutoff = 5.00e-02\n# The maximum gap between significant sites is assigned as the read length/tag size.\n# The minimum length of peaks is assigned as the predicted fragment length "d".\n# Larger dataset will be scaled towards smaller dataset.\n# R

184
computing the scaling values
b'INFO  @ Thu, 05 Nov 2020 03:28:34: \n# Command line: callpeak -B -t ../../data/chipseq_IRF8_degraded/bwa/mergedLibrary/MV411_IRF8_NT_DMSO-MEF2D_R1.mLb.clN.sorted.bam -c ../../data/chipseq_IRF8_degraded/bwa/mergedLibrary/INPUT_R1.mLb.clN.sorted.bam --nomodel --extsize 184 -n MV411_IRF8_NT_DMSO-MEF2D_R1 --outdir ../../data/chipseq_IRF8_degraded/diffData/ -f BAMPE\n# ARGUMENTS LIST:\n# name = MV411_IRF8_NT_DMSO-MEF2D_R1\n# format = BAMPE\n# ChIP-seq file = [\'../../data/chipseq_IRF8_degraded/bwa/mergedLibrary/MV411_IRF8_NT_DMSO-MEF2D_R1.mLb.clN.sorted.bam\']\n# control file = [\'../../data/chipseq_IRF8_degraded/bwa/mergedLibrary/INPUT_R1.mLb.clN.sorted.bam\']\n# effective genome size = 2.70e+09\n# band width = 300\n# model fold = [5, 50]\n# qvalue cutoff = 5.00e-02\n# The maximum gap between significant sites is assigned as the read length/tag size.\n# The minimum length of peaks is assigned as the predicted fragment length "d".\n# Larger dataset will be

CompletedProcess(args='macs2 bdgdiff --t1 ../../data/chipseq_IRF8_degraded/diffData/MV411_IRF8_NT_DMSO-MEF2D_R1_treat_pileup.bdg --c1 ../../data/chipseq_IRF8_degraded/diffData/MV411_IRF8_NT_DMSO-MEF2D_R1_control_lambda.bdg --t2 ../../data/chipseq_IRF8_degraded/diffData/MV411_IRF8_NT_VHL-MEF2D_R1_treat_pileup.bdg --c2 ../../data/chipseq_IRF8_degraded/diffData/MV411_IRF8_NT_VHL-MEF2D_R1_control_lambda.bdg --d1 18035712 --d2 22098080 -g 60 -l 184 --o-prefix MV411_IRF8_NT_DMSO-MEF2D_R1_treat_pileup_vs_MV411_IRF8_NT_VHL-MEF2D_R1_treat_pileup --outdir ../../data/chipseq_IRF8_degraded/diffPeaks/', returncode=0, stdout=b'', stderr=b'INFO  @ Thu, 05 Nov 2020 04:15:49: Read and build treatment 1 bedGraph... \nINFO  @ Thu, 05 Nov 2020 04:16:26: Read and build control 1 bedGraph... \nINFO  @ Thu, 05 Nov 2020 04:20:12: Read and build treatment 2 bedGraph... \nINFO  @ Thu, 05 Nov 2020 04:20:44: Read and build control 2 bedGraph... \nINFO  @ Thu, 05 Nov 2020 04:32:11: Write peaks... \nINFO  @ Thu, 05

b'INFO  @ Thu, 05 Nov 2020 04:58:27: \n# Command line: callpeak -B -t ../../data/chipseq_IRF8_degraded/bwa/mergedLibrary/MV411_IRF8_NT_VHL-MYC_R1.mLb.clN.sorted.bam -c ../../data/chipseq_IRF8_degraded/bwa/mergedLibrary/INPUT_R1.mLb.clN.sorted.bam --nomodel --extsize 205 -n MV411_IRF8_NT_VHL-MYC_R1 --outdir ../../data/chipseq_IRF8_degraded/diffData/ -f BAMPE\n# ARGUMENTS LIST:\n# name = MV411_IRF8_NT_VHL-MYC_R1\n# format = BAMPE\n# ChIP-seq file = [\'../../data/chipseq_IRF8_degraded/bwa/mergedLibrary/MV411_IRF8_NT_VHL-MYC_R1.mLb.clN.sorted.bam\']\n# control file = [\'../../data/chipseq_IRF8_degraded/bwa/mergedLibrary/INPUT_R1.mLb.clN.sorted.bam\']\n# effective genome size = 2.70e+09\n# band width = 300\n# model fold = [5, 50]\n# qvalue cutoff = 5.00e-02\n# The maximum gap between significant sites is assigned as the read length/tag size.\n# The minimum length of peaks is assigned as the predicted fragment length "d".\n# Larger dataset will be scaled towards smaller dataset.\n# Range for

236
computing the scaling values
b'INFO  @ Thu, 05 Nov 2020 05:41:59: \n# Command line: callpeak -B -t ../../data/chipseq_IRF8_degraded/bwa/mergedLibrary/MV411_IRF8_NT_DMSO-POLII_total_R1.mLb.clN.sorted.bam -c ../../data/chipseq_IRF8_degraded/bwa/mergedLibrary/INPUT_R1.mLb.clN.sorted.bam --nomodel --extsize 236 -n MV411_IRF8_NT_DMSO-POLII_total_R1 --outdir ../../data/chipseq_IRF8_degraded/diffData/ -f BAMPE\n# ARGUMENTS LIST:\n# name = MV411_IRF8_NT_DMSO-POLII_total_R1\n# format = BAMPE\n# ChIP-seq file = [\'../../data/chipseq_IRF8_degraded/bwa/mergedLibrary/MV411_IRF8_NT_DMSO-POLII_total_R1.mLb.clN.sorted.bam\']\n# control file = [\'../../data/chipseq_IRF8_degraded/bwa/mergedLibrary/INPUT_R1.mLb.clN.sorted.bam\']\n# effective genome size = 2.70e+09\n# band width = 300\n# model fold = [5, 50]\n# qvalue cutoff = 5.00e-02\n# The maximum gap between significant sites is assigned as the read length/tag size.\n# The minimum length of peaks is assigned as the predicted fragment length "d".\n

CompletedProcess(args='macs2 bdgdiff --t1 ../../data/chipseq_IRF8_degraded/diffData/MV411_IRF8_NT_DMSO-POLII_total_R1_treat_pileup.bdg --c1 ../../data/chipseq_IRF8_degraded/diffData/MV411_IRF8_NT_DMSO-POLII_total_R1_control_lambda.bdg --t2 ../../data/chipseq_IRF8_degraded/diffData/MV411_IRF8_NT_VHL-POLII_total_R1_treat_pileup.bdg --c2 ../../data/chipseq_IRF8_degraded/diffData/MV411_IRF8_NT_VHL-POLII_total_R1_control_lambda.bdg --d1 19113190 --d2 83363130 -g 60 -l 236 --o-prefix MV411_IRF8_NT_DMSO-POLII_total_R1_treat_pileup_vs_MV411_IRF8_NT_VHL-POLII_total_R1_treat_pileup --outdir ../../data/chipseq_IRF8_degraded/diffPeaks/', returncode=0, stdout=b'', stderr=b'INFO  @ Thu, 05 Nov 2020 06:31:03: Read and build treatment 1 bedGraph... \nINFO  @ Thu, 05 Nov 2020 06:31:38: Read and build control 1 bedGraph... \nINFO  @ Thu, 05 Nov 2020 06:35:13: Read and build treatment 2 bedGraph... \nINFO  @ Thu, 05 Nov 2020 06:35:48: Read and build control 2 bedGraph... \nINFO  @ Thu, 05 Nov 2020 06:47:

In [253]:
initscales = ! cat ../../data/$project/bwa/mergedLibrary/bigwig/scale/*.txt
initscales

['0.00744797',
 '0.0305914',
 '0.0238996',
 '0.0282355',
 '0.0274099',
 '0.0240789',
 '0.0259882',
 '0.0313712',
 '0.0202025',
 '0.0284237',
 '0.0310627',
 '0.0266872',
 '0.0258712']

In [252]:
rescales = [val*float(initscales[1+i]) for i, val in enumerate(scales)]
rescales

[0.0305914,
 0.018880684000000002,
 0.016658944999999998,
 0.0274099,
 0.018540753,
 0.0259882,
 0.0313712,
 0.014545800000000001,
 0.025865567,
 0.0310627,
 0.0266872,
 0.005950376]

In [257]:
chip.bigWigFrom(bams[1:], 
                genome='GRCh38',scaling=rescales,
               numthreads=8)

In [264]:
!mkdir ../../data/$project/recalib_bigwig/

In [265]:
!mv bigwig/* ../../data/$project/recalib_bigwig/

In [259]:
os.popen('for i in $(ls ../../data/'+project+'/diffPeaks/*.bed); \
            do echo $(wc -l $i); \
            done').read().split('\n')

['696 ../../data/chipseq_IRF8_degraded/diffPeaks/MV411_IRF8_NT_DMSO-IRF8_R1_treat_pileup_vs_MV411_IRF8_NT_VHL-IRF8_R1_treat_pileup_c3.0_common.bed',
 '36431 ../../data/chipseq_IRF8_degraded/diffPeaks/MV411_IRF8_NT_DMSO-IRF8_R1_treat_pileup_vs_MV411_IRF8_NT_VHL-IRF8_R1_treat_pileup_c3.0_cond1.bed',
 '198 ../../data/chipseq_IRF8_degraded/diffPeaks/MV411_IRF8_NT_DMSO-IRF8_R1_treat_pileup_vs_MV411_IRF8_NT_VHL-IRF8_R1_treat_pileup_c3.0_cond2.bed',
 '12195 ../../data/chipseq_IRF8_degraded/diffPeaks/MV411_IRF8_NT_DMSO-MED1_R1_treat_pileup_vs_MV411_IRF8_NT_VHL-MED1_R1_treat_pileup_c3.0_common.bed',
 '78 ../../data/chipseq_IRF8_degraded/diffPeaks/MV411_IRF8_NT_DMSO-MED1_R1_treat_pileup_vs_MV411_IRF8_NT_VHL-MED1_R1_treat_pileup_c3.0_cond1.bed',
 '2236 ../../data/chipseq_IRF8_degraded/diffPeaks/MV411_IRF8_NT_DMSO-MED1_R1_treat_pileup_vs_MV411_IRF8_NT_VHL-MED1_R1_treat_pileup_c3.0_cond2.bed',
 '7037 ../../data/chipseq_IRF8_degraded/diffPeaks/MV411_IRF8_NT_DMSO-MEF2C_R1_treat_pileup_vs_MV411_IRF8_N

In [266]:
bw = ! ls ../../data/$project/recalib_bigwig/*
bw

['../../data/chipseq_IRF8_degraded/recalib_bigwig/MV411_IRF8_NT_DMSO-IRF8_R1.bw',
 '../../data/chipseq_IRF8_degraded/recalib_bigwig/MV411_IRF8_NT_DMSO-MED1_R1.bw',
 '../../data/chipseq_IRF8_degraded/recalib_bigwig/MV411_IRF8_NT_DMSO-MEF2C_R1.bw',
 '../../data/chipseq_IRF8_degraded/recalib_bigwig/MV411_IRF8_NT_DMSO-MEF2D_R1.bw',
 '../../data/chipseq_IRF8_degraded/recalib_bigwig/MV411_IRF8_NT_DMSO-MYC_R1.bw',
 '../../data/chipseq_IRF8_degraded/recalib_bigwig/MV411_IRF8_NT_DMSO-POLII_total_R1.bw',
 '../../data/chipseq_IRF8_degraded/recalib_bigwig/MV411_IRF8_NT_VHL-IRF8_R1.bw',
 '../../data/chipseq_IRF8_degraded/recalib_bigwig/MV411_IRF8_NT_VHL-MED1_R1.bw',
 '../../data/chipseq_IRF8_degraded/recalib_bigwig/MV411_IRF8_NT_VHL-MEF2C_R1.bw',
 '../../data/chipseq_IRF8_degraded/recalib_bigwig/MV411_IRF8_NT_VHL-MEF2D_R1.bw',
 '../../data/chipseq_IRF8_degraded/recalib_bigwig/MV411_IRF8_NT_VHL-MYC_R1.bw',
 '../../data/chipseq_IRF8_degraded/recalib_bigwig/MV411_IRF8_NT_VHL-POLII_total_R1.bw']

In [261]:
!mkdir ../results/$project/
!mkdir ../results/$project/plots
!mkdir ../results/$project/plots/heatmaps/

In [262]:
# GENOME WIDE comparison

In [263]:
peaks = ! ls ../../data/$project/bwa/mergedLibrary/macs/broadPeak/*.broadPeak

In [268]:
names = ["IRF8","MED1","MEF2C","MEF2D","MYC","POLII_total"]

In [270]:
for i, val in enumerate(bw):
    if i <1:
        continue
    name = names[i-6]+'_wIRF8ko' if i//6 else names[i]+'_DMSO'
    print(name)
    chip.getPeaksAt(peaks[i], bigwigs = val, bigwignames= name, peaknames=['Macs2_Peaks'], window=3000, folder="", title=name, numthreads=8, refpoint="center", name='../../data/'+project+'/'+name+'_mat.pdf', withDeeptools=True, torecompute=True, legendLoc="lower-left")

MED1_DMSO
CompletedProcess(args='computeMatrix reference-point -S ../../data/chipseq_IRF8_degraded/recalib_bigwig/MV411_IRF8_NT_DMSO-MED1_R1.bw  --referencePoint center --regionsFileName ../../data/chipseq_IRF8_degraded/bwa/mergedLibrary/macs/broadPeak/MV411_IRF8_NT_DMSO-MED1_R1_peaks.broadPeak --missingDataAsZero --outFileName ../../data/chipseq_IRF8_degraded/MED1_DMSO_mat.gz --upstream 3000 --downstream 3000 --numberOfProcessors 8 && plotHeatmap --matrixFile ../../data/chipseq_IRF8_degraded/MED1_DMSO_mat.gz --outFileName ../../data/chipseq_IRF8_degraded/MED1_DMSO_mat.pdf --refPointLabel center --legendLocation lower-left --regionsLabel Macs2_Peaks --plotTitle MED1_DMSO', returncode=0, stdout=b'', stderr=b'')
MEF2C_DMSO
CompletedProcess(args='computeMatrix reference-point -S ../../data/chipseq_IRF8_degraded/recalib_bigwig/MV411_IRF8_NT_DMSO-MEF2C_R1.bw  --referencePoint center --regionsFileName ../../data/chipseq_IRF8_degraded/bwa/mergedLibrary/macs/broadPeak/MV411_IRF8_NT_DMSO-MEF2C_

In [271]:
h.createFoldersFor('../results/'+project+'/plots/scaled/heatmaps/')
! cp ../../data/$project/*.pdf ../results/$project/plots/scaled/heatmaps/

In [272]:
cond1peak = ! ls ../../data/$project/diffPeaks/*cond1.bed
cond2peak = ! ls ../../data/$project/diffPeaks/*cond2.bed
commonpeak = ! ls ../../data/$project/diffPeaks/*common.bed
cond1peak

['../../data/chipseq_IRF8_degraded/diffPeaks/MV411_IRF8_NT_DMSO-IRF8_R1_treat_pileup_vs_MV411_IRF8_NT_VHL-IRF8_R1_treat_pileup_c3.0_cond1.bed',
 '../../data/chipseq_IRF8_degraded/diffPeaks/MV411_IRF8_NT_DMSO-MED1_R1_treat_pileup_vs_MV411_IRF8_NT_VHL-MED1_R1_treat_pileup_c3.0_cond1.bed',
 '../../data/chipseq_IRF8_degraded/diffPeaks/MV411_IRF8_NT_DMSO-MEF2C_R1_treat_pileup_vs_MV411_IRF8_NT_VHL-MEF2C_R1_treat_pileup_c3.0_cond1.bed',
 '../../data/chipseq_IRF8_degraded/diffPeaks/MV411_IRF8_NT_DMSO-MEF2D_R1_treat_pileup_vs_MV411_IRF8_NT_VHL-MEF2D_R1_treat_pileup_c3.0_cond1.bed',
 '../../data/chipseq_IRF8_degraded/diffPeaks/MV411_IRF8_NT_DMSO-MYC_R1_treat_pileup_vs_MV411_IRF8_NT_VHL-MYC_R1_treat_pileup_c3.0_cond1.bed',
 '../../data/chipseq_IRF8_degraded/diffPeaks/MV411_IRF8_NT_DMSO-POLII_total_R1_treat_pileup_vs_MV411_IRF8_NT_VHL-POLII_total_R1_treat_pileup_c3.0_cond1.bed']

In [273]:
for i in range(int(len(bw)/2)):
    if i <0:
        continue
    name1 = bw[i]
    name2 = bw[i+6]
    peak = [cond1peak[i], commonpeak[i], cond2peak[i]]
   # for val in peak:
      #  chip.dropWeirdChromosomes(val)
    name = names[i]
    print(name1,name2)
    chip.getPeaksAt(peak, [name1, name2], bigwignames=['DMSO', 'VHL'], peaknames=['DMSO_peaks', 'common', 'VHL_peaks'], window=3000, folder="", title=name, numthreads=8, refpoint="center", name='../../data/'+project+'/diffPeaks/'+name+'_mat.pdf', withDeeptools=True, torecompute=False)

../../data/chipseq_IRF8_degraded/recalib_bigwig/MV411_IRF8_NT_DMSO-IRF8_R1.bw ../../data/chipseq_IRF8_degraded/recalib_bigwig/MV411_IRF8_NT_VHL-IRF8_R1.bw
CompletedProcess(args='computeMatrix reference-point -S ../../data/chipseq_IRF8_degraded/recalib_bigwig/MV411_IRF8_NT_DMSO-IRF8_R1.bw ../../data/chipseq_IRF8_degraded/recalib_bigwig/MV411_IRF8_NT_VHL-IRF8_R1.bw  --referencePoint center --regionsFileName ../../data/chipseq_IRF8_degraded/diffPeaks/MV411_IRF8_NT_DMSO-IRF8_R1_treat_pileup_vs_MV411_IRF8_NT_VHL-IRF8_R1_treat_pileup_c3.0_cond1.bed ../../data/chipseq_IRF8_degraded/diffPeaks/MV411_IRF8_NT_DMSO-IRF8_R1_treat_pileup_vs_MV411_IRF8_NT_VHL-IRF8_R1_treat_pileup_c3.0_common.bed ../../data/chipseq_IRF8_degraded/diffPeaks/MV411_IRF8_NT_DMSO-IRF8_R1_treat_pileup_vs_MV411_IRF8_NT_VHL-IRF8_R1_treat_pileup_c3.0_cond2.bed --missingDataAsZero --outFileName ../../data/chipseq_IRF8_degraded/diffPeaks/IRF8_mat.gz --upstream 3000 --downstream 3000 --numberOfProcessors 8 && plotHeatmap --matrixF

In [274]:
! mkdir ../results/$project/plots/scaled/diffPeaks/
! cp ../../data/$project/diffPeaks/*.pdf ../results/$project/plots/scaled/diffPeaks/

### on unscalled data

In [275]:
! mkdir ../../data/$project/diffPeaks_unscaled

In [276]:
#on unscalled data 
for i in range(int(len(bams)/2)):
    if i < 0:
        continue
    name1 = bams[i]
    name2 = bams[6+i]
    print(name1,name2)
    chip.fullDiffPeak(name1,name2, control1='../../data/'+project+'/bwa/mergedLibrary/INPUT_R1.mLb.clN.sorted.bam', directory = "../../data/"+project+"/diffData_unscaled/", res_directory = "../../data/"+project+"/diffPeaks_unscaled/",pairedend=False)

../../data/chipseq_IRF8_degraded/bwa/mergedLibrary/INPUT_R1.mLb.clN.sorted.bam ../../data/chipseq_IRF8_degraded/bwa/mergedLibrary/MV411_IRF8_NT_DMSO-POLII_total_R1.mLb.clN.sorted.bam
doing diff from ../../data/chipseq_IRF8_degraded/bwa/mergedLibrary/INPUT_R1.mLb.clN.sorted.bam and ../../data/chipseq_IRF8_degraded/bwa/mergedLibrary/MV411_IRF8_NT_DMSO-POLII_total_R1.mLb.clN.sorted.bam
computing the fragment avg size
183
computing the scaling values
b'INFO  @ Thu, 05 Nov 2020 19:18:41: \n# Command line: callpeak -B -t ../../data/chipseq_IRF8_degraded/bwa/mergedLibrary/INPUT_R1.mLb.clN.sorted.bam -c ../../data/chipseq_IRF8_degraded/bwa/mergedLibrary/INPUT_R1.mLb.clN.sorted.bam --nomodel --extsize 183 -n INPUT_R1 --outdir ../../data/chipseq_IRF8_degraded/diffData_unscaled/ -f BAM\n# ARGUMENTS LIST:\n# name = INPUT_R1\n# format = BAM\n# ChIP-seq file = [\'../../data/chipseq_IRF8_degraded/bwa/mergedLibrary/INPUT_R1.mLb.clN.sorted.bam\']\n# control file = [\'../../data/chipseq_IRF8_degraded/bw

b'INFO  @ Thu, 05 Nov 2020 19:37:33: \n# Command line: callpeak -B -t ../../data/chipseq_IRF8_degraded/bwa/mergedLibrary/MV411_IRF8_NT_DMSO-POLII_total_R1.mLb.clN.sorted.bam -c ../../data/chipseq_IRF8_degraded/bwa/mergedLibrary/INPUT_R1.mLb.clN.sorted.bam --nomodel --extsize 183 -n MV411_IRF8_NT_DMSO-POLII_total_R1 --outdir ../../data/chipseq_IRF8_degraded/diffData_unscaled/ -f BAM\n# ARGUMENTS LIST:\n# name = MV411_IRF8_NT_DMSO-POLII_total_R1\n# format = BAM\n# ChIP-seq file = [\'../../data/chipseq_IRF8_degraded/bwa/mergedLibrary/MV411_IRF8_NT_DMSO-POLII_total_R1.mLb.clN.sorted.bam\']\n# control file = [\'../../data/chipseq_IRF8_degraded/bwa/mergedLibrary/INPUT_R1.mLb.clN.sorted.bam\']\n# effective genome size = 2.70e+09\n# band width = 300\n# model fold = [5, 50]\n# qvalue cutoff = 5.00e-02\n# The maximum gap between significant sites is assigned as the read length/tag size.\n# The minimum length of peaks is assigned as the predicted fragment length "d".\n# Larger dataset will be sca

200
computing the scaling values
b'INFO  @ Thu, 05 Nov 2020 20:06:14: \n# Command line: callpeak -B -t ../../data/chipseq_IRF8_degraded/bwa/mergedLibrary/MV411_IRF8_NT_DMSO-IRF8_R1.mLb.clN.sorted.bam -c ../../data/chipseq_IRF8_degraded/bwa/mergedLibrary/INPUT_R1.mLb.clN.sorted.bam --nomodel --extsize 200 -n MV411_IRF8_NT_DMSO-IRF8_R1 --outdir ../../data/chipseq_IRF8_degraded/diffData_unscaled/ -f BAM\n# ARGUMENTS LIST:\n# name = MV411_IRF8_NT_DMSO-IRF8_R1\n# format = BAM\n# ChIP-seq file = [\'../../data/chipseq_IRF8_degraded/bwa/mergedLibrary/MV411_IRF8_NT_DMSO-IRF8_R1.mLb.clN.sorted.bam\']\n# control file = [\'../../data/chipseq_IRF8_degraded/bwa/mergedLibrary/INPUT_R1.mLb.clN.sorted.bam\']\n# effective genome size = 2.70e+09\n# band width = 300\n# model fold = [5, 50]\n# qvalue cutoff = 5.00e-02\n# The maximum gap between significant sites is assigned as the read length/tag size.\n# The minimum length of peaks is assigned as the predicted fragment length "d".\n# Larger dataset will b

../../data/chipseq_IRF8_degraded/bwa/mergedLibrary/MV411_IRF8_NT_DMSO-MED1_R1.mLb.clN.sorted.bam ../../data/chipseq_IRF8_degraded/bwa/mergedLibrary/MV411_IRF8_NT_VHL-MED1_R1.mLb.clN.sorted.bam
doing diff from ../../data/chipseq_IRF8_degraded/bwa/mergedLibrary/MV411_IRF8_NT_DMSO-MED1_R1.mLb.clN.sorted.bam and ../../data/chipseq_IRF8_degraded/bwa/mergedLibrary/MV411_IRF8_NT_VHL-MED1_R1.mLb.clN.sorted.bam
computing the fragment avg size
212
computing the scaling values
b'INFO  @ Thu, 05 Nov 2020 20:44:32: \n# Command line: callpeak -B -t ../../data/chipseq_IRF8_degraded/bwa/mergedLibrary/MV411_IRF8_NT_DMSO-MED1_R1.mLb.clN.sorted.bam -c ../../data/chipseq_IRF8_degraded/bwa/mergedLibrary/INPUT_R1.mLb.clN.sorted.bam --nomodel --extsize 212 -n MV411_IRF8_NT_DMSO-MED1_R1 --outdir ../../data/chipseq_IRF8_degraded/diffData_unscaled/ -f BAM\n# ARGUMENTS LIST:\n# name = MV411_IRF8_NT_DMSO-MED1_R1\n# format = BAM\n# ChIP-seq file = [\'../../data/chipseq_IRF8_degraded/bwa/mergedLibrary/MV411_IRF8_NT

b'INFO  @ Thu, 05 Nov 2020 20:58:13: \n# Command line: callpeak -B -t ../../data/chipseq_IRF8_degraded/bwa/mergedLibrary/MV411_IRF8_NT_VHL-MED1_R1.mLb.clN.sorted.bam -c ../../data/chipseq_IRF8_degraded/bwa/mergedLibrary/INPUT_R1.mLb.clN.sorted.bam --nomodel --extsize 212 -n MV411_IRF8_NT_VHL-MED1_R1 --outdir ../../data/chipseq_IRF8_degraded/diffData_unscaled/ -f BAM\n# ARGUMENTS LIST:\n# name = MV411_IRF8_NT_VHL-MED1_R1\n# format = BAM\n# ChIP-seq file = [\'../../data/chipseq_IRF8_degraded/bwa/mergedLibrary/MV411_IRF8_NT_VHL-MED1_R1.mLb.clN.sorted.bam\']\n# control file = [\'../../data/chipseq_IRF8_degraded/bwa/mergedLibrary/INPUT_R1.mLb.clN.sorted.bam\']\n# effective genome size = 2.70e+09\n# band width = 300\n# model fold = [5, 50]\n# qvalue cutoff = 5.00e-02\n# The maximum gap between significant sites is assigned as the read length/tag size.\n# The minimum length of peaks is assigned as the predicted fragment length "d".\n# Larger dataset will be scaled towards smaller dataset.\n# 

../../data/chipseq_IRF8_degraded/bwa/mergedLibrary/MV411_IRF8_NT_DMSO-MEF2C_R1.mLb.clN.sorted.bam ../../data/chipseq_IRF8_degraded/bwa/mergedLibrary/MV411_IRF8_NT_VHL-MEF2C_R1.mLb.clN.sorted.bam
doing diff from ../../data/chipseq_IRF8_degraded/bwa/mergedLibrary/MV411_IRF8_NT_DMSO-MEF2C_R1.mLb.clN.sorted.bam and ../../data/chipseq_IRF8_degraded/bwa/mergedLibrary/MV411_IRF8_NT_VHL-MEF2C_R1.mLb.clN.sorted.bam
computing the fragment avg size
204
computing the scaling values
b'INFO  @ Thu, 05 Nov 2020 21:24:16: \n# Command line: callpeak -B -t ../../data/chipseq_IRF8_degraded/bwa/mergedLibrary/MV411_IRF8_NT_DMSO-MEF2C_R1.mLb.clN.sorted.bam -c ../../data/chipseq_IRF8_degraded/bwa/mergedLibrary/INPUT_R1.mLb.clN.sorted.bam --nomodel --extsize 204 -n MV411_IRF8_NT_DMSO-MEF2C_R1 --outdir ../../data/chipseq_IRF8_degraded/diffData_unscaled/ -f BAM\n# ARGUMENTS LIST:\n# name = MV411_IRF8_NT_DMSO-MEF2C_R1\n# format = BAM\n# ChIP-seq file = [\'../../data/chipseq_IRF8_degraded/bwa/mergedLibrary/MV411_

b'INFO  @ Thu, 05 Nov 2020 21:37:57: \n# Command line: callpeak -B -t ../../data/chipseq_IRF8_degraded/bwa/mergedLibrary/MV411_IRF8_NT_VHL-MEF2C_R1.mLb.clN.sorted.bam -c ../../data/chipseq_IRF8_degraded/bwa/mergedLibrary/INPUT_R1.mLb.clN.sorted.bam --nomodel --extsize 204 -n MV411_IRF8_NT_VHL-MEF2C_R1 --outdir ../../data/chipseq_IRF8_degraded/diffData_unscaled/ -f BAM\n# ARGUMENTS LIST:\n# name = MV411_IRF8_NT_VHL-MEF2C_R1\n# format = BAM\n# ChIP-seq file = [\'../../data/chipseq_IRF8_degraded/bwa/mergedLibrary/MV411_IRF8_NT_VHL-MEF2C_R1.mLb.clN.sorted.bam\']\n# control file = [\'../../data/chipseq_IRF8_degraded/bwa/mergedLibrary/INPUT_R1.mLb.clN.sorted.bam\']\n# effective genome size = 2.70e+09\n# band width = 300\n# model fold = [5, 50]\n# qvalue cutoff = 5.00e-02\n# The maximum gap between significant sites is assigned as the read length/tag size.\n# The minimum length of peaks is assigned as the predicted fragment length "d".\n# Larger dataset will be scaled towards smaller dataset.

184
computing the scaling values
b'INFO  @ Thu, 05 Nov 2020 22:02:35: \n# Command line: callpeak -B -t ../../data/chipseq_IRF8_degraded/bwa/mergedLibrary/MV411_IRF8_NT_DMSO-MEF2D_R1.mLb.clN.sorted.bam -c ../../data/chipseq_IRF8_degraded/bwa/mergedLibrary/INPUT_R1.mLb.clN.sorted.bam --nomodel --extsize 184 -n MV411_IRF8_NT_DMSO-MEF2D_R1 --outdir ../../data/chipseq_IRF8_degraded/diffData_unscaled/ -f BAM\n# ARGUMENTS LIST:\n# name = MV411_IRF8_NT_DMSO-MEF2D_R1\n# format = BAM\n# ChIP-seq file = [\'../../data/chipseq_IRF8_degraded/bwa/mergedLibrary/MV411_IRF8_NT_DMSO-MEF2D_R1.mLb.clN.sorted.bam\']\n# control file = [\'../../data/chipseq_IRF8_degraded/bwa/mergedLibrary/INPUT_R1.mLb.clN.sorted.bam\']\n# effective genome size = 2.70e+09\n# band width = 300\n# model fold = [5, 50]\n# qvalue cutoff = 5.00e-02\n# The maximum gap between significant sites is assigned as the read length/tag size.\n# The minimum length of peaks is assigned as the predicted fragment length "d".\n# Larger dataset wi

../../data/chipseq_IRF8_degraded/bwa/mergedLibrary/MV411_IRF8_NT_DMSO-MYC_R1.mLb.clN.sorted.bam ../../data/chipseq_IRF8_degraded/bwa/mergedLibrary/MV411_IRF8_NT_VHL-MYC_R1.mLb.clN.sorted.bam
doing diff from ../../data/chipseq_IRF8_degraded/bwa/mergedLibrary/MV411_IRF8_NT_DMSO-MYC_R1.mLb.clN.sorted.bam and ../../data/chipseq_IRF8_degraded/bwa/mergedLibrary/MV411_IRF8_NT_VHL-MYC_R1.mLb.clN.sorted.bam
computing the fragment avg size
205
computing the scaling values
b'INFO  @ Thu, 05 Nov 2020 22:40:36: \n# Command line: callpeak -B -t ../../data/chipseq_IRF8_degraded/bwa/mergedLibrary/MV411_IRF8_NT_DMSO-MYC_R1.mLb.clN.sorted.bam -c ../../data/chipseq_IRF8_degraded/bwa/mergedLibrary/INPUT_R1.mLb.clN.sorted.bam --nomodel --extsize 205 -n MV411_IRF8_NT_DMSO-MYC_R1 --outdir ../../data/chipseq_IRF8_degraded/diffData_unscaled/ -f BAM\n# ARGUMENTS LIST:\n# name = MV411_IRF8_NT_DMSO-MYC_R1\n# format = BAM\n# ChIP-seq file = [\'../../data/chipseq_IRF8_degraded/bwa/mergedLibrary/MV411_IRF8_NT_DMSO-M

b'INFO  @ Thu, 05 Nov 2020 22:54:33: \n# Command line: callpeak -B -t ../../data/chipseq_IRF8_degraded/bwa/mergedLibrary/MV411_IRF8_NT_VHL-MYC_R1.mLb.clN.sorted.bam -c ../../data/chipseq_IRF8_degraded/bwa/mergedLibrary/INPUT_R1.mLb.clN.sorted.bam --nomodel --extsize 205 -n MV411_IRF8_NT_VHL-MYC_R1 --outdir ../../data/chipseq_IRF8_degraded/diffData_unscaled/ -f BAM\n# ARGUMENTS LIST:\n# name = MV411_IRF8_NT_VHL-MYC_R1\n# format = BAM\n# ChIP-seq file = [\'../../data/chipseq_IRF8_degraded/bwa/mergedLibrary/MV411_IRF8_NT_VHL-MYC_R1.mLb.clN.sorted.bam\']\n# control file = [\'../../data/chipseq_IRF8_degraded/bwa/mergedLibrary/INPUT_R1.mLb.clN.sorted.bam\']\n# effective genome size = 2.70e+09\n# band width = 300\n# model fold = [5, 50]\n# qvalue cutoff = 5.00e-02\n# The maximum gap between significant sites is assigned as the read length/tag size.\n# The minimum length of peaks is assigned as the predicted fragment length "d".\n# Larger dataset will be scaled towards smaller dataset.\n# Rang

In [277]:
bw = ! ls ../../data/$project/bwa/mergedLibrary/bigwig/*.bigWig
bw

['../../data/chipseq_IRF8_degraded/bwa/mergedLibrary/bigwig/INPUT_R1.bigWig',
 '../../data/chipseq_IRF8_degraded/bwa/mergedLibrary/bigwig/MV411_IRF8_NT_DMSO-IRF8_R1.bigWig',
 '../../data/chipseq_IRF8_degraded/bwa/mergedLibrary/bigwig/MV411_IRF8_NT_DMSO-MED1_R1.bigWig',
 '../../data/chipseq_IRF8_degraded/bwa/mergedLibrary/bigwig/MV411_IRF8_NT_DMSO-MEF2C_R1.bigWig',
 '../../data/chipseq_IRF8_degraded/bwa/mergedLibrary/bigwig/MV411_IRF8_NT_DMSO-MEF2D_R1.bigWig',
 '../../data/chipseq_IRF8_degraded/bwa/mergedLibrary/bigwig/MV411_IRF8_NT_DMSO-MYC_R1.bigWig',
 '../../data/chipseq_IRF8_degraded/bwa/mergedLibrary/bigwig/MV411_IRF8_NT_DMSO-POLII_total_R1.bigWig',
 '../../data/chipseq_IRF8_degraded/bwa/mergedLibrary/bigwig/MV411_IRF8_NT_VHL-IRF8_R1.bigWig',
 '../../data/chipseq_IRF8_degraded/bwa/mergedLibrary/bigwig/MV411_IRF8_NT_VHL-MED1_R1.bigWig',
 '../../data/chipseq_IRF8_degraded/bwa/mergedLibrary/bigwig/MV411_IRF8_NT_VHL-MEF2C_R1.bigWig',
 '../../data/chipseq_IRF8_degraded/bwa/mergedLibrary

In [278]:
cond1peak = ! ls ../../data/$project/diffPeaks_unscaled/*cond1.bed
cond2peak = ! ls ../../data/$project/diffPeaks_unscaled/*cond2.bed
commonpeak = ! ls ../../data/$project/diffPeaks_unscaled/*common.bed
commonpeak

['../../data/chipseq_IRF8_degraded/diffPeaks_unscaled/INPUT_R1_treat_pileup_vs_MV411_IRF8_NT_DMSO-POLII_total_R1_treat_pileup_c3.0_common.bed',
 '../../data/chipseq_IRF8_degraded/diffPeaks_unscaled/MV411_IRF8_NT_DMSO-IRF8_R1_treat_pileup_vs_MV411_IRF8_NT_VHL-IRF8_R1_treat_pileup_c3.0_common.bed',
 '../../data/chipseq_IRF8_degraded/diffPeaks_unscaled/MV411_IRF8_NT_DMSO-MED1_R1_treat_pileup_vs_MV411_IRF8_NT_VHL-MED1_R1_treat_pileup_c3.0_common.bed',
 '../../data/chipseq_IRF8_degraded/diffPeaks_unscaled/MV411_IRF8_NT_DMSO-MEF2C_R1_treat_pileup_vs_MV411_IRF8_NT_VHL-MEF2C_R1_treat_pileup_c3.0_common.bed',
 '../../data/chipseq_IRF8_degraded/diffPeaks_unscaled/MV411_IRF8_NT_DMSO-MEF2D_R1_treat_pileup_vs_MV411_IRF8_NT_VHL-MEF2D_R1_treat_pileup_c3.0_common.bed',
 '../../data/chipseq_IRF8_degraded/diffPeaks_unscaled/MV411_IRF8_NT_DMSO-MYC_R1_treat_pileup_vs_MV411_IRF8_NT_VHL-MYC_R1_treat_pileup_c3.0_common.bed']

In [279]:
for i in range(int((len(bw)-1)/2)):
    name1 = bw[i]
    name2 = bw[6+i]
    peak = [cond1peak[i], commonpeak[i], cond2peak[i]]
    #for val in peak:
     #   chip.dropWeirdChromosomes(val)
    name = names[i]
    print(name1,name2)
    chip.getPeaksAt(peak, [name1, name2], bigwignames=['DMSO', 'VHL'], peaknames=['DMSO_peaks', 'common', 'VHL_peaks'], window=3000, folder="", title=name, numthreads=8, torecompute=True, refpoint='center', name='../../data/'+project+'/diffPeaks_unscaled/'+name+'_mat.pdf', withDeeptools=True)

../../data/chipseq_IRF8_degraded/bwa/mergedLibrary/bigwig/INPUT_R1.bigWig ../../data/chipseq_IRF8_degraded/bwa/mergedLibrary/bigwig/MV411_IRF8_NT_DMSO-POLII_total_R1.bigWig
CompletedProcess(args='computeMatrix reference-point -S ../../data/chipseq_IRF8_degraded/bwa/mergedLibrary/bigwig/INPUT_R1.bigWig ../../data/chipseq_IRF8_degraded/bwa/mergedLibrary/bigwig/MV411_IRF8_NT_DMSO-POLII_total_R1.bigWig  --referencePoint center --regionsFileName ../../data/chipseq_IRF8_degraded/diffPeaks_unscaled/INPUT_R1_treat_pileup_vs_MV411_IRF8_NT_DMSO-POLII_total_R1_treat_pileup_c3.0_cond2.bed --missingDataAsZero --outFileName ../../data/chipseq_IRF8_degraded/diffPeaks_unscaled/IRF8_mat.gz --upstream 3000 --downstream 3000 --numberOfProcessors 8 && plotHeatmap --matrixFile ../../data/chipseq_IRF8_degraded/diffPeaks_unscaled/IRF8_mat.gz --outFileName ../../data/chipseq_IRF8_degraded/diffPeaks_unscaled/IRF8_mat.pdf --refPointLabel center --regionsLabel VHL_peaks --plotTitle IRF8', returncode=0, stdout=b'

CompletedProcess(args='computeMatrix reference-point -S ../../data/chipseq_IRF8_degraded/bwa/mergedLibrary/bigwig/MV411_IRF8_NT_DMSO-MYC_R1.bigWig ../../data/chipseq_IRF8_degraded/bwa/mergedLibrary/bigwig/MV411_IRF8_NT_VHL-MYC_R1.bigWig  --referencePoint center --regionsFileName ../../data/chipseq_IRF8_degraded/diffPeaks_unscaled/MV411_IRF8_NT_DMSO-MYC_R1_treat_pileup_vs_MV411_IRF8_NT_VHL-MYC_R1_treat_pileup_c3.0_cond1.bed ../../data/chipseq_IRF8_degraded/diffPeaks_unscaled/MV411_IRF8_NT_DMSO-MYC_R1_treat_pileup_vs_MV411_IRF8_NT_VHL-MYC_R1_treat_pileup_c3.0_common.bed ../../data/chipseq_IRF8_degraded/diffPeaks_unscaled/MV411_IRF8_NT_DMSO-MYC_R1_treat_pileup_vs_MV411_IRF8_NT_VHL-MYC_R1_treat_pileup_c3.0_cond2.bed --missingDataAsZero --outFileName ../../data/chipseq_IRF8_degraded/diffPeaks_unscaled/POLII_total_mat.gz --upstream 3000 --downstream 3000 --numberOfProcessors 8 && plotHeatmap --matrixFile ../../data/chipseq_IRF8_degraded/diffPeaks_unscaled/POLII_total_mat.gz --outFileName ../

In [280]:
h.createFoldersFor('../results/'+project+'/plots/unscaled/diffPeaks/')
! cp ../../data/$project/diffPeaks_unscaled/*.pdf ../results/$project/plots/unscaled/diffPeaks/

! gsutil -m cp gs://amlproject/Chip/$project/bwa/mergedLibrary/deepTools/**.pdf ../results/$project/plots/

Copying gs://amlproject/Chip/chipseq_IRF8_degraded/bwa/mergedLibrary/deepTools/plotFingerprint/MV411_IRF8_NT_DMSO-IRF8_R1.plotFingerprint.pdf...
Copying gs://amlproject/Chip/chipseq_IRF8_degraded/bwa/mergedLibrary/deepTools/plotFingerprint/MV411_IRF8_NT_DMSO-MED1_R1.plotFingerprint.pdf...
Copying gs://amlproject/Chip/chipseq_IRF8_degraded/bwa/mergedLibrary/deepTools/plotFingerprint/MV411_IRF8_NT_DMSO-MEF2C_R1.plotFingerprint.pdf...
Copying gs://amlproject/Chip/chipseq_IRF8_degraded/bwa/mergedLibrary/deepTools/plotFingerprint/MV411_IRF8_NT_DMSO-MEF2D_R1.plotFingerprint.pdf...
Copying gs://amlproject/Chip/chipseq_IRF8_degraded/bwa/mergedLibrary/deepTools/plotFingerprint/MV411_IRF8_NT_DMSO-MYC_R1.plotFingerprint.pdf...
Copying gs://amlproject/Chip/chipseq_IRF8_degraded/bwa/mergedLibrary/deepTools/plotFingerprint/MV411_IRF8_NT_DMSO-POLII_total_R1.plotFingerprint.pdf...
Copying gs://amlproject/Chip/chipseq_IRF8_degraded/bwa/mergedLibrary/deepTools/plotFingerprint/MV411_IRF8_NT_VHL-IRF8_R1.p

In [281]:
! gsutil -m cp -r ../../data/$project/diffPeaks gs://amlproject/Chip/$project/
! gsutil -m cp -r ../../data/$project/diffData gs://amlproject/Chip/$project/
! gsutil -m cp -r ../../data/$project/diffPeaks_unscaled gs://amlproject/Chip/$project/
! gsutil -m cp -r ../../data/$project/diffData_unscaled gs://amlproject/Chip/$project/
! gsutil -m cp -r ../../data/$project/peakplot gs://amlproject/Chip/$project/

Copying file://../../data/chipseq_IRF8_degraded/diffPeaks/MV411_IRF8_NT_DMSO-MEF2D_R1_treat_pileup_vs_MV411_IRF8_NT_VHL-MEF2D_R1_treat_pileup_c3.0_cond1.bed [Content-Type=application/octet-stream]...
Copying file://../../data/chipseq_IRF8_degraded/diffPeaks/MV411_IRF8_NT_DMSO-MEF2C_R1_treat_pileup_vs_MV411_IRF8_NT_VHL-MEF2C_R1_treat_pileup_c3.0_cond2.bed [Content-Type=application/octet-stream]...
Copying file://../../data/chipseq_IRF8_degraded/diffPeaks/MED1_mat.gz [Content-Type=application/octet-stream]...
Copying file://../../data/chipseq_IRF8_degraded/diffPeaks/IRF8_mat.pdf [Content-Type=application/pdf]...
Copying file://../../data/chipseq_IRF8_degraded/diffPeaks/MV411_IRF8_NT_DMSO-POLII_total_R1_treat_pileup_vs_MV411_IRF8_NT_VHL-POLII_total_R1_treat_pileup_c3.0_cond2.bed [Content-Type=application/octet-stream]...
Copying file://../../data/chipseq_IRF8_degraded/diffPeaks/MV411_IRF8_NT_DMSO-IRF8_R1_treat_pileup_vs_MV411_IRF8_NT_VHL-IRF8_R1_treat_pileup_c3.0_common.bed [Content-Type=

Copying file://../../data/chipseq_IRF8_degraded/diffData/MV411_IRF8_NT_VHL-MYC_R1_treat_pileup.bdg [Content-Type=application/octet-stream]...
Copying file://../../data/chipseq_IRF8_degraded/diffData/MV411_IRF8_NT_DMSO-MEF2D_R1_control_lambda.bdg [Content-Type=application/octet-stream]...
Copying file://../../data/chipseq_IRF8_degraded/diffData/MV411_IRF8_NT_DMSO-IRF8_R1_summits.bed [Content-Type=application/octet-stream]...
Copying file://../../data/chipseq_IRF8_degraded/diffData/MV411_IRF8_NT_VHL-POLII_total_R1_peaks.xls [Content-Type=application/vnd.ms-excel]...
Copying file://../../data/chipseq_IRF8_degraded/diffData/MV411_IRF8_NT_VHL-MEF2C_R1_control_lambda.bdg [Content-Type=application/octet-stream]...
Copying file://../../data/chipseq_IRF8_degraded/diffData/MV411_IRF8_NT_DMSO-POLII_total_R1_peaks.xls [Content-Type=application/vnd.ms-excel]...
Copying file://../../data/chipseq_IRF8_degraded/diffData/MV411_IRF8_NT_VHL-MEF2D_R1_peaks.xls [Content-Type=application/vnd.ms-excel]...
Co

Copying file://../../data/chipseq_IRF8_degraded/diffPeaks_unscaled/MEF2D_mat.gz [Content-Type=application/octet-stream]...
Copying file://../../data/chipseq_IRF8_degraded/diffPeaks_unscaled/MV411_IRF8_NT_DMSO-MEF2C_R1_treat_pileup_vs_MV411_IRF8_NT_VHL-MEF2C_R1_treat_pileup_c3.0_cond1.bed [Content-Type=application/octet-stream]...
Copying file://../../data/chipseq_IRF8_degraded/diffPeaks_unscaled/MV411_IRF8_NT_DMSO-MED1_R1_treat_pileup_vs_MV411_IRF8_NT_VHL-MED1_R1_treat_pileup_c3.0_common.bed [Content-Type=application/octet-stream]...
Copying file://../../data/chipseq_IRF8_degraded/diffPeaks_unscaled/POLII_total_mat.gz [Content-Type=application/octet-stream]...
Copying file://../../data/chipseq_IRF8_degraded/diffPeaks_unscaled/MV411_IRF8_NT_DMSO-MYC_R1_treat_pileup_vs_MV411_IRF8_NT_VHL-MYC_R1_treat_pileup_c3.0_cond1.bed [Content-Type=application/octet-stream]...
Copying file://../../data/chipseq_IRF8_degraded/diffPeaks_unscaled/IRF8_mat.gz [Content-Type=application/octet-stream]...
Copy

Copying file://../../data/chipseq_IRF8_degraded/diffData_unscaled/MV411_IRF8_NT_DMSO-POLII_total_R1_control_lambda.bdg [Content-Type=application/octet-stream]...
Copying file://../../data/chipseq_IRF8_degraded/diffData_unscaled/MV411_IRF8_NT_DMSO-POLII_total_R1_peaks.narrowPeak [Content-Type=application/octet-stream]...
Copying file://../../data/chipseq_IRF8_degraded/diffData_unscaled/MV411_IRF8_NT_VHL-MEF2C_R1_peaks.xls [Content-Type=application/vnd.ms-excel]...
Copying file://../../data/chipseq_IRF8_degraded/diffData_unscaled/MV411_IRF8_NT_DMSO-MYC_R1_peaks.narrowPeak [Content-Type=application/octet-stream]...
Copying file://../../data/chipseq_IRF8_degraded/diffData_unscaled/MV411_IRF8_NT_DMSO-MYC_R1_summits.bed [Content-Type=application/octet-stream]...
Copying file://../../data/chipseq_IRF8_degraded/diffData_unscaled/MV411_IRF8_NT_VHL-IRF8_R1_summits.bed [Content-Type=application/octet-stream]...
Copying file://../../data/chipseq_IRF8_degraded/diffData_unscaled/MV411_IRF8_NT_VHL-ME