In [1]:
import os
import pandas as pd
import sys
import numpy as np
import sys
sys.path.insert(0, '../..')
import itertools

from JKBio import TerraFunction as terra
from ccle_processing.src.CCLE_postp_function import *
from JKBio import Helper as h
from JKBio.epigenetics import ChIP_helper as chip

import dalmatian as dm
import pyBigWig

from bokeh.plotting import *
from IPython.display import IFrame
import igv

from sklearn.manifold import MDS, TSNE
from sklearn.decomposition import PCA
from sklearn.preprocessing import scale
from sklearn.cluster import AgglomerativeClustering
from sklearn.mixture import GaussianMixture

output_notebook()
%load_ext autoreload
%autoreload 2

you need to have installed JKBio in the same folder as ccle_processing


# ChIP

In [3]:
project="IRF2BP2_degron_ChIP"

In [None]:
!gsutil ls gs://amlproject/Chip/

In [None]:
# install bwa
! mkdir ../../ref
! curl ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/001/215/GCF_000001215.4_Release_6_plus_ISO1_MT/GCF_000001215.4_Release_6_plus_ISO1_MT_genomic.fna.gz -o ../../ref/reference_droso.fna.gz
! gunzip ref/reference_droso.fna.gz
! bwa index -a bwtsw ../../ref/reference_droso.fna

## V1

### analysis

In [None]:
rename1 = {
 'gs://transfer-amlproject/20191211_10_MP7613_S8_R1_001.fastq.gz':"gs://amlproject/Chip/IRF2BP2_degraded_rep1/fastqs/mp779-MV411_IRF2BP2_DMSO_6h-FLAG_IRF2BP2-r1_1.fastq.gz",
 'gs://transfer-amlproject/20191211_10_MP7613_S8_R2_001.fastq.gz':"gs://amlproject/Chip/IRF2BP2_degraded_rep1/fastqs/mp779-MV411_IRF2BP2_DMSO_6h-FLAG_IRF2BP2-r1_2.fastq.gz",
 'gs://transfer-amlproject/20191211_11_MP7613_S9_R1_001.fastq.gz':"gs://amlproject/Chip/IRF2BP2_degraded_rep1/fastqs/mp780-MV411_IRF2BP2_DMSO_6h-MED1-r1_1.fastq.gz",
 'gs://transfer-amlproject/20191211_11_MP7613_S9_R2_001.fastq.gz':"gs://amlproject/Chip/IRF2BP2_degraded_rep1/fastqs/mp780-MV411_IRF2BP2_DMSO_6h-MED1-r1_2.fastq.gz",
 'gs://transfer-amlproject/20191211_12_MP7613_S10_R1_001.fastq.gz':"gs://amlproject/Chip/IRF2BP2_degraded_rep1/fastqs/mp781-MV411_IRF2BP2_DMSO_6h-POLII_total-r1_1.fastq.gz",
 'gs://transfer-amlproject/20191211_12_MP7613_S10_R2_001.fastq.gz':"gs://amlproject/Chip/IRF2BP2_degraded_rep1/fastqs/mp781-MV411_IRF2BP2_DMSO_6h-POLII_total-r1_2.fastq.gz",
 'gs://transfer-amlproject/20191211_13_MP7613_S11_R1_001.fastq.gz':"gs://amlproject/Chip/IRF2BP2_degraded_rep1/fastqs/mp782-MV411_IRF2BP2_DMSO_6h-POLII_S2-r1_1.fastq.gz",
 'gs://transfer-amlproject/20191211_13_MP7613_S11_R2_001.fastq.gz':"gs://amlproject/Chip/IRF2BP2_degraded_rep1/fastqs/mp782-MV411_IRF2BP2_DMSO_6h-POLII_S2-r1_2.fastq.gz",
 'gs://transfer-amlproject/20191211_14_MP7613_S12_R1_001.fastq.gz':"gs://amlproject/Chip/IRF2BP2_degraded_rep1/fastqs/mp783-MV411_IRF2BP2_DMSO_6h-POLII_S5-r1_1.fastq.gz",
 'gs://transfer-amlproject/20191211_14_MP7613_S12_R2_001.fastq.gz':"gs://amlproject/Chip/IRF2BP2_degraded_rep1/fastqs/mp783-MV411_IRF2BP2_DMSO_6h-POLII_S5-r1_2.fastq.gz",
 'gs://transfer-amlproject/20191211_15_MP7613_S13_R1_001.fastq.gz':"gs://amlproject/Chip/IRF2BP2_degraded_rep1/fastqs/mp784-MV411_IRF2BP2_DMSO_6h-MYC-r1_1.fastq.gz",
 'gs://transfer-amlproject/20191211_15_MP7613_S13_R2_001.fastq.gz':"gs://amlproject/Chip/IRF2BP2_degraded_rep1/fastqs/mp784-MV411_IRF2BP2_DMSO_6h-MYC-r1_2.fastq.gz",
 'gs://transfer-amlproject/20191211_16_MP7613_S14_R1_001.fastq.gz':"gs://amlproject/Chip/IRF2BP2_degraded_rep1/fastqs/mp785-MV411_IRF2BP2_DMSO_6h-MYB-r1_1.fastq.gz",
 'gs://transfer-amlproject/20191211_16_MP7613_S14_R2_001.fastq.gz':"gs://amlproject/Chip/IRF2BP2_degraded_rep1/fastqs/mp785-MV411_IRF2BP2_DMSO_6h-MYB-r1_2.fastq.gz",
 'gs://transfer-amlproject/20191211_1_MP7613_S1_R1_001.fastq.gz':"gs://amlproject/Chip/IRF2BP2_degraded_rep1/fastqs/mp786-MV411_IRF2BP2_DMSO_6h-SPI1-r1_1.fastq.gz",
 'gs://transfer-amlproject/20191211_1_MP7613_S1_R2_001.fastq.gz':"gs://amlproject/Chip/IRF2BP2_degraded_rep1/fastqs/mp786-MV411_IRF2BP2_DMSO_6h-SPI1-r1_2.fastq.gz",
 'gs://transfer-amlproject/20191211_2_MP7613_S2_R1_001.fastq.gz':"gs://amlproject/Chip/IRF2BP2_degraded_rep1/fastqs/mp787-MV411_IRF2BP_VHL_6h-FLAG_IRF2BP2-r1_1.fastq.gz",
 'gs://transfer-amlproject/20191211_2_MP7613_S2_R2_001.fastq.gz':"gs://amlproject/Chip/IRF2BP2_degraded_rep1/fastqs/mp787-MV411_IRF2BP_VHL_6h-FLAG_IRF2BP2-r1_2.fastq.gz",
 'gs://transfer-amlproject/20191211_3_MP7613_S3_R1_001.fastq.gz':"gs://amlproject/Chip/IRF2BP2_degraded_rep1/fastqs/mp788-MV411_IRF2BP_VHL_6h-MED1-r1_1.fastq.gz",
 'gs://transfer-amlproject/20191211_3_MP7613_S3_R2_001.fastq.gz':"gs://amlproject/Chip/IRF2BP2_degraded_rep1/fastqs/mp788-MV411_IRF2BP_VHL_6h-MED1-r1_2.fastq.gz",
 'gs://transfer-amlproject/20191211_4_MP7613_S4_R1_001.fastq.gz':"gs://amlproject/Chip/IRF2BP2_degraded_rep1/fastqs/mp789-MV411_IRF2BP_VHL_6h-POLII_total-r1_1.fastq.gz",
 'gs://transfer-amlproject/20191211_4_MP7613_S4_R2_001.fastq.gz':"gs://amlproject/Chip/IRF2BP2_degraded_rep1/fastqs/mp789-MV411_IRF2BP_VHL_6h-POLII_total-r1_2.fastq.gz",
 'gs://transfer-amlproject/20191211_5R_MP7613_S15_R1_001.fastq.gz':"gs://amlproject/Chip/IRF2BP2_degraded_rep1/fastqs/mp790-MV411_IRF2BP_VHL_6h-POLII_S2-r1_1.fastq.gz",
 'gs://transfer-amlproject/20191211_5R_MP7613_S15_R2_001.fastq.gz':"gs://amlproject/Chip/IRF2BP2_degraded_rep1/fastqs/mp790-MV411_IRF2BP_VHL_6h-POLII_S2-r1_2.fastq.gz",
 'gs://transfer-amlproject/20191211_6R_MP7613_S16_R1_001.fastq.gz':"gs://amlproject/Chip/IRF2BP2_degraded_rep1/fastqs/mp791-MV411_IRF2BP_VHL_6h-POLII_S5-r1_1.fastq.gz",
 'gs://transfer-amlproject/20191211_6R_MP7613_S16_R2_001.fastq.gz':"gs://amlproject/Chip/IRF2BP2_degraded_rep1/fastqs/mp791-MV411_IRF2BP_VHL_6h-POLII_S5-r1_2.fastq.gz",
 'gs://transfer-amlproject/20191211_7_MP7613_S5_R1_001.fastq.gz':"gs://amlproject/Chip/IRF2BP2_degraded_rep1/fastqs/mp792-MV411_IRF2BP_VHL_6h-MYC-r1_1.fastq.gz",
 'gs://transfer-amlproject/20191211_7_MP7613_S5_R2_001.fastq.gz':"gs://amlproject/Chip/IRF2BP2_degraded_rep1/fastqs/mp792-MV411_IRF2BP_VHL_6h-MYC-r1_2.fastq.gz",
 'gs://transfer-amlproject/20191211_8_MP7613_S6_R1_001.fastq.gz':"gs://amlproject/Chip/IRF2BP2_degraded_rep1/fastqs/mp793-MV411_IRF2BP_VHL_6h-MYB-r1_1.fastq.gz",
 'gs://transfer-amlproject/20191211_8_MP7613_S6_R2_001.fastq.gz':"gs://amlproject/Chip/IRF2BP2_degraded_rep1/fastqs/mp793-MV411_IRF2BP_VHL_6h-MYB-r1_2.fastq.gz",
 'gs://transfer-amlproject/20191211_9_MP7613_S7_R1_001.fastq.gz':"gs://amlproject/Chip/IRF2BP2_degraded_rep1/fastqs/mp794-MV411_IRF2BP_VHL_6h-SPI1-r1_1.fastq.gz",
 'gs://transfer-amlproject/20191211_9_MP7613_S7_R2_001.fastq.gz':"gs://amlproject/Chip/IRF2BP2_degraded_rep1/fastqs/mp794-MV411_IRF2BP_VHL_6h-SPI1-r1_2.fastq.gz"}

In [None]:
for k, val in rename1.items():
    os.system('gsutil mv '+k+' '+val)

In [None]:
! gsutil -m cp -r gs://amlproject/Chip/IRF2BP2_degraded_rep1 ../../data/
inputfastq="gs://amlproject/Chip/fastqs/mp99-MV411-INPUT-r1.fastq.gz"
! gsutil cp $inputfastq ../../data/IRF2BP2_degraded_rep1/fastqs/
a = ! ls ../../data/IRF2BP2_degraded_rep1/fastqs/

In [None]:
from gsheets import Sheets
sheets = Sheets.from_files('~/.client_secret.json', '~/.storage.json')
url="https://docs.google.com/spreadsheets/d/1yFLjYB1McU530JnLgL0QIMAKIkVl3kl0_LCHje2gk8U"
gsheet = sheets.get(url).sheets[2].to_frame()

In [None]:
gsheet

In [None]:
df = {
"fastq1": [],
"fastq2": [],
"antibody": [],
"group": [],
"replicate": [],
"control": []
}
for val in helper.grouped(a[:-1],2):
    row = gsheet[gsheet.id==val[0].split('/')[-1].split('_')[0]]
    df['group'].append((row.id.values[0] + '_'+ row.name_replicate.values[0] +"_"+row.protein.values[0]))
    df['replicate'].append(1)
    df['fastq1'].append(val[0])
    #df['fastq2'].append(val[1])
    df['antibody'].append(row.protein.values[0])
    df['control'].append("INPUT")
df['group'].append('INPUT')
df['replicate'].append(1)
df['fastq1'].append(a[-1])
#df['fastq2'].append(val[1])
df['antibody'].append("INPUT")
df['control'].append("INPUT")
df = pd.DataFrame(df)

In [None]:
df[df.columns[[3,4,0,1,2,5]]].to_csv('../nextflow/design_IRF2BP2_degraded_rep1.csv')

In [None]:
#process chips
! sudo ./nextflow run nf-core/chipseq --single_end --seq_center 'DFCI' --email 'jkobject@gmail.com' --narrow_peak --input ../nextflow/design_IRF2BP2_degraded_rep1.csv --genome GRCh38 --skip_preseq --max_cpus 24 -profile docker -w work

In [None]:
#!gsutil cp results/* ../../data/IRF2BP2_degraded_rep1/ && sudo rm -r results && sudo rm -r work

In [None]:
# get scaling values
mappedreads, umappedreads_norm, mapped = chip.getSpikeInControlScales(refgenome="../../ref/reference_droso.fna",FastQfolder='../../data/IRF2BP2_degraded_rep1/fastqs/',pairedEnd=True, cores=8)
mappedreads, umappedreads_norm, mapped

computing scales from the excel sheet

In [None]:
scales = [[562285,1496707],
[1686168,7198567],
[3642441,2612624],
[3992589,3474812],
[3347901,3829477],
[6181136,989703],
[7523840,4173047],
[922482,1195857]]

In [None]:
scales = [[1.0, 0.3756814126], #IRF2BP2
[1.0, 0.2342366196], #MED1
[0.5546432407, 1.0], #MYB
[0.1601166841, 1.0], #MYC
[0.870315477, 1.0], #POL2S2
[1.0, 0.8742449687], #POL2S5
[0.7172728398, 1.0], #POL2TOT
[1.0, 0.7713982525]] #SPI1

### on scalled data


In [None]:
bams = !ls ../../data/IRF2BP2_degraded_rep1/diffData/*treat_pileup.bdg
bams = ['_'.join(i.split('/')[-1].split('_')[:-2]) for i in bams]
bams

In [None]:
# diffPeak on scalled data
sizes = [220, 191, 211, 204, 285, 222, 228, 194]
for i in range(int(len(bams)/2)):
    name1 = bams[i]
    name2 = bams[i+8]
    print(name1,name2)
    print(chip.diffPeak(name1,name2, directory= "../../data/IRF2BP2_degraded_rep1/diffData/", res_directory='../../data/IRF2BP2_degraded_rep1/diffPeaks/', scaling1=scales[i][1], scaling2=scales[i][0], size=size[i]))

In [None]:
# diffPeak on scalled data
for i in range(int(len(bams[1:])/2)-1):
    name1 = bams[1+i]
    name2 = bams[9+i]
    chip.fullDiffPeak(name1,name2, bams[0], scaling = scales[i],compute_size=True)

In [None]:
scales = [1.0,
1.0,
0.5546432407,
0.1601166841,
0.870315477,
1.0,
0.7172728398,
1.0,
0.3756814126,
0.2342366196,
1.0,
1.0,
1.0,
0.8742449687,
1.0,
0.7713982525]

In [None]:
chip.bigWigFrom(bams[1:], genome='GRCh38',scaling=scales)

In [None]:
! mv ../../data/recalib_bigwig_rep1/* ../../data/IRF2BP2_degraded_rep1/recalib_bigwig/
bw = ! ls ../../data/IRF2BP2_degraded_rep1/recalib_bigwig/*.bw
bw

In [None]:
cond1peak = ! ls ../../data/IRF2BP2_degraded_rep1/diffPeaks/*cond1.bed
cond2peak = ! ls ../../data/IRF2BP2_degraded_rep1/diffPeaks/*cond2.bed
commonpeak = ! ls ../../data/IRF2BP2_degraded_rep1/diffPeaks/*common.bed
commonpeak

In [None]:
names = ["FLAG_IRF2BP2","MED1","MYB","MYC","POLII_S2","POLII_S5","POLII_total","SPI1"]

In [None]:
for i in range(int(len(bw)/2)):
    if i<0:
        continue
    name1 = bw[i]
    name2 = bw[8+i]
    peak = [cond1peak[i], commonpeak[i], cond2peak[i]]
    for val in peak:
        chip.dropWeirdChromosomes(val)
    name = names[i]
    print(name1,name2)
    chip.getPeaksAt(peak, [name1, name2], bigwignames=['DMSO', 'VHL'], peaknames=['DMSO_peaks', 'common', 'VHL_peaks'], window=3000, refpoint='center', folder="", title=name, numthreads=7, torecompute=True, name='../../data/IRF2BP2_degraded_rep1/diffPeaks/'+name+'_mat.pdf', withDeeptools=True)

In [None]:
peaks = ! ls ../../data/IRF2BP2_degraded_rep1/bwa/mergedLibrary/macs/narrowPeak/*.narrowPeak
peaks

In [None]:
names = ["FLAG_IRF2BP2","MED1","MYB","MYC","POLII_S2","POLII_S5","POLII_total","SPI1"]
names.extend([i+'_VHL' for i in names])
names

In [None]:
for i in range(len(bw)):
    chip.getPeaksAt(peaks[i], bw[i], window=3000, folder="", title=names[i], numthreads=7, torecompute=True, onlyProfile=True, refpoint='center', name='../../data/IRF2BP2_degraded_rep1/diffPeaks/'+names[i]+'_mat_profile.pdf', withDeeptools=True)
    chip.getPeaksAt(peaks[i], bw[i], window=3000, folder="", title=names[i], numthreads=7, torecompute=True, refpoint='center',onlyProfile=True,name='../../data/IRF2BP2_degraded_rep1/diffPeaks/'+names[i]+'_mat_profile_clust3.pdf', withDeeptools=True, cluster=3)

### on unscalled data

In [None]:
bams = !ls ../../data/IRF2BP2_degraded_rep1/bwa/mergedLibrary/mp*.bam
bams

In [None]:
! mkdir ../../data/IRF2BP2_degraded_rep1/diffPeaks_unscaled

In [None]:
bams= list(bams)

In [None]:
bams

In [None]:
size

In [None]:
# on unscalled data 
for i in range(int((len(bams)-1)/2)-1):
    if i < 0:
        continue
    name1 = bams[1+i]
    name2 = bams[9+i]
    chip.fullDiffPeak(name1,name2, control1=bams[0], size=size[i], compute_size=False, directory = "../../data/IRF2BP2_degraded_rep1/diffData_unscaled/", res_directory = "../../data/IRF2BP2_degraded_rep1/diffPeaks_unscaled/",pairedend=False)

In [None]:
bw = ! ls ../../data/IRF2BP2_degraded_rep1/bwa/mergedLibrary/bigwig/*.bigWig

In [None]:
commonpeak = ! ls ../../data/IRF2BP2_degraded_rep1/diffPeaks_unscaled/*common.bed
cond1peak = ! ls ../../data/IRF2BP2_degraded_rep1/diffPeaks_unscaled/*cond1.bed
cond2peak = ! ls ../../data/IRF2BP2_degraded_rep1/diffPeaks_unscaled/*cond2.bed
cond2peak

In [None]:
names = ["FLAG_IRF2BP2","MED1","POLII_total","POLII_S2","POLII_S5","MYC","MYB","SPI1"]

In [None]:
for i in range(int(len(bw[1:])/2)-1):
    name1 = bw[1+i]
    name2 = bw[9+i]
    peak = [cond1peak[i], commonpeak[i], cond2peak[i]]
    for val in peak:
        chip.dropWeirdChromosomes(val)
    name = names[i]
    chip.getPeaksAt(peak, [name1, name2], bigwignames=['DMSO', 'VHL'], peaknames=['DMSO_peaks', 'common', 'VHL_peaks'], window=3000, title=name, numthreads=7, refpoint='center', folder="../../data/IRF2BP2_degraded_rep1/bwa/mergedLibrary/bigwig/", torecompute=True, name='../../data/IRF2BP2_degraded_rep1/diffPeaks_unscaled/'+name+'_mat.pdf', withDeeptools=True)

In [None]:
! gsutil -m cp -r ../../data/IRF2BP2_degraded_rep1 gs://amlproject/Chip/
# rm -r ../../data/IRF2BP2_degraded_rep1

## v2

In [None]:
# mkdir ../../data/IRF2BP2_degraded_rep2/ && mkdir ../../data/IRF2BP2_degraded_rep2/fastqs && gsutil -m cp "gs://transfer-amlproject/20191219_MP7659*" ../../data/IRF2BP2_degraded_rep2/fastqs/

### analysis

In [None]:
rename = {
"20191219_MP7659_1_S1_R1_001.fastq.gz":"mp795-MV411_IRF2BP2_DMSO_6h-FLAG_IRF2BP2-r2_R1.fastq.gz",
"20191219_MP7659_1_S1_R2_001.fastq.gz":"mp795-MV411_IRF2BP2_DMSO_6h-FLAG_IRF2BP2-r2_R2.fastq.gz",
"20191219_MP7659_2_S2_R1_001.fastq.gz":"mp796-MV411_IRF2BP2_DMSO_6h-MED1-r2_R1.fastq.gz",
"20191219_MP7659_2_S2_R2_001.fastq.gz":"mp796-MV411_IRF2BP2_DMSO_6h-MED1-r2_R2.fastq.gz",
"20191219_MP7659_3_S3_R1_001.fastq.gz":"mp797-MV411_IRF2BP2_DMSO_6h-POLII_total-r2_R1.fastq.gz",
"20191219_MP7659_3_S3_R2_001.fastq.gz":"mp797-MV411_IRF2BP2_DMSO_6h-POLII_total-r2_R2.fastq.gz",
"20191219_MP7659_4_S4_R1_001.fastq.gz":"mp798-MV411_IRF2BP2_DMSO_6h-POLII_S2-r2_R1.fastq.gz",
"20191219_MP7659_4_S4_R2_001.fastq.gz":"mp798-MV411_IRF2BP2_DMSO_6h-POLII_S2-r2_R2.fastq.gz",
"20191219_MP7659_5_S5_R1_001.fastq.gz":"mp799-MV411_IRF2BP2_DMSO_6h-POLII_S5-r2_R1.fastq.gz",
"20191219_MP7659_5_S5_R2_001.fastq.gz":"mp799-MV411_IRF2BP2_DMSO_6h-POLII_S5-r2_R2.fastq.gz",
"20191219_MP7659_6_S6_R1_001.fastq.gz":"mp800-MV411_IRF2BP2_DMSO_6h-MYC-r2_R1.fastq.gz",
"20191219_MP7659_6_S6_R2_001.fastq.gz":"mp800-MV411_IRF2BP2_DMSO_6h-MYC-r2_R2.fastq.gz",
"20191219_MP7659_7_S7_R1_001.fastq.gz":"mp801-MV411_IRF2BP2_DMSO_6h-MYB-r2_R1.fastq.gz",
"20191219_MP7659_7_S7_R2_001.fastq.gz":"mp801-MV411_IRF2BP2_DMSO_6h-MYB-r2_R2.fastq.gz",
"20191219_MP7659_8_S8_R1_001.fastq.gz":"mp802-MV411_IRF2BP2_DMSO_6h-SPI1-r2_R1.fastq.gz",
"20191219_MP7659_8_S8_R2_001.fastq.gz":"mp802-MV411_IRF2BP2_DMSO_6h-SPI1-r2_R2.fastq.gz",
"20191219_MP7659_9_S9_R1_001.fastq.gz":"mp803-MV411_IRF2BP_VHL_6h-FLAG_IRF2BP2-r2_R1.fastq.gz",
"20191219_MP7659_9_S9_R2_001.fastq.gz":"mp803-MV411_IRF2BP_VHL_6h-FLAG_IRF2BP2-r2_R2.fastq.gz",
"20191219_MP7659_10_S10_R1_001.fastq.gz":"mp804-MV411_IRF2BP_VHL_6h-MED1-r2_R1.fastq.gz",
"20191219_MP7659_10_S10_R2_001.fastq.gz":"mp804-MV411_IRF2BP_VHL_6h-MED1-r2_R2.fastq.gz",
"20191219_MP7659_11_S11_R1_001.fastq.gz":"mp805-MV411_IRF2BP_VHL_6h-POLII_total-r2_R1.fastq.gz",
"20191219_MP7659_11_S11_R2_001.fastq.gz":"mp805-MV411_IRF2BP_VHL_6h-POLII_total-r2_R2.fastq.gz",
"20191219_MP7659_12_S12_R1_001.fastq.gz":"mp806-MV411_IRF2BP_VHL_6h-POLII_S2-r2_R1.fastq.gz",
"20191219_MP7659_12_S12_R2_001.fastq.gz":"mp806-MV411_IRF2BP_VHL_6h-POLII_S2-r2_R2.fastq.gz",
"20191219_MP7659_13_S13_R1_001.fastq.gz":"mp807-MV411_IRF2BP_VHL_6h-POLII_S5-r2_R1.fastq.gz",
"20191219_MP7659_13_S13_R2_001.fastq.gz":"mp807-MV411_IRF2BP_VHL_6h-POLII_S5-r2_R2.fastq.gz",
"20191219_MP7659_14_S14_R1_001.fastq.gz":"mp808-MV411_IRF2BP_VHL_6h-MYC-r2_R1.fastq.gz",
"20191219_MP7659_14_S14_R2_001.fastq.gz":"mp808-MV411_IRF2BP_VHL_6h-MYC-r2_R2.fastq.gz",
"20191219_MP7659_15_S15_R1_001.fastq.gz":"mp809-MV411_IRF2BP_VHL_6h-MYB-r2_R1.fastq.gz",
"20191219_MP7659_15_S15_R2_001.fastq.gz":"mp809-MV411_IRF2BP_VHL_6h-MYB-r2_R2.fastq.gz",
"20191219_MP7659_16_S16_R1_001.fastq.gz":"mp810-MV411_IRF2BP_VHL_6h-SPI1-r2_R1.fastq.gz",
"20191219_MP7659_16_S16_R2_001.fastq.gz":"mp810-MV411_IRF2BP_VHL_6h-SPI1-r2_R2.fastq.gz"
}

In [None]:
for k,v in rename.items():
    ! mv ../../data/IRF2BP2_degraded_rep2/fastqs/$k ../../data/IRF2BP2_degraded_rep2/fastqs/$v


In [None]:
inputfastq="gs://amlproject/Chip/fastqs/mp99-MV411-INPUT-r1.fastq.gz"
! gsutil cp $inputfastq ../../data/IRF2BP2_degraded_rep2/fastqs
a = ! ls ../../data/IRF2BP2_degraded_rep2/fastqs

In [None]:
from gsheets import Sheets
sheets = Sheets.from_files('~/.client_secret.json', '~/.storage.json')
url="https://docs.google.com/spreadsheets/d/1yFLjYB1McU530JnLgL0QIMAKIkVl3kl0_LCHje2gk8U"
gsheet = sheets.get(url).sheets[2].to_frame()

In [None]:
df = {
"fastq1": [],
"fastq2": [],
"antibody": [],
"group": [],
"replicate": [],
"control": []
}
for val in helper.grouped(a[:-1],2):
    row = gsheet[gsheet.id==val[0].split('/')[-1].split('_')[0]]
    df['group'].append((row.id.values[0] + '_'+ row.name_replicate.values[0] +"_"+row.protein.values[0]))
    df['replicate'].append(1)
    df['fastq1'].append(val[0])
    #df['fastq2'].append(val[1])
    df['antibody'].append(row.protein.values[0])
    df['control'].append("INPUT")
df['group'].append('INPUT')
df['replicate'].append(1)
df['fastq1'].append(a[-1])
#df['fastq2'].append(val[1])
df['antibody'].append("INPUT")
df['control'].append("INPUT")
df = pd.DataFrame(df)

In [None]:
df[df.columns[[3,4,0,1,2,5]]].to_csv('../nextflow/IRF2BP2_degraded_rep2_design.csv')

In [None]:
#process chips
! sudo ./nextflow run nf-core/chipseq --single_end --seq_center 'DFCI' --email 'jkobject@gmail.com' --narrow_peak --input ../nextflow/IRF2BP2_degraded_rep2_design.csv --genome GRCh38 --skip_preseq --max_cpus 24 -profile docker -w work

In [None]:
#!gsutil cp results/* ../../data/IRF2BP2_degraded_rep2/ && sudo rm -r work && sudo rm -r results

In [None]:
# get scaling values
mappedreads, umappedreads_norm, mapped = chip.getSpikeInControlScales(refgenome="../../ref/reference_droso.fna",FastQfolder='../../data/IRF2BP2_degraded_rep2/fastqs/',pairedEnd=True, cores=8)
mappedreads, umappedreads_norm, mapped

computing scales from the excel sheet

In [None]:
scales = [[508878,637972],
[1929129,11595],
[669536,429562],
[1272730,810802],
[743859,893304],
[312888,1154119],
[1086031,880901],
[850181,1019640]]

In [None]:
scales = [[1.0, 0.7976494266],
[0.006010484524, 1.0],
[0.6415816327, 1.0],
[0.6370573492, 1.0],
[1.0, 0.8327053276],
[1.0, 0.2711054926],
[0.8111195721, 1.0],
[1.0, 0.8338050685]]

### on scalled data

In [None]:
# on scaled data
bams = ! ls ../../IRF2BP2_degraded_rep2/bwa/mergedLibrary/*.bam
for i in range(int(len(bams[1:])/2)-1):
    name1 = bams[1+val]
    name2 = bams[9+val]
    chip.fullDiffPeak(name1,name2,control1='../../data/IRF2BP2_degraded_rep2/bwa/mergedLibrary/INPUT_R1.mLb.clN.sorted.bam', directory = "../../data/IRF2BP2_degraded_rep2/diffData_unscaled/", res_directory = "../../data/IRF2BP2_degraded_rep2/diffPeaks_unscaled/", scaling = scales[val][::-1])

In [None]:
bams = ! ls ../../data/IRF2BP2_degraded_rep2/bwa/mergedLibrary/*.bam
bams = [i.split('/')[-1].split('.')[0] for i in bams]
bams

In [None]:
# diffPeak on scalled data
sizes = [220, 191, 228, 285, 222, 204, 211, 194]
for i in range(int(len(bams[1:])/2)-1):
    name1 = bams[1+i]
    name2 = bams[9+i]
    chip.diffPeak(name1,name2, directory= "../../data/IRF2BP2_degraded_rep2/diffData/", res_directory='../../data/IRF2BP2_degraded_rep2/diffPeaks/', scaling1=scales[i][1], scaling2=scales[i][0], size=size[i])

In [None]:
scales = [1.0,
0.006010484524,
0.6415816327,
0.6370573492,
1.0,
1.0,
0.8111195721,
1.0,
0.7976494266,
1.0,
1.0,
1.0,
0.8327053276,
0.2711054926,
1.0,
0.8338050685]

In [None]:
chip.bigWigFrom(bams[1:],genome='GRCh38',scaling=scales)

In [None]:
! mv ../../data/recalib_bigwig_rep2/* ../../data/IRF2BP2_degraded_rep2/recalib_bigwig/
bw = ! ls ../../data/IRF2BP2_degraded_rep2/recalib_bigwig/*.bw
bw

In [None]:
cond1peak = ! ls ../../data/IRF2BP2_degraded_rep2/diffPeaks/*cond1.bed
cond2peak = ! ls ../../data/IRF2BP2_degraded_rep2/diffPeaks/*cond2.bed
commonpeak = ! ls ../../data/IRF2BP2_degraded_rep2/diffPeaks/*common.bed
commonpeak

In [None]:
names = ["FLAG_IRF2BP2","MED1","POLII_total","POLII_S2","POLII_S5","MYC","MYB","SPI1"]

In [None]:
for i in range(int(len(bw)/2)):
    name1 = bw[i]
    name2 = bw[8+i]
    peak = [cond1peak[i], commonpeak[i], cond2peak[i]]
    for val in peak:
        chip.dropWeirdChromosomes(val)
    name = names[i]
    chip.getPeaksAt(peak, [name1, name2], bigwignames=['DMSO', 'VHL'], peaknames=['DMSO_peaks', 'common', 'VHL_peaks'], window=3000, folder="", title=name, numthreads=20, name='../../data/IRF2BP2_degraded_rep2/diffPeaks/'+name+'_mat.pdf', refpoint='center', withDeeptools=True, torecompute= True,)

In [None]:
peaks = ! ls ../../data/IRF2BP2_degraded_rep2/bwa/mergedLibrary/macs/narrowPeak/*.narrowPeak
peaks

In [None]:
names = ["FLAG_IRF2BP2","MED1","POLII_total","POLII_S2","POLII_S5","MYC","MYB","SPI1"]
names.extend([i+'_VHL' for i in names])
names

In [None]:
bw

In [None]:
for i in range(len(bw)-1):
    chip.getPeaksAt(peaks[i], bw[1+i], window=3000, folder="", title=names[i], numthreads=7, torecompute=True, onlyProfile=True, name='../../data/IRF2BP2_degraded_rep2/diffPeaks/'+names[i]+'_mat_profile.pdf', refpoint='center', withDeeptools=True)
    chip.getPeaksAt(peaks[i], bw[1+i], window=3000, folder="", title=names[i], numthreads=7, torecompute=True, refpoint='center', onlyProfile=True, name='../../data/IRF2BP2_degraded_rep2/diffPeaks/'+names[i]+'_mat_profile_clust3.pdf', withDeeptools=True, cluster=3)

### unscalled

In [None]:
bams = !ls ../../data/IRF2BP2_degraded_rep2/bwa/mergedLibrary/mp*.bam
bams

In [None]:
!mkdir  ../../data/IRF2BP2_degraded_rep2/diffPeaks_unscaled
bw = ! ../../data/IRF2BP2_degraded_rep2/bwa/mergedLibrary/bigwig/*.bigWig

In [None]:
# on unscalled data 
for i in range(int(len(bams)/2)):
    if i <0:
        continue
    name1 = bams[i]
    name2 = bams[8+i]
    chip.fullDiffPeak(name1,name2, control1='../../data/results3/bwa/mergedLibrary/INPUT_R1.mLb.clN.sorted.bam', directory = "../../data/IRF2BP2_degraded_rep2/diffData_unscaled/", res_directory = "../../data/IRF2BP2_degraded_rep2/diffPeaks_unscaled/", pairedend=False)

In [None]:
commonpeak = ! ls ../../data/IRF2BP2_degraded_rep2/diffPeaks_unscaled/*common.bed
cond1peak = ! ls ../../data/IRF2BP2_degraded_rep2/diffPeaks_unscaled/*cond1.bed
cond2peak = ! ls ../../data/IRF2BP2_degraded_rep2/diffPeaks_unscaled/*cond2.bed
cond2peak

In [None]:
names = ["FLAG_IRF2BP2","MED1","POLII_total","POLII_S2","POLII_S5","MYC","MYB","SPI1"]

In [None]:
for i in range(int(len(bw[1:])/2)):
    name1 = bw[1+i]
    name2 = bw[9+i]
    peak = [cond1peak[i], commonpeak[i], cond2peak[i]]
    for val in peak:
        chip.dropWeirdChromosomes(val)
    name = names[i]
    chip.getPeaksAt(peak, [name1, name2], bigwignames=['DMSO', 'VHL'], peaknames=['DMSO_peaks', 'common', 'VHL_peaks'], window=3000, title=name, numthreads=7, torecompute=True, refpoint='center', folder="", name='../../data/IRF2BP2_degraded_rep2/diffPeaks_unscaled/'+name+'_mat.pdf', withDeeptools=True)

In [None]:
! gsutil -m cp -r "../../data/IRF2BP2_degraded_rep2" gs://amlproject/Chip/

## v3

In [None]:
! gsutil mv gs://transfer-amlproject/*MP7781*  gs://transfer-amlproject/IRF2BP2_v3/

### analysis

In [None]:
! mkdir ../../data/IRF2BP2_degraded_rep3 && mkdir ../../data/IRF2BP2_degraded_rep3/fastqs && gsutil -m cp gs://transfer-amlproject/IRF2BP2_v3/* ../../data/IRF2BP2_degraded_rep3/fastqs/

In [None]:
rename = {
"20200203_1_MP7781_S67_R1_001.fastq.gz":"mp831-MV411_IRF2BP_DMSO_6h-CDK8-r1_R1.fastq.gz",
"20200203_1_MP7781_S67_R2_001.fastq.gz":"mp831-MV411_IRF2BP_DMSO_6h-CDK8-r1_R2.fastq.gz",
"20200203_2_MP7781_S68_R1_001.fastq.gz":"mp832-MV411_IRF2BP_DMSO_6h-BRD4-r1_R1.fastq.gz",
"20200203_2_MP7781_S68_R2_001.fastq.gz":"mp832-MV411_IRF2BP_DMSO_6h-BRD4-r1_R2.fastq.gz",
"20200203_3_MP7781_S69_R1_001.fastq.gz":"mp833-MV411_IRF2BP_DMSO_6h-IRF8-r1_R1.fastq.gz",
"20200203_3_MP7781_S69_R2_001.fastq.gz":"mp833-MV411_IRF2BP_DMSO_6h-IRF8-r1_R2.fastq.gz",
"20200203_4_MP7781_S70_R1_001.fastq.gz":"mp834-MV411_IRF2BP_DMSO_6h-SMC1-r1_R1.fastq.gz",
"20200203_4_MP7781_S70_R2_001.fastq.gz":"mp834-MV411_IRF2BP_DMSO_6h-SMC1-r1_R2.fastq.gz",
"20200203_5_MP7781_S71_R2_001.fastq.gz":"mp835-MV411_IRF2BP_DMSO_6h-MED1-r3_R2.fastq.gz",
"20200203_5_MP7781_S71_R1_001.fastq.gz":"mp835-MV411_IRF2BP_DMSO_6h-MED1-r3_R1.fastq.gz",
"20200203_6_MP7781_S72_R1_001.fastq.gz":"mp836-MV411_IRF2BP_DMSO_6h-ZEB2-r1_R1.fastq.gz",
"20200203_6_MP7781_S72_R2_001.fastq.gz":"mp836-MV411_IRF2BP_DMSO_6h-ZEB2-r1_R2.fastq.gz",
"20200203_7_MP7781_S73_R1_001.fastq.gz":"mp837-MV411_IRF2BP_DMSO_6h-CEBPA-r1_R1.fastq.gz",
"20200203_7_MP7781_S73_R2_001.fastq.gz":"mp837-MV411_IRF2BP_DMSO_6h-CEBPA-r1_R2.fastq.gz",
"20200203_8_MP7781_S74_R1_001.fastq.gz":"mp838-MV411_IRF2BP_VHL_6h-CDK8-r1_R1.fastq.gz",
"20200203_8_MP7781_S74_R2_001.fastq.gz":"mp838-MV411_IRF2BP_VHL_6h-CDK8-r1_R2.fastq.gz",
"20200203_9_MP7781_S75_R1_001.fastq.gz":"mp839-MV411_IRF2BP_VHL_6h-BRD4-r1_R1.fastq.gz",
"20200203_9_MP7781_S75_R2_001.fastq.gz":"mp839-MV411_IRF2BP_VHL_6h-BRD4-r1_R2.fastq.gz",
"20200203_10_MP7781_S76_R2_001.fastq.gz":"mp840-MV411_IRF2BP_VHL_6h-IRF8-r1_R2.fastq.gz",
"20200203_10_MP7781_S76_R1_001.fastq.gz":"mp840-MV411_IRF2BP_VHL_6h-IRF8-r1_R1.fastq.gz",
"20200203_11_MP7781_S77_R1_001.fastq.gz":"mp841-MV411_IRF2BP_VHL_6h-SMC1-r1_R1.fastq.gz",
"20200203_11_MP7781_S77_R2_001.fastq.gz":"mp841-MV411_IRF2BP_VHL_6h-SMC1-r1_R2.fastq.gz",
"20200203_12_MP7781_S78_R1_001.fastq.gz":"mp842-MV411_IRF2BP_VHL_6h-MED1-r3_R1.fastq.gz",
"20200203_12_MP7781_S78_R2_001.fastq.gz":"mp842-MV411_IRF2BP_VHL_6h-MED1-r3_R2.fastq.gz",
"20200203_13_MP7781_S79_R1_001.fastq.gz":"mp843-MV411_IRF2BP_VHL_6h-ZEB2-r1_R1.fastq.gz",
"20200203_13_MP7781_S79_R2_001.fastq.gz":"mp843-MV411_IRF2BP_VHL_6h-ZEB2-r1_R2.fastq.gz",
"20200203_14_MP7781_S80_R2_001.fastq.gz":"mp844-MV411_IRF2BP_VHL_6h-CEBPA-r1_R2.fastq.gz",
"20200203_14_MP7781_S80_R1_001.fastq.gz":"mp844-MV411_IRF2BP_VHL_6h-CEBPA-r1_R1.fastq.gz",
"20200203_Input_MP7781_S81_R1_001.fastq.gz":"mp845-MV411_IRF2BP2_-INPUT-r1_R1.fastq.gz",
"20200203_Input_MP7781_S81_R2_001.fastq.gz":"mp845-MV411_IRF2BP2_-INPUT-r1_R2.fastq.gz"
}

In [None]:
for k,v in rename.items():
    ! mv ../../data/IRF2BP2_degraded_rep3/fastqs/$k ../../data/IRF2BP2_degraded_rep3/fastqs/$v

In [None]:
from gsheets import Sheets
sheets = Sheets.from_files('~/.client_secret.json', '~/.storage.json')
url="https://docs.google.com/spreadsheets/d/1yFLjYB1McU530JnLgL0QIMAKIkVl3kl0_LCHje2gk8U"
gsheet = sheets.get(url).sheets[2].to_frame()

In [None]:
a = ! ls ../../data/IRF2BP2_degraded_rep3/fastqs

In [None]:
gsheet

In [None]:
df = {
"fastq_1": [],
"fastq_2": [],
"antibody": [],
"group": [],
"replicate": [],
"control": []
}
for val in helper.grouped(a[:-2],2):
    row = gsheet[gsheet.id==val[0].split('/')[-1].split('-')[0]]
    df['group'].append(row['name'].values[0])
    df['replicate'].append(1)
    df['fastq_1'].append(val[0])
    df['fastq_2'].append(val[1])
    df['antibody'].append(row['protein'].values[0])
    df['control'].append("INPUT")
df['group'].append('INPUT')
df['replicate'].append(1)
df['fastq_1'].append(a[-2])
df['fastq_2'].append(a[-1])
df['antibody'].append("")
df['control'].append("")
df = pd.DataFrame(df)

In [None]:
df

In [None]:
df[df.columns[[3,4,0,1,2,5]]].to_csv('../nextflow/IRF2BP2_degraded_rep3_design.csv',index=False)

In [None]:
#process chips
! sudo ../nextflow run nf-core/chipseq --paired_end --seq_center 'DFCI' --email \
'jkobject@gmail.com' --narrow_peak --input ../nextflow/../../data/IRF2BP2_degraded_rep3_design.csv --genome GRCh38 --skip_preseq \
--max_cpus 24 -profile docker -w work

In [None]:
ls

In [None]:
!cp -r results/* ../../data/IRF2BP2_degraded_rep3/ && sudo rm -r work && sudo rm -r results

In [None]:
ls ../TrimGalore-0.6.5/trim_galore

In [None]:
# get scaling values
mappedreads, umappedreads_norm = chip.getSpikeInControlScales(refgenome="../../ref/reference_droso.fna",
                                                                      fastQfolder='../../data/IRF2BP2_degraded_rep3/fastqs',
                                                                      pairedEnd=True, cores=8,
                                                                      tofilter=False,
                                                                      totrim=False,
                                                                      tomap=False,
                                                                      pathtotrim_galore="../TrimGalore-0.6.5/trim_galore")
mappedreads, umappedreads_norm

computing scales from the excel sheet

In [None]:
scales = [[536923,632558],
[601370,681405],
[2402198,1676203],
[417892,216192],
[1544590,1350802],
[1174994,1376726],
[289635,240366]]

In [None]:
scales= [[1.0, 0.8488122828],
[1.0, 0.8825441551],
[0.6977788675, 1.0],
[0.5173394083, 1.0],
[0.8745375796, 1.0],
[1.0, 0.8534697536],
[0.8298927961, 1.0]]

In [None]:
bams = ! ls ../../data/IRF2BP2_degraded_rep3/bwa/mergedLibrary/*.bam #../../data/results3/bwa/mergedLibrary/*.bam
bams

In [None]:
bams[7]

In [None]:
bams = [bam1.split('/')[-1].split('.')[0] for bam1 in bams]

### on scalled data

In [None]:
# diffPeak on scaled data
size=[206, 218, 189, 194, 217, 217, 176]
for i in range(int(len(bams)/2)):
    if i<0:
        continue
    name1 = bams[i]
    name2 = bams[7+i]
    print(name1,name2)
    chip.diffPeak(name1, name2, directory= "../../data/IRF2BP2_degraded_rep3/diffData/", res_directory='../../data/IRF2BP2_degraded_rep3/diffPeaks/', scaling1=scales[i][1], scaling2=scales[i][0], size=size[i])

In [None]:
os.popen('for i in $(ls ../../data/IRF2BP2_degraded_rep3/diffPeaks/*.bed); \
            do echo $(wc -l $i); \
            done').read().split('\n')

In [None]:
# diffPeak on scaled data
for i in range(int(len(bams)/2)):
    if i <0:
        continue
    name1 = bams[i]
    name2 = bams[7+i]
    print(name1,name2)
    chip.fullDiffPeak(name1,name2, control1='../../data/IRF2BP2_degraded_rep3/INPUT_R1.mLb.clN.sorted.bam', directory = "../../data/IRF2BP2_degraded_rep3/diffData/", res_directory = "../../data/IRF2BP2_degraded_rep3/diffPeaks/", isTF=True, compute_size=True, pairedend=False)

In [None]:
scales = [1.0,
1.0,
0.6977788675,
0.5173394083,
0.8745375796,
1.0,
0.8298927961,
0.8488122828,
0.8825441551,
1.0,
1.0,
1.0,
0.8534697536,
1.0]

In [None]:
bams

In [None]:
chip.bigWigFrom(bams[1:], 
                genome='GRCh38',scaling=scales,
               numthreads=8)

In [None]:
! mv diffPeaks ../../data/IRF2BP2_degraded_rep3
! mv diffData ../../data/IRF2BP2_degraded_rep3

In [None]:
!mv bigwig ../../data/recalib_bigwig_3 

In [None]:
os.popen('for i in $(ls ../../data/IRF2BP2_degraded_rep3/diffPeaks); \
            do echo $(wc -l "../../data/IRF2BP2_degraded_rep3/diffPeaks/"$i); \
            done').read().split('\n')

In [None]:
bw = ! ls ../../data/recalib_bigwig_3/*.bw
bw

In [None]:
cond1peak = ! ls ../../data/IRF2BP2_degraded_rep3/diffPeaks/*cond1.bed
cond2peak = ! ls ../../data/IRF2BP2_degraded_rep3/diffPeaks/*cond2.bed
commonpeak = ! ls ../../data/IRF2BP2_degraded_rep3/diffPeaks/*common.bed
cond1peak

In [None]:
names = ["CDK8","BRD4","IRF8","SMC1","MED1","ZEB2","CEBPA"]

In [None]:
for i in range(int(len(bw)/2)):
    if i<0:
        continue
    name1 = bw[i]
    name2 = bw[7+i]
    peak = [cond1peak[i], commonpeak[i], cond2peak[i]]
    #for val in peak:
        #chip.dropWeirdChromosomes(val)
    name = names[i]
    chip.getPeaksAt(peak, [name1, name2], bigwignames=['DMSO', 'VHL'], refpoint='center', peaknames=['DMSO_peaks', 'common', 'VHL_peaks'], window=3000, folder="", title=name, numthreads=14, name='../../data/IRF2BP2_degraded_rep3/diffPeaks/'+name+'_mat.pdf', withDeeptools=True, torecompute=True)

In [None]:
names = ["CDK8","BRD4","IRF8","SMC1","MED1","ZEB2","CEBPA"]
names.extend([i+'_VHL' for i in names])
names

In [None]:
peaks = ! ls ../../data/IRF2BP2_degraded_rep3/bwa/mergedLibrary/macs/narrowPeak/*.narrowPeak
peaks

In [None]:
for i in range(len(bw)):
    chip.getPeaksAt(peaks[i], bw[i], window=3000, folder="", title=names[i], numthreads=7, torecompute=True, onlyProfile=True, refpoint='center', name='../../data/IRF2BP2_degraded_rep3/diffPeaks/'+names[i]+'_mat_profile.pdf', withDeeptools=True)
    chip.getPeaksAt(peaks[i], bw[i], window=3000, folder="", title=names[i], numthreads=7, torecompute=True, refpoint='center', onlyProfile=True,name='../../data/IRF2BP2_degraded_rep3/diffPeaks/'+names[i]+'_mat_profile_clust3.pdf', withDeeptools=True, cluster=3)

### on unscalled data

In [None]:
bams = !ls ../../data/IRF2BP2_degraded_rep3/bwa/mergedLibrary/mp*.bam
bams

In [None]:
! mkdir ../../data/IRF2BP2_degraded_rep3/diffPeaks_unscaled

In [None]:
#on unscalled data 
for i in range(int(len(bams)/2)):
    if i < 0:
        continue
    name1 = bams[i]
    name2 = bams[7+i]
    print(name1,name2)
    chip.fullDiffPeak(name1,name2, control1='../../data/results3/bwa/mergedLibrary/INPUT_R1.mLb.clN.sorted.bam', directory = "../../data/IRF2BP2_degraded_rep3/diffData_unscaled/", res_directory = "../../data/IRF2BP2_degraded_rep3/diffPeaks_unscaled/",pairedend=False)

In [None]:
bw = ! ls ../../data/IRF2BP2_degraded_rep3/bwa/mergedLibrary/bigwig/*.bigWig
bw

In [None]:
cond1peak = ! ls ../../data/IRF2BP2_degraded_rep3/diffPeaks_unscaled/*cond1.bed
cond2peak = ! ls ../../data/IRF2BP2_degraded_rep3/diffPeaks_unscaled/*cond2.bed
commonpeak = ! ls ../../data/IRF2BP2_degraded_rep3/diffPeaks_unscaled/*common.bed
cond1peak

In [None]:
names = ["CDK8","BRD4","IRF8","SMC1","MED1","ZEB2","CEBPA"]

In [None]:
for i in range(int((len(bw)-1)/2)):
    if i <0:
        continue
    name1 = bw[1+i]
    name2 = bw[8+i]
    peak = [cond1peak[i], commonpeak[i], cond2peak[i]]
    #for val in peak:
        #chip.dropWeirdChromosomes(val)
    name = names[i]
    print(name1,name2)
    chip.getPeaksAt(peak, [name1, name2], bigwignames=['DMSO', 'VHL'], peaknames=['DMSO_peaks', 'common', 'VHL_peaks'], window=3000, folder="", title=name, numthreads=14, torecompute=True, refpoint="center", name='../../data/IRF2BP2_degraded_rep3/diffPeaks_unscaled/'+name+'_mat.pdf', withDeeptools=True)

In [None]:
! gsutil -m cp -r "../../data/IRF2BP2_degraded_rep3/" gs://amlproject/Chip/

## histones V1&2

In [None]:
bamfolder="../../data/IRF2BP2_degraded_histones/fastqs/"

In [None]:
! mkdir ../../data/IRF2BP2_degraded_histones && mkdir ../../data/IRF2BP2_degraded_histones/fastqs && gsutil cp "gs://transfer-amlproject/*.fastq.gz" $bamfolder

In [None]:
! gsutil -m cp gs://transfer-amlproject/*MP7693* gs://transfer-amlproject/IRF2BP2_hist/ && gsutil -m rm gs://transfer-amlproject/*MP7693*


### analysis

In [None]:
rename = {
    "20200103_1_MP7693_S6_R1_001.fastq.gz":"mp811-MV411_IRF2BP2_DMSO_6h-H3K27ac-r1_R1.fastq.gz",
    "20200103_1_MP7693_S6_R2_001.fastq.gz":"mp811-MV411_IRF2BP2_DMSO_6h-H3K27ac-r1.fastq.gz",
    "20200103_2_MP7693_S7_R1_001.fastq.gz":"mp812-MV411_IRF2BP2_DMSO_6h-H3K27ac-r2_R1.fastq.gz",
    "20200103_2_MP7693_S7_R2_001.fastq.gz":"mp812-MV411_IRF2BP2_DMSO_6h-H3K27ac-r2.fastq.gz",
    "20200103_3_MP7693_S8_R1_001.fastq.gz":"mp813-MV411_IRF2BP2_DMSO_6h-H3K27me3-r1_R1.fastq.gz",
    "20200103_3_MP7693_S8_R2_001.fastq.gz":"mp813-MV411_IRF2BP2_DMSO_6h-H3K27me3-r1.fastq.gz",
    "20200103_4_MP7693_S9_R1_001.fastq.gz":"mp814-MV411_IRF2BP2_DMSO_6h-H3K27me3-r2_R1.fastq.gz",
    "20200103_4_MP7693_S9_R2_001.fastq.gz":"mp814-MV411_IRF2BP2_DMSO_6h-H3K27me3-r2.fastq.gz",
    "20200103_5_MP7693_S10_R1_001.fastq.gz":"mp815-MV411_IRF2BP2_DMSO_6h-H3K4me1-r1_R1.fastq.gz",
    "20200103_5_MP7693_S10_R2_001.fastq.gz":"mp815-MV411_IRF2BP2_DMSO_6h-H3K4me1-r1.fastq.gz",
    "20200103_6_MP7693_S11_R1_001.fastq.gz":"mp816-MV411_IRF2BP2_DMSO_6h-H3K4me1-r2_R1.fastq.gz",
    "20200103_6_MP7693_S11_R2_001.fastq.gz":"mp816-MV411_IRF2BP2_DMSO_6h-H3K4me1-r2.fastq.gz",
    "20200103_7_MP7693_S12_R1_001.fastq.gz":"mp817-MV411_IRF2BP2_DMSO_6h-H3K4me3-r1_R1.fastq.gz",
    "20200103_7_MP7693_S12_R2_001.fastq.gz":"mp817-MV411_IRF2BP2_DMSO_6h-H3K4me3-r1.fastq.gz",
    "20200103_8_MP7693_S13_R1_001.fastq.gz":"mp818-MV411_IRF2BP2_DMSO_6h-H3K4me3-r2_R1.fastq.gz",
    "20200103_8_MP7693_S13_R2_001.fastq.gz":"mp818-MV411_IRF2BP2_DMSO_6h-H3K4me3-r2.fastq.gz",
    "20200103_9_MP7693_S14_R1_001.fastq.gz":"mp819-MV411_IRF2BP2_DMSO_6h-H3K79me2-r1_R1.fastq.gz",
    "20200103_9_MP7693_S14_R2_001.fastq.gz":"mp819-MV411_IRF2BP2_DMSO_6h-H3K79me2-r1.fastq.gz",
    "20200103_10_MP7693_S15_R1_001.fastq.gz":"mp820-MV411_IRF2BP2_DMSO_6h-H3K79me2-r2_R1.fastq.gz",
    "20200103_10_MP7693_S15_R2_001.fastq.gz":"mp820-MV411_IRF2BP2_DMSO_6h-H3K79me2-r2.fastq.gz",
    "20200103_11_MP7693_S16_R1_001.fastq.gz":"mp821-MV411_IRF2BP2_VHL_6h-H3K27ac-r1_R1.fastq.gz",
    "20200103_11_MP7693_S16_R2_001.fastq.gz":"mp821-MV411_IRF2BP2_VHL_6h-H3K27ac-r1.fastq.gz",
    "20200103_12_MP7693_S17_R1_001.fastq.gz":"mp822-MV411_IRF2BP2_VHL_6h-H3K27ac-r2_R1.fastq.gz",
    "20200103_12_MP7693_S17_R2_001.fastq.gz":"mp822-MV411_IRF2BP2_VHL_6h-H3K27ac-r2.fastq.gz",
    "20200103_13_MP7693_S18_R1_001.fastq.gz":"mp823-MV411_IRF2BP2_VHL_6h-H3K27me3-r1_R1.fastq.gz",
    "20200103_13_MP7693_S18_R2_001.fastq.gz":"mp823-MV411_IRF2BP2_VHL_6h-H3K27me3-r1.fastq.gz",
    "20200103_14_MP7693_S19_R1_001.fastq.gz":"mp824-MV411_IRF2BP2_VHL_6h-H3K27me3-r2_R1.fastq.gz",
    "20200103_14_MP7693_S19_R2_001.fastq.gz":"mp824-MV411_IRF2BP2_VHL_6h-H3K27me3-r2.fastq.gz",
    "20200103_15_MP7693_S20_R1_001.fastq.gz":"mp825-MV411_IRF2BP2_VHL_6h-H3K4me1-r1_R1.fastq.gz",
    "20200103_15_MP7693_S20_R2_001.fastq.gz":"mp825-MV411_IRF2BP2_VHL_6h-H3K4me1-r1.fastq.gz",
    "20200103_16_MP7693_S21_R1_001.fastq.gz":"mp826-MV411_IRF2BP2_VHL_6h-H3K4me1-r2_R1.fastq.gz",
    "20200103_16_MP7693_S21_R2_001.fastq.gz":"mp826-MV411_IRF2BP2_VHL_6h-H3K4me1-r2.fastq.gz",
    "20200103_17_MP7693_S22_R1_001.fastq.gz":"mp827-MV411_IRF2BP2_VHL_6h-H3K4me3-r1_R1.fastq.gz",
    "20200103_17_MP7693_S22_R2_001.fastq.gz":"mp827-MV411_IRF2BP2_VHL_6h-H3K4me3-r1.fastq.gz",
    "20200103_18_MP7693_S23_R1_001.fastq.gz":"mp828-MV411_IRF2BP2_VHL_6h-H3K4me3-r2_R1.fastq.gz",
    "20200103_18_MP7693_S23_R2_001.fastq.gz":"mp828-MV411_IRF2BP2_VHL_6h-H3K4me3-r2.fastq.gz",
    "20200103_19_MP7693_S24_R1_001.fastq.gz":"mp829-MV411_IRF2BP2_VHL_6h-H3K79me2-r1_R1.fastq.gz",
    "20200103_19_MP7693_S24_R2_001.fastq.gz":"mp829-MV411_IRF2BP2_VHL_6h-H3K79me2-r1.fastq.gz",
    "20200103_20_MP7693_S25_R1_001.fastq.gz":"mp830-MV411_IRF2BP2_VHL_6h-H3K79me2-r2_R1.fastq.gz",
    "20200103_20_MP7693_S25_R2_001.fastq.gz":"mp830-MV411_IRF2BP2_VHL_6h-H3K79me2-r2.fastq.gz"
}

In [None]:
! gsutil -m cp gs://transfer-amlproject/IRF2BP2_hist/* ../../data/IRF2BP2_degraded_histones/fastqs/

In [None]:
for k,v in rename.items():
    ! mv $bamfolder$k $bamfolder$v

In [None]:
inputfastq="gs://amlproject/Chip/fastqs/mp99-MV411-INPUT-r1.fastq.gz"
! gsutil cp $inputfastq $bamfolder

In [None]:
a = ! ls $bamfolder

In [None]:
from gsheets import Sheets
sheets = Sheets.from_files('~/.client_secret.json', '~/.storage.json')
url="https://docs.google.com/spreadsheets/d/1yFLjYB1McU530JnLgL0QIMAKIkVl3kl0_LCHje2gk8U"
gsheet = sheets.get(url).sheets[2].to_frame()

In [None]:
df = {
"fastq1": [],
"fastq2": [],
"antibody": [],
"group": [],
"replicate": [],
"control": []
}
for val in helper.grouped(a[:-1],2):
    row = gsheet[gsheet.id==val[0].split('/')[-1].split('_')[0]]
    df['group'].append((row.id.values[0] + '_'+ row.name_replicate.values[0] +"_"+row.protein.values[0]))
    df['replicate'].append(1)
    df['fastq1'].append(val[0])
    #df['fastq2'].append(val[1])
    df['antibody'].append(row.protein.values[0])
    df['control'].append("INPUT")
df['group'].append('INPUT')
df['replicate'].append(1)
df['fastq1'].append(a[-1])
#df['fastq2'].append(val[1])
df['antibody'].append("INPUT")
df['control'].append("INPUT")
df = pd.DataFrame(df)

In [None]:
df[df.columns[[3,4,0,1,2,5]]].to_csv('../nextflow/IRF2BP2_degraded_histones_design.csv')

In [None]:
#process chips
 ! sudo ./nextflow run nf-core/chipseq --single_end --seq_center 'DFCI' --email 'jkobject@gmail.com' --narrow_peak --input ../nextflow/IRF2BP2_degraded_histones_design.csv --genome GRCh38 --skip_preseq --max_cpus 24 -profile docker -w work

In [None]:
! mv results/* ../../data/IRF2BP2_degraded_histones/

In [None]:
bams = ! ls ../../data/IRF2BP2_degraded_hist/bwa/mergedLibrary/*.bam
bams = [i.split('/')[-1].split('.')[0] for i in bams]
bams

In [None]:
scales = [[191079,278272],
[274625,494562],
[1094016,2067804],
[1328914,969565],
[237779,132422],
[162971,174092],
[115788,187078],
[134269,495924],
[171890,225315],
[188370,199911]]

In [None]:
scales = [[1.0,0.6866626897],
[1.0,0.5552893267],
[1.0,0.5290714207],
[0.7295919826,1.0],
[0.5569120906,1.0],
[1.0,0.9361199825],
[1.0,0.6189290029],
[1.0,0.2707451142],
[1.0,0.762887513],
[1.0,0.9422693098]]

### on scalled data

In [None]:
# diffPeak on scaled data
size=[206, 213, 47, 272, 229, 235, 190, 196, 287, 288]
for i in range(int(len(bams)/2)):
    name1 = bams[1+i]
    name2 = bams[11+i]
    print(name1,name2)
    chip.diffPeak(name1, name2, directory= "../../data/IRF2BP2_degraded_hist/diffData/", res_directory='../../data/IRF2BP2_degraded_hist/diffPeaks/', scaling1=scales[i][1], scaling2=scales[i][0], size=size[i])

In [None]:
# diffPeak on scaled data
for i in range(int(len(bams)/2)):
    name1 = bams[1+i]
    name2 = bams[11+i]
    print(name1,name2)
    chip.fullDiffPeak(name1,name2, bams[0], scaling = scales[i], directory='../../data/IRF2BP2_degraded_hist/diffData/',
res_directory="../../data/IRF2BP2_degraded_hist/diffPeaks/", isTF=False, compute_size=True, pairedend=False)

In [None]:
scales = [1.0,
1.0,
1.0,
0.7295919826,
0.5569120906,
1.0,
1.0,
1.0,
1.0,
1.0,
0.6866626897,
0.5552893267,
0.5290714207,
1.0,
1.0,
0.9361199825,
0.6189290029,
0.2707451142,
0.762887513,
0.9422693098,]

In [None]:
chip.bigWigFrom(bams[1:],genome='GRCh38',scaling=scales)

In [None]:
! mv ../../recalib_bigwig_hist/* ../../data/IRF2BP2_degraded_histones/recalib_bigwig/
bw = ! ls ../../data/IRF2BP2_degraded_histones/recalib_bigwig/*.bw
bw

In [None]:
! rm ../../data/IRF2BP2_degraded_hist/diffData/mp*_R1*

In [None]:
cond1peak = ! ls ../../data/IRF2BP2_degraded_hist/diffPeaks/*cond1.bed
cond2peak = ! ls ../../data/IRF2BP2_degraded_hist/diffPeaks/*cond2.bed
commonpeak = ! ls ../../data/IRF2BP2_degraded_hist/diffPeaks/*common.bed
cond1peak

In [None]:
names = ["H3K27ac", "H3K27ac_v2","H3K27me3","H3K27me3_v2","H3K4me1","H3K4me1_v2", "H3K4me3", "H3K4me3_v2", "H3K79me2", "H3K79me2_v2"]

In [None]:
for i in range(int(len(bw)/2)):
    if i < 0 : 
        continue
    print(i)
    name1 = bw[i]
    name2 = bw[10+i]
    peak = [cond1peak[i], commonpeak[i], cond2peak[i]]
    #for val in peak:
        #chip.dropWeirdChromosomes(val)
    name = names[i]
    print(name1,name2)
    chip.getPeaksAt(peak, [name1, name2], torecompute= True, bigwignames=['DMSO', 'VHL'], peaknames=['DMSO_peaks', 'common', 'VHL_peaks'], window=3000, folder="", title=name, numthreads=7, refpoint='center', name='../../data/IRF2BP2_degraded_hist/diffPeaks/'+name+'_mat.pdf', withDeeptools=True)

In [None]:
names = ["H3K27ac", "H3K27ac_v2","H3K27me3","H3K27me3_v2","H3K4me1","H3K4me1_v2", "H3K4me3", "H3K4me3_v2", "H3K79me2", "H3K79me2_v2"]
names.extend([i+'_VHL' for i in names])
names

In [None]:
peaks = ! ls ../../data/IRF2BP2_degraded_histones/bwa/mergedLibrary/macs/broadPeak/*.broadPeak 
peaks

In [None]:
for i in range(len(bw)):
    chip.getPeaksAt(peaks[i], bw[i], window=3000, folder="", title=names[i], numthreads=7, torecompute=True, onlyProfile=True, refpoint='center', name='../../data/IRF2BP2_degraded_hist/diffPeaks/'+names[i]+'_mat_profile.pdf', withDeeptools=True)
    chip.getPeaksAt(peaks[i], bw[i], window=3000, folder="", title=names[i], numthreads=7, torecompute=True, refpoint='center', onlyProfile=True,name='../../data/IRF2BP2_degraded_hist/diffPeaks/'+names[i]+'_mat_profile_clust3.pdf', withDeeptools=True, cluster=3)

### on unscalled data

In [None]:
bams = !ls ../../data/IRF2BP2_degraded_hist/bwa/mergedLibrary/mp*.bam
bams

In [None]:
bw = ! ls ../../data/IRF2BP2_degraded_histones/bwa/mergedLibrary/bigwig/*.bigWig
bw

In [None]:
# on unscalled data 
for i in range(int((len(bams)-1)/2)):
    name1 = bams[1+i]
    name2 = bams[11+i]
    chip.fullDiffPeak(name1,name2, control1='../../data/IRF2BP2_degraded_hist/INPUT_R1.mLb.clN.sorted.bam', directory = "../../data/IRF2BP2_degraded_hist/diffData_unscaled/", res_directory = "../../data/IRF2BP2_degraded_hist/diffPeaks_unscaled/",isTF=False, compute_size=True, pairedend=False)

In [None]:
cond1peak = ! ls ../../data/IRF2BP2_degraded_hist/diffPeaks_unscaled/*cond1.bed
cond2peak = ! ls ../../data/IRF2BP2_degraded_hist/diffPeaks_unscaled/*cond2.bed
commonpeak = ! ls ../../data/IRF2BP2_degraded_hist/diffPeaks_unscaled/*common.bed
cond2peak

In [None]:
names = ["H3K27ac", "H3K27ac_v2","H3K27me3","H3K27me3_v2","H3K4me1","H3K4me1_v2", "H3K4me3", "H3K4me3_v2", "H3K79me2", "H3K79me2_v2"]

In [None]:
for i in range(int(len(bw)/2)):
    if i < 0:
        continue
    name1 = bw[1+i]
    name2 = bw[11+i]
    peak = [cond1peak[i], commonpeak[i], cond2peak[i]]
    #for val in peak:
        #chip.dropWeirdChromosomes(val)
    name = names[i]
    print(name1,name2)
    chip.getPeaksAt(peak, [name1, name2], bigwignames=['DMSO', 'VHL'], peaknames=['DMSO_peaks', 'common', 'VHL_peaks'], window=3000, folder="", title=name, numthreads=10, refpoint='center', name='../../data/IRF2BP2_degraded_hist/diffPeaks_unscaled/'+name+'_mat.pdf', withDeeptools=True, torecompute=True)

In [None]:
! gsutil -m cp -r "../../data/IRF2BP2_degraded_hist/" gs://amlproject/Chip/

In [None]:
## on local
! scp runningchip2.us-east1-b.aml-chipseq-rnaseq-259419:"~/data/IRF2BP2_degraded_rep3/diffPeaks_unscaled/*.pdf" unscaled/v3/
! scp runningchip2.us-east1-b.aml-chipseq-rnaseq-259419:"~/data/IRF2BP2_degraded_rep3/diffPeaks/*.pdf" scaled/v3/
! scp runningchip2.us-east1-b.aml-chipseq-rnaseq-259419:"~/data/IRF2BP2_degraded_rep1/diffPeaks_unscaled/*.pdf" unscaled/v3/
! scp runningchip2.us-east1-b.aml-chipseq-rnaseq-259419:"~/data/IRF2BP2_degraded_rep1/diffPeaks/*.pdf" scaled/v1/
! scp runningchip2.us-east1-b.aml-chipseq-rnaseq-259419:"~/data/IRF2BP2_degraded_hist/diffPeaks_unscaled/*.pdf" unscaled/v1/
! scp runningchip2.us-east1-b.aml-chipseq-rnaseq-259419:"~/data/IRF2BP2_degraded_hist/diffPeaks/*.pdf" unscaled/v1/
! scp runningchip2.us-east1-b.aml-chipseq-rnaseq-259419:"~/data/IRF2BP2_degraded_rep2/diffPeaks_unscaled/*.pdf" unscaled/v2/
! scp runningchip2.us-east1-b.aml-chipseq-rnaseq-259419:"~/data/IRF2BP2_degraded_rep2/diffPeaks/*.pdf" scaled/v2/

## v4

In [None]:
! gsutil -m mv gs://transfer-amlproject/*MP7868*  gs://transfer-amlproject/IRF2BP2_v4/

### analysis

In [None]:
! mkdir ../../data/IRF2BP2_degraded_rep4 && mkdir ../../data/IRF2BP2_degraded_rep4/fastqs && gsutil -m cp gs://transfer-amlproject/IRF2BP2_v4/* ../../data/IRF2BP2_degraded_rep4/fastqs

In [None]:
a = ! ls ../../data/IRF2BP2_degraded_rep4/fastqs
a

In [None]:
rename = {
"20200302_1_MP7868_S51_R1_001.fastq.gz":"mp846-MV411_IRF2BP_DMSO_6h-MED1-r4_R1.fastq.gz", 
"20200302_1_MP7868_S51_R2_001.fastq.gz":"mp846-MV411_IRF2BP_DMSO_6h-MED1-r4_R2.fastq.gz", 
"20200302_2_MP7868_S52_R1_001.fastq.gz":"mp847-MV411_IRF2BP_DMSO_6h-MED1-r5_R1.fastq.gz", 
"20200302_2_MP7868_S52_R2_001.fastq.gz":"mp847-MV411_IRF2BP_DMSO_6h-MED1-r5_R2.fastq.gz", 
"20200302_3_MP7868_S53_R1_001.fastq.gz":"mp848-MV411_IRF2BP_DMSO_6h-FLAG_IRF2BP2-r3_R1.fastq.gz", 
"20200302_3_MP7868_S53_R2_001.fastq.gz":"mp848-MV411_IRF2BP_DMSO_6h-FLAG_IRF2BP2-r3_R2.fastq.gz",
"20200302_4_MP7868_S54_R1_001.fastq.gz":"mp849-MV411_IRF2BP_DMSO_6h-POLII_total-r3_R1.fastq.gz",
"20200302_4_MP7868_S54_R2_001.fastq.gz":"mp849-MV411_IRF2BP_DMSO_6h-POLII_total-r3_R2.fastq.gz",
"20200302_5_MP7868_S55_R1_001.fastq.gz":"mp850-MV411_IRF2BP_DMSO_6h-POLII_S2-r3_R1.fastq.gz",
"20200302_5_MP7868_S55_R2_001.fastq.gz":"mp850-MV411_IRF2BP_DMSO_6h-POLII_S2-r3_R2.fastq.gz",
"20200302_6_MP7868_S56_R1_001.fastq.gz":"mp851-MV411_IRF2BP_DMSO_6h-POLII_S5-r3_R1.fastq.gz",
"20200302_6_MP7868_S56_R2_001.fastq.gz":"mp851-MV411_IRF2BP_DMSO_6h-POLII_S5-r3_R2.fastq.gz",
"20200302_7_MP7868_S57_R1_001.fastq.gz":"mp852-MV411_IRF2BP_VHL_6h-MED1-r4_R1.fastq.gz",
"20200302_7_MP7868_S57_R2_001.fastq.gz":"mp852-MV411_IRF2BP_VHL_6h-MED1-r4_R2.fastq.gz",
"20200302_8_MP7868_S58_R1_001.fastq.gz":"mp853-MV411_IRF2BP_VHL_6h-MED1-r5_R1.fastq.gz",
"20200302_8_MP7868_S58_R2_001.fastq.gz":"mp853-MV411_IRF2BP_VHL_6h-MED1-r5_R2.fastq.gz",
"20200302_9_MP7868_S59_R1_001.fastq.gz":"mp854-MV411_IRF2BP_VHL_6h-FLAG_IRF2BP2-r3_R1.fastq.gz",
"20200302_9_MP7868_S59_R2_001.fastq.gz":"mp854-MV411_IRF2BP_VHL_6h-FLAG_IRF2BP2-r3_R2.fastq.gz", 
"20200302_10_MP7868_S60_R1_001.fastq.gz":"mp855-MV411_IRF2BP_VHL_6h-POLII_total-r3_R1.fastq.gz",
"20200302_10_MP7868_S60_R2_001.fastq.gz":"mp855-MV411_IRF2BP_VHL_6h-POLII_total-r3_R2.fastq.gz",
"20200302_11_MP7868_S61_R1_001.fastq.gz":"mp856-MV411_IRF2BP_VHL_6h-POLII_S2-r3_R1.fastq.gz",
"20200302_11_MP7868_S61_R2_001.fastq.gz":"mp856-MV411_IRF2BP_VHL_6h-POLII_S2-r3_R2.fastq.gz",
"20200302_12_MP7868_S62_R1_001.fastq.gz":"mp857-MV411_IRF2BP_VHL_6h-POLII_S5-r3_R1.fastq.gz",
"20200302_12_MP7868_S62_R2_001.fastq.gz":"mp857-MV411_IRF2BP_VHL_6h-POLII_S5-r3_R2.fastq.gz"
}

In [None]:
for k,v in rename.items():
    ! mv ../../data/IRF2BP2_degraded_rep4/fastqs/$k ../../data/IRF2BP2_degraded_rep4/fastqs/$v

In [None]:
from gsheets import Sheets
sheets = Sheets.from_files('~/.client_secret.json', '~/.storage.json')
url="https://docs.google.com/spreadsheets/d/1yFLjYB1McU530JnLgL0QIMAKIkVl3kl0_LCHje2gk8U"
gsheet = sheets.get(url).sheets[2].to_frame()

In [None]:
gsheet

In [None]:
df = {
"fastq_1": [],
"fastq_2": [],
"antibody": [],
"group": [],
"replicate": [],
"control": []
}
for val in h.grouped(a,2):
    row = gsheet[gsheet.id==val[0].split('/')[-1].split('-')[0]]
    df['group'].append(row['name'].values[0])
    df['replicate'].append(1)
    df['fastq_1'].append(val[0])
    df['fastq_2'].append(val[1])
    df['antibody'].append(row['protein'].values[0])
    df['control'].append("INPUT")
df['group'].append('INPUT')
df['replicate'].append(1)
df['fastq_1'].append('IRF2BP2_degraded_rep3/mp845-MV411_IRF2BP2_-INPUT-r1_R1.fastq.gz')
df['fastq_2'].append('IRF2BP2_degraded_rep3/mp845-MV411_IRF2BP2_-INPUT-r1_R2.fastq.gz')
df['antibody'].append("")
df['control'].append("")
df = pd.DataFrame(df)

In [None]:
df

In [None]:
df[df.columns[[3,4,0,1,2,5]]].to_csv('../nextflow/IRF2BP2_degraded_rep4_design.csv',index=False)

In [None]:
#process chips
! sudo ../nextflow run nf-core/chipseq --paired_end --seq_center 'DFCI' --email \
'jkobject@gmail.com' --narrow_peak --input ../nextflow/IRF2BP2_degraded_rep4_design.csv --genome GRCh38 --skip_preseq \
--max_cpus 24 -profile docker -w work

In [None]:
!cp -r results/* ../../data/IRF2BP2_degraded_rep4/ && sudo rm -r work && sudo rm -r results

In [None]:
ls ../../TrimGalore-0.6.5/trim_galore

In [None]:
# get scaling values
mappedreads, umappedreads_norm = chip.getSpikeInControlScales(refgenome="../../data/ref/reference_droso.fna",
fastQfolder='../../data/IRF2BP2_degraded_rep4/fastqs/',
pairedEnd=True, cores=12,
tofilter=True,
totrim=False,
tomap=True,
results="../../data/IRF2BP2_degraded_rep4/",
pathtotrim_galore="../../TrimGalore-0.6.5/trim_galore")
mappedreads, umappedreads_norm

computing scales from the excel sheet

In [None]:
bams = ! ls ../../data/IRF2BP2_degraded_rep4/bwa/mergedLibrary/*.bam
bams

In [None]:
bams[6]

### on scalled data

In [None]:
bams = [bam1.split('/')[-1].split('.')[0] for bam1 in bams]

In [None]:
# diffPeak on scaled data
size=[ 208, 214, 207, 234, 296, 231]
for i in range(int(len(bams)/2)-1):
    name1 = bams[1+i]
    name2 = bams[7+i]
    print(name1,name2)
    chip.diffPeak(name1, name2, directory= "../../data/IRF2BP2_degraded_rep4/diffData/", res_directory='../../data/IRF2BP2_degraded_rep4/diffPeaks/', scaling1=scales[i][1], scaling2=scales[i][0], size=size[i])

In [None]:
# diffPeak on scaled data
for i in range(int(len(bams[1:])/2)):
    name1 = bams[1+i]
    name2 = bams[7+i]
    chip.fullDiffPeak(name1,name2, control1='../../data/IRF2BP2_degraded_rep3/bwa/mergedLibrary/mp845-MV411_IRF2BP2-INPUT-r1.mLb.clN.sorted.bam', directory = "../../data/IRF2BP2_degraded_rep4/diffData/", res_directory = "../../data/IRF2BP2_degraded_rep4/diffPeaks/",pairedend=False)

In [None]:
scales = [1.0,
1.0,
1.0,
1.0,
1.0,
0.626304048,
0.2628507876,
0.9021192519,
0.1869653476,
0.5500321887,
0.6046056203,
1.0]

In [None]:
bams

In [None]:
chip.bigWigFrom(bams[1:], 
                genome='GRCh38',scaling=scales,
               numthreads=8)

In [None]:
!mv bigwig ../../data/IRF2BP2_degraded_rep4/recalib_bigwig/

In [None]:
os.popen('for i in $(ls ../../data/IRF2BP2_degraded_rep4/diffPeaks/*.bed); \
            do echo $(wc -l $i); \
            done').read().split('\n')

In [None]:
bw = ! ls ../../data/IRF2BP2_degraded_rep4/recalib_bigwig/*.bw
bw

In [None]:
cond1peak = ! ls ../../data/IRF2BP2_degraded_rep4/diffPeaks/*cond1.bed
cond2peak = ! ls ../../data/IRF2BP2_degraded_rep4/diffPeaks/*cond2.bed
commonpeak = ! ls ../../data/IRF2BP2_degraded_rep4/diffPeaks/*common.bed
cond1peak

In [None]:
names = ["MED1","MED1_v2","FLAG_IRF2BP2","POLII_total","POLII_S2","POLII_S5"]

In [None]:
for i in range(int(len(bw)/2)):
    if i <0:
        continue
    name1 = bw[i]
    name2 = bw[i+6]
    peak = [cond1peak[i], commonpeak[i], cond2peak[i]]
   # for val in peak:
      #  chip.dropWeirdChromosomes(val)
    name = names[i]
    print(name1,name2)
    chip.getPeaksAt(peak, [name1, name2], bigwignames=['DMSO', 'VHL'], peaknames=['DMSO_peaks', 'common', 'VHL_peaks'], window=3000, folder="", title=name, numthreads=7, refpoint="center", name='../../data/IRF2BP2_degraded_rep4/diffPeaks/'+name+'_mat.pdf', withDeeptools=True, torecompute=True)

In [None]:
peaks = ! ls ../../data/results4/bwa/mergedLibrary/macs/narrowPeak/*.narrowPeak
peaks

In [None]:
names = ["MED1","MED1_v2","FLAG_IRF2BP2","POLII_total","POLII_S2","POLII_S5"]
names.extend([i+'_VHL' for i in names])
names

In [None]:
for i in range(len(bw)):
    chip.getPeaksAt(peaks[i], bw[i], window=3000, folder="", title=names[i], numthreads=7, torecompute=True, onlyProfile=True, name='../../data/IRF2BP2_degraded_rep4/diffPeaks/'+names[i]+'_mat_profile.pdf', withDeeptools=True, refpoint="center")
    chip.getPeaksAt(peaks[i], bw[i], window=3000, folder="", title=names[i], numthreads=7, torecompute=True, onlyProfile=True,name='../../data/IRF2BP2_degraded_rep4/diffPeaks/'+names[i]+'_mat_profile_clust3.pdf', withDeeptools=True, cluster=3, refpoint="center")

### on unscalled data

In [None]:
bams = !ls ../../data/results4/bwa/mergedLibrary/mp*.bam
bams

In [None]:
! mkdir ../../data/IRF2BP2_degraded_rep4/diffPeaks_unscaled

In [None]:
#on unscalled data 
for i in range(int(len(bams)/2)):
    if i < 5:
        continue
    name1 = bams[i]
    name2 = bams[6+i]
    print(name1,name2)
    chip.fullDiffPeak(name1,name2, control1='../../data/results4/bwa/mergedLibrary/INPUT_R1.mLb.clN.sorted.bam', directory = "../../data/IRF2BP2_degraded_rep4/diffData_unscaled/", res_directory = "../../data/IRF2BP2_degraded_rep4/diffPeaks_unscaled/",pairedend=False)

In [None]:
bw = ! ls ../../data/results4/bwa/mergedLibrary/bigwig/*.bigWig
bw

In [None]:
cond1peak = ! ls ../../data/IRF2BP2_degraded_rep4/diffPeaks_unscaled/*cond1.bed
cond2peak = ! ls ../../data/IRF2BP2_degraded_rep4/diffPeaks_unscaled/*cond2.bed
commonpeak = ! ls ../../data/IRF2BP2_degraded_rep4/diffPeaks_unscaled/*common.bed
commonpeak

In [None]:
names = ["MED1","MED1_v2","FLAG_IRF2BP2","POLII_total","POLII_S2","POLII_S5"]

In [None]:
for i in range(int((len(bw)-1)/2)):
    name1 = bw[1+i]
    name2 = bw[7+i]
    peak = [cond1peak[i], commonpeak[i], cond2peak[i]]
    #for val in peak:
     #   chip.dropWeirdChromosomes(val)
    name = names[i]
    print(name1,name2)
    chip.getPeaksAt(peak, [name1, name2], bigwignames=['DMSO', 'VHL'], peaknames=['DMSO_peaks', 'common', 'VHL_peaks'], window=3000, folder="", title=name, numthreads=7, torecompute=True, refpoint='center', name='../../data/IRF2BP2_degraded_rep4/diffPeaks_unscaled/'+name+'_mat.pdf', withDeeptools=True)

In [None]:
! gsutil -m cp -r "../../data/IRF2BP2_degraded_rep4" gs://amlproject/Chip/

## Copying data

In [30]:
mkdir ../results/$project/diffPeaks_unscaled/

In [31]:
! cp ../../data/*/diffPeaks_unscaled/*.bed ../results/$project/diffPeaks_unscaled/

In [35]:
mkdir ../results/$project/diffPeaks_scaled/

In [36]:
! cp ../../data/*/diffPeaks/*.bed ../results/$project/diffPeaks_scaled/

## v5

In [4]:
project="additional_degraded_v1"

In [37]:
! gsutil ls gs://transfer-amlproject/200723_MP8095_fastq/

gs://transfer-amlproject/200723_MP8095_fastq/20200723_10_MP8095_S220_R1_001.fastq.gz
gs://transfer-amlproject/200723_MP8095_fastq/20200723_10_MP8095_S220_R2_001.fastq.gz
gs://transfer-amlproject/200723_MP8095_fastq/20200723_11_MP8095_S221_R1_001.fastq.gz
gs://transfer-amlproject/200723_MP8095_fastq/20200723_11_MP8095_S221_R2_001.fastq.gz
gs://transfer-amlproject/200723_MP8095_fastq/20200723_12_MP8095_S222_R1_001.fastq.gz
gs://transfer-amlproject/200723_MP8095_fastq/20200723_12_MP8095_S222_R2_001.fastq.gz
gs://transfer-amlproject/200723_MP8095_fastq/20200723_13_MP8095_S223_R1_001.fastq.gz
gs://transfer-amlproject/200723_MP8095_fastq/20200723_13_MP8095_S223_R2_001.fastq.gz
gs://transfer-amlproject/200723_MP8095_fastq/20200723_14_MP8095_S224_R1_001.fastq.gz
gs://transfer-amlproject/200723_MP8095_fastq/20200723_14_MP8095_S224_R2_001.fastq.gz
gs://transfer-amlproject/200723_MP8095_fastq/20200723_15_MP8095_S225_R1_001.fastq.gz
gs://transfer-amlproject/200723_MP8095_fastq/20200723_

In [42]:
mkdir ../data/$project/qc/

In [43]:
!gsutil -m cp gs://transfer-amlproject/200723_MP8095_fastq/multiqc_report.html ../data/$project/qc/
!gsutil -m cp -r gs://transfer-amlproject/200723_MP8095_fastq/Reports/ ../data/$project/qc/
!gsutil -m cp -r gs://transfer-amlproject/200723_MP8095_fastq/multiqc_data/ ../data/$project/qc/

Copying gs://transfer-amlproject/200723_MP8095_fastq/multiqc_report.html...
/ [1/1 files][  1.5 MiB/  1.5 MiB] 100% Done                                    
Operation completed over 1 objects/1.5 MiB.                                      
Copying gs://transfer-amlproject/200723_MP8095_fastq/Reports/20200723_10_MP8095_S220_R1_001_fastqc.html...
Copying gs://transfer-amlproject/200723_MP8095_fastq/Reports/20200723_10_MP8095_S220_R2_001_fastqc.html...
Copying gs://transfer-amlproject/200723_MP8095_fastq/Reports/20200723_10_MP8095_S220_R1_001_fastqc.zip...
Copying gs://transfer-amlproject/200723_MP8095_fastq/Reports/20200723_10_MP8095_S220_R2_001_fastqc.zip...
Copying gs://transfer-amlproject/200723_MP8095_fastq/Reports/20200723_11_MP8095_S221_R1_001_fastqc.html...
Copying gs://transfer-amlproject/200723_MP8095_fastq/Reports/20200723_11_MP8095_S221_R1_001_fastqc.zip...
Copying gs://transfer-amlproject/200723_MP8095_fastq/Reports/20200723_11_MP8095_S221_R2_001_fastqc.zip...
Copying gs://tra

/ [6/6 files][  2.2 MiB/  2.2 MiB] 100% Done                                    
Operation completed over 6 objects/2.2 MiB.                                      


In [46]:
! gsutil -m cp gs://transfer-amlproject/200723_MP8095_fastq/*  gs://transfer-amlproject/$project/

Omitting prefix "gs://transfer-amlproject/200723_MP8095_fastq/Reports/". (Did you mean to do cp -r?)
Omitting prefix "gs://transfer-amlproject/200723_MP8095_fastq/multiqc_data/". (Did you mean to do cp -r?)
Copying gs://transfer-amlproject/200723_MP8095_fastq/20200723_10_MP8095_S220_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/200723_MP8095_fastq/20200723_10_MP8095_S220_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/200723_MP8095_fastq/20200723_11_MP8095_S221_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/200723_MP8095_fastq/20200723_11_MP8095_S221_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/200723_MP8095_fastq/20200723_12_MP8095_S222_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/200723_MP8095_fastq/20200723_12_MP8095_S222_R2_001.fastq.gz [Content-Type=applic

### analysis

In [48]:
! mkdir ../../data/$project && mkdir ../../data/$project/fastqs && gsutil -m cp gs://transfer-amlproject/$project/* ../../data/$project/fastqs

Copying gs://transfer-amlproject/additional_degraded_v1/20200723_10_MP8095_S220_R1_001.fastq.gz...
Copying gs://transfer-amlproject/additional_degraded_v1/20200723_10_MP8095_S220_R2_001.fastq.gz...
Copying gs://transfer-amlproject/additional_degraded_v1/20200723_11_MP8095_S221_R1_001.fastq.gz...
Copying gs://transfer-amlproject/additional_degraded_v1/20200723_11_MP8095_S221_R2_001.fastq.gz...
Copying gs://transfer-amlproject/additional_degraded_v1/20200723_12_MP8095_S222_R2_001.fastq.gz...
Copying gs://transfer-amlproject/additional_degraded_v1/20200723_13_MP8095_S223_R1_001.fastq.gz...
Copying gs://transfer-amlproject/additional_degraded_v1/20200723_12_MP8095_S222_R1_001.fastq.gz...
Copying gs://transfer-amlproject/additional_degraded_v1/20200723_13_MP8095_S223_R2_001.fastq.gz...
Copying gs://transfer-amlproject/additional_degraded_v1/20200723_14_MP8095_S224_R1_001.fastq.gz...
Copying gs://transfer-amlproject/additional_degraded_v1/20200723_14_MP8095_S224_R2_001.fastq.gz...
Copying gs

In [50]:
! rm ../../data/$project/fastqs/multiqc_report.html

In [3]:
a = ! ls ../../data/$project/fastqs
a

['mp858-MV411_RNP_AAVS1-H3K27AC-r1_R1_001.fastq.gz',
 'mp858-MV411_RNP_AAVS1-H3K27AC-r1_R2_001.fastq.gz',
 'mp859-MV411_RNP_AAVS1-H3K27AC-r2_R1_001.fastq.gz',
 'mp859-MV411_RNP_AAVS1-H3K27AC-r2_R2_001.fastq.gz',
 'mp860-MV411_RNP_RUNX1-H3K27AC-r1_R1_001.fastq.gz',
 'mp860-MV411_RNP_RUNX1-H3K27AC-r1_R2_001.fastq.gz',
 'mp861-MV411_RNP_RUNX1-H3K27AC-r2_R1_001.fastq.gz',
 'mp861-MV411_RNP_RUNX1-H3K27AC-r2_R2_001.fastq.gz',
 'mp862-MV411_RNP_RUNX2-H3K27AC-r1_R1_001.fastq.gz',
 'mp862-MV411_RNP_RUNX2-H3K27AC-r1_R2_001.fastq.gz',
 'mp863-MV411_RNP_RUNX2-H3K27AC-r2_R1_001.fastq.gz',
 'mp863-MV411_RNP_RUNX2-H3K27AC-r2_R2_001.fastq.gz',
 'mp864-MV411_RNP_RUNX1_RUNX2-H3K27AC-r1_R1_001.fastq.gz',
 'mp864-MV411_RNP_RUNX1_RUNX2-H3K27AC-r1_R2_001.fastq.gz',
 'mp865-MV411_RNP_RUNX1_RUNX2-H3K27AC-r2_R1_001.fastq.gz',
 'mp865-MV411_RNP_RUNX1_RUNX2-H3K27AC-r2_R2_001.fastq.gz',
 'mp866-MV411_RNP_MEF2D-H3K27AC-r1_R1_001.fastq.gz',
 'mp866-MV411_RNP_MEF2D-H3K27AC-r1_R2_001.fastq.gz',
 'mp867-MV411_RNP_MEF2

In [67]:
rename = {
"20200723_1_MP8095_S211": "mp858-MV411_RNP_AAVS1-H3K27AC-r1",
"20200723_2_MP8095_S212": "mp859-MV411_RNP_AAVS1-H3K27AC-r2",
"20200723_3_MP8095_S213": "mp860-MV411_RNP_RUNX1-H3K27AC-r1",
"20200723_4_MP8095_S214": "mp861-MV411_RNP_RUNX1-H3K27AC-r2",
"20200723_5_MP8095_S215": "mp862-MV411_RNP_RUNX2-H3K27AC-r1",
"20200723_6_MP8095_S216": "mp863-MV411_RNP_RUNX2-H3K27AC-r2",
"20200723_7_MP8095_S217": "mp864-MV411_RNP_RUNX1_RUNX2-H3K27AC-r1",
"20200723_8_MP8095_S218": "mp865-MV411_RNP_RUNX1_RUNX2-H3K27AC-r2",
"20200723_9_MP8095_S219": "mp866-MV411_RNP_MEF2D-H3K27AC-r1",
"20200723_10_MP8095_S220": "mp867-MV411_RNP_MEF2D-H3K27AC-r2",
"20200723_11_MP8095_S221": "mp868-MV411_RNP_IRF8-H3K27AC-r1",
"20200723_12_MP8095_S222": "mp869-MV411_RNP_IRF8-H3K27AC-r2",
"20200723_13_MP8095_S223": "mp870-MV411_RNP_MYB-H3K27AC-r1",
"20200723_14_MP8095_S224": "mp871-MV411_RNP_MYB-H3K27AC-r2",
"20200723_15_MP8095_S225": "mp872-MV411_RNP_SPI1-H3K27AC-r1",
"20200723_16_MP8095_S226": "mp873-MV411_RNP_SPI1-H3K27AC-r2",
"20200723_1S_MP8095_S209": "mp874-MV411_MEF2D_NT_SC_63-FLAG_MEF2D-r2",
"20200723_2S_MP8095_S210": "mp875-MV411_MEF2C_NT-FLAG_MEF2C-r1"}

In [68]:
for val in a:
    rep = val
    for k,v in rename.items():
        rep = rep.replace(k,v)
    !mv ../../data/$project/fastqs/$val ../../data/$project/fastqs/$rep

In [69]:
from gsheets import Sheets
sheets = Sheets.from_files('~/.client_secret.json', '~/.storage.json')
url="https://docs.google.com/spreadsheets/d/1yFLjYB1McU530JnLgL0QIMAKIkVl3kl0_LCHje2gk8U"
gsheet = sheets.get(url).sheets[2].to_frame()

In [70]:
gsheet

Unnamed: 0,id,cell line,replicate,protein,quality,paired_end,matching input name,processed,name,previous name,...,ratio to droso,unique mapped reads(droso),scaling factor,Total QC,folderNarrow,folderCompensated,folderQC,folderBroad,folder Bigwig,folder diffPeaks
0,mp100,U937,1,INPUT,,n,,Y,mp100-U937-INPUT-r1,,...,,,,https://storage.cloud.google.com/amlproject/Ch...,https://console.cloud.google.com/storage/brows...,,https://console.cloud.google.com/storage/brows...,https://console.cloud.google.com/storage/brows...,https://console.cloud.google.com/storage/brows...,
1,mp101,NOMO1,1,INPUT,,n,,Y,mp101-NOMO1-INPUT-r1,,...,,,,,,,,,,
2,mp102,UT7,1,INPUT,,n,,Y,mp102-UT7-INPUT-r1,,...,,,,,,,,,,
3,mp106,MV411,1,MYB,x,n,INPUT_MV411,Y,mp106-MV411-MYB-r1,,...,,,,,,,,,,
4,mp109,M6,1,CEBPA,x,n,INPUT_M6,Y,mp109-M6-CEBPA-r1,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
299,mp871,MV411_RNP_MYB,2,H3K27AC,,y,INPUT_MV411,,mp871-MV411_RNP_MYB-H3K27AC-r2,20200723_14_MP8095_S224,...,,,,,,,,,,
300,mp872,MV411_RNP_SPI1,1,H3K27AC,,y,INPUT_MV411,,mp872-MV411_RNP_SPI1-H3K27AC-r1,20200723_15_MP8095_S225,...,,,,,,,,,,
301,mp873,MV411_RNP_SPI1,2,H3K27AC,,y,INPUT_MV411,,mp873-MV411_RNP_SPI1-H3K27AC-r2,20200723_16_MP8095_S226,...,,,,,,,,,,
302,mp874,MV411_MEF2D_NT_SC_63,2,FLAG_MEF2D,,y,INPUT_MV411,,mp874-MV411_MEF2D_NT_SC_63-FLAG_MEF2D-r2,20200723_1S_MP8095_S209,...,,,,,,,,,,


In [71]:
a = ! ls ../../data/$project/fastqs
a

['mp858-MV411_RNP_AAVS1-H3K27AC-r1_R1_001.fastq.gz',
 'mp858-MV411_RNP_AAVS1-H3K27AC-r1_R2_001.fastq.gz',
 'mp859-MV411_RNP_AAVS1-H3K27AC-r2_R1_001.fastq.gz',
 'mp859-MV411_RNP_AAVS1-H3K27AC-r2_R2_001.fastq.gz',
 'mp860-MV411_RNP_RUNX1-H3K27AC-r1_R1_001.fastq.gz',
 'mp860-MV411_RNP_RUNX1-H3K27AC-r1_R2_001.fastq.gz',
 'mp861-MV411_RNP_RUNX1-H3K27AC-r2_R1_001.fastq.gz',
 'mp861-MV411_RNP_RUNX1-H3K27AC-r2_R2_001.fastq.gz',
 'mp862-MV411_RNP_RUNX2-H3K27AC-r1_R1_001.fastq.gz',
 'mp862-MV411_RNP_RUNX2-H3K27AC-r1_R2_001.fastq.gz',
 'mp863-MV411_RNP_RUNX2-H3K27AC-r2_R1_001.fastq.gz',
 'mp863-MV411_RNP_RUNX2-H3K27AC-r2_R2_001.fastq.gz',
 'mp864-MV411_RNP_RUNX1_RUNX2-H3K27AC-r1_R1_001.fastq.gz',
 'mp864-MV411_RNP_RUNX1_RUNX2-H3K27AC-r1_R2_001.fastq.gz',
 'mp865-MV411_RNP_RUNX1_RUNX2-H3K27AC-r2_R1_001.fastq.gz',
 'mp865-MV411_RNP_RUNX1_RUNX2-H3K27AC-r2_R2_001.fastq.gz',
 'mp866-MV411_RNP_MEF2D-H3K27AC-r1_R1_001.fastq.gz',
 'mp866-MV411_RNP_MEF2D-H3K27AC-r1_R2_001.fastq.gz',
 'mp867-MV411_RNP_MEF2

In [73]:
for val in a[-4:]:
    !gsutil cp ../../data/$project/fastqs/$val gs://amlproject/Chip/fastqs/
    !rm ../../data/$project/fastqs/$val

Copying file://../../data/additional_degraded_v1/fastqs/mp874-MV411_MEF2D_NT_SC_63-FLAG_MEF2D-r2_R1_001.fastq.gz [Content-Type=application/octet-stream]...
==> NOTE: You are uploading one or more large file(s), which would run          
significantly faster if you enable parallel composite uploads. This
feature can be enabled by editing the
"parallel_composite_upload_threshold" value in your .boto
configuration file. However, note that if you do this large files will
be uploaded as `composite objects
<https://cloud.google.com/storage/docs/composite-objects>`_,which
means that any user who downloads such objects will need to have a
compiled crcmod installed (see "gsutil help crcmod"). This is because
without a compiled crcmod, computing checksums on composite objects is
so slow that gsutil disables downloads of composite objects.

/ [1 files][  1.7 GiB/  1.7 GiB]   98.9 MiB/s                                   
Operation completed over 1 objects/1.7 GiB.                                  

In [76]:
gcp.patternRN({'mp845-MV411_IRF2BP2_-INPUT-r1':'mp845-MV411-INPUT-r2'},'gs://amlproject/Chip/',wildcards=['**','.*'], test=False)

found 2 files to rename


In [87]:
df = {
"fastq_1": [],
"fastq_2": [],
"antibody": [],
"group": [],
"replicate": [],
"control": []
}
for val in h.grouped(a[:-4],2):
    row = gsheet[gsheet.id==val[0].split('/')[-1].split('-')[0]]
    df['group'].append("MV4"+row['name'].values[0].split('-r')[0].split('-MV4')[1])
    df['replicate'].append(row['name'].values[0].split('-r')[1])
    df['fastq_1'].append(project+"/fastqs/"+val[0])
    df['fastq_2'].append(project+"/fastqs/"+val[1])
    df['antibody'].append(row['protein'].values[0])
    df['control'].append("INPUT")
df['group'].append('INPUT')
df['replicate'].append(1)
df['fastq_1'].append('ref/mp845-MV411-INPUT-r2_R1.fastq.gz')
df['fastq_2'].append('ref/mp845-MV411-INPUT-r2_R2.fastq.gz')
df['antibody'].append("")
df['control'].append("")
df = pd.DataFrame(df)

In [88]:
df

Unnamed: 0,fastq_1,fastq_2,antibody,group,replicate,control
0,additional_degraded_v1/fastqs/mp858-MV411_RNP_...,additional_degraded_v1/fastqs/mp858-MV411_RNP_...,H3K27AC,MV411_RNP_AAVS1-H3K27AC,1,INPUT
1,additional_degraded_v1/fastqs/mp859-MV411_RNP_...,additional_degraded_v1/fastqs/mp859-MV411_RNP_...,H3K27AC,MV411_RNP_AAVS1-H3K27AC,2,INPUT
2,additional_degraded_v1/fastqs/mp860-MV411_RNP_...,additional_degraded_v1/fastqs/mp860-MV411_RNP_...,H3K27AC,MV411_RNP_RUNX1-H3K27AC,1,INPUT
3,additional_degraded_v1/fastqs/mp861-MV411_RNP_...,additional_degraded_v1/fastqs/mp861-MV411_RNP_...,H3K27AC,MV411_RNP_RUNX1-H3K27AC,2,INPUT
4,additional_degraded_v1/fastqs/mp862-MV411_RNP_...,additional_degraded_v1/fastqs/mp862-MV411_RNP_...,H3K27AC,MV411_RNP_RUNX2-H3K27AC,1,INPUT
5,additional_degraded_v1/fastqs/mp863-MV411_RNP_...,additional_degraded_v1/fastqs/mp863-MV411_RNP_...,H3K27AC,MV411_RNP_RUNX2-H3K27AC,2,INPUT
6,additional_degraded_v1/fastqs/mp864-MV411_RNP_...,additional_degraded_v1/fastqs/mp864-MV411_RNP_...,H3K27AC,MV411_RNP_RUNX1_RUNX2-H3K27AC,1,INPUT
7,additional_degraded_v1/fastqs/mp865-MV411_RNP_...,additional_degraded_v1/fastqs/mp865-MV411_RNP_...,H3K27AC,MV411_RNP_RUNX1_RUNX2-H3K27AC,2,INPUT
8,additional_degraded_v1/fastqs/mp866-MV411_RNP_...,additional_degraded_v1/fastqs/mp866-MV411_RNP_...,H3K27AC,MV411_RNP_MEF2D-H3K27AC,1,INPUT
9,additional_degraded_v1/fastqs/mp867-MV411_RNP_...,additional_degraded_v1/fastqs/mp867-MV411_RNP_...,H3K27AC,MV411_RNP_MEF2D-H3K27AC,2,INPUT


In [89]:
df[df.columns[[3,4,0,1,2,5]]].to_csv('../nextflow/additional_degraded_v1_design.csv',index=False)

In [6]:
! cd ../../data/ && sudo ../nextflow log ## to get access to the previous runs

TIMESTAMP          	DURATION      	RUN NAME              	STATUS	REVISION ID	SESSION ID                          	COMMAND                                                                                                                                                                                                                                                          
2020-02-10 22:39:18	8m 25s        	stupefied_crick       	ERR   	21be314954 	76ea5df0-153c-4e71-a59d-52c6112fda84	nextflow run nf-core/chipseq --paired_end --seq_center DFCI --email jkobject@gmail.com --narrow_peak --input design.csv --genome GRCh38 --skip_preseq --max_cpus 24 -profile docker -w work                                                                      
2020-02-10 22:40:37	1m 6s         	ridiculous_hilbert    	ERR   	21be314954 	75004903-035d-4504-ab80-cab74b5acac4	nextflow run nf-core/chipseq --paired_end --seq_center DFCI --email jkobject@gmail.com --narrow_peak --input design.csv --genome GRCh38 --sk

In [7]:
#process chips
! cd ../../data/ && sudo ../nextflow run nf-core/chipseq --paired_end --seq_center 'DFCI' --email 'jkobject@gmail.com' --input ../AMLproject/nextflow/additional_degraded_v1_design.csv --genome GRCh38 --skip_preseq --max_cpus 16 -profile docker -w work -resume exotic_bartik

N E X T F L O W  ~  version 19.10.0
Launching `nf-core/chipseq` [wise_legentil] - revision: 21be314954 [master]
NOTE: Your local project version looks outdated - a different revision is available in the remote repository [0f487ed76d]
-[2m--------------------------------------------------[0m-
                                        [0;32m,--.[0;30m/[0;32m,-.[0m
[0;34m        ___     __   __   __   ___     [0;32m/,-._.--~'[0m
[0;34m  |\ | |__  __ /  ` /  \ |__) |__         [0;33m}  {[0m
[0;34m  | \| |       \__, \__/ |  \ |___     [0;32m\`-._,-`-,[0m
                                        [0;32m`._,._,'[0m
[0;35m  nf-core/chipseq v1.1.0[0m
-[2m--------------------------------------------------[0m-
Run Name            : wise_legentil
Data Type           : Paired-End
Design File         : ../AMLproject/nextflow/additional_degraded_v1_design.csv
Genome              : GRCh38
Fasta File          : s3://ngi-igenomes/igenomes//Homo_sapiens/NCBI/GRCh38/Sequence/WholeGenomeF

[28A
executor >  local (1)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[-        ] process > MergeBAM                       -[K
[-        ] process > MergeBAMFilter                 -[K
[-        ] process > MergeBAMRemoveOrphan           -[K
[-        ] process > Preseq                         -[K
[-        ] process > CollectMultipleMetrics         -[K
[-        ] process > BigWig                         -[K
[-        ] process > PlotProfile   

[28A
executor >  local (6)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[fc/cf6140] process > MergeBAM (MV411_RNP_SPI1-H3... [ 88%] 14 of 16, cached: 11[K
[-        ] process > MergeBAMFilter                 -[K
[-        ] process > MergeBAMRemoveOrphan           -[K
[-        ] process > Preseq                         -[K
[-        ] process > CollectMultipleMetrics         -[K
[-        ] process > BigWig                         -[K
[-        

[28A
executor >  local (11)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[84/574208] process > MergeBAMFilter (MV411_RNP_S... [ 50%] 2 of 4[K
[-        ] process > MergeBAMRemoveOrphan           -[K
[-        ] process > Preseq                         -[K
[-        ] process > CollectMultipleMetrics         -[K
[-        ] process > BigWig                         -

[28A
executor >  local (15)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[e1/d4ac46] process > MergeBAMFilter (MV411_RNP_A... [ 63%] 5 of 8[K
[-        ] process > MergeBAMRemoveOrphan           -[K
[-        ] process > Preseq                         -[K
[-        ] process > CollectMultipleMetrics         -[K
[-        ] process > BigWig                         -

[28A
executor >  local (18)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[77/a464ff] process > MergeBAMFilter (MV411_RNP_I... [ 91%] 10 of 11[K
[-        ] process > MergeBAMRemoveOrphan           -[K
[-        ] process > Preseq                         -[K
[-        ] process > CollectMultipleMetrics         -[K
[-        ] process > BigWig                         

[28A
executor >  local (23)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[e0/a67acd] process > MergeBAMFilter (MV411_RNP_M... [ 88%] 14 of 16[K
[-        ] process > MergeBAMRemoveOrphan           -[K
[-        ] process > Preseq                         -[K
[-        ] process > CollectMultipleMetrics         -[K
[-        ] process > BigWig                         

[28A
executor >  local (28)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[48/4b5e62] process > MergeBAMFilter (MV411_RNP_R... [100%] 17 of 17 ✔[K
[57/199f77] process > MergeBAMRemoveOrphan (MV411... [ 50%] 2 of 4[K
[-        ] process > Preseq                         -[K
[-        ] process > CollectMultipleMetrics         -[K
[-        ] process > BigWig           

[28A
executor >  local (33)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[48/4b5e62] process > MergeBAMFilter (MV411_RNP_R... [100%] 17 of 17 ✔[K
[63/892aee] process > MergeBAMRemoveOrphan (MV411... [ 78%] 7 of 9[K
[-        ] process > Preseq                         -[K
[-        ] process > CollectMultipleMetrics         -[K
[-        ] process > BigWig           

[28A
executor >  local (38)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[48/4b5e62] process > MergeBAMFilter (MV411_RNP_R... [100%] 17 of 17 ✔[K
[ba/00ce48] process > MergeBAMRemoveOrphan (MV411... [ 79%] 11 of 14[K
[-        ] process > Preseq                         -[K
[-        ] process > CollectMultipleMetrics         -[K
[-        ] process > BigWig         

[28A
executor >  local (41)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[48/4b5e62] process > MergeBAMFilter (MV411_RNP_R... [100%] 17 of 17 ✔[K
[c0/52c2db] process > MergeBAMRemoveOrphan (MV411... [ 94%] 15 of 16[K
[-        ] process > Preseq                         -[K
[-        ] process > CollectMultipleMetrics         -[K
[-        ] process > BigWig         

[28A
executor >  local (46)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[48/4b5e62] process > MergeBAMFilter (MV411_RNP_R... [100%] 17 of 17 ✔[K
[dc/6cb451] process > MergeBAMRemoveOrphan (MV411... [ 94%] 16 of 17[K
[-        ] process > Preseq                         -[K
[18/e3205c] process > CollectMultipleMetrics (MV4... [100%] 1 of 1[K
[fa/7ca520] process > Big

[c1/b1a581] process > PhantomPeakQualTools (MV411... [ 67%] 2 of 3[K
[-        ] process > PlotFingerprint                -[K
[-        ] process > MACSCallPeak                   -[K
[-        ] process > AnnotatePeaks                  -[K
[-        ] process > PeakQC                         -[K
[-        ] process > ConsensusPeakSet               -[K
[-        ] process > ConsensusPeakSetAnnotate       -[K
[-        ] process > ConsensusPeakSetDESeq          -[K
[-        ] process > IGV                            -[K
[7e/9c912a] process > get_software_versions          [100%] 1 of 1 ✔[K
[-        ] process > MultiQC                        -[K
[8a/7334d7] process > output_documentation           [100%] 1 of 1, cached: 1 ✔[K
[28A
executor >  local (51)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [1

[28A
executor >  local (56)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[48/4b5e62] process > MergeBAMFilter (MV411_RNP_R... [100%] 17 of 17 ✔[K
[dc/6cb451] process > MergeBAMRemoveOrphan (MV411... [100%] 17 of 17 ✔[K
[-        ] process > Preseq                         -[K
[36/03ab9d] process > CollectMultipleMetrics (MV4... [ 80%] 4 of 5[K
[26/8106f4] process > B

[28A
executor >  local (60)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[48/4b5e62] process > MergeBAMFilter (MV411_RNP_R... [100%] 17 of 17 ✔[K
[dc/6cb451] process > MergeBAMRemoveOrphan (MV411... [100%] 17 of 17 ✔[K
[-        ] process > Preseq                         -[K
[36/03ab9d] process > CollectMultipleMetrics (MV4... [100%] 5 of 5[K
[2d/982381] process > B

[28A
executor >  local (65)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[48/4b5e62] process > MergeBAMFilter (MV411_RNP_R... [100%] 17 of 17 ✔[K
[dc/6cb451] process > MergeBAMRemoveOrphan (MV411... [100%] 17 of 17 ✔[K
[-        ] process > Preseq                         -[K
[6f/0c32f5] process > CollectMultipleMetrics (INP... [100%] 6 of 6[K
[2d/982381] process > B

[28A
executor >  local (70)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[48/4b5e62] process > MergeBAMFilter (MV411_RNP_R... [100%] 17 of 17 ✔[K
[dc/6cb451] process > MergeBAMRemoveOrphan (MV411... [100%] 17 of 17 ✔[K
[-        ] process > Preseq                         -[K
[77/85e402] process > CollectMultipleMetrics (MV4... [100%] 7 of 7[K
[83/f433d6] process > B

[28A
executor >  local (74)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[48/4b5e62] process > MergeBAMFilter (MV411_RNP_R... [100%] 17 of 17 ✔[K
[dc/6cb451] process > MergeBAMRemoveOrphan (MV411... [100%] 17 of 17 ✔[K
[-        ] process > Preseq                         -[K
[f7/870fa3] process > CollectMultipleMetrics (MV4... [100%] 8 of 8[K
[be/8c5392] process > B

[28A
executor >  local (79)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[48/4b5e62] process > MergeBAMFilter (MV411_RNP_R... [100%] 17 of 17 ✔[K
[dc/6cb451] process > MergeBAMRemoveOrphan (MV411... [100%] 17 of 17 ✔[K
[-        ] process > Preseq                         -[K
[eb/85c928] process > CollectMultipleMetrics (MV4... [100%] 9 of 9[K
[4e/c46492] process > B

[28A
executor >  local (84)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[48/4b5e62] process > MergeBAMFilter (MV411_RNP_R... [100%] 17 of 17 ✔[K
[dc/6cb451] process > MergeBAMRemoveOrphan (MV411... [100%] 17 of 17 ✔[K
[-        ] process > Preseq                         -[K
[af/724a3d] process > CollectMultipleMetrics (MV4... [ 91%] 10 of 11[K
[60/3850df] process >

[28A
executor >  local (89)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[48/4b5e62] process > MergeBAMFilter (MV411_RNP_R... [100%] 17 of 17 ✔[K
[dc/6cb451] process > MergeBAMRemoveOrphan (MV411... [100%] 17 of 17 ✔[K
[-        ] process > Preseq                         -[K
[be/e4f2a1] process > CollectMultipleMetrics (MV4... [100%] 12 of 12[K
[eb/3a1856] process >

[28A
executor >  local (94)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[48/4b5e62] process > MergeBAMFilter (MV411_RNP_R... [100%] 17 of 17 ✔[K
[dc/6cb451] process > MergeBAMRemoveOrphan (MV411... [100%] 17 of 17 ✔[K
[-        ] process > Preseq                         -[K
[58/9c859d] process > CollectMultipleMetrics (MV4... [100%] 13 of 13[K
[eb/3a1856] process >

[28A
executor >  local (99)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[48/4b5e62] process > MergeBAMFilter (MV411_RNP_R... [100%] 17 of 17 ✔[K
[dc/6cb451] process > MergeBAMRemoveOrphan (MV411... [100%] 17 of 17 ✔[K
[-        ] process > Preseq                         -[K
[39/7b314d] process > CollectMultipleMetrics (MV4... [100%] 14 of 14[K
[e1/c25e9c] process >

[28A
executor >  local (104)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[48/4b5e62] process > MergeBAMFilter (MV411_RNP_R... [100%] 17 of 17 ✔[K
[dc/6cb451] process > MergeBAMRemoveOrphan (MV411... [100%] 17 of 17 ✔[K
[-        ] process > Preseq                         -[K
[85/5ed89a] process > CollectMultipleMetrics (MV4... [ 94%] 15 of 16[K
[9c/f2630b] process 

[28A
executor >  local (109)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[48/4b5e62] process > MergeBAMFilter (MV411_RNP_R... [100%] 17 of 17 ✔[K
[dc/6cb451] process > MergeBAMRemoveOrphan (MV411... [100%] 17 of 17 ✔[K
[-        ] process > Preseq                         -[K
[7b/722dae] process > CollectMultipleMetrics (MV4... [100%] 17 of 17 ✔[K
[a1/2654ae] proces

[28A
executor >  local (114)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[48/4b5e62] process > MergeBAMFilter (MV411_RNP_R... [100%] 17 of 17 ✔[K
[dc/6cb451] process > MergeBAMRemoveOrphan (MV411... [100%] 17 of 17 ✔[K
[-        ] process > Preseq                         -[K
[7b/722dae] process > CollectMultipleMetrics (MV4... [100%] 17 of 17 ✔[K
[a1/2654ae] proces

[28A
executor >  local (120)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[48/4b5e62] process > MergeBAMFilter (MV411_RNP_R... [100%] 17 of 17 ✔[K
[dc/6cb451] process > MergeBAMRemoveOrphan (MV411... [100%] 17 of 17 ✔[K
[-        ] process > Preseq                         -[K
[7b/722dae] process > CollectMultipleMetrics (MV4... [100%] 17 of 17 ✔[K
[a1/2654ae] proces

[28A
executor >  local (125)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[48/4b5e62] process > MergeBAMFilter (MV411_RNP_R... [100%] 17 of 17 ✔[K
[dc/6cb451] process > MergeBAMRemoveOrphan (MV411... [100%] 17 of 17 ✔[K
[-        ] process > Preseq                         -[K
[7b/722dae] process > CollectMultipleMetrics (MV4... [100%] 17 of 17 ✔[K
[a1/2654ae] proces

[28A
executor >  local (129)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[48/4b5e62] process > MergeBAMFilter (MV411_RNP_R... [100%] 17 of 17 ✔[K
[dc/6cb451] process > MergeBAMRemoveOrphan (MV411... [100%] 17 of 17 ✔[K
[-        ] process > Preseq                         -[K
[7b/722dae] process > CollectMultipleMetrics (MV4... [100%] 17 of 17 ✔[K
[a1/2654ae] proces

[28A
executor >  local (133)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[48/4b5e62] process > MergeBAMFilter (MV411_RNP_R... [100%] 17 of 17 ✔[K
[dc/6cb451] process > MergeBAMRemoveOrphan (MV411... [100%] 17 of 17 ✔[K
[-        ] process > Preseq                         -[K
[7b/722dae] process > CollectMultipleMetrics (MV4... [100%] 17 of 17 ✔[K
[a1/2654ae] proces

[28A
executor >  local (138)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[48/4b5e62] process > MergeBAMFilter (MV411_RNP_R... [100%] 17 of 17 ✔[K
[dc/6cb451] process > MergeBAMRemoveOrphan (MV411... [100%] 17 of 17 ✔[K
[-        ] process > Preseq                         -[K
[7b/722dae] process > CollectMultipleMetrics (MV4... [100%] 17 of 17 ✔[K
[a1/2654ae] proces

[28A
executor >  local (143)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[48/4b5e62] process > MergeBAMFilter (MV411_RNP_R... [100%] 17 of 17 ✔[K
[dc/6cb451] process > MergeBAMRemoveOrphan (MV411... [100%] 17 of 17 ✔[K
[-        ] process > Preseq                         -[K
[7b/722dae] process > CollectMultipleMetrics (MV4... [100%] 17 of 17 ✔[K
[a1/2654ae] proces

[28A
executor >  local (148)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[48/4b5e62] process > MergeBAMFilter (MV411_RNP_R... [100%] 17 of 17 ✔[K
[dc/6cb451] process > MergeBAMRemoveOrphan (MV411... [100%] 17 of 17 ✔[K
[-        ] process > Preseq                         -[K
[7b/722dae] process > CollectMultipleMetrics (MV4... [100%] 17 of 17 ✔[K
[a1/2654ae] proces

[28A
executor >  local (153)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[48/4b5e62] process > MergeBAMFilter (MV411_RNP_R... [100%] 17 of 17 ✔[K
[dc/6cb451] process > MergeBAMRemoveOrphan (MV411... [100%] 17 of 17 ✔[K
[-        ] process > Preseq                         -[K
[7b/722dae] process > CollectMultipleMetrics (MV4... [100%] 17 of 17 ✔[K
[a1/2654ae] proces

[28A
executor >  local (158)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[48/4b5e62] process > MergeBAMFilter (MV411_RNP_R... [100%] 17 of 17 ✔[K
[dc/6cb451] process > MergeBAMRemoveOrphan (MV411... [100%] 17 of 17 ✔[K
[-        ] process > Preseq                         -[K
[7b/722dae] process > CollectMultipleMetrics (MV4... [100%] 17 of 17 ✔[K
[a1/2654ae] proces

[28A
executor >  local (162)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[48/4b5e62] process > MergeBAMFilter (MV411_RNP_R... [100%] 17 of 17 ✔[K
[dc/6cb451] process > MergeBAMRemoveOrphan (MV411... [100%] 17 of 17 ✔[K
[-        ] process > Preseq                         -[K
[7b/722dae] process > CollectMultipleMetrics (MV4... [100%] 17 of 17 ✔[K
[a1/2654ae] proces

[31A
executor >  local (163)[K
[eb/23cd77] process > CheckDesign (additional_deg... [100%] 1 of 1, cached: 1 ✔[K
[c9/ec8de4] process > MakeTSSBED (genes.bed)         [100%] 1 of 1, cached: 1 ✔[K
[0d/f433a4] process > MakeGenomeFilter (genome.fa)   [100%] 1 of 1, cached: 1 ✔[K
[fe/2e0c27] process > FastQC (MV411_RNP_SPI1-H3K2... [100%] 17 of 17, cached:...[K
[68/a565e7] process > TrimGalore (MV411_RNP_MYB-H... [100%] 17 of 17, cached:...[K
[a2/0c2388] process > BWAMem (MV411_RNP_RUNX1-H3K... [100%] 17 of 17, cached:...[K
[13/4f49fa] process > SortBAM (INPUT_R1_T1)          [100%] 17 of 17, cached:...[K
[5a/6aaef3] process > MergeBAM (MV411_RNP_RUNX2-H... [100%] 17 of 17, cached:...[K
[48/4b5e62] process > MergeBAMFilter (MV411_RNP_R... [100%] 17 of 17 ✔[K
[dc/6cb451] process > MergeBAMRemoveOrphan (MV411... [100%] 17 of 17 ✔[K
[-        ] process > Preseq                         -[K
[7b/722dae] process > CollectMultipleMetrics (MV4... [100%] 17 of 17 ✔[K
[a1/2654ae] proces

In [10]:
!cp -r ../../data/results/* ../../data/$project/ && sudo rm -r ../data/results && sudo rm -r ..data/work

^C


In [11]:
ls ../../TrimGalore-0.6.5/trim_galore

[0m[01;32m../../TrimGalore-0.6.5/trim_galore[0m*


In [14]:
# get scaling values
norm, mapped = h.getSpikeInControlScales(refgenome="../../data/ref/reference_droso.fna",
fastQfolder='../../data/'+project+'/fastqs/',
pairedEnd=True, cores=12,
tofilter=True,
totrim=True,
tomap=True,
results="../../data/"+project+"/",
pathtotrim_galore="../../TrimGalore-0.6.5/trim_galore")
norm, mapped

using all files from folder
need to be name_*1, name_*2
[('mp858-MV411_RNP_AAVS1-H3K27AC-r1_R1_001.fastq.gz', 'mp858-MV411_RNP_AAVS1-H3K27AC-r1_R2_001.fastq.gz'), ('mp859-MV411_RNP_AAVS1-H3K27AC-r2_R1_001.fastq.gz', 'mp859-MV411_RNP_AAVS1-H3K27AC-r2_R2_001.fastq.gz'), ('mp860-MV411_RNP_RUNX1-H3K27AC-r1_R1_001.fastq.gz', 'mp860-MV411_RNP_RUNX1-H3K27AC-r1_R2_001.fastq.gz'), ('mp861-MV411_RNP_RUNX1-H3K27AC-r2_R1_001.fastq.gz', 'mp861-MV411_RNP_RUNX1-H3K27AC-r2_R2_001.fastq.gz'), ('mp862-MV411_RNP_RUNX2-H3K27AC-r1_R1_001.fastq.gz', 'mp862-MV411_RNP_RUNX2-H3K27AC-r1_R2_001.fastq.gz'), ('mp863-MV411_RNP_RUNX2-H3K27AC-r2_R1_001.fastq.gz', 'mp863-MV411_RNP_RUNX2-H3K27AC-r2_R2_001.fastq.gz'), ('mp864-MV411_RNP_RUNX1_RUNX2-H3K27AC-r1_R1_001.fastq.gz', 'mp864-MV411_RNP_RUNX1_RUNX2-H3K27AC-r1_R2_001.fastq.gz'), ('mp865-MV411_RNP_RUNX1_RUNX2-H3K27AC-r2_R1_001.fastq.gz', 'mp865-MV411_RNP_RUNX1_RUNX2-H3K27AC-r2_R2_001.fastq.gz'), ('mp866-MV411_RNP_MEF2D-H3K27AC-r1_R1_001.fastq.gz', 'mp866-MV411_RNP_M

({'mp858-MV411_RNP_AAVS1-H3K27AC-r1_R1_001_val_1': 0.6644280581189461,
  'mp859-MV411_RNP_AAVS1-H3K27AC-r2_R1_001_val_1': 0.5408483387497395,
  'mp860-MV411_RNP_RUNX1-H3K27AC-r1_R1_001_val_1': 0.9306982320552297,
  'mp861-MV411_RNP_RUNX1-H3K27AC-r2_R1_001_val_1': 0.4595249033344818,
  'mp862-MV411_RNP_RUNX2-H3K27AC-r1_R1_001_val_1': 1.0,
  'mp863-MV411_RNP_RUNX2-H3K27AC-r2_R1_001_val_1': 0.45660285309024545,
  'mp864-MV411_RNP_RUNX1_RUNX2-H3K27AC-r1_R1_001_val_1': 0.500947891528239,
  'mp865-MV411_RNP_RUNX1_RUNX2-H3K27AC-r2_R1_001_val_1': 0.3734932073757382,
  'mp866-MV411_RNP_MEF2D-H3K27AC-r1_R1_001_val_1': 0.37678768740112,
  'mp867-MV411_RNP_MEF2D-H3K27AC-r2_R1_001_val_1': 0.397505335892133,
  'mp868-MV411_RNP_IRF8-H3K27AC-r1_R1_001_val_1': 0.20011420092694476,
  'mp869-MV411_RNP_IRF8-H3K27AC-r2_R1_001_val_1': 0.2631428229278934,
  'mp870-MV411_RNP_MYB-H3K27AC-r1_R1_001_val_1': 0.5015211299972058,
  'mp871-MV411_RNP_MYB-H3K27AC-r2_R1_001_val_1': 0.32322855597358185,
  'mp872-MV411_R

computing scales from the excel sheet

In [119]:
scales = [0.3011826465, # 'MV411_RNP_IRF8-H3K27AC_R1',
0.4865371752, # 'MV411_RNP_IRF8-H3K27AC_R2',
0.5670857556, # 'MV411_RNP_MEF2D-H3K27AC_R1',
0.7349663619, # 'MV411_RNP_MEF2D-H3K27AC_R2',
0.7548163023, # 'MV411_RNP_MYB-H3K27AC_R1',
0.5976325206, # 'MV411_RNP_MYB-H3K27AC_R2',
1.400750948, # 'MV411_RNP_RUNX1-H3K27AC_R1',
0.849637265, # 'MV411_RNP_RUNX1-H3K27AC_R2',
0.7539535476, # 'MV411_RNP_RUNX1_RUNX2-H3K27AC_R1',
0.6905692051, # 'MV411_RNP_RUNX1_RUNX2-H3K27AC_R2',
1.50505384, # 'MV411_RNP_RUNX2-H3K27AC_R1',
0.8442345485, # 'MV411_RNP_RUNX2-H3K27AC_R2',
0.5019100631, # 'MV411_RNP_SPI1-H3K27AC_R1',
0.8688220473, # 'MV411_RNP_SPI1-H3K27AC_R2'
         ]

In [78]:
bams = ! ls ../../data/$project/bwa/mergedLibrary/*.bam
bams

['../../data/additional_degraded_v1/bwa/mergedLibrary/INPUT_R1.mLb.clN.sorted.bam',
 '../../data/additional_degraded_v1/bwa/mergedLibrary/MV411_RNP_AAVS1-H3K27AC_R1.mLb.clN.sorted.bam',
 '../../data/additional_degraded_v1/bwa/mergedLibrary/MV411_RNP_AAVS1-H3K27AC_R2.mLb.clN.sorted.bam',
 '../../data/additional_degraded_v1/bwa/mergedLibrary/MV411_RNP_IRF8-H3K27AC_R1.mLb.clN.sorted.bam',
 '../../data/additional_degraded_v1/bwa/mergedLibrary/MV411_RNP_IRF8-H3K27AC_R2.mLb.clN.sorted.bam',
 '../../data/additional_degraded_v1/bwa/mergedLibrary/MV411_RNP_MEF2D-H3K27AC_R1.mLb.clN.sorted.bam',
 '../../data/additional_degraded_v1/bwa/mergedLibrary/MV411_RNP_MEF2D-H3K27AC_R2.mLb.clN.sorted.bam',
 '../../data/additional_degraded_v1/bwa/mergedLibrary/MV411_RNP_MYB-H3K27AC_R1.mLb.clN.sorted.bam',
 '../../data/additional_degraded_v1/bwa/mergedLibrary/MV411_RNP_MYB-H3K27AC_R2.mLb.clN.sorted.bam',
 '../../data/additional_degraded_v1/bwa/mergedLibrary/MV411_RNP_RUNX1-H3K27AC_R1.mLb.clN.sorted.bam',
 '..

In [26]:
bams[6]

'../../data/additional_degraded_v1/bwa/mergedLibrary/MV411_RNP_MEF2D-H3K27AC_R2.mLb.clN.sorted.bam'

### on scalled data

In [21]:
! mkdir ../../data/$project/diffPeaks/ && ! mkdir ../../data/$project/diffData/

In [32]:
! mkdir ../../data/$project/droso_aligned

In [33]:
! mv ../../data/$project/mp* ../../data/$project/droso_aligned

In [24]:
wigs = ! ls ../../data/$project/bwa/mergedLibrary/bigwig/*.bigWig

In [5]:
ls ../../data/$project/bwa/mergedLibrary/bigwig/

INPUT_R1.mLb.clN.bigWig
MV411_RNP_AAVS1-H3K27AC_R1.mLb.clN.bdg
MV411_RNP_AAVS1-H3K27AC_R1.mLb.clN.bigWig
MV411_RNP_AAVS1-H3K27AC_R2.mLb.clN.bdg
MV411_RNP_AAVS1-H3K27AC_R2.mLb.clN.bigWig
MV411_RNP_IRF8-H3K27AC_R1.mLb.clN.bdg
MV411_RNP_IRF8-H3K27AC_R1.mLb.clN.bigWig
MV411_RNP_IRF8-H3K27AC_R2.mLb.clN.bdg
MV411_RNP_IRF8-H3K27AC_R2.mLb.clN.bigWig
MV411_RNP_MEF2D-H3K27AC_R1.mLb.clN.bdg
MV411_RNP_MEF2D-H3K27AC_R1.mLb.clN.bigWig
MV411_RNP_MEF2D-H3K27AC_R2.mLb.clN.bdg
MV411_RNP_MEF2D-H3K27AC_R2.mLb.clN.bigWig
MV411_RNP_MYB-H3K27AC_R1.mLb.clN.bdg
MV411_RNP_MYB-H3K27AC_R1.mLb.clN.bigWig
MV411_RNP_MYB-H3K27AC_R2.mLb.clN.bdg
MV411_RNP_MYB-H3K27AC_R2.mLb.clN.bigWig
MV411_RNP_RUNX1-H3K27AC_R1.mLb.clN.bdg
MV411_RNP_RUNX1-H3K27AC_R1.mLb.clN.bigWig
MV411_RNP_RUNX1-H3K27AC_R2.mLb.clN.bdg
MV411_RNP_RUNX1-H3K27AC_R2.mLb.clN.bigWig
MV411_RNP_RUNX1_RUNX2-H3K27AC_R1.mLb.clN.bdg
MV411_RNP_RUNX1_RUNX2-H3K27AC_R1.mLb.clN.bigWig
MV411_RNP_RUNX1_RUNX2-H3K27AC_R2.mLb.clN.bdg
MV411_RNP_RUNX1_

In [80]:
! gsutil -m cp -r ../../data/$project/bwa/mergedLibrary/bigwig/ gs://amlproject/Chip/$project/bwa/mergedLibrary/
! gsutil -m cp -r ../../data/$project/droso_aligned gs://amlproject/Chip/$project/
! gsutil -m cp -r ../../data/$project/recalib_bigwig gs://amlproject/Chip/$project/

Copying file://../../data/additional_degraded_v1/bwa/mergedLibrary/bigwig/MV411_RNP_IRF8-H3K27AC_R2.mLb.clN.bigWig [Content-Type=application/octet-stream]...
Copying file://../../data/additional_degraded_v1/bwa/mergedLibrary/bigwig/MV411_RNP_MYB-H3K27AC_R1.mLb.clN.bigWig [Content-Type=application/octet-stream]...
==> NOTE: You are uploading one or more large file(s), which would run
significantly faster if you enable parallel composite uploads. This
feature can be enabled by editing the
"parallel_composite_upload_threshold" value in your .boto
configuration file. However, note that if you do this large files will
be uploaded as `composite objects
<https://cloud.google.com/storage/docs/composite-objects>`_,which
means that any user who downloads such objects will need to have a
compiled crcmod installed (see "gsutil help crcmod"). This is because
without a compiled crcmod, computing checksums on composite objects is
so slow that gsutil disables downloads of composite objects.

Copying f

Copying file://../../data/additional_degraded_v1/bwa/mergedLibrary/bigwig/scale/MV411_RNP_MYB-H3K27AC_R1.mLb.clN.scale_factor.txt [Content-Type=text/plain]...
/ [50/50 files][ 22.6 GiB/ 22.6 GiB] 100% Done  83.7 MiB/s ETA 00:00:00         
Operation completed over 50 objects/22.6 GiB.                                    
Copying file://../../data/additional_degraded_v1/droso_aligned/mp862-MV411_RNP_RUNX2-H3K27AC-r1_R1_001_val_1.sorted.bam [Content-Type=application/octet-stream]...
Copying file://../../data/additional_degraded_v1/droso_aligned/mp860-MV411_RNP_RUNX1-H3K27AC-r1_R1_001.sorted.bam.bai [Content-Type=application/octet-stream]...
Copying file://../../data/additional_degraded_v1/droso_aligned/mp870-MV411_RNP_MYB-H3K27AC-r1_R1_001.sorted.bam.bai [Content-Type=application/octet-stream]...
==> NOTE: You are uploading one or more large file(s), which would run          
significantly faster if you enable parallel composite uploads. This
feature can be enabled by editing the
"paralle

Copying file://../../data/additional_degraded_v1/droso_aligned/mp873-MV411_RNP_SPI1-H3K27AC-r2_R1_001_val_1.sorted.bam.idxstat [Content-Type=application/octet-stream]...
Copying file://../../data/additional_degraded_v1/droso_aligned/mp865-MV411_RNP_RUNX1_RUNX2-H3K27AC-r2_R1_001_val_1.sorted.bam [Content-Type=application/octet-stream]...
Copying file://../../data/additional_degraded_v1/droso_aligned/mp858-MV411_RNP_AAVS1-H3K27AC-r1_R1_001_val_1.sorted.bam.bai [Content-Type=application/octet-stream]...
Copying file://../../data/additional_degraded_v1/droso_aligned/mp870-MV411_RNP_MYB-H3K27AC-r1_R1_001_val_1.sorted.bam.bai [Content-Type=application/octet-stream]...
Copying file://../../data/additional_degraded_v1/droso_aligned/mp871-MV411_RNP_MYB-H3K27AC-r2_R2_001_val_2.fq.gz [Content-Type=application/octet-stream]...
Copying file://../../data/additional_degraded_v1/droso_aligned/mp858-MV411_RNP_AAVS1-H3K27AC-r1_R1_001.sorted.bam [Content-Type=application/octet-stream]...
Copying file://.

Copying file://../../data/additional_degraded_v1/droso_aligned/mp872-MV411_RNP_SPI1-H3K27AC-r1_R1_001_val_1.sorted.bam [Content-Type=application/octet-stream]...
Copying file://../../data/additional_degraded_v1/droso_aligned/mp864-MV411_RNP_RUNX1_RUNX2-H3K27AC-r1_R1_001.sorted.bam [Content-Type=application/octet-stream]...
Copying file://../../data/additional_degraded_v1/droso_aligned/mp868-MV411_RNP_IRF8-H3K27AC-r1_R1_001.sorted.bam.bai [Content-Type=application/octet-stream]...
Copying file://../../data/additional_degraded_v1/droso_aligned/mp868-MV411_RNP_IRF8-H3K27AC-r1_R1_001_val_1.fq.gz [Content-Type=application/octet-stream]...
Copying file://../../data/additional_degraded_v1/droso_aligned/mp873-MV411_RNP_SPI1-H3K27AC-r2_R1_001.sorted.bam.idxstat [Content-Type=application/octet-stream]...
Copying file://../../data/additional_degraded_v1/droso_aligned/mp868-MV411_RNP_IRF8-H3K27AC-r1_R1_001.fastq.gz_trimming_report.txt [Content-Type=text/plain]...
Copying file://../../data/addition

Copying file://../../data/additional_degraded_v1/droso_aligned/mp861-MV411_RNP_RUNX1-H3K27AC-r2_R2_001_val_2.fq.gz [Content-Type=application/octet-stream]...
Copying file://../../data/additional_degraded_v1/droso_aligned/mp872-MV411_RNP_SPI1-H3K27AC-r1_R1_001.sorted.bam [Content-Type=application/octet-stream]...
Copying file://../../data/additional_degraded_v1/droso_aligned/mp862-MV411_RNP_RUNX2-H3K27AC-r1_R1_001_val_1.sorted.bam.bai [Content-Type=application/octet-stream]...
Copying file://../../data/additional_degraded_v1/droso_aligned/mp870-MV411_RNP_MYB-H3K27AC-r1_R1_001.sorted.bam.idxstat [Content-Type=application/octet-stream]...
Copying file://../../data/additional_degraded_v1/droso_aligned/mp868-MV411_RNP_IRF8-H3K27AC-r1_R2_001.fastq.gz_trimming_report.txt [Content-Type=text/plain]...
Copying file://../../data/additional_degraded_v1/droso_aligned/mp864-MV411_RNP_RUNX1_RUNX2-H3K27AC-r1_R1_001.fastq.gz_trimming_report.txt [Content-Type=text/plain]...
Copying file://../../data/add

In [65]:
for val in wigs:
    bedg = val[:-6]+'bdg'
    ! bigWigToBedGraph $val $bedg

In [66]:
bdg=! ls ../../data/additional_degraded_v1/bwa/mergedLibrary/bigwig/*.bdg
bdg

['../../data/additional_degraded_v1/bwa/mergedLibrary/bigwig/INPUT_R1.mLb.clN.bdg',
 '../../data/additional_degraded_v1/bwa/mergedLibrary/bigwig/MV411_RNP_AAVS1-H3K27AC_R1.mLb.clN.bdg',
 '../../data/additional_degraded_v1/bwa/mergedLibrary/bigwig/MV411_RNP_AAVS1-H3K27AC_R2.mLb.clN.bdg',
 '../../data/additional_degraded_v1/bwa/mergedLibrary/bigwig/MV411_RNP_IRF8-H3K27AC_R1.mLb.clN.bdg',
 '../../data/additional_degraded_v1/bwa/mergedLibrary/bigwig/MV411_RNP_IRF8-H3K27AC_R2.mLb.clN.bdg',
 '../../data/additional_degraded_v1/bwa/mergedLibrary/bigwig/MV411_RNP_MEF2D-H3K27AC_R1.mLb.clN.bdg',
 '../../data/additional_degraded_v1/bwa/mergedLibrary/bigwig/MV411_RNP_MEF2D-H3K27AC_R2.mLb.clN.bdg',
 '../../data/additional_degraded_v1/bwa/mergedLibrary/bigwig/MV411_RNP_MYB-H3K27AC_R1.mLb.clN.bdg',
 '../../data/additional_degraded_v1/bwa/mergedLibrary/bigwig/MV411_RNP_MYB-H3K27AC_R2.mLb.clN.bdg',
 '../../data/additional_degraded_v1/bwa/mergedLibrary/bigwig/MV411_RNP_RUNX1-H3K27AC_R1.mLb.clN.bdg',
 '..

In [73]:
# diffPeak on scaled data 
size= 240
for i in range(int(len(bdg)-3)):
    name1 = bdg[1+i%2]
    name2 = bdg[3+i]
    print(name1,name2)
    print(chip.diffPeak(name1, name2, control1=bdg[0], control2=bdg[0], res_directory="../../data/"+project+"/diffPeaks/", scaling1=1, scaling2=scales[i], size=size))

../../data/additional_degraded_v1/bwa/mergedLibrary/bigwig/MV411_RNP_AAVS1-H3K27AC_R1.mLb.clN.bdg ../../data/additional_degraded_v1/bwa/mergedLibrary/bigwig/MV411_RNP_IRF8-H3K27AC_R1.mLb.clN.bdg
doing differential peak binding
CompletedProcess(args='macs2 bdgdiff --t1 ../../data/additional_degraded_v1/bwa/mergedLibrary/bigwig/MV411_RNP_AAVS1-H3K27AC_R1.mLb.clN.bdg --c1 ../../data/additional_degraded_v1/bwa/mergedLibrary/bigwig/INPUT_R1.mLb.clN.bdg --t2 ../../data/additional_degraded_v1/bwa/mergedLibrary/bigwig/MV411_RNP_IRF8-H3K27AC_R1.mLb.clN.bdg --c2 ../../data/additional_degraded_v1/bwa/mergedLibrary/bigwig/INPUT_R1.mLb.clN.bdg --d1 1 --d2 0.3011826465 -g 60 -l 240 --o-prefix MV411_RNP_AAVS1-H3K27AC_R1_vs_MV411_RNP_IRF8-H3K27AC_R1 --outdir ../../data/additional_degraded_v1/diffPeaks/', returncode=0, stdout=b'', stderr=b'INFO  @ Wed, 09 Sep 2020 20:39:48: Read and build treatment 1 bedGraph... \nINFO  @ Wed, 09 Sep 2020 20:40:17: Read and build control 1 bedGraph... \nINFO  @ Wed, 09

CompletedProcess(args='macs2 bdgdiff --t1 ../../data/additional_degraded_v1/bwa/mergedLibrary/bigwig/MV411_RNP_AAVS1-H3K27AC_R2.mLb.clN.bdg --c1 ../../data/additional_degraded_v1/bwa/mergedLibrary/bigwig/INPUT_R1.mLb.clN.bdg --t2 ../../data/additional_degraded_v1/bwa/mergedLibrary/bigwig/MV411_RNP_RUNX1-H3K27AC_R2.mLb.clN.bdg --c2 ../../data/additional_degraded_v1/bwa/mergedLibrary/bigwig/INPUT_R1.mLb.clN.bdg --d1 1 --d2 0.849637265 -g 60 -l 240 --o-prefix MV411_RNP_AAVS1-H3K27AC_R2_vs_MV411_RNP_RUNX1-H3K27AC_R2 --outdir ../../data/additional_degraded_v1/diffPeaks/', returncode=0, stdout=b'', stderr=b'INFO  @ Wed, 09 Sep 2020 21:50:20: Read and build treatment 1 bedGraph... \nINFO  @ Wed, 09 Sep 2020 21:51:00: Read and build control 1 bedGraph... \nINFO  @ Wed, 09 Sep 2020 21:53:11: Read and build treatment 2 bedGraph... \nINFO  @ Wed, 09 Sep 2020 21:54:06: Read and build control 2 bedGraph... \nINFO  @ Wed, 09 Sep 2020 22:01:04: Write peaks... \nINFO  @ Wed, 09 Sep 2020 22:01:04: Done

In [148]:
bams

['../../data/additional_degraded_v1/bwa/mergedLibrary/INPUT_R1.mLb.clN.sorted.bam',
 '../../data/additional_degraded_v1/bwa/mergedLibrary/MV411_RNP_AAVS1-H3K27AC_R1.mLb.clN.sorted.bam',
 '../../data/additional_degraded_v1/bwa/mergedLibrary/MV411_RNP_AAVS1-H3K27AC_R2.mLb.clN.sorted.bam',
 '../../data/additional_degraded_v1/bwa/mergedLibrary/MV411_RNP_IRF8-H3K27AC_R1.mLb.clN.sorted.bam',
 '../../data/additional_degraded_v1/bwa/mergedLibrary/MV411_RNP_IRF8-H3K27AC_R2.mLb.clN.sorted.bam',
 '../../data/additional_degraded_v1/bwa/mergedLibrary/MV411_RNP_MEF2D-H3K27AC_R1.mLb.clN.sorted.bam',
 '../../data/additional_degraded_v1/bwa/mergedLibrary/MV411_RNP_MEF2D-H3K27AC_R2.mLb.clN.sorted.bam',
 '../../data/additional_degraded_v1/bwa/mergedLibrary/MV411_RNP_MYB-H3K27AC_R1.mLb.clN.sorted.bam',
 '../../data/additional_degraded_v1/bwa/mergedLibrary/MV411_RNP_MYB-H3K27AC_R2.mLb.clN.sorted.bam',
 '../../data/additional_degraded_v1/bwa/mergedLibrary/MV411_RNP_RUNX1-H3K27AC_R1.mLb.clN.sorted.bam',
 '..

In [154]:
bams[10]

'../../data/additional_degraded_v1/bwa/mergedLibrary/MV411_RNP_RUNX1-H3K27AC_R2.mLb.clN.sorted.bam'

In [None]:
# diffPeak on scaled data (full reprocessing)
size= 240
for i in range(len(bams)-3):
    if i < 8:
        continue
    bam1 = bams[1+i%2]
    bam2 = bams[3+i]
    print(bam1,bam2)
    print(chip.fullDiffPeak(bam1,bam2, control1=bams[0], compute_size=False, size=size, scaling=[scales[i],1], directory = "../../data/"+project+"/diffData/", res_directory = "../../data/"+project+"/diffPeaks/",pairedend=True))

../../data/additional_degraded_v1/bwa/mergedLibrary/MV411_RNP_AAVS1-H3K27AC_R1.mLb.clN.sorted.bam ../../data/additional_degraded_v1/bwa/mergedLibrary/MV411_RNP_RUNX1_RUNX2-H3K27AC_R1.mLb.clN.sorted.bam
doing diff from ../../data/additional_degraded_v1/bwa/mergedLibrary/MV411_RNP_AAVS1-H3K27AC_R1.mLb.clN.sorted.bam and ../../data/additional_degraded_v1/bwa/mergedLibrary/MV411_RNP_RUNX1_RUNX2-H3K27AC_R1.mLb.clN.sorted.bam
using default|given size
computing the scaling values
b'INFO  @ Mon, 14 Sep 2020 15:06:40: \n# Command line: callpeak -B -t ../../data/additional_degraded_v1/bwa/mergedLibrary/MV411_RNP_AAVS1-H3K27AC_R1.mLb.clN.sorted.bam -c ../../data/additional_degraded_v1/bwa/mergedLibrary/INPUT_R1.mLb.clN.sorted.bam --nomodel --extsize 240 -n MV411_RNP_AAVS1-H3K27AC_R1 --outdir ../../data/additional_degraded_v1/diffData/ -f BAMPE\n# ARGUMENTS LIST:\n# name = MV411_RNP_AAVS1-H3K27AC_R1\n# format = BAMPE\n# ChIP-seq file = [\'../../data/additional_degraded_v1/bwa/mergedLibrary/MV411_R

b'INFO  @ Mon, 14 Sep 2020 15:31:11: \n# Command line: callpeak -B -t ../../data/additional_degraded_v1/bwa/mergedLibrary/MV411_RNP_RUNX1_RUNX2-H3K27AC_R1.mLb.clN.sorted.bam -c ../../data/additional_degraded_v1/bwa/mergedLibrary/INPUT_R1.mLb.clN.sorted.bam --nomodel --extsize 240 -n MV411_RNP_RUNX1_RUNX2-H3K27AC_R1 --outdir ../../data/additional_degraded_v1/diffData/ -f BAMPE\n# ARGUMENTS LIST:\n# name = MV411_RNP_RUNX1_RUNX2-H3K27AC_R1\n# format = BAMPE\n# ChIP-seq file = [\'../../data/additional_degraded_v1/bwa/mergedLibrary/MV411_RNP_RUNX1_RUNX2-H3K27AC_R1.mLb.clN.sorted.bam\']\n# control file = [\'../../data/additional_degraded_v1/bwa/mergedLibrary/INPUT_R1.mLb.clN.sorted.bam\']\n# effective genome size = 2.70e+09\n# band width = 300\n# model fold = [5, 50]\n# qvalue cutoff = 5.00e-02\n# The maximum gap between significant sites is assigned as the read length/tag size.\n# The minimum length of peaks is assigned as the predicted fragment length "d".\n# Larger dataset will be scaled 

b'INFO  @ Mon, 14 Sep 2020 16:11:37: \n# Command line: callpeak -B -t ../../data/additional_degraded_v1/bwa/mergedLibrary/MV411_RNP_AAVS1-H3K27AC_R2.mLb.clN.sorted.bam -c ../../data/additional_degraded_v1/bwa/mergedLibrary/INPUT_R1.mLb.clN.sorted.bam --nomodel --extsize 240 -n MV411_RNP_AAVS1-H3K27AC_R2 --outdir ../../data/additional_degraded_v1/diffData/ -f BAMPE\n# ARGUMENTS LIST:\n# name = MV411_RNP_AAVS1-H3K27AC_R2\n# format = BAMPE\n# ChIP-seq file = [\'../../data/additional_degraded_v1/bwa/mergedLibrary/MV411_RNP_AAVS1-H3K27AC_R2.mLb.clN.sorted.bam\']\n# control file = [\'../../data/additional_degraded_v1/bwa/mergedLibrary/INPUT_R1.mLb.clN.sorted.bam\']\n# effective genome size = 2.70e+09\n# band width = 300\n# model fold = [5, 50]\n# qvalue cutoff = 5.00e-02\n# The maximum gap between significant sites is assigned as the read length/tag size.\n# The minimum length of peaks is assigned as the predicted fragment length "d".\n# Larger dataset will be scaled towards smaller dataset.

In [34]:
initscales = ! cat ../../data/$project/bwa/mergedLibrary/bigwig/scale/*.txt

In [75]:
scales = [val*float(initscales[3+i]) for i, val in enumerate(scales)]

In [76]:
scales

[0.0055422425878344,
 0.0095582660753916,
 0.0099217323799776,
 0.0123312656199582,
 0.01928382044626971,
 0.00920760471838008,
 0.028572797987493598,
 0.00965697715399,
 0.01991590914591828,
 0.01796301710614069,
 0.034577257425544,
 0.0109291227710616,
 0.00907162286248202,
 0.015778937847629487]

In [79]:
chip.bigWigFrom(bams[3:], 
                genome='GRCh38',scaling=scales,
               numthreads=8)

In [88]:
ls ../../data/$project/recalib_bigwig/

MV411_RNP_IRF8-H3K27AC_R1.bw   MV411_RNP_RUNX1-H3K27AC_R2.bw
MV411_RNP_IRF8-H3K27AC_R2.bw   MV411_RNP_RUNX1_RUNX2-H3K27AC_R1.bw
MV411_RNP_MEF2D-H3K27AC_R1.bw  MV411_RNP_RUNX1_RUNX2-H3K27AC_R2.bw
MV411_RNP_MEF2D-H3K27AC_R2.bw  MV411_RNP_RUNX2-H3K27AC_R1.bw
MV411_RNP_MYB-H3K27AC_R1.bw    MV411_RNP_RUNX2-H3K27AC_R2.bw
MV411_RNP_MYB-H3K27AC_R2.bw    MV411_RNP_SPI1-H3K27AC_R1.bw
MV411_RNP_RUNX1-H3K27AC_R1.bw  MV411_RNP_SPI1-H3K27AC_R2.bw


In [84]:
!mv bigwig/* ../../data/$project/recalib_bigwig/

In [91]:
!cp ../../data/$project/bwa/mergedLibrary/bigwig/MV411_RNP_AAVS1-*.bigWig ../../data/$project/recalib_bigwig/

In [130]:
os.popen('for i in $(ls ../../data/'+project+'/diffPeaks/*.bed); \
            do echo $(wc -l $i); \
            done').read().split('\n')

['10158 ../../data/additional_degraded_v1/diffPeaks/MV411_RNP_AAVS1-H3K27AC_R1_treat_pileup_vs_MV411_RNP_IRF8-H3K27AC_R1_treat_pileup_c3.0_common.bed',
 '44772 ../../data/additional_degraded_v1/diffPeaks/MV411_RNP_AAVS1-H3K27AC_R1_treat_pileup_vs_MV411_RNP_IRF8-H3K27AC_R1_treat_pileup_c3.0_cond1.bed',
 '138 ../../data/additional_degraded_v1/diffPeaks/MV411_RNP_AAVS1-H3K27AC_R1_treat_pileup_vs_MV411_RNP_IRF8-H3K27AC_R1_treat_pileup_c3.0_cond2.bed',
 '43929 ../../data/additional_degraded_v1/diffPeaks/MV411_RNP_AAVS1-H3K27AC_R1_treat_pileup_vs_MV411_RNP_MEF2D-H3K27AC_R1_treat_pileup_c3.0_common.bed',
 '31829 ../../data/additional_degraded_v1/diffPeaks/MV411_RNP_AAVS1-H3K27AC_R1_treat_pileup_vs_MV411_RNP_MEF2D-H3K27AC_R1_treat_pileup_c3.0_cond1.bed',
 '13 ../../data/additional_degraded_v1/diffPeaks/MV411_RNP_AAVS1-H3K27AC_R1_treat_pileup_vs_MV411_RNP_MEF2D-H3K27AC_R1_treat_pileup_c3.0_cond2.bed',
 '42999 ../../data/additional_degraded_v1/diffPeaks/MV411_RNP_AAVS1-H3K27AC_R1_treat_pileup_vs

In [92]:
bw = ! ls ../../data/additional_degraded_v1/recalib_bigwig/*
bw

['../../data/additional_degraded_v1/recalib_bigwig/MV411_RNP_AAVS1-H3K27AC_R1.mLb.clN.bigWig',
 '../../data/additional_degraded_v1/recalib_bigwig/MV411_RNP_AAVS1-H3K27AC_R2.mLb.clN.bigWig',
 '../../data/additional_degraded_v1/recalib_bigwig/MV411_RNP_IRF8-H3K27AC_R1.bw',
 '../../data/additional_degraded_v1/recalib_bigwig/MV411_RNP_IRF8-H3K27AC_R2.bw',
 '../../data/additional_degraded_v1/recalib_bigwig/MV411_RNP_MEF2D-H3K27AC_R1.bw',
 '../../data/additional_degraded_v1/recalib_bigwig/MV411_RNP_MEF2D-H3K27AC_R2.bw',
 '../../data/additional_degraded_v1/recalib_bigwig/MV411_RNP_MYB-H3K27AC_R1.bw',
 '../../data/additional_degraded_v1/recalib_bigwig/MV411_RNP_MYB-H3K27AC_R2.bw',
 '../../data/additional_degraded_v1/recalib_bigwig/MV411_RNP_RUNX1-H3K27AC_R1.bw',
 '../../data/additional_degraded_v1/recalib_bigwig/MV411_RNP_RUNX1-H3K27AC_R2.bw',
 '../../data/additional_degraded_v1/recalib_bigwig/MV411_RNP_RUNX1_RUNX2-H3K27AC_R1.bw',
 '../../data/additional_degraded_v1/recalib_bigwig/MV411_RNP_RU

In [None]:
# GENOME WIDE only

In [98]:
peaks = ! ls ../../data/$project/bwa/mergedLibrary/macs/broadPeak/*.broadPeak

In [106]:
names = ["AAVS1", "AAVS1_v2", "IRF8","IRF8_v2","MEF2D","MEF2D_v2","MYB","MYB_v2","RUNX1","RUNX1_v2","RUNX1_RUNX2","RUNX1_RUNX2_v2","RUNX2","RUNX2_v2", "SPI1","SPI1_v2"]

In [107]:
for i, val in enumerate(bw):
    if i <0:
        continue
    name = names[i]
    print(name)
    chip.getPeaksAt(peaks[i], val, peaknames=['Macs2_Peaks'], window=3000, folder="", title=name, numthreads=8, refpoint="center", name='../../data/'+project+'/diffData/'+name+'_mat.pdf', withDeeptools=True, torecompute=True)

AAVS1
CompletedProcess(args='computeMatrix reference-point -S ../../data/additional_degraded_v1/recalib_bigwig/MV411_RNP_AAVS1-H3K27AC_R1.mLb.clN.bigWig  --referencePoint center --regionsFileName ../../data/additional_degraded_v1/bwa/mergedLibrary/macs/broadPeak/MV411_RNP_AAVS1-H3K27AC_R1_peaks.broadPeak  --missingDataAsZero --outFileName ../../data/additional_degraded_v1/diffPeaks/AAVS1_mat.gz --upstream 3000 --downstream 3000 --numberOfProcessors 8 && plotHeatmap --matrixFile ../../data/additional_degraded_v1/diffPeaks/AAVS1_mat.gz --outFileName ../../data/additional_degraded_v1/diffPeaks/AAVS1_mat.pdf --refPointLabel center --regionsLabel Macs2_Peaks  --plotTitle AAVS1', returncode=0, stdout=b'', stderr=b'')
AAVS1_v2
CompletedProcess(args='computeMatrix reference-point -S ../../data/additional_degraded_v1/recalib_bigwig/MV411_RNP_AAVS1-H3K27AC_R2.mLb.clN.bigWig  --referencePoint center --regionsFileName ../../data/additional_degraded_v1/bwa/mergedLibrary/macs/broadPeak/MV411_RNP_AAV

CompletedProcess(args='computeMatrix reference-point -S ../../data/additional_degraded_v1/recalib_bigwig/MV411_RNP_RUNX2-H3K27AC_R1.bw  --referencePoint center --regionsFileName ../../data/additional_degraded_v1/bwa/mergedLibrary/macs/broadPeak/MV411_RNP_RUNX2-H3K27AC_R1_peaks.broadPeak  --missingDataAsZero --outFileName ../../data/additional_degraded_v1/diffPeaks/RUNX2_mat.gz --upstream 3000 --downstream 3000 --numberOfProcessors 8 && plotHeatmap --matrixFile ../../data/additional_degraded_v1/diffPeaks/RUNX2_mat.gz --outFileName ../../data/additional_degraded_v1/diffPeaks/RUNX2_mat.pdf --refPointLabel center --regionsLabel Macs2_Peaks  --plotTitle RUNX2', returncode=0, stdout=b'', stderr=b'')
RUNX2_v2
CompletedProcess(args='computeMatrix reference-point -S ../../data/additional_degraded_v1/recalib_bigwig/MV411_RNP_RUNX2-H3K27AC_R2.bw  --referencePoint center --regionsFileName ../../data/additional_degraded_v1/bwa/mergedLibrary/macs/broadPeak/MV411_RNP_RUNX2-H3K27AC_R2_peaks.broadPeak 

In [133]:
!mkdir ../results/$project/
!mkdir ../results/$project/plots

mkdir: cannot create directory ‘../results/additional_degraded_v1/plots’: File exists


In [134]:
! cp ../../data/additional_degraded_v1/*.pdf ../results/$project/plots/

In [147]:
cond1peak = ! ls ../../data/$project/diffPeaks/*cond1.bed
cond2peak = ! ls ../../data/$project/diffPeaks/*cond2.bed
commonpeak = ! ls ../../data/$project/diffPeaks/*common.bed
cond1peak

['../../data/additional_degraded_v1/diffPeaks/MV411_RNP_AAVS1-H3K27AC_R1_treat_pileup_vs_MV411_RNP_IRF8-H3K27AC_R1_treat_pileup_c3.0_cond1.bed',
 '../../data/additional_degraded_v1/diffPeaks/MV411_RNP_AAVS1-H3K27AC_R1_treat_pileup_vs_MV411_RNP_MEF2D-H3K27AC_R1_treat_pileup_c3.0_cond1.bed',
 '../../data/additional_degraded_v1/diffPeaks/MV411_RNP_AAVS1-H3K27AC_R1_treat_pileup_vs_MV411_RNP_MYB-H3K27AC_R1_treat_pileup_c3.0_cond1.bed',
 '../../data/additional_degraded_v1/diffPeaks/MV411_RNP_AAVS1-H3K27AC_R1_treat_pileup_vs_MV411_RNP_RUNX1-H3K27AC_R1_treat_pileup_c3.0_cond1.bed',
 '../../data/additional_degraded_v1/diffPeaks/MV411_RNP_AAVS1-H3K27AC_R2_treat_pileup_vs_MV411_RNP_IRF8-H3K27AC_R2_treat_pileup_c3.0_cond1.bed',
 '../../data/additional_degraded_v1/diffPeaks/MV411_RNP_AAVS1-H3K27AC_R2_treat_pileup_vs_MV411_RNP_MEF2D-H3K27AC_R2_treat_pileup_c3.0_cond1.bed',
 '../../data/additional_degraded_v1/diffPeaks/MV411_RNP_AAVS1-H3K27AC_R2_treat_pileup_vs_MV411_RNP_MYB-H3K27AC_R2_treat_pileup_c

In [None]:
names = ["IRF8", "MEF2D", "MYB","RUNX1","RUNX1_RUNX2","RUNX2", "SPI1", "MEF2D_v2", "IRF8_v2", "MYB_v2", "RUNX1_v2", "SPI1_v2", "RUNX2_v2","RUNX1_RUNX2_v2"]

In [143]:
bw

['../../data/additional_degraded_v1/recalib_bigwig/MV411_RNP_AAVS1-H3K27AC_R1.mLb.clN.bigWig',
 '../../data/additional_degraded_v1/recalib_bigwig/MV411_RNP_AAVS1-H3K27AC_R2.mLb.clN.bigWig',
 '../../data/additional_degraded_v1/recalib_bigwig/MV411_RNP_IRF8-H3K27AC_R1.bw',
 '../../data/additional_degraded_v1/recalib_bigwig/MV411_RNP_IRF8-H3K27AC_R2.bw',
 '../../data/additional_degraded_v1/recalib_bigwig/MV411_RNP_MEF2D-H3K27AC_R1.bw',
 '../../data/additional_degraded_v1/recalib_bigwig/MV411_RNP_MEF2D-H3K27AC_R2.bw',
 '../../data/additional_degraded_v1/recalib_bigwig/MV411_RNP_MYB-H3K27AC_R1.bw',
 '../../data/additional_degraded_v1/recalib_bigwig/MV411_RNP_MYB-H3K27AC_R2.bw',
 '../../data/additional_degraded_v1/recalib_bigwig/MV411_RNP_RUNX1-H3K27AC_R1.bw',
 '../../data/additional_degraded_v1/recalib_bigwig/MV411_RNP_RUNX1-H3K27AC_R2.bw',
 '../../data/additional_degraded_v1/recalib_bigwig/MV411_RNP_RUNX1_RUNX2-H3K27AC_R1.bw',
 '../../data/additional_degraded_v1/recalib_bigwig/MV411_RNP_RU

In [146]:
for i in range(int(len(bw)/2)):
    if i <0:
        continue
    name1 = bw[i%2]
    name2 = bw[i+2]
    peak = [cond1peak[i], commonpeak[i], cond2peak[i]]
   # for val in peak:
      #  chip.dropWeirdChromosomes(val)
    name = names[i]
    print(name1,name2)
    chip.getPeaksAt(peak, [name1, name2], bigwignames=['DMSO', 'VHL'], peaknames=['DMSO_peaks', 'common', 'VHL_peaks'], window=3000, folder="", title=name, numthreads=7, refpoint="center", name='../../data/'+project+'/diffPeaks/'+name+'_mat.pdf', withDeeptools=True, torecompute=True)

../../data/additional_degraded_v1/recalib_bigwig/MV411_RNP_AAVS1-H3K27AC_R1.mLb.clN.bigWig ../../data/additional_degraded_v1/recalib_bigwig/MV411_RNP_IRF8-H3K27AC_R1.bw
../../data/additional_degraded_v1/recalib_bigwig/MV411_RNP_AAVS1-H3K27AC_R2.mLb.clN.bigWig ../../data/additional_degraded_v1/recalib_bigwig/MV411_RNP_IRF8-H3K27AC_R2.bw
CompletedProcess(args='computeMatrix reference-point -S ../../data/additional_degraded_v1/recalib_bigwig/MV411_RNP_AAVS1-H3K27AC_R2.mLb.clN.bigWig ../../data/additional_degraded_v1/recalib_bigwig/MV411_RNP_IRF8-H3K27AC_R2.bw  --referencePoint center --regionsFileName ../../data/additional_degraded_v1/diffPeaks/MV411_RNP_AAVS1-H3K27AC_R1_treat_pileup_vs_MV411_RNP_MEF2D-H3K27AC_R1_treat_pileup_c3.0_cond1.bed ../../data/additional_degraded_v1/diffPeaks/MV411_RNP_AAVS1-H3K27AC_R1_treat_pileup_vs_MV411_RNP_MEF2D-H3K27AC_R1_treat_pileup_c3.0_common.bed --missingDataAsZero --outFileName ../../data/additional_degraded_v1/diffPeaks/AAVS1_v2_mat.gz --upstream 3000

KeyboardInterrupt: 

In [71]:
peaks = ! ls ../../data/$project/bwa/mergedLibrary/macs/broadPeak/*.broadPeak
peaks

['../../data/additional_degraded_v1/bwa/mergedLibrary/macs/broadPeak/MV411_RNP_AAVS1-H3K27AC_R1_peaks.broadPeak',
 '../../data/additional_degraded_v1/bwa/mergedLibrary/macs/broadPeak/MV411_RNP_AAVS1-H3K27AC_R2_peaks.broadPeak',
 '../../data/additional_degraded_v1/bwa/mergedLibrary/macs/broadPeak/MV411_RNP_IRF8-H3K27AC_R1_peaks.broadPeak',
 '../../data/additional_degraded_v1/bwa/mergedLibrary/macs/broadPeak/MV411_RNP_IRF8-H3K27AC_R2_peaks.broadPeak',
 '../../data/additional_degraded_v1/bwa/mergedLibrary/macs/broadPeak/MV411_RNP_MEF2D-H3K27AC_R1_peaks.broadPeak',
 '../../data/additional_degraded_v1/bwa/mergedLibrary/macs/broadPeak/MV411_RNP_MEF2D-H3K27AC_R2_peaks.broadPeak',
 '../../data/additional_degraded_v1/bwa/mergedLibrary/macs/broadPeak/MV411_RNP_MYB-H3K27AC_R1_peaks.broadPeak',
 '../../data/additional_degraded_v1/bwa/mergedLibrary/macs/broadPeak/MV411_RNP_MYB-H3K27AC_R2_peaks.broadPeak',
 '../../data/additional_degraded_v1/bwa/mergedLibrary/macs/broadPeak/MV411_RNP_RUNX1-H3K27AC_R

In [75]:
names = ["IRF8","MEF2D","MYB","RUNX1","RUNX1_RUNX2","RUNX2","SPI1"]
names.extend([i+'_r2' for i in names])
names.sort()
names

['IRF8',
 'IRF8_r2',
 'MEF2D',
 'MEF2D_r2',
 'MYB',
 'MYB_r2',
 'RUNX1',
 'RUNX1_RUNX2',
 'RUNX1_RUNX2_r2',
 'RUNX1_r2',
 'RUNX2',
 'RUNX2_r2',
 'SPI1',
 'SPI1_r2']

In [77]:
for i in range(len(bw)):
    chip.getPeaksAt(peaks[i], bw[i], window=3000, folder="", title=names[i], numthreads=8, torecompute=True, onlyProfile=False, name='../../data/'+project+'/diffPeaks/'+names[i]+'_mat_profile.pdf', withDeeptools=True, refpoint="center")
    chip.getPeaksAt(peaks[i], bw[i], window=3000, folder="", title=names[i], numthreads=8, torecompute=True, onlyProfile=True,name='../../data/'+project+'/diffPeaks/'+names[i]+'_mat_profile_clust3.pdf', withDeeptools=True, cluster=3, refpoint="center")

CompletedProcess(args='computeMatrix reference-point -S ../../data/additional_degraded_v1/recalib_bigwig/MV411_RNP_IRF8-H3K27AC_R1.bw  --referencePoint center --regionsFileName ../../data/additional_degraded_v1/bwa/mergedLibrary/macs/broadPeak/MV411_RNP_AAVS1-H3K27AC_R1_peaks.broadPeak  --missingDataAsZero --outFileName ../../data/additional_degraded_v1/diffPeaks/IRF8_mat_profile.gz --upstream 3000 --downstream 3000 --numberOfProcessors 8 && plotHeatmap --matrixFile ../../data/additional_degraded_v1/diffPeaks/IRF8_mat_profile.gz --outFileName ../../data/additional_degraded_v1/diffPeaks/IRF8_mat_profile.pdf --refPointLabel center --plotTitle IRF8', returncode=0, stdout=b'', stderr=b'')
CompletedProcess(args='computeMatrix reference-point -S ../../data/additional_degraded_v1/recalib_bigwig/MV411_RNP_IRF8-H3K27AC_R1.bw  --referencePoint center --regionsFileName ../../data/additional_degraded_v1/bwa/mergedLibrary/macs/broadPeak/MV411_RNP_AAVS1-H3K27AC_R1_peaks.broadPeak  --missingDataAsZer

KeyboardInterrupt: 

In [139]:
! ls ../../data/additional_degraded_v1/diffPeaks

MV411_RNP_AAVS1-H3K27AC_R1_treat_pileup_vs_MV411_RNP_IRF8-H3K27AC_R1_treat_pileup_c3.0_common.bed
MV411_RNP_AAVS1-H3K27AC_R1_treat_pileup_vs_MV411_RNP_IRF8-H3K27AC_R1_treat_pileup_c3.0_cond1.bed
MV411_RNP_AAVS1-H3K27AC_R1_treat_pileup_vs_MV411_RNP_IRF8-H3K27AC_R1_treat_pileup_c3.0_cond2.bed
MV411_RNP_AAVS1-H3K27AC_R1_treat_pileup_vs_MV411_RNP_MEF2D-H3K27AC_R1_treat_pileup_c3.0_common.bed
MV411_RNP_AAVS1-H3K27AC_R1_treat_pileup_vs_MV411_RNP_MEF2D-H3K27AC_R1_treat_pileup_c3.0_cond1.bed
MV411_RNP_AAVS1-H3K27AC_R1_treat_pileup_vs_MV411_RNP_MEF2D-H3K27AC_R1_treat_pileup_c3.0_cond2.bed
MV411_RNP_AAVS1-H3K27AC_R1_treat_pileup_vs_MV411_RNP_MYB-H3K27AC_R1_treat_pileup_c3.0_common.bed
MV411_RNP_AAVS1-H3K27AC_R1_treat_pileup_vs_MV411_RNP_MYB-H3K27AC_R1_treat_pileup_c3.0_cond1.bed
MV411_RNP_AAVS1-H3K27AC_R1_treat_pileup_vs_MV411_RNP_MYB-H3K27AC_R1_treat_pileup_c3.0_cond2.bed
MV411_RNP_AAVS1-H3K27AC_R1_treat_pileup_vs_MV411_RNP_RUNX1-H3K27AC_R1_treat_pileup_c3.0_common.bed
MV411_RNP_AAVS1

### on unscalled data

In [None]:
bams = !ls ../../data/$project/bwa/mergedLibrary/mp*.bam
bams

In [None]:
! mkdir ../../data/$project/diffPeaks_unscaled

In [None]:
#on unscalled data 
for i in range(int(len(bams)/2)):
    if i < 5:
        continue
    name1 = bams[i]
    name2 = bams[6+i]
    print(name1,name2)
    chip.fullDiffPeak(name1,name2, control1='../../data/'+project+'/bwa/mergedLibrary/INPUT_R1.mLb.clN.sorted.bam', directory = "../../data/"+project+"/diffData_unscaled/", res_directory = "../../data/"+project+"/diffPeaks_unscaled/",pairedend=False)

In [None]:
bw = ! ls ../../data/$project/bwa/mergedLibrary/bigwig/*.bigWig
bw

In [None]:
cond1peak = ! ls ../../data/$project/diffPeaks_unscaled/*cond1.bed
cond2peak = ! ls ../../data/$project/diffPeaks_unscaled/*cond2.bed
commonpeak = ! ls ../../data/$project/diffPeaks_unscaled/*common.bed
commonpeak

In [None]:
names = ["MED1","MED1_v2","FLAG_IRF2BP2","POLII_total","POLII_S2","POLII_S5"]

In [None]:
for i in range(int((len(bw)-1)/2)):
    name1 = bw[1+i]
    name2 = bw[7+i]
    peak = [cond1peak[i], commonpeak[i], cond2peak[i]]
    #for val in peak:
     #   chip.dropWeirdChromosomes(val)
    name = names[i]
    print(name1,name2)
    chip.getPeaksAt(peak, [name1, name2], bigwignames=['DMSO', 'VHL'], peaknames=['DMSO_peaks', 'common', 'VHL_peaks'], window=3000, folder="", title=name, numthreads=7, torecompute=True, refpoint='center', name='../../data/'+project+'/diffPeaks_unscaled/'+name+'_mat.pdf', withDeeptools=True)

In [None]:
! gsutil -m cp -r "../../data/additional_degraded_v1" gs://amlproject/Chip/

In [None]:
! cp ../../$project/*_mat.pdf ../results/$project/unscaled/
! cp ../../$project/*_mat.pdf ../results/$project/scaled/

! cp ../../$project/*_mat.pdf ../results/$project/unscaled/
! cp ../../$project/*_mat.pdf ../results/$project/unscaled/

! cp -r ../data/$project/bwa/mergedLibrary/deepTools/plot/* ../results/$project/plots