In [2]:
#Erica Hildebrand
#Running Noam's Insulation Model script on SRCAP degron Hi-C libraries

#make sure running in insulation-model conda environment
#make env with updated insulation-temp yml file
#then add pandas (conda install pandas)
#then add cooltools (pip install git+https://github.com/mirnylab/cooltools.git)
#then add jupyter if want to run in notebook (conda install jupyter)
#make sure in that order!

import cooler
import cooltools
import os
from cooltools.io import cool2cworld
import glob
import bioframe

In [3]:
#Specifying files - add your own paths here...
path2018 = "/nl/umw_job_dekker/cshare/users/eh37w/SRCAP-Degron/coolers/2018/library"
txtpath = "/nl/umw_job_dekker/users/eh37w/SRCAP-Degron/txtdata_sortedFiles"
outpath = '/nl/umw_job_dekker/users/eh37w/SRCAP-Degron/insulation_model'

conditions = [
    'WTCtrl5T_R1', 
    'WTAux5T_R1', 
    'DegCtrl4x_R1', 
    'DegAux4x_R1',
    'DegCtrl8z_R1', 
    'DegAux8z_R1',
    'WTCtrl5T_R2', 
    'WTAux5T_R2', 
    'DegCtrl4x_R2', 
    'DegAux4x_R2',
    'DegCtrl8z_R2', 
    'DegAux8z_R2',
]

binsize = 10000

long_names = {    
    'WTCtrl5T_R1' : 'SD-HiC-Dpn-G4mESC-5T-SRCAP-GFP-TIR1-ctrl-2-22-R1-T1', 
    'WTAux5T_R1' : 'SD-HiC-Dpn-G4mESC-5T-SRCAP-GFP-TIR1-auxin-12hr-2-22-R1-T1', 
    'DegCtrl4x_R1' : 'SD-HiC-Dpn-G4mESC-4x-SRCAP-AID-GFP-TIR1-ctrl-2-22-R1-T1', 
    'DegAux4x_R1' : 'SD-HiC-Dpn-G4mESC-4x-SRCAP-AID-GFP-TIR1-auxin-12hr-2-22-R1-T1',
    'DegCtrl8z_R1' : 'SD-HiC-Dpn-G4mESC-8z-SRCAP-AID-GFP-TIR1-ctrl-2-22-R2-T1', 
    'DegAux8z_R1' : 'SD-HiC-Dpn-G4mESC-8z-SRCAP-AID-GFP-TIR1-auxin-12hr-2-22-R2-T1',
    'WTCtrl5T_R2' : 'SD-HiC-Dpn-G4mESC-5T-SRCAP-GFP-TIR1-ctrl-3-04-R2-T1', 
    'WTAux5T_R2' : 'SD-HiC-Dpn-G4mESC-5T-SRCAP-GFP-TIR1-auxin-12hr-3-04-R2-T1', 
    'DegCtrl4x_R2' : 'SD-HiC-Dpn-G4mESC-4x-SRCAP-AID-GFP-TIR1-ctrl-3-04-R2-T1', 
    'DegAux4x_R2' : 'SD-HiC-Dpn-G4mESC-4x-SRCAP-AID-GFP-TIR1-auxin-12hr-3-04-R2-T1',
    'DegCtrl8z_R2' : 'SD-HiC-Dpn-G4mESC-8z-SRCAP-AID-GFP-TIR1-ctrl-3-04-R2-T1', 
    'DegAux8z_R2' : 'SD-HiC-Dpn-G4mESC-8z-SRCAP-AID-GFP-TIR1-auxin-12hr-3-04-R2-T1'
}

In [4]:
mm10 = bioframe.fetch_chromsizes('mm10')
chromsizes = bioframe.fetch_chromsizes('mm10')
chromosomes = list(chromsizes.index)
print(chromosomes)

['chr1', 'chr2', 'chr3', 'chr4', 'chr5', 'chr6', 'chr7', 'chr8', 'chr9', 'chr10', 'chr11', 'chr12', 'chr13', 'chr14', 'chr15', 'chr16', 'chr17', 'chr18', 'chr19', 'chrX', 'chrY', 'chrM']


In [5]:
#run insulation model - testing...seems to be taking a long time...
for cond in conditions:
    out_dir = "{}/data/{}/C-{}".format(outpath, long_names[cond], binsize)
    #print(out_dir)
    os.makedirs(out_dir, exist_ok = True)
    for chrom in chromosomes:
        t = '{}/{}/{}/{}_{}_{}_mm10_iced.matrix.gz'.format(txtpath, long_names[cond], 'C-{}'.format(binsize), long_names[cond], chrom, binsize)
        #print(t)
        out_fname = "{}/{}_{}_{}_mm10_iced".format(out_dir, long_names[cond], chrom, binsize)
        #print(out_fname)
        #update this bsub job to send email to your user name/save logs in your folders
        !bsub -q short -W 04:00 -e /home/eh37w/lsf_jobs/LSB_%J.err -o /home/eh37w/lsf_jobs/LSB_%J.log \
        -n 2 -R span[hosts=1] -R select[ib] -R rusage[mem=10000] -N -u erica.hildebrand@umassmed.edu \
        "python /home/eh37w/bin/git/insulation-temp/optinspot_cli.py -inf $t -fmt cworld -decay powerlaw -max 0.1p -dist 0-2e6 -nonoise -chr $chrom -outf $out_fname"
        #!echo $out_fname
        #!python /home/eh37w/bin/git/insulation-temp/optinspot_cli.py -inf $t -fmt cworld -decay powerlaw -max 0.1p -dist 0-2e6 -nonoise -chr $chrom -outf $out_fname

Job <844839> is submitted to queue <short>.
Job <844843> is submitted to queue <short>.
Job <844847> is submitted to queue <short>.
Job <844851> is submitted to queue <short>.
Job <844856> is submitted to queue <short>.
Job <844860> is submitted to queue <short>.
Job <844864> is submitted to queue <short>.
Job <844868> is submitted to queue <short>.
Job <844872> is submitted to queue <short>.
Job <844876> is submitted to queue <short>.
Job <844880> is submitted to queue <short>.
Job <844884> is submitted to queue <short>.
Job <844888> is submitted to queue <short>.
Job <844892> is submitted to queue <short>.
Job <844896> is submitted to queue <short>.
Job <844900> is submitted to queue <short>.
Job <844905> is submitted to queue <short>.
Job <844910> is submitted to queue <short>.
Job <844914> is submitted to queue <short>.
Job <844917> is submitted to queue <short>.
Job <844922> is submitted to queue <short>.
Job <844926> is submitted to queue <short>.
Job <844930> is submitted to que

Job <845598> is submitted to queue <short>.
Job <845602> is submitted to queue <short>.
Job <845606> is submitted to queue <short>.
Job <845610> is submitted to queue <short>.
Job <845614> is submitted to queue <short>.
Job <845618> is submitted to queue <short>.
Job <845622> is submitted to queue <short>.
Job <845627> is submitted to queue <short>.
Job <845632> is submitted to queue <short>.
Job <845635> is submitted to queue <short>.
Job <845639> is submitted to queue <short>.
Job <845644> is submitted to queue <short>.
Job <845648> is submitted to queue <short>.
Job <845652> is submitted to queue <short>.
Job <845656> is submitted to queue <short>.
Job <845660> is submitted to queue <short>.
Job <845664> is submitted to queue <short>.
Job <845668> is submitted to queue <short>.
Job <845672> is submitted to queue <short>.
Job <845677> is submitted to queue <short>.
Job <845681> is submitted to queue <short>.
Job <845685> is submitted to queue <short>.
Job <845689> is submitted to que

In [None]:
#mitochondrial chromosomes are exiting without running, ok as long as rest work...
#Also one Y chrom didn't work. So concatenating all but M and Y, and should be fine. 

In [6]:
#concatenate individual chromosomes into genome-wide insulation model file
for cond in conditions:
    out_dir = "{}/data/{}/C-{}".format(outpath, long_names[cond], binsize)
    out_fname = "{}/{}_{}_mm10_iced.inspot.tab".format(out_dir, long_names[cond], binsize)
    for chrom in chromosomes[0:20]:
        in_fname = "{}/{}_{}_{}_mm10_iced.inspot.tab".format(out_dir, long_names[cond], chrom, binsize)
        !cat $in_fname >> $out_fname