In [None]:
import sys
import os

import pybedtools as bedtools
from string import Template


In [None]:
gencode_config_template=os.path.join("gencode_config_TEMPLATE_V4.txt")
reg_config_template=os.path.join("regulatory_config_TEMPLATE.txt")
submit_template_file=os.path.join("TEMPLATE_characterize.txt")

out_folder=os.path.join("..","data","quantification")
tmp_root= os.path.join("/","nobackup","lab_bsf","users","dbarreca")
tmp_folder = os.path.join(tmp_root,"tmp_characterize")
if (not os.path.exists(tmp_folder)):
    os.makedirs(tmp_folder)

suffix="ALL"
characterization_folder=os.path.join(out_folder,'characterization_{}_V4'.format(suffix))
if (not os.path.exists(characterization_folder)):
    os.makedirs(characterization_folder)

base_bed_file=os.path.join(out_folder,"consensus_set_{}.bed".format(suffix))

resources_folder=os.path.join("..","references")

gencode_file=os.path.join(resources_folder,"gencode.v31.basic.annotation.gtf")
reg_build_file=os.path.join(resources_folder,"homo_sapiens.GRCh38.Regulatory_Build.regulatory_features.20190329.gtf")
chrom_file=os.path.join(resources_folder,"hg38.chrom.sizes")

tss_size=100
proximal_size_up=1000
proximal_size_dn=500
distal_size=10000

threads=4

In [None]:
with open( gencode_config_template ) as f:
    gencode_template=Template(f.read())  
with open( reg_config_template ) as f:
    reg_template=Template(f.read())  
with open( submit_template_file ) as f:
    submit_template=Template(f.read())  

permutations=100
for i in range(-1,permutations):      
    
    cmd1=''
    if (i!=-1):
        out_bed=os.path.join(characterization_folder,"consensus_set_{}.bed".format(i))
        base_name="characterize_{}".format(i)
        
        cmd1 = "bedtools shuffle -i {base_bed} -g {chrom_file} -seed {seed}|bedtools sort -faidx {chrom_file} > {out_bed}".format(
            base_bed=os.path.abspath(base_bed_file),
            chrom_file=os.path.abspath(chrom_file),
            seed=i,
            out_bed=os.path.abspath(out_bed)
        )
    else:
        out_bed=base_bed_file
        base_name="characterize_base"
        
    gencode_config=gencode_template.substitute({
        'TSS_flanking':tss_size,
        'TSS_proximal_upstream':proximal_size_up,
        'TSS_proximal_downstream':proximal_size_dn,
        'distal_distance':distal_size,
        'gtf_file':'"{}"'.format(os.path.abspath(gencode_file)),
        'bed_file':'"{}"'.format(os.path.abspath(out_bed))
    })    
    gencode_config_file=os.path.join(characterization_folder,"gencode_{}.json".format(base_name))
    with open(gencode_config_file,'w') as out:
        out.write(gencode_config)
    cmd2 = "uropa -p {out_folder}/gencode_{base_name} -i {config_file} -t {threads} -l {out_folder}/uropa.gencode.{base_name}.log".format(
        base_name=base_name,
        out_folder=os.path.abspath(characterization_folder),
        config_file=os.path.abspath(gencode_config_file),
        threads=threads
    )
    
    reg_config=reg_template.substitute({
        'gtf_file':'"{}"'.format(os.path.abspath(reg_build_file)),
        'bed_file':'"{}"'.format(os.path.abspath(out_bed))
    })    
    reg_config_file=os.path.join(characterization_folder,"reg_{}.json".format(base_name))
    with open(reg_config_file,'w') as out:
        out.write(reg_config)
    cmd3 = "uropa -p {out_folder}/reg_{base_name} -i {config_file} -t {threads} -l {out_folder}/uropa.reg.{base_name}.log".format(
        base_name=base_name,
        out_folder=os.path.abspath(characterization_folder),
        config_file=os.path.abspath(reg_config_file),
        threads=threads
    )
    
    submit_config=submit_template.substitute({
        'threads':threads,
        'workdir':os.path.abspath(characterization_folder),
        'job_name':base_name,
        'tempdir':os.path.abspath(tmp_folder),
        'cmd1':cmd1,
        'cmd2':cmd2,
        'cmd3':cmd3
        
    })    
    submit_script=os.path.join(characterization_folder,"{}.sub".format(base_name))
    with open(submit_script,'w') as out:
        out.write(submit_config)