# Compute observed vs expected fold change CTCF binding sites

This notebook runs a script that, first, computes the observed fold change in the CTCF binding site vs flanks. Then, for 1,000 times, it simulates the expected distribution of mutations inside CTCF binding sites and computes the expected fold change. The observed fold change is then compared with the distribution of expected fold changes. 

In [5]:
main_dir = ''

In [11]:
ctcf_dir = output_dir = f'{main_dir}/ctcf/data'
sigs_f = f'{main_dir}/ctcf/code/COSMIC_v3.2_SBS_GRCh38.txt'

In [7]:
map_f = f'{main_dir}/ctcf/code/expected_fold_change.map' 
code_f = f'{main_dir}/ctcf/code/expected_fold_change.py' 

In [8]:
info = [
    '[params]',
    'cores=1',
    'memory=100G\n',
    '[pre]',
    '. "/home/$USER/miniconda3/etc/profile.d/conda.sh"',
    'conda activate hotspots_framework\n',
    '[jobs]',
]

In [9]:
run_dict = {
 'COADREAD': ['SBS17b', 'SBS17a'],
 'ESOPHA_STOMACH': ['SBS17b', 'SBS17a']
}

In [10]:
seed = 61254
nsim = 1000
feature_size = 600
window_size = 2000

In [12]:
with open(map_f, 'w') as ofd: 
    for line in info: 
        ofd.write(f'{line}\n')
    for ctype, signatures in run_dict.items(): 
        ctcf_f = f'{ctcf_dir}/{ctype}_ctcf_maxprob_length2000.in.txt'
        for signature in signatures: 
            output_f = f'{output_dir}/{ctype}_{signature}_simulations_ctcf_maxprob_length2000.in.json'
            ofd.write(
                f'python {code_f}' +
                f' -f {ctcf_f} -fs {feature_size} -ws {window_size} -sig {signature} -sigf {sigs_f} -nsim {nsim} -o {output_f} --seed {seed}\n'
            )            