# Hotspots per bin

This code intersects filtered hotspots in mappable bins with sets of bins of different length (autosomes; driver regions removed)

Hotspots in a cancer type are assigned to a signature by maximum likelihood 

In [1]:
cancer_types = [
    'BRCA', 
    'BLADDER_URI', 
    'COADREAD', 
    'ESOPHA_STOMACH', 
    'NSCLC', 
    'PROSTATE', 
    'SKCM'
]

In [2]:
bins = [1000000]    # run only on 1 Mbp

In [3]:
main_dir = ''

In [4]:
hotspots_dir = f'{main_dir}/signatures/assign_hotspots_to_sigs/data'
samples_threshold = '2'
alternates = 'altsplit'
data_type = 'cancertypes_filtered_nodrivers'

In [5]:
output_dir = f'{main_dir}/genomic_bins/data/hotspots_per_bin'

### Run intersect of hotspots (within mappable megabases) and defined bins 

In [6]:
code_f = f'{main_dir}/genomic_bins/code/hotspots_per_bin.py'
map_f = f'{main_dir}/genomic_bins/code/hotspots_per_bin.map'

In [7]:
info = [
    '[params]',
    'cores=1',
    'memory=16G\n',
    '[pre]',
    '. "/home/$USER/miniconda3/etc/profile.d/conda.sh"',
    'conda activate hotspots_framework\n',
    '[jobs]',
]

In [8]:
with open(map_f, 'w') as ofd: 
    for line in info: 
        ofd.write(f'{line}\n')
    for ctype in cancer_types: 
        input_f = f'{hotspots_dir}/{ctype}_sigsprobs_{data_type}_n{samples_threshold}_{alternates}.normsum.txt'
        for bin_size in bins: 
            bins_f = f'{main_dir}/genomic_bins/data/hg38_{int(bin_size/1000)}kb_bin.nodrivers.filtered.mappable_positions.autosomes.binids.txt'
            output_f = f'{output_dir}/{ctype}.{int(bin_size/1000)}kb.hotspots_per_bin.json'
            ofd.write(f'python {code_f} -h {input_f} -b {bins_f} -o {output_f}\n')            