# Count hotspots on each experiment

In [1]:
import os

In [2]:
main_dir = ''

## Propensity 100 samples 100-300 muts/sample

In [3]:
map_file = f'{main_dir}/code/hotspot_counts.map'
code_file = f'{main_dir}/code/hotspot_counts_perposition.py'

In [4]:
iterations = 1000
sample_size = 100
mutations_cutoffs = [100, 125, 150, 175, 200, 225, 250, 275, 300]

In [5]:
subsamples_path = f'{main_dir}/data/subsamples'
input_path = f'{main_dir}/data/hotspots'
output_path = f'{main_dir}/data/counts'

In [6]:
info = [
    '[params]',
    'cores=1',
    'memory=24G\n',
    '[pre]',
    '. "/home/$USER/miniconda3/etc/profile.d/conda.sh"',
    'conda activate hotspots_framework\n',
    '[jobs]',
]

In [7]:
# Cancer types and signatures to run
# Update if the mutation burden per sample is set to 600
data_to_run = {
 'BLADDER_URI': ['SBS2', 'SBS13', 'SBS5', 'SBS1'],
 'BRCA': ['SBS5', 'SBS1', 'SBS2', 'SBS13', 'SBS3'],
 'COADREAD': ['SBS1', 'SBS40', 'SBS17b', 'SBS5', 'SBS18', 'SBS93', 'SBS17a'],
 'ESOPHA_STOMACH': ['SBS17b', 'SBS17a', 'SBS5', 'SBS1'],
 'NSCLC': ['SBS4', 'SBS5', 'SBS40', 'SBS2', 'SBS13', 'SBS1'],
 'PROSTATE': ['SBS5', 'SBS1', 'SBS8'],
 'SKCM': ['SBS7a', 'SBS7b']
}
ctypes_to_run = data_to_run.keys()

In [8]:
with open(map_file, 'w') as ofd: 
    for line in info: 
        ofd.write(f'{line}\n')
    
    for mcutoff in mutations_cutoffs: 
        subsamples_dir = f'{subsamples_path}/{iterations}iter_{sample_size}samples_{mcutoff}muts'
        hotspots_dir = f'{input_path}/{iterations}iter_{sample_size}samples_{mcutoff}muts'
        output_dir = f'{output_path}/{iterations}iter_{sample_size}samples_{mcutoff}muts'
        if not os.path.exists(output_dir):
            os.makedirs(output_dir, exist_ok=True)
        
        for cancertype in ctypes_to_run:
            ctype_output = os.path.join(output_dir, cancertype)
            if not os.path.exists(ctype_output):
                os.makedirs(ctype_output, exist_ok=True)

            for signature in data_to_run[cancertype]: 
                
                subsample = f'{subsamples_dir}/{cancertype}/subsampling_{signature}.txt'
                hotspots = f'{hotspots_dir}/{cancertype}'
                output_f = f'{ctype_output}/{cancertype}_hotspot_counts_{signature}.tsv'

                ofd.write(f'python {code_file} -s {subsample} -i {hotspots} -o {output_f} \n')