# Probability of signatures in hotspots

Calculate the probability of each signature within a hotspot using the signature mutational probability vectors for each mutation in the hotspot. 

In [1]:
import os

#### Data

In [2]:
main_dir = ''

In [3]:
sigstype = 'SBS96'
sigs_dir = f'{main_dir}/signatures/sigprofiler/output/mutations_total'

In [4]:
samples_threshold = '2'
alternates = 'altsplit'  
data_type = 'cancertypes_filtered_nodrivers'
hotspots_dir = f'{main_dir}/hotspots/data/{data_type}/hotspots_n{samples_threshold}_{alternates}'

In [5]:
ctypes_to_analyze = [
    'ALL',
    'BILIARY_TRACT',
    'BLADDER_URI',
    'BOWEL',
    'BRCA',
    'BRAIN',
    'CERVIX',
    'COADREAD',
    'ES',
    'ESOPHA_STOMACH',
    'GBM',
    'HEAD_NECK',
    'KIDNEY',
    'LIVER',
    'LUNG',
    'MBL',
    'NSCLC',
    'OV',
    'PANCREAS',
    'PROSTATE',
    'SCLC',
    'SKCM',
    'SKIN',
    'ST',
    'THYROID',
    'UCEC'
]

In [6]:
len(ctypes_to_analyze)

26

#### Run

In [7]:
run_dir = f'{main_dir}/signatures/assign_hotspots_to_sigs'

In [8]:
code = os.path.join(run_dir, 'code', 'assign_hotspots_to_sigs.py')
map_file = os.path.join(run_dir, 'code', f'assign_hotspots_to_sigs_nodrivers.map')
output_dir = os.path.join(run_dir, 'data')

In [9]:
info = [
    '[params]',
    'cores=1',
    'memory=8G\n',
    '[pre]',
    '. "/home/$USER/miniconda3/etc/profile.d/conda.sh"',
    'conda activate hotspots_framework\n',
    '[jobs]',
]

In [10]:
with open(map_file, 'w') as ofd: 
    
    for line in info: 
        ofd.write(f'{line}\n')
    
    for ctype in ctypes_to_analyze: 
        
        # 'sum' for sum of probabilities or 'normsum' for normalized sum of probabilities
        for p_mode in ['normsum']: 
            
            # Signatures file
            context_probs_f = os.path.join(
                sigs_dir, f'{ctype}_{sigstype}', sigstype, 'Suggested_Solution', 
                f'COSMIC_{sigstype}_Decomposed_Solution', 'Activities', 'Decomposed_Mutation_Probabilities.txt')

            # Hotspots file
            input_file_hotspots = os.path.join(hotspots_dir, f'{ctype}.results.tsv.gz')

            # Define output file
            output_f = os.path.join(output_dir, f'{ctype}_sigsprobs_{data_type}_n{samples_threshold}_{alternates}.{p_mode}.txt')

            # Write
            ofd.write(f'python {code} -s {context_probs_f} -h {input_file_hotspots} -o {output_f} -p {p_mode}\n')
