# Assign mutations to signatures

For a given mutation in a given patient, compute the probability of that mutation arising from signatures that are active in the sample. This code generates a vector of mutational probabilities per individual mutation

Signatures used here are the COSMIC decomposition of signatures extracted from all mutations in the cancer type

We run this code for the three sets of mutations: 1) total, 2) inside and 3) outside hotspots

In [1]:
import os
from collections import defaultdict

import pandas as pd

In [2]:
main_dir = ''

In [3]:
run_dir = f'{main_dir}/signatures/assign_muts_to_sigs'

In [4]:
sigstype = 'SBS96'
sigstype_b = 'SNV'

## 1) Total mutations

In [5]:
mtype = 'total'

In [6]:
# Input mutations from sigprofilermatrixgenerator
muts_dir = f'{main_dir}/signatures/sigprofilermatrixgenerator/output/mutations_total'

In [7]:
# Input signatures from sigprofilerextractor
sigs_dir = f'{main_dir}/signatures/sigprofiler/output/mutations_total'

#### Run 

In [8]:
code = os.path.join(run_dir, 'code', 'assign_muts_to_sigs.py')
map_file = os.path.join(run_dir, 'code', f'1_assign_muts_to_sigs_{mtype}.map')
output_dir = os.path.join(run_dir, 'data')

In [9]:
info = [
    '[params]',
    'cores=1',
    'memory=8G\n',
    '[pre]',
    '. "/home/$USER/miniconda3/etc/profile.d/conda.sh"',
    'conda activate hotspots_framework\n',
    '[jobs]',
]

In [10]:
with open(map_file, 'w') as ofd: 
    
    for line in info: 
        ofd.write(f'{line}\n')

    for entry in os.scandir(sigs_dir):
        if sigstype in entry.name: 
            ctype = entry.name[:-6]
                        
            # Define signatures file
            context_probs_f = os.path.join(
                sigs_dir, f'{ctype}_{sigstype}', sigstype, 'Suggested_Solution', 
                f'COSMIC_{sigstype}_Decomposed_Solution', 'Activities', 'Decomposed_Mutation_Probabilities.txt')

            # Define mutations directory
            muts_ctype_dir = os.path.join(muts_dir, ctype, 'output', 'vcf_files', sigstype_b)

            # Define output file
            output_f = os.path.join(output_dir, f'{ctype}_{sigstype}_{mtype}.txt')

            # Write
            ofd.write(f'python {code} -p {context_probs_f} -m {muts_ctype_dir} -st {sigstype} -o {output_f}\n')            

## 2) Inside hotspot mutations

In [11]:
mtype = 'in'

In [12]:
# Input mutations from sigprofilermatrixgenerator
muts_dir = f'{main_dir}/signatures/sigprofilermatrixgenerator/output/mutations_in_hotspots'

In [13]:
# Input signatures from sigprofilerextractor
sigs_dir = f'{main_dir}/signatures/sigprofiler/output/mutations_total'

#### Run 

In [14]:
code = os.path.join(run_dir, 'code', 'assign_muts_to_sigs.py')
map_file = os.path.join(run_dir, 'code', f'1_assign_muts_to_sigs_{mtype}.map')
output_dir = os.path.join(run_dir, 'data')

In [15]:
info = [
    '[params]',
    'cores=1',
    'memory=8G\n',
    '[pre]',
    '. "/home/$USER/miniconda3/etc/profile.d/conda.sh"',
    'conda activate hotspots_framework\n',
    '[jobs]',
]

In [16]:
with open(map_file, 'w') as ofd: 
    
    for line in info: 
        ofd.write(f'{line}\n')

    for entry in os.scandir(sigs_dir):
        if sigstype in entry.name: 
            ctype = entry.name[:-6]
                        
            # Define signatures file
            context_probs_f = os.path.join(
                sigs_dir, f'{ctype}_{sigstype}', sigstype, 'Suggested_Solution', 
                f'COSMIC_{sigstype}_Decomposed_Solution', 'Activities', 'Decomposed_Mutation_Probabilities.txt')

            # Define mutations directory
            muts_ctype_dir = os.path.join(muts_dir, ctype, 'output', 'vcf_files', sigstype_b)

            # Define output file
            output_f = os.path.join(output_dir, f'{ctype}_{sigstype}_{mtype}.txt')

            # Write
            ofd.write(f'python {code} -p {context_probs_f} -m {muts_ctype_dir} -st {sigstype} -o {output_f}\n')            

## 3) Outside hotspot mutations

In [17]:
mtype = 'out'

In [18]:
# Input mutations from sigprofilermatrixgenerator
muts_dir = f'{main_dir}/signatures/sigprofilermatrixgenerator/output/mutations_out_hotspots'

In [19]:
# Input signatures from sigprofilerextractor
sigs_dir = f'{main_dir}/signatures/sigprofiler/output/mutations_total'

#### Run 

In [20]:
code = os.path.join(run_dir, 'code', 'assign_muts_to_sigs.py')
map_file = os.path.join(run_dir, 'code', f'1_assign_muts_to_sigs_{mtype}.map')
output_dir = os.path.join(run_dir, 'data')

In [21]:
info = [
    '[params]',
    'cores=1',
    'memory=8G\n',
    '[pre]',
    '. "/home/$USER/miniconda3/etc/profile.d/conda.sh"',
    'conda activate hotspots_framework\n',
    '[jobs]',
]

In [22]:
with open(map_file, 'w') as ofd: 
    
    for line in info: 
        ofd.write(f'{line}\n')

    for entry in os.scandir(sigs_dir):
        if sigstype in entry.name: 
            ctype = entry.name[:-6]
                        
            # Define signatures file
            context_probs_f = os.path.join(
                sigs_dir, f'{ctype}_{sigstype}', sigstype, 'Suggested_Solution', 
                f'COSMIC_{sigstype}_Decomposed_Solution', 'Activities', 'Decomposed_Mutation_Probabilities.txt')

            # Define mutations directory
            muts_ctype_dir = os.path.join(muts_dir, ctype, 'output', 'vcf_files', sigstype_b)

            # Define output file
            output_f = os.path.join(output_dir, f'{ctype}_{sigstype}_{mtype}.txt')

            # Write
            ofd.write(f'python {code} -p {context_probs_f} -m {muts_ctype_dir} -st {sigstype} -o {output_f}\n')            