# Create a qmap to run parallely msigact for each sample, with specific signatures per sample

In [2]:
import json
import os

In [65]:
file = './msigact/sigs_per_sample.json'
sigs_per_sample_dict = json.load(open(file,'rb'))
sigs_per_sample_dict

{'AU2198': ['SBS1', 'SBS2', 'SBS31', 'SBS45'],
 'AU2199': ['SBS1', 'SBS2', 'SBS18', 'SBS31', 'SBS40', 'SBS45'],
 'AU2200': ['SBS1', 'SBS2', 'SBS5', 'SBS18', 'SBS31', 'SBS45'],
 'AU2202': ['SBS1', 'SBS2', 'SBS18', 'SBS31', 'SBS40'],
 'AZ4609': ['SBS5', 'SBS18', 'SBS31', 'SBS40', 'SBS45'],
 'AZ4610': ['SBS5', 'SBS31', 'SBS40', 'SBS45'],
 'AZ4611': ['SBS5', 'SBS31', 'SBS40', 'SBS45'],
 'AZ4612': ['SBS1', 'SBS31', 'SBS40'],
 'AZ4613': ['SBS2', 'SBS13', 'SBS31', 'SBS40'],
 'AZ4614': ['SBS5', 'SBS18', 'SBS31', 'SBS40'],
 'AZ6342': ['SBS5', 'SBS18', 'SBS31', 'SBS40'],
 'AZ6371': ['SBS1', 'SBS18', 'SBS31', 'SBS40']}

### Create folders per sample

In [4]:
path = './msigact/'
for sample in sigs_per_sample_dict.keys():
    if os.path.exists(path+sample) == False:
        os.mkdir(path+sample)
    for sig in ['SBS31','SBS35']:
        if os.path.exists(path+sample+'/'+sig) == False:
            os.mkdir(path+sample+'/'+sig)

### Create tables with selected signatures per sample

In [37]:
python_script = '../../../wgs_analysis/tumor_vs_blood/signature_analysis/scripts/create_matrix_sigs.py'
cosmic_sigs_file = '../../../wgs_analysis/tumor_vs_blood/signature_analysis/data/COSMIC_v3.3.1_SBS_GRCh38.txt'
path = '../msigact/'
commands = []
for sample in sigs_per_sample_dict.keys():
    for test_sig in ['SBS31','SBS35']:
        out_file = path + sample + '/sigs_' + test_sig +'.tsv'
        sigs = sigs_per_sample_dict[sample]
        if 'SBS31' in sigs:
            sigs.remove('SBS31')
        if 'SBS35' in sigs:
            sigs.remove('SBS35')
        sigs.append(test_sig)
        sigs_in_string = ''
        for sig in sigs:
            sigs_in_string = sigs_in_string + ',' + sig
        sigs_in_string = sigs_in_string[1:]
        command = 'python '+python_script+' --cosmic_sigs_file '+cosmic_sigs_file+' --out_file '+out_file+' --sigs '+sigs_in_string
        commands.append(command)
commands

['python ../../../wgs_analysis/tumor_vs_blood/signature_analysis/scripts/create_matrix_sigs.py --cosmic_sigs_file ../../../wgs_analysis/tumor_vs_blood/signature_analysis/data/COSMIC_v3.3.1_SBS_GRCh38.txt --out_file ../msigact/AU2198/sigs_SBS31.tsv --sigs SBS1,SBS2,SBS45,SBS31',
 'python ../../../wgs_analysis/tumor_vs_blood/signature_analysis/scripts/create_matrix_sigs.py --cosmic_sigs_file ../../../wgs_analysis/tumor_vs_blood/signature_analysis/data/COSMIC_v3.3.1_SBS_GRCh38.txt --out_file ../msigact/AU2198/sigs_SBS35.tsv --sigs SBS1,SBS2,SBS45,SBS35',
 'python ../../../wgs_analysis/tumor_vs_blood/signature_analysis/scripts/create_matrix_sigs.py --cosmic_sigs_file ../../../wgs_analysis/tumor_vs_blood/signature_analysis/data/COSMIC_v3.3.1_SBS_GRCh38.txt --out_file ../msigact/AU2199/sigs_SBS31.tsv --sigs SBS1,SBS2,SBS18,SBS40,SBS45,SBS31',
 'python ../../../wgs_analysis/tumor_vs_blood/signature_analysis/scripts/create_matrix_sigs.py --cosmic_sigs_file ../../../wgs_analysis/tumor_vs_blood/

In [38]:
qmap_pre_params = ['[pre]','. "/home/$USER/miniconda3/etc/profile.d/conda.sh"','conda activate msigact','[params]','cores = 1','memory = 8G','[jobs]']
qmap_file = qmap_pre_params + commands

In [39]:
#Save qmap file

with open('./qmap_files/01_msigact_signature_tables_per_sample.qmap', 'w') as f:
    for item in qmap_file:
        f.write('%s\n' % item)

## Qmap for paralelizing msigact script per sample

In [29]:
rscript = '../../../wgs_analysis/tumor_vs_blood/signature_analysis/scripts/run_pipeline.R'
base_path = '/workspace/projects/sjd_pediatric_tumors/' #change at the end
repo_path = base_path+'second-tumors-children/duplex_analysis/signature_analysis/msigact/'
commands = []
for sample in sigs_per_sample_dict.keys():
    for sig in ['SBS31','SBS35']:
        count_matrix = repo_path + sample + '/count_matrix.tsv'
        sigs_matrix = repo_path + sample + '/sigs_'+sig+'.tsv'
        output = repo_path + sample + '/' + sig +'/'
        command = 'Rscript '+rscript+' '+count_matrix+' '+sigs_matrix+' '+output+' 1'
        commands.append(command)
commands

['Rscript ../../../wgs_analysis/tumor_vs_blood/signature_analysis/scripts/run_pipeline.R /workspace/projects/sjd_pediatric_tumors/second-tumors-children/duplex_analysis/signature_analysis/msigact/AU2198/count_matrix.tsv /workspace/projects/sjd_pediatric_tumors/second-tumors-children/duplex_analysis/signature_analysis/msigact/AU2198/sigs_SBS31.tsv /workspace/projects/sjd_pediatric_tumors/second-tumors-children/duplex_analysis/signature_analysis/msigact/AU2198/SBS31/ 1',
 'Rscript ../../../wgs_analysis/tumor_vs_blood/signature_analysis/scripts/run_pipeline.R /workspace/projects/sjd_pediatric_tumors/second-tumors-children/duplex_analysis/signature_analysis/msigact/AU2198/count_matrix.tsv /workspace/projects/sjd_pediatric_tumors/second-tumors-children/duplex_analysis/signature_analysis/msigact/AU2198/sigs_SBS35.tsv /workspace/projects/sjd_pediatric_tumors/second-tumors-children/duplex_analysis/signature_analysis/msigact/AU2198/SBS35/ 1',
 'Rscript ../../../wgs_analysis/tumor_vs_blood/signa

In [30]:
qmap_pre_params = ['[pre]','. "/home/$USER/miniconda3/etc/profile.d/conda.sh"','conda activate msigact','[params]','cores = 1','memory = 8G','[jobs]']
qmap_file = qmap_pre_params + commands

In [31]:
#Save qmap file

with open('./qmap_files/02_msigact_script_per_sample.qmap', 'w') as f:
    for item in qmap_file:
        f.write('%s\n' % item)

# Specificity test

### qmap to create signature tables

In [67]:
samples_info = json.load(open('./synthetic_samples_specificity/samples_dict.json','rb'))
samples_info

{'AU2198': {'SBS1': 63, 'SBS2': 55, 'SBS45': 330},
 'AU2199': {'SBS2': 57, 'SBS18': 562},
 'AU2200': {'SBS1': 15, 'SBS2': 13, 'SBS5': 225, 'SBS18': 108, 'SBS45': 85},
 'AU2202': {'SBS1': 14, 'SBS18': 48, 'SBS40': 115},
 'AZ4609': {'SBS5': 316, 'SBS18': 283, 'SBS40': 507},
 'AZ4610': {'SBS5': 319, 'SBS40': 464, 'SBS45': 159},
 'AZ4611': {'SBS5': 246, 'SBS40': 179, 'SBS45': 87},
 'AZ4612': {'SBS1': 32, 'SBS40': 744},
 'AZ4613': {'SBS2': 26, 'SBS40': 372},
 'AZ4614': {'SBS18': 95, 'SBS40': 238},
 'AZ6342': {'SBS5': 105, 'SBS18': 59},
 'AZ6371': {'SBS1': 5, 'SBS40': 57}}

In [52]:
python_script = '../../../wgs_analysis/tumor_vs_blood/signature_analysis/scripts/create_matrix_sigs.py'
cosmic_sigs_file = '../../../wgs_analysis/tumor_vs_blood/signature_analysis/data/COSMIC_v3.3.1_SBS_GRCh38.txt'
out_path = '../synthetic_samples_specificity/'
commands = []
for sample in samples_info:
    sigs = samples_info[sample].keys()
    sigs_to_string = ''
    for sig in sigs:
        sigs_to_string = sigs_to_string + ',' + sig
    sigs_to_string = sigs_to_string[1:]
    command = 'python '+python_script+' --cosmic_sigs_file '+cosmic_sigs_file+' --out_file '+out_path+sample+'/sigs_SBS31.tsv --sigs '+sigs_to_string+',SBS31'
    commands.append(command)
qmap_pre_params = ['[pre]','. "/home/$USER/miniconda3/etc/profile.d/conda.sh"','conda activate msigact','[params]','cores = 1','memory = 8G','[jobs]']
qmap_file = qmap_pre_params + commands
#Save qmap file
print(commands)
with open('./qmap_files/03_msigact_signatrue_tables_synthetic_specificity.qmap', 'w') as f:
    for item in qmap_file:
        f.write('%s\n' % item)

['python ../../../wgs_analysis/tumor_vs_blood/signature_analysis/scripts/create_matrix_sigs.py --cosmic_sigs_file ../../../wgs_analysis/tumor_vs_blood/signature_analysis/data/COSMIC_v3.3.1_SBS_GRCh38.txt --out_file ../synthetic_samples_specificity/AU2198/sigs_SBS31.tsv --sigs SBS1,SBS2,SBS45,SBS31', 'python ../../../wgs_analysis/tumor_vs_blood/signature_analysis/scripts/create_matrix_sigs.py --cosmic_sigs_file ../../../wgs_analysis/tumor_vs_blood/signature_analysis/data/COSMIC_v3.3.1_SBS_GRCh38.txt --out_file ../synthetic_samples_specificity/AU2199/sigs_SBS31.tsv --sigs SBS2,SBS18,SBS31', 'python ../../../wgs_analysis/tumor_vs_blood/signature_analysis/scripts/create_matrix_sigs.py --cosmic_sigs_file ../../../wgs_analysis/tumor_vs_blood/signature_analysis/data/COSMIC_v3.3.1_SBS_GRCh38.txt --out_file ../synthetic_samples_specificity/AU2200/sigs_SBS31.tsv --sigs SBS1,SBS2,SBS5,SBS18,SBS45,SBS31', 'python ../../../wgs_analysis/tumor_vs_blood/signature_analysis/scripts/create_matrix_sigs.py

### qmap to run mSigAct

In [51]:
rscript = '../../../wgs_analysis/tumor_vs_blood/signature_analysis/scripts/run_pipeline.R'
base_path = '/workspace/projects/sjd_pediatric_tumors/' #change this path at th end
path = base_path+'second-tumors-children/duplex_analysis/signature_analysis/synthetic_samples_specificity/'
commands = []
for sample in samples_info.keys():
    count_matrix = path + sample + '/count_matrix.tsv'
    sigs_matrix = path + sample + '/sigs_SBS31.tsv'
    output = path + sample + '/SBS31/'
    if not os.path.exists(output):
        os.makedirs(output)
    command = 'Rscript '+rscript+' '+count_matrix+' '+sigs_matrix+' '+output+' 4'
    commands.append(command)
print(commands)
qmap_pre_params = ['[pre]','. "/home/$USER/miniconda3/etc/profile.d/conda.sh"','conda activate msigact','[params]','cores = 4','memory = 16G','[jobs]']
qmap_file = qmap_pre_params + commands
with open('./qmap_files/04_msigact_script_synthetic_specificity.qmap', 'w') as f:
    for item in qmap_file:
        f.write('%s\n' % item)

['Rscript ../../../wgs_analysis/tumor_vs_blood/signature_analysis/scripts/run_pipeline.R /workspace/projects/sjd_pediatric_tumors/second-tumors-children/duplex_analysis/signature_analysis/synthetic_samples_specificity/AU2198/count_matrix.tsv /workspace/projects/sjd_pediatric_tumors/second-tumors-children/duplex_analysis/signature_analysis/synthetic_samples_specificity/AU2198/sigs_SBS31.tsv /workspace/projects/sjd_pediatric_tumors/second-tumors-children/duplex_analysis/signature_analysis/synthetic_samples_specificity/AU2198/SBS31/ 4', 'Rscript ../../../wgs_analysis/tumor_vs_blood/signature_analysis/scripts/run_pipeline.R /workspace/projects/sjd_pediatric_tumors/second-tumors-children/duplex_analysis/signature_analysis/synthetic_samples_specificity/AU2199/count_matrix.tsv /workspace/projects/sjd_pediatric_tumors/second-tumors-children/duplex_analysis/signature_analysis/synthetic_samples_specificity/AU2199/sigs_SBS31.tsv /workspace/projects/sjd_pediatric_tumors/second-tumors-children/dupl

# Sensitibity test (SBS31 injections, confidence intervals)

### qmap to create signature tables

In [68]:
all_samples_dict = json.load(open('./synthetic_samples_sensitibity/all_samples_dict.json','rb'))
pt1_samples_dict = json.load(open('./synthetic_samples_sensitibity/case1_samples_dict.json','rb'))

In [69]:
python_script = '../../../wgs_analysis/tumor_vs_blood/signature_analysis/scripts/create_matrix_sigs.py'
cosmic_sigs_file = '../../../wgs_analysis/tumor_vs_blood/signature_analysis/data/COSMIC_v3.3.1_SBS_GRCh38.txt'
out_path = '../synthetic_samples_sensitibity/'
commands = []
for sample in all_samples_dict.keys():
    sigs = all_samples_dict[sample]['SBS31_0'].keys()
    sigs_to_string = ''
    for sig in sigs:
        sigs_to_string = sigs_to_string + ',' + sig
    sigs_to_string = sigs_to_string[1:]
    command = 'python '+python_script+' --cosmic_sigs_file '+cosmic_sigs_file+' --out_file '+out_path+sample+'/sigs_SBS31.tsv --sigs '+sigs_to_string+',SBS31'
    commands.append(command)
for sample in pt1_samples_dict.keys():
    sigs = pt1_samples_dict[sample]['SBS31_0'].keys()
    sigs_to_string = ''
    for sig in sigs:
        sigs_to_string = sigs_to_string + ',' + sig
    sigs_to_string = sigs_to_string[1:]
    command = 'python '+python_script+' --cosmic_sigs_file '+cosmic_sigs_file+' --out_file '+out_path+sample+'/sigs_SBS31.tsv --sigs '+sigs_to_string+',SBS31'
    commands.append(command)

qmap_pre_params = ['[pre]','. "/home/$USER/miniconda3/etc/profile.d/conda.sh"','conda activate msigact','[params]','cores = 1','memory = 8G','[jobs]']
qmap_file = qmap_pre_params + commands
#Save qmap file
print(commands)
with open('./qmap_files/05_msigact_signature_tables_synthetic_sensitibity_injections.qmap', 'w') as f:
    for item in qmap_file:
        f.write('%s\n' % item)

['python ../../../wgs_analysis/tumor_vs_blood/signature_analysis/scripts/create_matrix_sigs.py --cosmic_sigs_file ../../../wgs_analysis/tumor_vs_blood/signature_analysis/data/COSMIC_v3.3.1_SBS_GRCh38.txt --out_file ../synthetic_samples_sensitibity/AU2198/sigs_SBS31.tsv --sigs SBS1,SBS2,SBS45,SBS31', 'python ../../../wgs_analysis/tumor_vs_blood/signature_analysis/scripts/create_matrix_sigs.py --cosmic_sigs_file ../../../wgs_analysis/tumor_vs_blood/signature_analysis/data/COSMIC_v3.3.1_SBS_GRCh38.txt --out_file ../synthetic_samples_sensitibity/AZ4609/sigs_SBS31.tsv --sigs SBS5,SBS18,SBS40,SBS31', 'python ../../../wgs_analysis/tumor_vs_blood/signature_analysis/scripts/create_matrix_sigs.py --cosmic_sigs_file ../../../wgs_analysis/tumor_vs_blood/signature_analysis/data/COSMIC_v3.3.1_SBS_GRCh38.txt --out_file ../synthetic_samples_sensitibity/AZ4610/sigs_SBS31.tsv --sigs SBS5,SBS40,SBS45,SBS31', 'python ../../../wgs_analysis/tumor_vs_blood/signature_analysis/scripts/create_matrix_sigs.py --c

### qmap to run mSigAct

In [70]:
rscript = '../../../wgs_analysis/tumor_vs_blood/signature_analysis/scripts/run_pipeline.R'
base_path = '/workspace/projects/sjd_pediatric_tumors/'
path = base_path + 'second-tumors-children/duplex_analysis/signature_analysis/synthetic_samples_sensitibity/'
commands = []
for sample in all_samples_dict.keys():
    count_matrix = path + sample + '/count_matrix.tsv'
    sigs_matrix = path + sample + '/sigs_SBS31.tsv'
    output = path + sample + '/SBS31/'
    if not os.path.exists(output):
        os.makedirs(output)
    command = 'Rscript '+rscript+' '+count_matrix+' '+sigs_matrix+' '+output+' 4'
    commands.append(command)
for sample in pt1_samples_dict.keys():
    count_matrix = path + sample + '/count_matrix.tsv'
    sigs_matrix = path + sample + '/sigs_SBS31.tsv'
    output = path + sample + '/SBS31/'
    if not os.path.exists(output):
        os.makedirs(output)
    command = 'Rscript '+rscript+' '+count_matrix+' '+sigs_matrix+' '+output+' 4'
    commands.append(command)
print(commands)
qmap_pre_params = ['[pre]','. "/home/$USER/miniconda3/etc/profile.d/conda.sh"','conda activate msigact','[params]','cores = 4','memory = 16G','[jobs]']
qmap_file = qmap_pre_params + commands
with open('./qmap_files/06_msigact_script_per_sample_synthetic_sensitibity_injections.qmap', 'w') as f:
    for item in qmap_file:
        f.write('%s\n' % item)

['Rscript ../../../wgs_analysis/tumor_vs_blood/signature_analysis/scripts/run_pipeline.R /workspace/projects/sjd_pediatric_tumors/second-tumors-children/duplex_analysis/signature_analysis/synthetic_samples_sensitibity/AU2198/count_matrix.tsv /workspace/projects/sjd_pediatric_tumors/second-tumors-children/duplex_analysis/signature_analysis/synthetic_samples_sensitibity/AU2198/sigs_SBS31.tsv /workspace/projects/sjd_pediatric_tumors/second-tumors-children/duplex_analysis/signature_analysis/synthetic_samples_sensitibity/AU2198/SBS31/ 4', 'Rscript ../../../wgs_analysis/tumor_vs_blood/signature_analysis/scripts/run_pipeline.R /workspace/projects/sjd_pediatric_tumors/second-tumors-children/duplex_analysis/signature_analysis/synthetic_samples_sensitibity/AZ4609/count_matrix.tsv /workspace/projects/sjd_pediatric_tumors/second-tumors-children/duplex_analysis/signature_analysis/synthetic_samples_sensitibity/AZ4609/sigs_SBS31.tsv /workspace/projects/sjd_pediatric_tumors/second-tumors-children/dupl