# Prepare qmaps for variant callers results postprocessing

In [2]:
import json
import pandas as pd
import os

In [3]:
samples = json.load( open( "../../../cases_ids.json", "rb" ) )
samples['case3']

{'normal': 'AQ5174',
 'tumor1': 'AQ5180',
 'tumor2': 'AQ5186',
 'sex': 'female',
 'kidney': 'AX4954',
 'liver': 'AX4955',
 'pancreas': 'AX4956',
 'heart': 'AX4957',
 'clone1': 'AX4958',
 'clone2': 'AX4961',
 'mother': 'AW8063',
 'father': 'AW8064',
 'lung': 'AX4962',
 'medulla': 'AX4963',
 'spleen': 'AX4964',
 'brain': 'AX4965',
 'bma': 'AX4966'}

In [6]:
# create a list with all the necessary folders for all the processed files

path = './output/' #change accordingly to another directory if needed
folders = []
folders.append(path)
pt = 'case3'
pt_folder = path+pt+'/'
folders.append(pt_folder)
for t in ['clone1','clone2']:
    tumor = samples[pt][t]
    normal = samples[pt]['normal']
    t_vs_n = tumor + '_vs_' + normal
    t_folder = pt_folder + t_vs_n + '/'
    vcf_folder = t_folder +'vcf_processing/'
    folders.append(t_folder)
    folders.append(vcf_folder)
    caller = 'strelka'
    caller_folder = vcf_folder + caller
    folders.append(caller_folder)
folders

['./output/',
 './output/case3/',
 './output/case3/AX4958_vs_AQ5174/',
 './output/case3/AX4958_vs_AQ5174/vcf_processing/',
 './output/case3/AX4958_vs_AQ5174/vcf_processing/strelka',
 './output/case3/AX4961_vs_AQ5174/',
 './output/case3/AX4961_vs_AQ5174/vcf_processing/',
 './output/case3/AX4961_vs_AQ5174/vcf_processing/strelka']

In [7]:
#create folders
for folder in folders:
    if not os.path.exists(folder):
        os.mkdir(folder)

# Somatic processing

In [11]:
root_in_hmf = '/path/to/hmf_pipeline/output/'
root_in_sarek = '/path/to/sarek/output/'
root_out = './output/'

## Process strelka vcf

In [12]:
#commands for process vcfs from strelka

python_file = '../../tumor_vs_blood/vcf_postprocessing/python_scripts/process_strelka_v2.9.10_vcf.py'

commands = []

#Strelka
pt = 'case3'
tumor1 = samples[pt]['clone1']
tumor2 = samples[pt]['clone2']
normal = samples[pt]['normal']
sample = tumor1 + '_vs_' + normal
in_file = os.path.join(root_in_sarek,'variant_calling','strelka',sample,sample+'.strelka.somatic_snvs.vcf.gz')
out_dir = os.path.join(root_out,pt,sample,'vcf_processing','strelka'+'/')
command = 'python ' + python_file + ' -i ' + in_file + ' -o ' + out_dir + ' -t_id ' + tumor1 + ' -n_id ' + normal
commands.append(command)
tumor = samples[pt]['clone2']
sample = tumor2 + '_vs_' + normal
in_file = os.path.join(root_in_sarek,'variant_calling','strelka',sample,sample+'.strelka.somatic_snvs.vcf.gz')
out_dir = os.path.join(root_out,pt,sample,'vcf_processing','strelka'+'/')
command = 'python ' + python_file + ' -i ' + in_file + ' -o ' + out_dir + ' -t_id ' + tumor2 + ' -n_id ' + normal
commands.append(command)
commands

['python ../../tumor_vs_blood/vcf_postprocessing/python_scripts/process_strelka_v2.9.10_vcf.py -i /path/to/sarek/output/variant_calling/strelka/AX4958_vs_AQ5174/AX4958_vs_AQ5174.strelka.somatic_snvs.vcf.gz -o ./output/case3/AX4958_vs_AQ5174/vcf_processing/strelka/ -t_id AX4958 -n_id AQ5174',
 'python ../../tumor_vs_blood/vcf_postprocessing/python_scripts/process_strelka_v2.9.10_vcf.py -i /path/to/sarek/output/variant_calling/strelka/AX4961_vs_AQ5174/AX4961_vs_AQ5174.strelka.somatic_snvs.vcf.gz -o ./output/case3/AX4961_vs_AQ5174/vcf_processing/strelka/ -t_id AX4961 -n_id AQ5174']

In [13]:
qmap_pre_params = ['[pre]','. "/home/$USER/miniconda3/etc/profile.d/conda.sh"','conda activate process_vc','[params]','cores = 1','memory = 20G','[jobs]']
qmap_file = qmap_pre_params + commands
qmap_file

['[pre]',
 '. "/home/$USER/miniconda3/etc/profile.d/conda.sh"',
 'conda activate process_vc',
 '[params]',
 'cores = 1',
 'memory = 20G',
 '[jobs]',
 'python ../../tumor_vs_blood/vcf_postprocessing/python_scripts/process_strelka_v2.9.10_vcf.py -i /path/to/sarek/output/variant_calling/strelka/AX4958_vs_AQ5174/AX4958_vs_AQ5174.strelka.somatic_snvs.vcf.gz -o ./output/case3/AX4958_vs_AQ5174/vcf_processing/strelka/ -t_id AX4958 -n_id AQ5174',
 'python ../../tumor_vs_blood/vcf_postprocessing/python_scripts/process_strelka_v2.9.10_vcf.py -i /path/to/sarek/output/variant_calling/strelka/AX4961_vs_AQ5174/AX4961_vs_AQ5174.strelka.somatic_snvs.vcf.gz -o ./output/case3/AX4961_vs_AQ5174/vcf_processing/strelka/ -t_id AX4961 -n_id AQ5174']

In [14]:
#Save qmap file

with open('./strelka_process.qmap', 'w') as f:
    for item in qmap_file:
        f.write('%s\n' % item)