# Prepare qmaps for variant callers results postprocessing

In [1]:
import json
import pandas as pd
import os

In [2]:
samples = json.load( open( "../../../cases_ids.json", "rb" ) )
samples

{'case1': {'normal': 'AQ5175',
  'tumor1': 'AQ5181',
  'tumor2': 'AQ5187',
  'sex': 'female'},
 'case2': {'normal': 'AQ5176',
  'tumor1': 'AQ5182',
  'tumor2': 'AQ5188',
  'sex': 'male'},
 'case3': {'normal': 'AQ5174',
  'tumor1': 'AQ5180',
  'tumor2': 'AQ5186',
  'sex': 'female',
  'kidney': 'AX4954',
  'liver': 'AX4955',
  'pancreas': 'AX4956',
  'heart': 'AX4957',
  'clone1': 'AX4958',
  'clone2': 'AX4961',
  'mother': 'AW8063',
  'father': 'AW8064',
  'lung': 'AX4962',
  'medulla': 'AX4963',
  'spleen': 'AX4964',
  'brain': 'AX4965',
  'bma': 'AX4966'},
 'case4': {'normal': 'AW8061',
  'tumor1': 'AW8050',
  'tumor2': 'AW8051',
  'sex': 'female'}}

# Create folder tree for output


In [3]:
# create a list with all the necessary folders for all the processed files

path = './output/' #change accordingly to another directory if needed
folders = []
folders.append(path)
for pt in samples.keys():
    pt_folder = path+pt+'/'
    folders.append(pt_folder)
    for t in ['tumor1','tumor2']:
        tumor = samples[pt][t]
        normal = samples[pt]['normal']
        t_vs_n = tumor + '_vs_' + normal
        t_folder = pt_folder + t_vs_n + '/'
        vcf_folder = t_folder +'vcf_processing/'
        folders.append(t_folder)
        folders.append(vcf_folder)
        for caller in ['mutect','strelka','sage','intersect']:
            caller_folder = vcf_folder + caller
            folders.append(caller_folder)
        vep_folder = t_folder +'process_vep_output/'
        vep2_folder = t_folder +'vep_processing'
        cnv_folder = t_folder +'process_cnv/'
        purple_folder = cnv_folder +'purple/'
        sv_folder = t_folder +'process_sv/'
        gridds_folder = sv_folder +'gridds/'
        folders.append(vep_folder)
        folders.append(vep2_folder)
        folders.append(cnv_folder)
        folders.append(purple_folder)
        folders.append(sv_folder)
        folders.append(gridds_folder)
        filter_folder = t_folder +'filter_and_annot/'
        folders.append(filter_folder)
    n_folder = pt_folder+samples[pt]['normal']+'/'
    folders.append(n_folder)
    for step in ['vcf_processing/','vep_processing/', 'process_vep_output/','filter_and_annot/']:
        step_folder = n_folder + step
        haplo_folder = step_folder+'haplotype_caller/'
        folders.append(step_folder)
        folders.append(haplo_folder)
    sv_folder = n_folder+'process_sv/'
    gripss_folder = sv_folder+'gripss/'
    t1_folder = gripss_folder+'t1/'
    t2_folder = gripss_folder+'t2/'
    folders.append(sv_folder)
    folders.append(gripss_folder)
    folders.append(t1_folder)
    folders.append(t2_folder)
folders

['./output/',
 './output/case1/',
 './output/case1/AQ5181_vs_AQ5175/',
 './output/case1/AQ5181_vs_AQ5175/vcf_processing/',
 './output/case1/AQ5181_vs_AQ5175/vcf_processing/mutect',
 './output/case1/AQ5181_vs_AQ5175/vcf_processing/strelka',
 './output/case1/AQ5181_vs_AQ5175/vcf_processing/sage',
 './output/case1/AQ5181_vs_AQ5175/vcf_processing/intersect',
 './output/case1/AQ5181_vs_AQ5175/process_vep_output/',
 './output/case1/AQ5181_vs_AQ5175/vep_processing',
 './output/case1/AQ5181_vs_AQ5175/process_cnv/',
 './output/case1/AQ5181_vs_AQ5175/process_cnv/purple/',
 './output/case1/AQ5181_vs_AQ5175/process_sv/',
 './output/case1/AQ5181_vs_AQ5175/process_sv/gridds/',
 './output/case1/AQ5181_vs_AQ5175/filter_and_annot/',
 './output/case1/AQ5187_vs_AQ5175/',
 './output/case1/AQ5187_vs_AQ5175/vcf_processing/',
 './output/case1/AQ5187_vs_AQ5175/vcf_processing/mutect',
 './output/case1/AQ5187_vs_AQ5175/vcf_processing/strelka',
 './output/case1/AQ5187_vs_AQ5175/vcf_processing/sage',
 './output/c

In [4]:
#create folders
for folder in folders:
    if not os.path.exists(folder):
        os.mkdir(folder)

# Somatic processing

## Process Mutect and SAGE vcf files

In [5]:
# These paths need to be accordongly adapted

root_in_hmf = '/path/to/hmf_pipeline/output/'
root_in_sarek = '/path/to/sarek/output/'
root_out = '../output/'

In [6]:
#commands for process vcfs from mutect and sage

python_file = '../python_scripts/process_mutect_vcf.py'

commands = []

#SAGE
for pt in samples.keys():
    tumor1 = samples[pt]['tumor1']
    tumor2 = samples[pt]['tumor2']
    normal = samples[pt]['normal']
    sample = tumor1 + '_vs_' + normal
    in_file = os.path.join(root_in_hmf,'sage_somatic',tumor1+'.sage.somatic.filtered.vcf.gz')
    out_dir = os.path.join(root_out,pt,sample,'vcf_processing','sage'+'/')
    command = 'python ' + python_file + ' -i ' + in_file + ' -o ' + out_dir + ' -t_id ' + tumor1 + ' -n_id ' + normal
    commands.append(command)
    sample = tumor2 + '_vs_' + normal
    in_file = os.path.join(root_in_hmf,'sage_somatic',tumor2+'.sage.somatic.filtered.vcf.gz')
    out_dir = os.path.join(root_out,pt,sample,'vcf_processing','sage'+'/')
    command = 'python ' + python_file + ' -i ' + in_file + ' -o ' + out_dir + ' -t_id ' + tumor2 + ' -n_id ' + normal
    commands.append(command)
    
#Mutect
for pt in samples.keys():
    tumor1 = samples[pt]['tumor1']
    tumor2 = samples[pt]['tumor2']
    normal = samples[pt]['normal']
    sample = tumor1 + '_vs_' + normal
    in_file = os.path.join(root_in_sarek,'variant_calling','mutect2',sample,sample+'.mutect2.filtered.vcf.gz')
    out_dir = os.path.join(root_out,pt,sample,'vcf_processing','mutect'+'/')
    command = 'python ' + python_file + ' -i ' + in_file + ' -o ' + out_dir + ' -t_id ' + tumor1 + ' -n_id ' + normal
    commands.append(command)
    tumor = samples[pt]['tumor2']
    sample = tumor2 + '_vs_' + normal
    in_file = os.path.join(root_in_sarek,'variant_calling','mutect2',sample,sample+'.mutect2.filtered.vcf.gz')
    out_dir = os.path.join(root_out,pt,sample,'vcf_processing','mutect'+'/')
    command = 'python ' + python_file + ' -i ' + in_file + ' -o ' + out_dir + ' -t_id ' + tumor2 + ' -n_id ' + normal
    commands.append(command)
commands

['python ../python_scripts/process_mutect_vcf.py -i /path/to/hmf_pipeline/output/sage_somatic/AQ5181.sage.somatic.filtered.vcf.gz -o ../output/case1/AQ5181_vs_AQ5175/vcf_processing/sage/ -t_id AQ5181 -n_id AQ5175',
 'python ../python_scripts/process_mutect_vcf.py -i /path/to/hmf_pipeline/output/sage_somatic/AQ5187.sage.somatic.filtered.vcf.gz -o ../output/case1/AQ5187_vs_AQ5175/vcf_processing/sage/ -t_id AQ5187 -n_id AQ5175',
 'python ../python_scripts/process_mutect_vcf.py -i /path/to/hmf_pipeline/output/sage_somatic/AQ5182.sage.somatic.filtered.vcf.gz -o ../output/case2/AQ5182_vs_AQ5176/vcf_processing/sage/ -t_id AQ5182 -n_id AQ5176',
 'python ../python_scripts/process_mutect_vcf.py -i /path/to/hmf_pipeline/output/sage_somatic/AQ5188.sage.somatic.filtered.vcf.gz -o ../output/case2/AQ5188_vs_AQ5176/vcf_processing/sage/ -t_id AQ5188 -n_id AQ5176',
 'python ../python_scripts/process_mutect_vcf.py -i /path/to/hmf_pipeline/output/sage_somatic/AQ5180.sage.somatic.filtered.vcf.gz -o ../outp

In [7]:
qmap_pre_params = ['[pre]','. "/home/$USER/miniconda3/etc/profile.d/conda.sh"','conda activate process_vc','[params]','cores = 1','memory = 20G','[jobs]']
qmap_file = qmap_pre_params + commands

In [8]:
#Save qmap file

with open('./qmap_files/01_mutect_sage_process.qmap', 'w') as f:
    for item in qmap_file:
        f.write('%s\n' % item)

## Process strelka vcf

In [9]:
#commands for process vcfs from strelka

python_file = '../python_scripts/process_strelka_v2.9.10_vcf.py'

commands = []

#Strelka
for pt in samples.keys():
    tumor1 = samples[pt]['tumor1']
    tumor2 = samples[pt]['tumor2']
    normal = samples[pt]['normal']
    sample = tumor1 + '_vs_' + normal
    in_file = os.path.join(root_in_sarek,'variant_calling','strelka',sample,sample+'.strelka.somatic_snvs.vcf.gz')
    out_dir = os.path.join(root_out,pt,sample,'vcf_processing','strelka'+'/')
    command = 'python ' + python_file + ' -i ' + in_file + ' -o ' + out_dir + ' -t_id ' + tumor1 + ' -n_id ' + normal
    commands.append(command)
    tumor = samples[pt]['tumor2']
    sample = tumor2 + '_vs_' + normal
    in_file = os.path.join(root_in_sarek,'variant_calling','strelka',sample,sample+'.strelka.somatic_snvs.vcf.gz')
    out_dir = os.path.join(root_out,pt,sample,'vcf_processing','strelka'+'/')
    command = 'python ' + python_file + ' -i ' + in_file + ' -o ' + out_dir + ' -t_id ' + tumor2 + ' -n_id ' + normal
    commands.append(command)
commands

['python ../python_scripts/process_strelka_v2.9.10_vcf.py -i /path/to/sarek/output/variant_calling/strelka/AQ5181_vs_AQ5175/AQ5181_vs_AQ5175.strelka.somatic_snvs.vcf.gz -o ../output/case1/AQ5181_vs_AQ5175/vcf_processing/strelka/ -t_id AQ5181 -n_id AQ5175',
 'python ../python_scripts/process_strelka_v2.9.10_vcf.py -i /path/to/sarek/output/variant_calling/strelka/AQ5187_vs_AQ5175/AQ5187_vs_AQ5175.strelka.somatic_snvs.vcf.gz -o ../output/case1/AQ5187_vs_AQ5175/vcf_processing/strelka/ -t_id AQ5187 -n_id AQ5175',
 'python ../python_scripts/process_strelka_v2.9.10_vcf.py -i /path/to/sarek/output/variant_calling/strelka/AQ5182_vs_AQ5176/AQ5182_vs_AQ5176.strelka.somatic_snvs.vcf.gz -o ../output/case2/AQ5182_vs_AQ5176/vcf_processing/strelka/ -t_id AQ5182 -n_id AQ5176',
 'python ../python_scripts/process_strelka_v2.9.10_vcf.py -i /path/to/sarek/output/variant_calling/strelka/AQ5188_vs_AQ5176/AQ5188_vs_AQ5176.strelka.somatic_snvs.vcf.gz -o ../output/case2/AQ5188_vs_AQ5176/vcf_processing/strelka/ 

In [10]:
qmap_pre_params = ['[pre]','. "/home/$USER/miniconda3/etc/profile.d/conda.sh"','conda activate process_vc','[params]','cores = 1','memory = 20G','[jobs]']
qmap_file = qmap_pre_params + commands
qmap_file

['[pre]',
 '. "/home/$USER/miniconda3/etc/profile.d/conda.sh"',
 'conda activate process_vc',
 '[params]',
 'cores = 1',
 'memory = 20G',
 '[jobs]',
 'python ../python_scripts/process_strelka_v2.9.10_vcf.py -i /path/to/sarek/output/variant_calling/strelka/AQ5181_vs_AQ5175/AQ5181_vs_AQ5175.strelka.somatic_snvs.vcf.gz -o ../output/case1/AQ5181_vs_AQ5175/vcf_processing/strelka/ -t_id AQ5181 -n_id AQ5175',
 'python ../python_scripts/process_strelka_v2.9.10_vcf.py -i /path/to/sarek/output/variant_calling/strelka/AQ5187_vs_AQ5175/AQ5187_vs_AQ5175.strelka.somatic_snvs.vcf.gz -o ../output/case1/AQ5187_vs_AQ5175/vcf_processing/strelka/ -t_id AQ5187 -n_id AQ5175',
 'python ../python_scripts/process_strelka_v2.9.10_vcf.py -i /path/to/sarek/output/variant_calling/strelka/AQ5182_vs_AQ5176/AQ5182_vs_AQ5176.strelka.somatic_snvs.vcf.gz -o ../output/case2/AQ5182_vs_AQ5176/vcf_processing/strelka/ -t_id AQ5182 -n_id AQ5176',
 'python ../python_scripts/process_strelka_v2.9.10_vcf.py -i /path/to/sarek/outp

In [11]:
#Save qmap file

with open('./qmap_files/02_strelka_process.qmap', 'w') as f:
    for item in qmap_file:
        f.write('%s\n' % item)

## Intersect 3 callers and output by chromosome

In [12]:
#commands for process vcfs

python_file = '../python_scripts/intersect_callers.py'

commands = []

for pt in samples.keys():
    tumor = samples[pt]['tumor1']
    normal = samples[pt]['normal']
    sample = tumor + '_vs_' + normal
    in_file_sage = os.path.join(root_out,pt,sample,'vcf_processing','sage',sample+'_process.maf.gz')
    in_file_mutect = os.path.join(root_out,pt,sample,'vcf_processing','mutect',sample+'_process.maf.gz')
    in_file_strelka = os.path.join(root_out,pt,sample,'vcf_processing','strelka',sample+'_process.maf.gz')
    out_dir = os.path.join(root_out,pt,sample,'vcf_processing','intersect'+'/')
    command = 'python ' + python_file + ' -sa ' + in_file_sage + ' -mu ' + in_file_mutect + ' -st ' + in_file_strelka + ' -o ' + out_dir + ' -sn ' + sample + ' -c '
    commands.append(command)
    tumor = samples[pt]['tumor2']
    sample = tumor + '_vs_' + normal
    in_file_sage = os.path.join(root_out,pt,sample,'vcf_processing','sage',sample+'_process.maf.gz')
    in_file_mutect = os.path.join(root_out,pt,sample,'vcf_processing','mutect',sample+'_process.maf.gz')
    in_file_strelka = os.path.join(root_out,pt,sample,'vcf_processing','strelka',sample+'_process.maf.gz')
    out_dir = os.path.join(root_out,pt,sample,'vcf_processing','intersect'+'/')
    command = 'python ' + python_file + ' -sa ' + in_file_sage + ' -mu ' + in_file_mutect + ' -st ' + in_file_strelka + ' -o ' + out_dir + ' -sn ' + sample + ' -c '
    commands.append(command)
    
commands

['python ../python_scripts/intersect_callers.py -sa ../output/case1/AQ5181_vs_AQ5175/vcf_processing/sage/AQ5181_vs_AQ5175_process.maf.gz -mu ../output/case1/AQ5181_vs_AQ5175/vcf_processing/mutect/AQ5181_vs_AQ5175_process.maf.gz -st ../output/case1/AQ5181_vs_AQ5175/vcf_processing/strelka/AQ5181_vs_AQ5175_process.maf.gz -o ../output/case1/AQ5181_vs_AQ5175/vcf_processing/intersect/ -sn AQ5181_vs_AQ5175 -c ',
 'python ../python_scripts/intersect_callers.py -sa ../output/case1/AQ5187_vs_AQ5175/vcf_processing/sage/AQ5187_vs_AQ5175_process.maf.gz -mu ../output/case1/AQ5187_vs_AQ5175/vcf_processing/mutect/AQ5187_vs_AQ5175_process.maf.gz -st ../output/case1/AQ5187_vs_AQ5175/vcf_processing/strelka/AQ5187_vs_AQ5175_process.maf.gz -o ../output/case1/AQ5187_vs_AQ5175/vcf_processing/intersect/ -sn AQ5187_vs_AQ5175 -c ',
 'python ../python_scripts/intersect_callers.py -sa ../output/case2/AQ5182_vs_AQ5176/vcf_processing/sage/AQ5182_vs_AQ5176_process.maf.gz -mu ../output/case2/AQ5182_vs_AQ5176/vcf_proc

In [13]:
qmap_pre_params = ['[pre]','. "/home/$USER/miniconda3/etc/profile.d/conda.sh"','conda activate process_vc','[params]','cores = 1','memory = 20G','[jobs]']
qmap_file = qmap_pre_params + commands
qmap_file

['[pre]',
 '. "/home/$USER/miniconda3/etc/profile.d/conda.sh"',
 'conda activate process_vc',
 '[params]',
 'cores = 1',
 'memory = 20G',
 '[jobs]',
 'python ../python_scripts/intersect_callers.py -sa ../output/case1/AQ5181_vs_AQ5175/vcf_processing/sage/AQ5181_vs_AQ5175_process.maf.gz -mu ../output/case1/AQ5181_vs_AQ5175/vcf_processing/mutect/AQ5181_vs_AQ5175_process.maf.gz -st ../output/case1/AQ5181_vs_AQ5175/vcf_processing/strelka/AQ5181_vs_AQ5175_process.maf.gz -o ../output/case1/AQ5181_vs_AQ5175/vcf_processing/intersect/ -sn AQ5181_vs_AQ5175 -c ',
 'python ../python_scripts/intersect_callers.py -sa ../output/case1/AQ5187_vs_AQ5175/vcf_processing/sage/AQ5187_vs_AQ5175_process.maf.gz -mu ../output/case1/AQ5187_vs_AQ5175/vcf_processing/mutect/AQ5187_vs_AQ5175_process.maf.gz -st ../output/case1/AQ5187_vs_AQ5175/vcf_processing/strelka/AQ5187_vs_AQ5175_process.maf.gz -o ../output/case1/AQ5187_vs_AQ5175/vcf_processing/intersect/ -sn AQ5187_vs_AQ5175 -c ',
 'python ../python_scripts/inters

In [14]:
#Save qmap file

with open('./qmap_files/03_intersect.qmap', 'w') as f:
    for item in qmap_file:
        f.write('%s\n' % item)

## Run VEP 101 from intersect

In [15]:
gnomad_url = '/path/to/gnomad/data/v3.0.0/hg38/'
#run vep command

chroms = list(range(1,23))
chroms = ['chr'+str(chrom) for chrom in chroms]
sex_chroms = ['chrX','chrY']
chroms = chroms + sex_chroms

vep_commands = []
for pt in samples.keys():
    tumor1 = samples[pt]['tumor1']
    normal = samples[pt]['normal']
    sample = tumor1 + '_vs_' + normal
    for chrom in chroms:
        pt_url = os.path.join(root_out,pt,sample,'vcf_processing','intersect',sample + '_' + chrom + '.maf.gz')
        chr_file = 'gnomad.genomes.r3.0.sites.'+chrom+'.vcf.bgz'
        gnomad_chr_file = os.path.join(gnomad_url,chr_file)
        out_file = pt_url.replace('vcf_processing/intersect/', 'vep_processing/')
        out_file = out_file.replace('.maf.gz', '_vep.txt')
        vep_command = 'vep -i '+ pt_url + ' -o STDOUT -tab --assembly GRCh38 --no_stats --cache --symbol --protein --canonical --offline --af_1kg --dir /path/to/vep  --custom ' + gnomad_chr_file + ',gnomADg,vcf,exact,0,AF,NFE > ' + out_file
        vep_commands.append(vep_command)
    tumor2 = samples[pt]['tumor2']
    sample = tumor2 + '_vs_' + normal
    for chrom in chroms:
        pt_url = os.path.join(root_out,pt,sample,'vcf_processing','intersect',sample + '_' + chrom + '.maf.gz')
        chr_file = 'gnomad.genomes.r3.0.sites.'+chrom+'.vcf.bgz'
        gnomad_chr_file = os.path.join(gnomad_url,chr_file)
        out_file = pt_url.replace('vcf_processing/intersect/', 'vep_processing/')
        out_file = out_file.replace('.maf.gz', '_vep.txt')
        vep_command = 'vep -i '+ pt_url + ' -o STDOUT -tab --assembly GRCh38 --no_stats --cache --symbol --protein --canonical --offline --af_1kg --dir /path/to/vep  --custom ' + gnomad_chr_file + ',gnomADg,vcf,exact,0,AF,NFE > ' + out_file
        vep_commands.append(vep_command)
        
vep_commands

['vep -i ../output/case1/AQ5181_vs_AQ5175/vcf_processing/intersect/AQ5181_vs_AQ5175_chr1.maf.gz -o STDOUT -tab --assembly GRCh38 --no_stats --cache --symbol --protein --canonical --offline --af_1kg --dir /path/to/vep  --custom /path/to/gnomad/data/v3.0.0/hg38/gnomad.genomes.r3.0.sites.chr1.vcf.bgz,gnomADg,vcf,exact,0,AF,NFE > ../output/case1/AQ5181_vs_AQ5175/vep_processing/AQ5181_vs_AQ5175_chr1_vep.txt',
 'vep -i ../output/case1/AQ5181_vs_AQ5175/vcf_processing/intersect/AQ5181_vs_AQ5175_chr2.maf.gz -o STDOUT -tab --assembly GRCh38 --no_stats --cache --symbol --protein --canonical --offline --af_1kg --dir /path/to/vep  --custom /path/to/gnomad/data/v3.0.0/hg38/gnomad.genomes.r3.0.sites.chr2.vcf.bgz,gnomADg,vcf,exact,0,AF,NFE > ../output/case1/AQ5181_vs_AQ5175/vep_processing/AQ5181_vs_AQ5175_chr2_vep.txt',
 'vep -i ../output/case1/AQ5181_vs_AQ5175/vcf_processing/intersect/AQ5181_vs_AQ5175_chr3.maf.gz -o STDOUT -tab --assembly GRCh38 --no_stats --cache --symbol --protein --canonical --off

In [16]:
qmap_pre_params = ['[pre]','. "/home/$USER/miniconda3/etc/profile.d/conda.sh"','conda activate vep101','[params]','cores = 1','memory = 8G','[jobs]']
qmap_file = qmap_pre_params + vep_commands
qmap_file
with open('./qmap_files/04_vep_gnomad_v3.qmap', 'w') as f:
    for item in qmap_file:
        f.write('%s\n' % item)

## process_vep101.py

In [17]:
#run process_vep command

python_script = '../python_scripts/process_vep101.py'
process_vep_commands = []
for pt in samples.keys():
    tumor1 = samples[pt]['tumor1']
    tumor2 = samples[pt]['tumor2']
    normal = samples[pt]['normal']
    t1_vs_n = tumor1 + '_vs_' + normal
    t2_vs_n = tumor2 + '_vs_' + normal
    maf_url = os.path.join(root_out,pt,t1_vs_n,'vcf_processing','intersect/')
    vep_url = os.path.join(root_out,pt,t1_vs_n,'vep_processing/')
    command = 'python '+python_script+' --path_input_vep '+vep_url+' --path_input_maf '+maf_url+' --file_name '+t1_vs_n +' --cores 8'
    process_vep_commands.append(command)
    maf_url = os.path.join(root_out,pt,t2_vs_n,'vcf_processing','intersect/')
    vep_url = os.path.join(root_out,pt,t2_vs_n,'vep_processing/')
    command = 'python '+python_script+' --path_input_vep '+vep_url+' --path_input_maf '+maf_url+' --file_name '+t2_vs_n +' --cores 8'
    process_vep_commands.append(command)
    
    
qmap_pre_params = ['[pre]','. "/home/$USER/miniconda3/etc/profile.d/conda.sh"','conda activate process_vc','[params]','cores = 8','memory = 15G','[jobs]']
qmap_file = qmap_pre_params + process_vep_commands
qmap_file

['[pre]',
 '. "/home/$USER/miniconda3/etc/profile.d/conda.sh"',
 'conda activate process_vc',
 '[params]',
 'cores = 8',
 'memory = 15G',
 '[jobs]',
 'python ../python_scripts/process_vep101.py --path_input_vep ../output/case1/AQ5181_vs_AQ5175/vep_processing/ --path_input_maf ../output/case1/AQ5181_vs_AQ5175/vcf_processing/intersect/ --file_name AQ5181_vs_AQ5175 --cores 8',
 'python ../python_scripts/process_vep101.py --path_input_vep ../output/case1/AQ5187_vs_AQ5175/vep_processing/ --path_input_maf ../output/case1/AQ5187_vs_AQ5175/vcf_processing/intersect/ --file_name AQ5187_vs_AQ5175 --cores 8',
 'python ../python_scripts/process_vep101.py --path_input_vep ../output/case2/AQ5182_vs_AQ5176/vep_processing/ --path_input_maf ../output/case2/AQ5182_vs_AQ5176/vcf_processing/intersect/ --file_name AQ5182_vs_AQ5176 --cores 8',
 'python ../python_scripts/process_vep101.py --path_input_vep ../output/case2/AQ5188_vs_AQ5176/vep_processing/ --path_input_maf ../output/case2/AQ5188_vs_AQ5176/vcf_pr

In [18]:
with open('./qmap_files/05_process_vep.qmap', 'w') as f:
    for item in qmap_file:
        f.write('%s\n' % item)

Before proceding, we need to calculate the CCF and establish the threshold for clonality.\
This is performed in this notebook: ```TMB_and_CCF_analysis.ipynb```

##  filter_and_annot.py

In [19]:
#run process_vep command

ccf_json_path = 'ccf_thresholds.json'
ccf_dict = json.load(open(ccf_json_path,'r'))
process_vep_commands = []
for pt in samples.keys():
    tumor = samples[pt]['tumor1']
    normal = samples[pt]['normal']
    t_vs_n = tumor + '_vs_' + normal
    #tumor1
    sample = pt + '_t1'
    ccf = ccf_dict[sample]
    vep_url = os.path.join(root_out,pt,t_vs_n,'process_vep_output/')
    cnv_url = root_in_hmf+'purple/'+tumor+'.purple.cnv.somatic.tsv'
    purity_url = root_in_hmf+'purple/'+tumor+'.purple.purity.tsv'
    output_url = os.path.join(root_out,pt,t_vs_n,'filter_and_annot/')
    script =  '../python_scripts/filter_and_annot_muts.py'
    command = 'python '+script+' -i '+vep_url+' -o '+output_url+' -t_id '+tumor + ' -n_id ' +normal + ' -ccf '+str(ccf) + ' -c intersect' + ' -cnv ' +cnv_url + ' -pur '+purity_url
    process_vep_commands.append(command)
    #tumor2
    tumor = samples[pt]['tumor2']
    t_vs_n = tumor + '_vs_' + normal
    sample = pt + '_t2'
    ccf = ccf_dict[sample]
    vep_url = os.path.join(root_out,pt,t_vs_n,'process_vep_output/')
    cnv_url = root_in_hmf +'purple/'+tumor+'.purple.cnv.somatic.tsv'
    purity_url = root_in_hmf +'purple/'+tumor+'.purple.purity.tsv'
    output_url = os.path.join(root_out,pt,t_vs_n,'filter_and_annot/')
    command = 'python '+script+' -i '+vep_url+' -o '+output_url+' -t_id '+tumor + ' -n_id ' +normal + ' -ccf '+str(ccf) + ' -c intersect'+ ' -cnv ' +cnv_url + ' -pur '+purity_url
    process_vep_commands.append(command)
    
    
qmap_pre_params = ['[pre]','. "/home/$USER/miniconda3/etc/profile.d/conda.sh"','conda activate process_vc','[params]','cores = 8','memory = 15G','[jobs]']
qmap_file = qmap_pre_params + process_vep_commands
qmap_file

['[pre]',
 '. "/home/$USER/miniconda3/etc/profile.d/conda.sh"',
 'conda activate process_vc',
 '[params]',
 'cores = 8',
 'memory = 15G',
 '[jobs]',
 'python ../python_scripts/filter_and_annot_muts.py -i ../output/case1/AQ5181_vs_AQ5175/process_vep_output/ -o ../output/case1/AQ5181_vs_AQ5175/filter_and_annot/ -t_id AQ5181 -n_id AQ5175 -ccf 0.6057787222048805 -c intersect -cnv /path/to/hmf_pipeline/output/purple/AQ5181.purple.cnv.somatic.tsv -pur /path/to/hmf_pipeline/output/purple/AQ5181.purple.purity.tsv',
 'python ../python_scripts/filter_and_annot_muts.py -i ../output/case1/AQ5187_vs_AQ5175/process_vep_output/ -o ../output/case1/AQ5187_vs_AQ5175/filter_and_annot/ -t_id AQ5187 -n_id AQ5175 -ccf 0.5363311165234896 -c intersect -cnv /path/to/hmf_pipeline/output/purple/AQ5187.purple.cnv.somatic.tsv -pur /path/to/hmf_pipeline/output/purple/AQ5187.purple.purity.tsv',
 'python ../python_scripts/filter_and_annot_muts.py -i ../output/case2/AQ5182_vs_AQ5176/process_vep_output/ -o ../output/ca

In [20]:
with open('./qmap_files/06_filter_and_annot.qmap', 'w') as f:
    for item in qmap_file:
        f.write('%s\n' % item)

## Process vcf from GRIDDS (SV)

In [21]:
python_file = '../python_scripts/process_gridds.py'
genomic_positions = '../data/genomic_positions_ensembl.txt.gz'
canonical_transcripts = '../data/ensembl_canonical_transcripts.tsv'

commands = []
for pt in samples.keys():
    #tumor1
    tumor = samples[pt]['tumor1']
    normal = samples[pt]['normal']
    input_vcf = root_in_hmf+'purple/'+tumor+'.purple.sv.vcf.gz'
    output_dir = root_out+tumor+'_vs_'+normal+'/process_sv/gridds/'
    command = 'python '+python_file+' -i '+input_vcf+' -o '+output_dir+' -gp '+genomic_positions+' -ct '+canonical_transcripts+' -t_id '+tumor+' -n_id '+normal
    commands.append(command)
    #tumor2
    tumor = samples[pt]['tumor2']
    input_vcf = root_in_hmf+'purple/'+tumor+'.purple.sv.vcf.gz'
    output_dir = root_out+pt+'/'+tumor+'_vs_'+normal+'/process_sv/gridds/'
    command = 'python '+python_file+' -i '+input_vcf+' -o '+output_dir+' -gp '+genomic_positions+' -ct '+canonical_transcripts+' -t_id '+tumor+' -n_id '+normal
    commands.append(command)
commands

['python ../python_scripts/process_gridds.py -i /path/to/hmf_pipeline/output/purple/AQ5181.purple.sv.vcf.gz -o ../output/AQ5181_vs_AQ5175/process_sv/gridds/ -gp ../data/genomic_positions_ensembl.txt.gz -ct ../data/ensembl_canonical_transcripts.tsv -t_id AQ5181 -n_id AQ5175',
 'python ../python_scripts/process_gridds.py -i /path/to/hmf_pipeline/output/purple/AQ5187.purple.sv.vcf.gz -o ../output/case1/AQ5187_vs_AQ5175/process_sv/gridds/ -gp ../data/genomic_positions_ensembl.txt.gz -ct ../data/ensembl_canonical_transcripts.tsv -t_id AQ5187 -n_id AQ5175',
 'python ../python_scripts/process_gridds.py -i /path/to/hmf_pipeline/output/purple/AQ5182.purple.sv.vcf.gz -o ../output/AQ5182_vs_AQ5176/process_sv/gridds/ -gp ../data/genomic_positions_ensembl.txt.gz -ct ../data/ensembl_canonical_transcripts.tsv -t_id AQ5182 -n_id AQ5176',
 'python ../python_scripts/process_gridds.py -i /path/to/hmf_pipeline/output/purple/AQ5188.purple.sv.vcf.gz -o ../output/case2/AQ5188_vs_AQ5176/process_sv/gridds/ -gp

In [22]:
qmap_pre_params = ['[pre]','. "/home/$USER/miniconda3/etc/profile.d/conda.sh"','conda activate process_vc','[params]','cores = 1','memory = 8G','[jobs]']
qmap_file = qmap_pre_params + commands
qmap_file

['[pre]',
 '. "/home/$USER/miniconda3/etc/profile.d/conda.sh"',
 'conda activate process_vc',
 '[params]',
 'cores = 1',
 'memory = 8G',
 '[jobs]',
 'python ../python_scripts/process_gridds.py -i /path/to/hmf_pipeline/output/purple/AQ5181.purple.sv.vcf.gz -o ../output/AQ5181_vs_AQ5175/process_sv/gridds/ -gp ../data/genomic_positions_ensembl.txt.gz -ct ../data/ensembl_canonical_transcripts.tsv -t_id AQ5181 -n_id AQ5175',
 'python ../python_scripts/process_gridds.py -i /path/to/hmf_pipeline/output/purple/AQ5187.purple.sv.vcf.gz -o ../output/case1/AQ5187_vs_AQ5175/process_sv/gridds/ -gp ../data/genomic_positions_ensembl.txt.gz -ct ../data/ensembl_canonical_transcripts.tsv -t_id AQ5187 -n_id AQ5175',
 'python ../python_scripts/process_gridds.py -i /path/to/hmf_pipeline/output/purple/AQ5182.purple.sv.vcf.gz -o ../output/AQ5182_vs_AQ5176/process_sv/gridds/ -gp ../data/genomic_positions_ensembl.txt.gz -ct ../data/ensembl_canonical_transcripts.tsv -t_id AQ5182 -n_id AQ5176',
 'python ../python

In [23]:
#Save qmap file

with open('./qmap_files/07_process_gridds.qmap', 'w') as f:
    for item in qmap_file:
        f.write('%s\n' % item)

## Process cnv vcf from purple

In [24]:
python_file = '../python_scripts/process_cnv_purple.py'

commands = []
for pt in samples.keys():
    #tumor1
    tumor = samples[pt]['tumor1']
    normal = samples[pt]['normal']
    input_tsv = root_in_hmf+'purple/'+tumor+'.purple.cnv.gene.tsv'
    output_dir = root_out+pt+'/'+tumor+'_vs_'+normal+'/process_cnv/purple/'
    command = 'python '+python_file+' -i '+input_tsv+' -o '+output_dir+' -t_id '+tumor+' -n_id '+normal
    commands.append(command)
    #tumor2
    tumor = samples[pt]['tumor2']
    input_tsv = root_in_hmf+'purple/'+tumor+'.purple.cnv.gene.tsv'
    output_dir = root_out+pt+'/'+tumor+'_vs_'+normal+'/process_cnv/purple/'
    command = 'python '+python_file+' -i '+input_tsv+' -o '+output_dir+' -t_id '+tumor+' -n_id '+normal
    commands.append(command)
commands

['python ../python_scripts/process_cnv_purple.py -i /path/to/hmf_pipeline/output/purple/AQ5181.purple.cnv.gene.tsv -o ../output/case1/AQ5181_vs_AQ5175/process_cnv/purple/ -t_id AQ5181 -n_id AQ5175',
 'python ../python_scripts/process_cnv_purple.py -i /path/to/hmf_pipeline/output/purple/AQ5187.purple.cnv.gene.tsv -o ../output/case1/AQ5187_vs_AQ5175/process_cnv/purple/ -t_id AQ5187 -n_id AQ5175',
 'python ../python_scripts/process_cnv_purple.py -i /path/to/hmf_pipeline/output/purple/AQ5182.purple.cnv.gene.tsv -o ../output/case2/AQ5182_vs_AQ5176/process_cnv/purple/ -t_id AQ5182 -n_id AQ5176',
 'python ../python_scripts/process_cnv_purple.py -i /path/to/hmf_pipeline/output/purple/AQ5188.purple.cnv.gene.tsv -o ../output/case2/AQ5188_vs_AQ5176/process_cnv/purple/ -t_id AQ5188 -n_id AQ5176',
 'python ../python_scripts/process_cnv_purple.py -i /path/to/hmf_pipeline/output/purple/AQ5180.purple.cnv.gene.tsv -o ../output/case3/AQ5180_vs_AQ5174/process_cnv/purple/ -t_id AQ5180 -n_id AQ5174',
 'pyt

In [25]:
qmap_pre_params = ['[pre]','. "/home/$USER/miniconda3/etc/profile.d/conda.sh"','conda activate process_vc','[params]','cores = 1','memory = 8G','[jobs]']
qmap_file = qmap_pre_params + commands
qmap_file

['[pre]',
 '. "/home/$USER/miniconda3/etc/profile.d/conda.sh"',
 'conda activate process_vc',
 '[params]',
 'cores = 1',
 'memory = 8G',
 '[jobs]',
 'python ../python_scripts/process_cnv_purple.py -i /path/to/hmf_pipeline/output/purple/AQ5181.purple.cnv.gene.tsv -o ../output/case1/AQ5181_vs_AQ5175/process_cnv/purple/ -t_id AQ5181 -n_id AQ5175',
 'python ../python_scripts/process_cnv_purple.py -i /path/to/hmf_pipeline/output/purple/AQ5187.purple.cnv.gene.tsv -o ../output/case1/AQ5187_vs_AQ5175/process_cnv/purple/ -t_id AQ5187 -n_id AQ5175',
 'python ../python_scripts/process_cnv_purple.py -i /path/to/hmf_pipeline/output/purple/AQ5182.purple.cnv.gene.tsv -o ../output/case2/AQ5182_vs_AQ5176/process_cnv/purple/ -t_id AQ5182 -n_id AQ5176',
 'python ../python_scripts/process_cnv_purple.py -i /path/to/hmf_pipeline/output/purple/AQ5188.purple.cnv.gene.tsv -o ../output/case2/AQ5188_vs_AQ5176/process_cnv/purple/ -t_id AQ5188 -n_id AQ5176',
 'python ../python_scripts/process_cnv_purple.py -i /pat

In [26]:
#Save qmap file

with open('./qmap_files/08_process_cnv_purple.qmap', 'w') as f:
    for item in qmap_file:
        f.write('%s\n' % item)

# Germline processing

## HaplotypeCaller vcf processing

In [27]:
#commands for process vcfs from HMF

python_file = '../python_scripts/process_haplotype_caller.py'

commands = []

#SAGE
for pt in samples.keys():
    tumor = samples[pt]['tumor1']
    normal = samples[pt]['normal']
    sample = tumor + '_vs_' + normal
    in_file = os.path.join(root_in_hmf,normal,'germline_caller',normal+'.germline.vcf.gz')
    out_dir = os.path.join(root_out,pt,normal,'vcf_processing','haplotype_caller'+'/')
    command = 'python ' + python_file + ' -i ' + in_file + ' -o ' + out_dir + ' -n_id ' + normal + ' -c 24'
    commands.append(command)
    
commands

['python ../python_scripts/process_haplotype_caller.py -i /path/to/hmf_pipeline/output/AQ5175/germline_caller/AQ5175.germline.vcf.gz -o ../output/case1/AQ5175/vcf_processing/haplotype_caller/ -n_id AQ5175 -c 24',
 'python ../python_scripts/process_haplotype_caller.py -i /path/to/hmf_pipeline/output/AQ5176/germline_caller/AQ5176.germline.vcf.gz -o ../output/case2/AQ5176/vcf_processing/haplotype_caller/ -n_id AQ5176 -c 24',
 'python ../python_scripts/process_haplotype_caller.py -i /path/to/hmf_pipeline/output/AQ5174/germline_caller/AQ5174.germline.vcf.gz -o ../output/case3/AQ5174/vcf_processing/haplotype_caller/ -n_id AQ5174 -c 24',
 'python ../python_scripts/process_haplotype_caller.py -i /path/to/hmf_pipeline/output/AW8061/germline_caller/AW8061.germline.vcf.gz -o ../output/case4/AW8061/vcf_processing/haplotype_caller/ -n_id AW8061 -c 24']

In [28]:
qmap_pre_params = ['[pre]','. "/home/$USER/miniconda3/etc/profile.d/conda.sh"','conda activate process_vc','[params]','cores = 24','memory = 100G','[jobs]']
qmap_file = qmap_pre_params + commands
qmap_file

['[pre]',
 '. "/home/$USER/miniconda3/etc/profile.d/conda.sh"',
 'conda activate process_vc',
 '[params]',
 'cores = 24',
 'memory = 100G',
 '[jobs]',
 'python ../python_scripts/process_haplotype_caller.py -i /path/to/hmf_pipeline/output/AQ5175/germline_caller/AQ5175.germline.vcf.gz -o ../output/case1/AQ5175/vcf_processing/haplotype_caller/ -n_id AQ5175 -c 24',
 'python ../python_scripts/process_haplotype_caller.py -i /path/to/hmf_pipeline/output/AQ5176/germline_caller/AQ5176.germline.vcf.gz -o ../output/case2/AQ5176/vcf_processing/haplotype_caller/ -n_id AQ5176 -c 24',
 'python ../python_scripts/process_haplotype_caller.py -i /path/to/hmf_pipeline/output/AQ5174/germline_caller/AQ5174.germline.vcf.gz -o ../output/case3/AQ5174/vcf_processing/haplotype_caller/ -n_id AQ5174 -c 24',
 'python ../python_scripts/process_haplotype_caller.py -i /path/to/hmf_pipeline/output/AW8061/germline_caller/AW8061.germline.vcf.gz -o ../output/case4/AW8061/vcf_processing/haplotype_caller/ -n_id AW8061 -c 24

In [29]:
#Save qmap file

with open('./qmap_files/09_haplocall_germline_process.qmap', 'w') as f:
    for item in qmap_file:
        f.write('%s\n' % item)

## Run vep on HaplotypeCaller

In [30]:
#run vep command

vep_commands = []
for pt in samples.keys():
    tumor1 = samples[pt]['tumor1']
    normal = samples[pt]['normal']
    for chrom in chroms:
        pt_url = os.path.join(root_out,pt,normal,'vcf_processing','haplotype_caller',normal + '_' + chrom + '.maf.gz')
        chr_file = 'gnomad.genomes.r3.0.sites.'+chrom+'.vcf.bgz'
        gnomad_chr_file = os.path.join(gnomad_url,chr_file)
        out_file = pt_url.replace('vcf_processing/haplotype_caller/', 'vep_processing/haplotype_caller/')
        out_file = out_file.replace('.maf.gz', '_vep.txt')
        vep_command = 'vep -i '+ pt_url + ' -o STDOUT -tab --assembly GRCh38 --no_stats --cache --symbol --protein --canonical --offline --af_1kg --dir /path/to/vep  --custom ' + gnomad_chr_file + ',gnomADg,vcf,exact,0,AF,NFE > ' + out_file
        vep_commands.append(vep_command)
        
vep_commands

['vep -i ../output/case1/AQ5175/vcf_processing/haplotype_caller/AQ5175_chr1.maf.gz -o STDOUT -tab --assembly GRCh38 --no_stats --cache --symbol --protein --canonical --offline --af_1kg --dir /path/to/vep  --custom /path/to/gnomad/data/v3.0.0/hg38/gnomad.genomes.r3.0.sites.chr1.vcf.bgz,gnomADg,vcf,exact,0,AF,NFE > ../output/case1/AQ5175/vep_processing/haplotype_caller/AQ5175_chr1_vep.txt',
 'vep -i ../output/case1/AQ5175/vcf_processing/haplotype_caller/AQ5175_chr2.maf.gz -o STDOUT -tab --assembly GRCh38 --no_stats --cache --symbol --protein --canonical --offline --af_1kg --dir /path/to/vep  --custom /path/to/gnomad/data/v3.0.0/hg38/gnomad.genomes.r3.0.sites.chr2.vcf.bgz,gnomADg,vcf,exact,0,AF,NFE > ../output/case1/AQ5175/vep_processing/haplotype_caller/AQ5175_chr2_vep.txt',
 'vep -i ../output/case1/AQ5175/vcf_processing/haplotype_caller/AQ5175_chr3.maf.gz -o STDOUT -tab --assembly GRCh38 --no_stats --cache --symbol --protein --canonical --offline --af_1kg --dir /path/to/vep  --custom /p

In [31]:
qmap_pre_params = ['[pre]','. "/home/$USER/miniconda3/etc/profile.d/conda.sh"','conda activate vep101','[params]','cores = 1','memory = 8G','[jobs]']
qmap_file = qmap_pre_params + vep_commands
qmap_file

['[pre]',
 '. "/home/$USER/miniconda3/etc/profile.d/conda.sh"',
 'conda activate vep101',
 '[params]',
 'cores = 1',
 'memory = 8G',
 '[jobs]',
 'vep -i ../output/case1/AQ5175/vcf_processing/haplotype_caller/AQ5175_chr1.maf.gz -o STDOUT -tab --assembly GRCh38 --no_stats --cache --symbol --protein --canonical --offline --af_1kg --dir /path/to/vep  --custom /path/to/gnomad/data/v3.0.0/hg38/gnomad.genomes.r3.0.sites.chr1.vcf.bgz,gnomADg,vcf,exact,0,AF,NFE > ../output/case1/AQ5175/vep_processing/haplotype_caller/AQ5175_chr1_vep.txt',
 'vep -i ../output/case1/AQ5175/vcf_processing/haplotype_caller/AQ5175_chr2.maf.gz -o STDOUT -tab --assembly GRCh38 --no_stats --cache --symbol --protein --canonical --offline --af_1kg --dir /path/to/vep  --custom /path/to/gnomad/data/v3.0.0/hg38/gnomad.genomes.r3.0.sites.chr2.vcf.bgz,gnomADg,vcf,exact,0,AF,NFE > ../output/case1/AQ5175/vep_processing/haplotype_caller/AQ5175_chr2_vep.txt',
 'vep -i ../output/case1/AQ5175/vcf_processing/haplotype_caller/AQ5175_c

In [32]:
with open('./qmap_files/10_run_vep_gnomad_v3_haplocall.qmap', 'w') as f:
    for item in qmap_file:
        f.write('%s\n' % item)

## Process vep output from HaplotypeCaller

In [33]:
#run process_vep command

python_file = '../python_scripts/process_vep101_haplocall.py'
process_vep_commands = []
for pt in samples.keys():
    for chrom in chroms:
        normal = samples[pt]['normal']
        file_name = normal + '_' + chrom + '_vep.txt'
        maf_url = os.path.join(root_out,pt,normal,'vcf_processing','haplotype_caller/')
        vep_url = os.path.join(root_out,pt,normal,'vep_processing','haplotype_caller/')
        command = 'python '+python_file+' --path_input_vep '+vep_url+' --path_input_maf '+maf_url+' --file_name '+file_name +' -c 16'
        process_vep_commands.append(command)

qmap_pre_params = ['[pre]','. "/home/$USER/miniconda3/etc/profile.d/conda.sh"','conda activate process_vc','[params]','cores = 16','memory = 100G','[jobs]']
qmap_file = qmap_pre_params + process_vep_commands
qmap_file

['[pre]',
 '. "/home/$USER/miniconda3/etc/profile.d/conda.sh"',
 'conda activate process_vc',
 '[params]',
 'cores = 16',
 'memory = 100G',
 '[jobs]',
 'python ../python_scripts/process_vep101_haplocall.py --path_input_vep ../output/case1/AQ5175/vep_processing/haplotype_caller/ --path_input_maf ../output/case1/AQ5175/vcf_processing/haplotype_caller/ --file_name AQ5175_chr1_vep.txt -c 16',
 'python ../python_scripts/process_vep101_haplocall.py --path_input_vep ../output/case1/AQ5175/vep_processing/haplotype_caller/ --path_input_maf ../output/case1/AQ5175/vcf_processing/haplotype_caller/ --file_name AQ5175_chr2_vep.txt -c 16',
 'python ../python_scripts/process_vep101_haplocall.py --path_input_vep ../output/case1/AQ5175/vep_processing/haplotype_caller/ --path_input_maf ../output/case1/AQ5175/vcf_processing/haplotype_caller/ --file_name AQ5175_chr3_vep.txt -c 16',
 'python ../python_scripts/process_vep101_haplocall.py --path_input_vep ../output/case1/AQ5175/vep_processing/haplotype_caller

In [34]:
with open('./qmap_files/11_process_vep_haplocall.qmap', 'w') as f:
    for item in qmap_file:
        f.write('%s\n' % item)

## Filter and prepare germline alterations table (HaplotypeCaller)

In [35]:
python_file = '../python_scripts/filter_and_annot_muts.py'

commands = []
for pt in samples.keys():
    #tumor1
    normal = samples[pt]['normal']
    input_dir =  root_out+pt+'/'+normal+'/process_vep_output/haplotype_caller/'
    output_dir = root_out+pt+'/'+normal+'/filter_and_annot/haplotype_caller/'
    command = 'python '+python_file+' -i '+input_dir+' -o '+output_dir+' -n_id '+normal+' -c hc --gnomad_threshold 0.01'
    commands.append(command)
commands

['python ../python_scripts/filter_and_annot_muts.py -i ../output/case1/AQ5175/process_vep_output/haplotype_caller/ -o ../output/case1/AQ5175/filter_and_annot/haplotype_caller/ -n_id AQ5175 -c hc --gnomad_threshold 0.01',
 'python ../python_scripts/filter_and_annot_muts.py -i ../output/case2/AQ5176/process_vep_output/haplotype_caller/ -o ../output/case2/AQ5176/filter_and_annot/haplotype_caller/ -n_id AQ5176 -c hc --gnomad_threshold 0.01',
 'python ../python_scripts/filter_and_annot_muts.py -i ../output/case3/AQ5174/process_vep_output/haplotype_caller/ -o ../output/case3/AQ5174/filter_and_annot/haplotype_caller/ -n_id AQ5174 -c hc --gnomad_threshold 0.01',
 'python ../python_scripts/filter_and_annot_muts.py -i ../output/case4/AW8061/process_vep_output/haplotype_caller/ -o ../output/case4/AW8061/filter_and_annot/haplotype_caller/ -n_id AW8061 -c hc --gnomad_threshold 0.01']

In [36]:
qmap_pre_params = ['[pre]','. "/home/$USER/miniconda3/etc/profile.d/conda.sh"','conda activate process_vc','[params]','cores = 1','memory = 8G','[jobs]']
qmap_file = qmap_pre_params + commands
qmap_file

['[pre]',
 '. "/home/$USER/miniconda3/etc/profile.d/conda.sh"',
 'conda activate process_vc',
 '[params]',
 'cores = 1',
 'memory = 8G',
 '[jobs]',
 'python ../python_scripts/filter_and_annot_muts.py -i ../output/case1/AQ5175/process_vep_output/haplotype_caller/ -o ../output/case1/AQ5175/filter_and_annot/haplotype_caller/ -n_id AQ5175 -c hc --gnomad_threshold 0.01',
 'python ../python_scripts/filter_and_annot_muts.py -i ../output/case2/AQ5176/process_vep_output/haplotype_caller/ -o ../output/case2/AQ5176/filter_and_annot/haplotype_caller/ -n_id AQ5176 -c hc --gnomad_threshold 0.01',
 'python ../python_scripts/filter_and_annot_muts.py -i ../output/case3/AQ5174/process_vep_output/haplotype_caller/ -o ../output/case3/AQ5174/filter_and_annot/haplotype_caller/ -n_id AQ5174 -c hc --gnomad_threshold 0.01',
 'python ../python_scripts/filter_and_annot_muts.py -i ../output/case4/AW8061/process_vep_output/haplotype_caller/ -o ../output/case4/AW8061/filter_and_annot/haplotype_caller/ -n_id AW8061 

In [37]:
#Save qmap file

with open('./qmap_files/12_filter_and_annot_germline_haplocall.qmap', 'w') as f:
    for item in qmap_file:
        f.write('%s\n' % item)

## Process vcf from GRIPSS (Germline SV)

In [38]:
python_file = '../python_scripts/process_gridds.py'
genomic_positions = '../data/genomic_positions_ensembl.txt.gz'
canonical_transcripts = '../data/ensembl_canonical_transcripts.tsv'

commands = []
for pt in samples.keys():
    #tumor1
    tumor = samples[pt]['tumor1']
    normal = samples[pt]['normal']
    input_vcf = root_in_hmf+'gripss_germline/'+normal+'.gripss.filtered.germline.vcf.gz'
    output_dir = root_out+pt+'/'+normal+'/process_sv/gripss/t1/'
    command = 'python '+python_file+' -i '+input_vcf+' -o '+output_dir+' -gp '+genomic_positions+' -ct '+canonical_transcripts+' -n_id '+normal+' --is_germline'
    commands.append(command)
    #tumor2
    tumor = samples[pt]['tumor2']
    input_vcf = root_in_hmf+'gripss_germline/'+normal+'.gripss.filtered.germline.vcf.gz'
    output_dir = root_out+pt+'/'+normal+'/process_sv/gripss/t2/'
    command = 'python '+python_file+' -i '+input_vcf+' -o '+output_dir+' -gp '+genomic_positions+' -ct '+canonical_transcripts+' -n_id '+normal+' --is_germline'
    commands.append(command)
commands

['python ../python_scripts/process_gridds.py -i /path/to/hmf_pipeline/output/gripss_germline/AQ5175.gripss.filtered.germline.vcf.gz -o ../output/case1/AQ5175/process_sv/gripss/t1/ -gp ../data/genomic_positions_ensembl.txt.gz -ct ../data/ensembl_canonical_transcripts.tsv -n_id AQ5175 --is_germline',
 'python ../python_scripts/process_gridds.py -i /path/to/hmf_pipeline/output/gripss_germline/AQ5175.gripss.filtered.germline.vcf.gz -o ../output/case1/AQ5175/process_sv/gripss/t2/ -gp ../data/genomic_positions_ensembl.txt.gz -ct ../data/ensembl_canonical_transcripts.tsv -n_id AQ5175 --is_germline',
 'python ../python_scripts/process_gridds.py -i /path/to/hmf_pipeline/output/gripss_germline/AQ5176.gripss.filtered.germline.vcf.gz -o ../output/case2/AQ5176/process_sv/gripss/t1/ -gp ../data/genomic_positions_ensembl.txt.gz -ct ../data/ensembl_canonical_transcripts.tsv -n_id AQ5176 --is_germline',
 'python ../python_scripts/process_gridds.py -i /path/to/hmf_pipeline/output/gripss_germline/AQ5176.

In [39]:
qmap_pre_params = ['[pre]','. "/home/$USER/miniconda3/etc/profile.d/conda.sh"','conda activate process_vc','[params]','cores = 1','memory = 8G','[jobs]']
qmap_file = qmap_pre_params + commands
qmap_file

['[pre]',
 '. "/home/$USER/miniconda3/etc/profile.d/conda.sh"',
 'conda activate process_vc',
 '[params]',
 'cores = 1',
 'memory = 8G',
 '[jobs]',
 'python ../python_scripts/process_gridds.py -i /path/to/hmf_pipeline/output/gripss_germline/AQ5175.gripss.filtered.germline.vcf.gz -o ../output/case1/AQ5175/process_sv/gripss/t1/ -gp ../data/genomic_positions_ensembl.txt.gz -ct ../data/ensembl_canonical_transcripts.tsv -n_id AQ5175 --is_germline',
 'python ../python_scripts/process_gridds.py -i /path/to/hmf_pipeline/output/gripss_germline/AQ5175.gripss.filtered.germline.vcf.gz -o ../output/case1/AQ5175/process_sv/gripss/t2/ -gp ../data/genomic_positions_ensembl.txt.gz -ct ../data/ensembl_canonical_transcripts.tsv -n_id AQ5175 --is_germline',
 'python ../python_scripts/process_gridds.py -i /path/to/hmf_pipeline/output/gripss_germline/AQ5176.gripss.filtered.germline.vcf.gz -o ../output/case2/AQ5176/process_sv/gripss/t1/ -gp ../data/genomic_positions_ensembl.txt.gz -ct ../data/ensembl_canonic

In [40]:
#Save qmap file

with open('./qmap_files/13_process_gripss_germline.qmap', 'w') as f:
    for item in qmap_file:
        f.write('%s\n' % item)