# Create QMAP file to filter annotated Melanoma sample with python script

In [1]:
# Create a file with this information for all samples

## Read the files and configure paths and commands

In [12]:
import os, sys

# Import files from path

# Paths from VEP output data (annotated variants table)
path_mel_annot = "/workspace/projects/sjd_melos/vep/vep_output_files/melanoma/"

# Paths from VEP input data (variants table)
path_mel_mut = "/workspace/projects/sjd_melos/vep/vep_input_files/melanoma/"

# List all files from the specified path
files_mel_annot = os.listdir(path_mel_annot)
files_mel_mut = os.listdir(path_mel_mut)

# Check all files from the specified path
print('These are the files annotated from melanoma sample: \n',os.listdir(path_mel_annot))
print('These are the files with variants from melanoma sample: \n',os.listdir(path_mel_mut))

These are the files annotated from melanoma sample: 
 ['chr1.tsv.gz', 'chr8.tsv.gz', 'chr12.tsv.gz', 'chr9.tsv.gz', 'chr5.tsv.gz', 'chr11.tsv.gz', 'chr2.tsv.gz', 'chr7.tsv.gz', 'chrX.tsv.gz', 'chr6.tsv.gz', 'chr13.tsv.gz', 'chr14.tsv.gz', 'chr15.tsv.gz', 'chr16.tsv.gz', 'chr17.tsv.gz', 'chr18.tsv.gz', 'chr19.tsv.gz', 'chr20.tsv.gz', 'chr21.tsv.gz', 'chr22.tsv.gz', 'chr3.tsv.gz', 'chr10.tsv.gz', 'chrY.tsv.gz', 'chr4.tsv.gz']
These are the files with variants from melanoma sample: 
 ['chr1.tsv.gz', 'chrY.tsv.gz', 'chr2.tsv.gz', 'chr3.tsv.gz', 'chr4.tsv.gz', 'chr5.tsv.gz', 'chr6.tsv.gz', 'chr7.tsv.gz', 'chr8.tsv.gz', 'chr9.tsv.gz', 'chr10.tsv.gz', 'chr11.tsv.gz', 'chr12.tsv.gz', 'chr13.tsv.gz', 'chr14.tsv.gz', 'chr15.tsv.gz', 'chr16.tsv.gz', 'chr17.tsv.gz', 'chr18.tsv.gz', 'chr19.tsv.gz', 'chr20.tsv.gz', 'chr21.tsv.gz', 'chr22.tsv.gz', 'chrX.tsv.gz']


In [13]:
# Output path to store processed files upon QMAP run
path_mel_out = "/workspace/projects/sjd_melos/vep/vep_filtering_output/melanoma/"

In [14]:
# Parameters to be aggregated
script = "/workspace/projects/sjd_melos/scripts/VEP_filtering_Melanoma.py"
param_annot = " --input_annot "
param_mut = " --input_mut "
param_out = " --output_file "
command = "python3 "

In [15]:
# Make a list of chromosomes
chrom = []
for c in range(1,23): 
    chrom.append('chr' + str(c) + '.tsv.gz')
chrom.append('chrX' + '.tsv.gz')
chrom.append('chrY' + '.tsv.gz')
chrom

['chr1.tsv.gz',
 'chr2.tsv.gz',
 'chr3.tsv.gz',
 'chr4.tsv.gz',
 'chr5.tsv.gz',
 'chr6.tsv.gz',
 'chr7.tsv.gz',
 'chr8.tsv.gz',
 'chr9.tsv.gz',
 'chr10.tsv.gz',
 'chr11.tsv.gz',
 'chr12.tsv.gz',
 'chr13.tsv.gz',
 'chr14.tsv.gz',
 'chr15.tsv.gz',
 'chr16.tsv.gz',
 'chr17.tsv.gz',
 'chr18.tsv.gz',
 'chr19.tsv.gz',
 'chr20.tsv.gz',
 'chr21.tsv.gz',
 'chr22.tsv.gz',
 'chrX.tsv.gz',
 'chrY.tsv.gz']

## Apply paths and commands to samples to create jobs

In [16]:
# Now apply the code to the melanoma samples
serie_mel = []
for c in chrom: 
    if c in files_mel_annot: # add elements in the serie
        serie_mel.append(command + script + param_annot + path_mel_annot + c + param_mut + path_mel_mut + c + param_out + path_mel_out + c)     
serie_mel

['python3 /workspace/projects/sjd_melos/scripts/VEP_filtering_Melanoma.py --input_annot /workspace/projects/sjd_melos/vep/vep_output_files/melanoma/chr1.tsv.gz --input_mut /workspace/projects/sjd_melos/vep/vep_input_files/melanoma/chr1.tsv.gz --output_file /workspace/projects/sjd_melos/vep/vep_filtering_output/melanoma/chr1.tsv.gz',
 'python3 /workspace/projects/sjd_melos/scripts/VEP_filtering_Melanoma.py --input_annot /workspace/projects/sjd_melos/vep/vep_output_files/melanoma/chr2.tsv.gz --input_mut /workspace/projects/sjd_melos/vep/vep_input_files/melanoma/chr2.tsv.gz --output_file /workspace/projects/sjd_melos/vep/vep_filtering_output/melanoma/chr2.tsv.gz',
 'python3 /workspace/projects/sjd_melos/scripts/VEP_filtering_Melanoma.py --input_annot /workspace/projects/sjd_melos/vep/vep_output_files/melanoma/chr3.tsv.gz --input_mut /workspace/projects/sjd_melos/vep/vep_input_files/melanoma/chr3.tsv.gz --output_file /workspace/projects/sjd_melos/vep/vep_filtering_output/melanoma/chr3.tsv.

In [17]:
# To change the separator of the list from comma to \n we can use join function that joins the elements of a string list
mel_result = '\n'.join(serie_mel)
print(mel_result)

python3 /workspace/projects/sjd_melos/scripts/VEP_filtering_Melanoma.py --input_annot /workspace/projects/sjd_melos/vep/vep_output_files/melanoma/chr1.tsv.gz --input_mut /workspace/projects/sjd_melos/vep/vep_input_files/melanoma/chr1.tsv.gz --output_file /workspace/projects/sjd_melos/vep/vep_filtering_output/melanoma/chr1.tsv.gz
python3 /workspace/projects/sjd_melos/scripts/VEP_filtering_Melanoma.py --input_annot /workspace/projects/sjd_melos/vep/vep_output_files/melanoma/chr2.tsv.gz --input_mut /workspace/projects/sjd_melos/vep/vep_input_files/melanoma/chr2.tsv.gz --output_file /workspace/projects/sjd_melos/vep/vep_filtering_output/melanoma/chr2.tsv.gz
python3 /workspace/projects/sjd_melos/scripts/VEP_filtering_Melanoma.py --input_annot /workspace/projects/sjd_melos/vep/vep_output_files/melanoma/chr3.tsv.gz --input_mut /workspace/projects/sjd_melos/vep/vep_input_files/melanoma/chr3.tsv.gz --output_file /workspace/projects/sjd_melos/vep/vep_filtering_output/melanoma/chr3.tsv.gz
python3

## Add header and join all samples

In [18]:
# Include information of the header as a list
header = ['[pre] \n. /home/$USER/miniconda3/etc/profile.d/conda.sh \nconda activate melos', '[params]', 'cores = 1', 'memory = 8G', '[jobs]']
header = '\n'.join(header)
print(header)

[pre] 
. /home/$USER/miniconda3/etc/profile.d/conda.sh 
conda activate melos
[params]
cores = 1
memory = 8G
[jobs]


In [13]:
# Add header to samples and this is the file to export
qmap = header + '\n' + mel_result   # adding \n might be unnecessary if you follow the saving instructions below
print(qmap)

[pre]
[params]
cores = 1
memory = 8G
[jobs]
python3 /workspace/projects/sjd_melos/scripts/VEP_filtering_Melanoma.py --input_annot /workspace/projects/sjd_melos/vep/vep_output_files/melanoma/chr1.tsv.gz --input_mut /workspace/projects/sjd_melos/vep/vep_input_files/melanoma/chr1.tsv.gz --output_file /workspace/projects/sjd_melos/vep/vep_filtering_output/melanoma/chr1.tsv.gz
python3 /workspace/projects/sjd_melos/scripts/VEP_filtering_Melanoma.py --input_annot /workspace/projects/sjd_melos/vep/vep_output_files/melanoma/chr2.tsv.gz --input_mut /workspace/projects/sjd_melos/vep/vep_input_files/melanoma/chr2.tsv.gz --output_file /workspace/projects/sjd_melos/vep/vep_filtering_output/melanoma/chr2.tsv.gz
python3 /workspace/projects/sjd_melos/scripts/VEP_filtering_Melanoma.py --input_annot /workspace/projects/sjd_melos/vep/vep_output_files/melanoma/chr3.tsv.gz --input_mut /workspace/projects/sjd_melos/vep/vep_input_files/melanoma/chr3.tsv.gz --output_file /workspace/projects/sjd_melos/vep/vep_f

In [None]:
## CHECK FORMAT, 

## Save as a QMAP file

In [38]:
# with open('/workspace/projects/sjd_melos/vep/vep_filtering_output/VEP_processing_python_script_qmap.qmap', 'w') as f:
    # for item in qmap:
        # f.write(item) #this respects the format from previous code