# VEP conversion of common mutations table: lung sample

In [1]:
# Needed basic packages
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from matplotlib_venn import venn3
import time

from tqdm import tqdm_notebook
from tqdm.notebook import tqdm_notebook
tqdm_notebook.pandas()
from tqdm.notebook import tqdm
tqdm.pandas()
from tqdm.notebook import tqdm_notebook
tqdm_notebook.pandas()

In [2]:
# Import the list of common mutations that are CLONAL in the compendium of the three callers in each tumor. Note that all melanoma mutations are considered clonal!!

clmut_df = pd.read_csv('/workspace/projects/sjd_melos/MAFs_tables/Lung_CCF_Purple_all_mutations.tsv.gz', sep="\t", header=0) #same data as mel_mut_df

In [3]:
# CHROM column has to maintain the # so VEP recognizes that this is a VCF file!!
clmut_df = clmut_df.rename(columns = {'CHROM':'#CHROM'})

In [6]:
clmut_df[clmut_df['#CHROM'] == 'chr3']

Unnamed: 0,#CHROM,POS,ID,REF,ALT,QUAL,FILTER,INFO,FORMAT,NORMAL,...,t_ref_reads,n_ref_reads,VAF,mut_type,Caller_intersec,SAMPLE,mut,CN,CCF,clonality
3252,chr3,75321,.,C,A,.,PASS,"AS_FilterStatus=SITE;AS_SB_TABLE=114,108|34,36...",GT:AD:AF:DP:F1R2:F2R1:FAD:SB,"0/0:138,0:7.290e-03:138:68,0:65,0:134,0:66,72,0,0",...,84,138,0.454545,SNV,Mutect_Strelka,sarcoma_lung,chr3_75321_C_A,1.8046,1.047545,clonal
3253,chr3,113106,.,T,C,.,PASS,"AS_FilterStatus=SITE;AS_SB_TABLE=118,147|17,14...",GT:AD:AF:DP:F1R2:F2R1:FAD:SB,"0/0:132,0:7.616e-03:132:57,0:64,0:128,0:57,75,0,0",...,133,132,0.189024,SNV,Mutect_Strelka,sarcoma_lung,chr3_113106_T_C,1.8046,0.435626,clonal
3254,chr3,275198,.,T,G,.,PASS,"AS_FilterStatus=SITE;AS_SB_TABLE=92,126|40,40;...",GT:AD:AF:DP:F1R2:F2R1:FAD:SB,"0/0:131,0:7.541e-03:131:58,0:67,0:130,0:53,78,0,0",...,87,131,0.479042,SNV,Mutect_Strelka,sarcoma_lung,chr3_275198_T_G,1.8046,1.104000,clonal
3255,chr3,295753,.,G,C,.,PASS,"AS_FilterStatus=SITE;AS_SB_TABLE=122,108|33,27...",GT:AD:AF:DP:F1R2:F2R1:FAD:SB,"0/0:125,0:7.840e-03:125:59,0:63,0:124,0:69,56,0,0",...,105,125,0.363636,SNV,Mutect_Strelka,sarcoma_lung,chr3_295753_G_C,1.8046,0.838036,clonal
3256,chr3,432189,.,C,G,.,PASS,"AS_FilterStatus=SITE;AS_SB_TABLE=104,85|38,38;...",GT:AD:AF:DP:F1R2:F2R1:FAD:SB,"0/0:108,0:9.100e-03:108:46,0:49,0:105,0:53,55,0,0",...,81,108,0.484076,SNV,Mutect_Strelka,sarcoma_lung,chr3_432189_C_G,1.8046,1.115603,clonal
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4641,chr3,196113352,.,C,G,.,PASS,"AS_FilterStatus=SITE;AS_SB_TABLE=74,50|43,17;D...",GT:AD:AF:DP:F1R2:F2R1:FAD:SB,"0/0:99,0:9.856e-03:99:37,0:38,0:97,0:55,44,0,0",...,25,99,0.705882,SNV,Mutect_Strelka,sarcoma_lung,chr3_196113352_C_G,1.3944,1.337224,clonal
4642,chr3,196667867,.,G,T,.,PASS,"AS_FilterStatus=SITE;AS_SB_TABLE=78,68|40,49;D...",GT:AD:AF:DP:F1R2:F2R1:FAD:SB,"0/0:121,0:8.286e-03:121:57,0:60,0:117,0:66,55,0,0",...,25,121,0.780702,SNV,Mutect_Strelka,sarcoma_lung,chr3_196667867_G_T,1.3944,1.478961,clonal
4643,chr3,196789208,.,T,G,.,PASS,"AS_FilterStatus=SITE;AS_SB_TABLE=86,91|43,51;D...",GT:AD:AF:DP:F1R2:F2R1:FAD:SB,"0/0:124,0:7.872e-03:124:58,0:64,0:123,0:63,61,0,0",...,53,124,0.639456,SNV,Mutect_Strelka,sarcoma_lung,chr3_196789208_T_G,1.3944,1.211385,clonal
4644,chr3,197998949,.,A,C,.,PASS,"AS_FilterStatus=SITE;AS_SB_TABLE=121,132|2,2;D...",GT:AD:AF:DP:F1R2:F2R1:FAD:SB,"0/0:121,0:7.946e-03:121:47,0:63,0:118,0:61,60,0,0",...,132,121,0.029412,SNV,Mutect_Strelka,sarcoma_lung,chr3_197998949_A_C,1.3944,0.055718,subclonal


In [5]:
# Perform a chromosome list
chroms = []
for n in range(1,23):
    chroms.append('chr'+str(n))
chroms.append('chrX')    
chroms.append('chrY')  
chroms

# This is the same as previous code but in few lines
# chroms = ['chr'+str(n) for n in range(1,23)  ]
# chroms.append('chrX')
# chroms.append('chrY')

['chr1',
 'chr2',
 'chr3',
 'chr4',
 'chr5',
 'chr6',
 'chr7',
 'chr8',
 'chr9',
 'chr10',
 'chr11',
 'chr12',
 'chr13',
 'chr14',
 'chr15',
 'chr16',
 'chr17',
 'chr18',
 'chr19',
 'chr20',
 'chr21',
 'chr22',
 'chrX',
 'chrY']

In [7]:
# Extract the chromosomes data:

path = '/workspace/projects/sjd_melos/vep/vep_input_files/lung/'

for chrom in chroms:
    file_name= path + chrom + '.tsv.gz'    
    chrom_df = clmut_df[clmut_df['#CHROM']==chrom]
    chrom_df.to_csv(file_name,sep='\t', index=None, compression = "gzip")