# VEP conversion of common mutations table: melanoma sample

In [1]:
# Needed basic packages
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from matplotlib_venn import venn3
import time

from tqdm import tqdm_notebook
from tqdm.notebook import tqdm_notebook
tqdm_notebook.pandas()
from tqdm.notebook import tqdm
tqdm.pandas()
from tqdm.notebook import tqdm_notebook
tqdm_notebook.pandas()

In [2]:
# Import the list of common mutations in the compendium of the three callers in each tumor. 

clmut_df = pd.read_csv('/workspace/projects/sjd_melos/MAFs_tables/Mel_CCF_Purple_common_mutations.tsv.gz', sep="\t", header=0) #same data as mel_mut_df

In [3]:
# CHROM column has to maintain the # so VEP recognizes that this is a VCF file!!
clmut_df = clmut_df.rename(columns = {'CHROM':'#CHROM'})

In [4]:
clmut_df[clmut_df['#CHROM'] == 'chr2']

Unnamed: 0,#CHROM,POS,ID,REF,ALT,QUAL,FILTER,INFO,FORMAT,NORMAL,...,n_alt_reads,t_ref_reads,n_ref_reads,VAF,mut_type,Caller_intersec,SAMPLE,mut,CN,CCF
84773,chr2,10601,.,C,T,.,PASS,"AS_FilterStatus=SITE;AS_SB_TABLE=103,44|6,5;DP...",GT:AD:AF:DP:F1R2:F2R1:FAD:SB,"0/0:70,0:0.014:70:28,0:41,0:70,0:49,21,0,0",...,0,77,70,0.125000,SNV,Mutect_Strelka,melanoma,chr2_10601_C_T,2.9845,1.164729
84774,chr2,14193,.,C,T,.,PASS,"AS_FilterStatus=SITE;AS_SB_TABLE=98,100|25,22;...",GT:AD:AF:DP:F1R2:F2R1:FAD:SB,"0/0:97,0:0.010:97:53,0:41,0:95,0:50,47,0,0",...,0,101,97,0.317568,SNV,Mutect_Strelka,melanoma,chr2_14193_C_T,2.9845,2.959042
84775,chr2,15846,.,CC,TT,925,PASS,LPS=34205;LPS_RC=36;RC=TGTTTC;RC_IDX=2;RC_LF=G...,GT:ABQ:AD:AF:DP:RABQ:RAD:RC_CNT:RC_IPC:RC_JIT:...,"./.:0:114,0:0:114:3678,0:117,0:0,0,0,0,0,114,1...",...,0,104,114,0.262000,MNV,Mutect_SAGE,melanoma,chr2_15846_CC_TT,2.9845,2.441272
84776,chr2,17393,.,C,T,.,PASS,"AS_FilterStatus=SITE;AS_SB_TABLE=129,156|11,9;...",GT:AD:AF:DP:F1R2:F2R1:FAD:SB,"0/0:148,0:6.720e-03:148:68,0:75,0:146,0:58,90,0,0",...,0,137,148,0.127389,SNV,Mutect_Strelka,melanoma,chr2_17393_C_T,2.9845,1.186985
84777,chr2,18012,.,C,T,.,PASS,"AS_FilterStatus=SITE;AS_SB_TABLE=111,129|16,20...",GT:AD:AF:DP:F1R2:F2R1:FAD:SB,"0/0:123,0:7.953e-03:123:63,0:56,0:121,0:50,73,0,0",...,0,117,123,0.235294,SNV,Mutect_Strelka,melanoma,chr2_18012_C_T,2.9845,2.192431
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
179372,chr2,242099852,.,C,T,.,PASS,"AS_FilterStatus=SITE;AS_SB_TABLE=118,129|7,8;D...",GT:AD:AF:DP:F1R2:F2R1:FAD:SB,"0/0:130,0:7.687e-03:130:56,0:69,0:127,0:62,68,0,0",...,0,117,130,0.113636,SNV,Mutect_Strelka,melanoma,chr2_242099852_C_T,2.2287,0.972958
179373,chr2,242101882,.,C,T,.,PASS,"AS_FilterStatus=SITE;AS_SB_TABLE=106,103|11,7;...",GT:AD:AF:DP:F1R2:F2R1:FAD:SB,"0/0:113,0:8.976e-03:113:60,0:49,0:109,0:54,59,0,0",...,0,96,113,0.157895,SNV,Mutect_Strelka,melanoma,chr2_242101882_C_T,2.2287,1.351900
179374,chr2,242113935,.,C,T,.,PASS,"AS_FilterStatus=SITE;AS_SB_TABLE=115,113|12,3;...",GT:AD:AF:DP:F1R2:F2R1:FAD:SB,"0/0:124,0:8.101e-03:124:49,0:67,0:121,0:66,58,0,0",...,0,104,124,0.126050,SNV,Mutect_Strelka,melanoma,chr2_242113935_C_T,2.2287,1.079248
179375,chr2,242128047,.,C,T,.,PASS,"AS_FilterStatus=SITE;AS_SB_TABLE=80,92|7,6;DP=...",GT:AD:AF:DP:F1R2:F2R1:FAD:SB,"0/0:90,0:0.011:90:45,0:43,0:90,0:38,52,0,0",...,0,82,90,0.136842,SNV,Mutect_Strelka,melanoma,chr2_242128047_C_T,2.2287,1.171647


In [5]:
# Perform a chromosome list
chroms = []
for n in range(1,23):
    chroms.append('chr'+str(n))
chroms.append('chrX')    
chroms.append('chrY')  
chroms

# This is the same as previous code but in few lines
# chroms = ['chr'+str(n) for n in range(1,23)  ]
# chroms.append('chrX')
# chroms.append('chrY')

['chr1',
 'chr2',
 'chr3',
 'chr4',
 'chr5',
 'chr6',
 'chr7',
 'chr8',
 'chr9',
 'chr10',
 'chr11',
 'chr12',
 'chr13',
 'chr14',
 'chr15',
 'chr16',
 'chr17',
 'chr18',
 'chr19',
 'chr20',
 'chr21',
 'chr22',
 'chrX',
 'chrY']

In [6]:
# Extract the chromosomes data:

path = '/workspace/projects/sjd_melos/vep/vep_input_files/melanoma/'

for chrom in chroms:
    file_name= path + chrom + '.tsv.gz'    
    chrom_df = clmut_df[clmut_df['#CHROM']==chrom]
    chrom_df.to_csv(file_name,sep='\t', index=None, compression = "gzip")