In [1]:
import pandas as pd
import numpy as np
import os
pd.set_option('display.max_rows', 150)

In [2]:
vep_cols = """\
Allele|Annotation|Impact|Gene_name|Gene_id|Feature_type|Feature_id|Biotype|EXON\
|INTRON|HGVSc|HGVSp|cDNA_position|CDS_position|Protein_position|Amino_acids\
|Codons|Existing_variation|DISTANCE|STRAND|FLAGS|SYMBOL_SOURCE|HGNC_ID|CANONICAL\
|CCDS|HGVS_OFFSET\
"""
vep_cols = vep_cols.split("|")
vep_cols = [term.strip().capitalize() for term in vep_cols]

In [3]:
vep_df = pd.read_table('./cftr.grch37.vep.vcf', header=0, skiprows=6, usecols=range(10))
info_df = vep_df["INFO"].str.replace("ANN=", "").str.split(",").apply(pd.Series, 1).stack()
info_df = info_df.str.split("|").apply(pd.Series, 1)
info_df.index = info_df.index.droplevel(-1)
info_df.columns = vep_cols
vep_df = vep_df.join(info_df)
vep_df = vep_df[['POS', 'REF', 'ALT', 'Feature_id', 'Annotation', 'Impact', 'Hgvsc']]
del info_df
vep_df.head()

Unnamed: 0,POS,REF,ALT,Feature_id,Annotation,Impact,Hgvsc
0,117105737,C,A,ENST00000546407,upstream_gene_variant,MODIFIER,
1,117105737,C,G,ENST00000546407,upstream_gene_variant,MODIFIER,
2,117105737,C,T,ENST00000546407,upstream_gene_variant,MODIFIER,
3,117105737,C,CA,ENST00000546407,upstream_gene_variant,MODIFIER,
4,117105737,C,CG,ENST00000546407,upstream_gene_variant,MODIFIER,


In [4]:
snpeff_cols = """\
Allele|Annotation|Impact|Gene_name|Gene_ID|Feature_type|Feature_ID|Biotype|Rank|HGVSc|HGVSp\
|cDNA_position|CDS_position|Protein_position|Distance|Errors"""
snpeff_cols = snpeff_cols.split("|")
snpeff_cols = [term.strip().capitalize() for term in snpeff_cols]

In [5]:
snpeff_df = pd.read_table('./cftr.grch37.snpeff.vcf', header=0, skiprows=9, usecols=range(10))
info_df = snpeff_df["INFO"].str.split(";").apply(pd.Series, 1)[0] #snpeff includes two other INFO fields that we don't need
info_df = info_df.str.replace("ANN=", "").str.split(",").apply(pd.Series, 1).stack()
info_df = info_df.str.split("|").apply(pd.Series, 1)
info_df.index = info_df.index.droplevel(-1)
info_df.columns = snpeff_cols
snpeff_df = snpeff_df.join(info_df)
snpeff_df = snpeff_df[['POS', 'REF', 'ALT', 'Feature_id', 'Annotation', 'Impact', 'Hgvsc']]
del info_df
snpeff_df.head()

Unnamed: 0,POS,REF,ALT,Feature_id,Annotation,Impact,Hgvsc
0,117105737,C,A,ENST00000546407,upstream_gene_variant,MODIFIER,n.-101C>A
0,117105737,C,A,ENSG00000214684-ENSG00000001626,intergenic_region,MODIFIER,n.117105737C>A
1,117105737,C,G,ENST00000546407,upstream_gene_variant,MODIFIER,n.-101C>G
1,117105737,C,G,ENSG00000214684-ENSG00000001626,intergenic_region,MODIFIER,n.117105737C>G
2,117105737,C,T,ENST00000546407,upstream_gene_variant,MODIFIER,n.-101C>T


In [6]:
#From http://snpeff.sourceforge.net/VCFannotationformat_v1.0.pdf with additions:
#VEP: non_coding_transcript_exon_variant, non_coding_transcript_variant, protein altering variant, 
#incomplete_terminal_codon_variant, NMD_transcript_variant
#Snpeff: conservative_inframe_deletion, conservative_inframe_insertion, structural_interaction_variant, 5_prime_UTR_truncation

ranked_terms = ["chromosome_number_variation","exon_loss_variant","frameshift_variant","stop_gained","stop_lost",
                "start_lost","splice_acceptor_variant","splice_donor_variant","rare_amino_acid_variant","missense_variant",
                "inframe_insertion","conservative_inframe_insertion", "disruptive_inframe_insertion","inframe_deletion","conservative_inframe_deletion", "disruptive_inframe_deletion",
                "5_prime_UTR_truncation+exon_loss_variant","5_prime_UTR_truncation","3_prime_UTR_truncation+exon_loss","splice_branch_variant",
                "splice_region_variant","splice_branch_variant","stop_retained_variant","initiator_codon_variant",
                "synonymous_variant","initiator_codon_variant+non_canonical_start_codon","stop_retained_variant",
                "5_prime_UTR_variant","3_prime_UTR_variant","5_prime_UTR_premature_start_codon_gain_variant",
                "structural_interaction_variant","coding_sequence_variant", "protein_altering_variant","upstream_gene_variant","downstream_gene_variant",
                "TF_binding_site_variant","regulatory_region_variant","miRNA","custom","sequence_feature",
                "conserved_intron_variant","intron_variant","intragenic_variant","conserved_intergenic_variant",
                "intergenic_region","coding_sequence_variant","non_coding_exon_variant","non_coding_transcript_exon_variant",
                "nc_transcript_variant","non_coding_transcript_variant","NMD_transcript_variant", "incomplete_terminal_codon_variant", "gene_variant","chromosome"]
def term_rank(term):
    return ranked_terms.index(term)

In [7]:
vep_df["effect"] = vep_df.apply(lambda row: min(row["Annotation"].split("&"), key=term_rank), axis=1)
snpeff_df["effect"] = snpeff_df.apply(lambda row: min(row["Annotation"].split('&'), key=term_rank), axis=1)

In [8]:
vc_vep = vep_df.groupby(['effect']).size()
vc_vep.name = "VEP"
vc_snpeff = snpeff_df.groupby(['effect']).size()
vc_snpeff.name = "SnpEff"
vc_df = pd.DataFrame([vc_vep, vc_snpeff])
print("Annotations\n")
print(vc_df.transpose().fillna(0))
impact_vep = vep_df.groupby(['Impact']).size()
impact_vep.name = "VEP"
impact_snpeff = snpeff_df.groupby(['Impact']).size()
impact_snpeff.name = "SnpEff"
impact_df = pd.DataFrame([impact_vep, impact_snpeff])
print("\nImpacts")
print(impact_df.transpose())
counts_vep = vep_df.count()
counts_vep.name = 'VEP'
counts_snpeff = snpeff_df.count()
counts_snpeff.name = 'SnpEff'
counts_df = pd.DataFrame([counts_vep, counts_snpeff])
print("\nCounts")
print(counts_df.transpose())

Annotations

                                                     VEP    SnpEff
3_prime_UTR_variant                               6654.0    6680.0
5_prime_UTR_premature_start_codon_gain_variant       0.0     286.0
5_prime_UTR_variant                               9987.0   10259.0
coding_sequence_variant                             28.0       0.0
conservative_inframe_deletion                        0.0    4619.0
conservative_inframe_insertion                       0.0    8650.0
disruptive_inframe_deletion                          0.0    8990.0
disruptive_inframe_insertion                         0.0   17211.0
downstream_gene_variant                          99177.0   99185.0
exon_loss_variant                                    0.0       7.0
frameshift_variant                              113706.0  113693.0
incomplete_terminal_codon_variant                    8.0       0.0
inframe_deletion                                 13604.0       0.0
inframe_insertion                                

This example is interesting. Vep is usually a pretty general `protein_altering_variant` annotation. Snpeff breaks it down to `disruptive_inframe_insertion`.

In [9]:
vep_df[vep_df['effect'].str.contains('protein_altering_variant')][:1]

Unnamed: 0,POS,REF,ALT,Feature_id,Annotation,Impact,Hgvsc,effect
19511,117120152,C,CCGA,ENST00000003084,protein_altering_variant,MODERATE,ENST00000003084.6:c.4_5insCGA,protein_altering_variant


In [10]:
vep_df.ix[19511]

Unnamed: 0,POS,REF,ALT,Feature_id,Annotation,Impact,Hgvsc,effect
19511,117120152,C,CCGA,ENST00000003084,protein_altering_variant,MODERATE,ENST00000003084.6:c.4_5insCGA,protein_altering_variant
19511,117120152,C,CCGA,ENST00000426809,protein_altering_variant,MODERATE,ENST00000426809.1:c.4_5insCGA,protein_altering_variant
19511,117120152,C,CCGA,ENST00000446805,intron_variant,MODIFIER,ENST00000446805.1:c.-191+404_-191+405insCGA,intron_variant
19511,117120152,C,CCGA,ENST00000454343,protein_altering_variant,MODERATE,ENST00000454343.1:c.4_5insCGA,protein_altering_variant
19511,117120152,C,CCGA,ENST00000546407,intron_variant&non_coding_transcript_variant,MODIFIER,ENST00000546407.1:n.166+4290_166+4291insCGA,intron_variant


In [11]:
snpeff_df.ix[19511]

Unnamed: 0,POS,REF,ALT,Feature_id,Annotation,Impact,Hgvsc,effect
19511,117120152,C,CCGA,ENST00000003084,disruptive_inframe_insertion,MODERATE,c.4_5insCGA,disruptive_inframe_insertion
19511,117120152,C,CCGA,ENST00000454343,disruptive_inframe_insertion,MODERATE,c.4_5insCGA,disruptive_inframe_insertion
19511,117120152,C,CCGA,ENST00000426809,disruptive_inframe_insertion,MODERATE,c.4_5insCGA,disruptive_inframe_insertion
19511,117120152,C,CCGA,ENST00000546407,intron_variant,MODIFIER,n.166+4290_166+4291insCGA,intron_variant
19511,117120152,C,CCGA,ENST00000446805,intron_variant,MODIFIER,c.-191+404_-191+405insCGA,intron_variant


So this example is interesting. It shows two things:
1. SnpEff is providing these "structural_interaction_variant" annotations, which vep does not provide
2. SnpEff is a bit more granular annotating inframe variants

In [12]:
snpeff_df[snpeff_df['effect'].str.contains('structural_interaction_variant')][:1]

Unnamed: 0,POS,REF,ALT,Feature_id,Annotation,Impact,Hgvsc,effect
58085,117182112,TTAA,T,2PZG:B_388-B_567:ENST00000003084,structural_interaction_variant,HIGH,c.1160_1162delTAA,structural_interaction_variant


In [13]:
vep_df.ix[58085]

Unnamed: 0,POS,REF,ALT,Feature_id,Annotation,Impact,Hgvsc,effect
58085,117182112,TTAA,T,ENST00000003084,inframe_deletion,MODERATE,ENST00000003084.6:c.1160_1162delTAA,inframe_deletion
58085,117182112,TTAA,T,ENST00000426809,inframe_deletion,MODERATE,ENST00000426809.1:c.1070_1072delTAA,inframe_deletion
58085,117182112,TTAA,T,ENST00000454343,inframe_deletion,MODERATE,ENST00000454343.1:c.1160_1162delTAA,inframe_deletion


In [14]:
snpeff_df.ix[58085]

Unnamed: 0,POS,REF,ALT,Feature_id,Annotation,Impact,Hgvsc,effect
58085,117182112,TTAA,T,2PZG:B_388-B_567:ENST00000003084,structural_interaction_variant,HIGH,c.1160_1162delTAA,structural_interaction_variant
58085,117182112,TTAA,T,ENST00000003084,disruptive_inframe_deletion,MODERATE,c.1160_1162delTAA,disruptive_inframe_deletion
58085,117182112,TTAA,T,ENST00000454343,disruptive_inframe_deletion,MODERATE,c.1160_1162delTAA,disruptive_inframe_deletion
58085,117182112,TTAA,T,ENST00000426809,disruptive_inframe_deletion,MODERATE,c.1070_1072delTAA,disruptive_inframe_deletion


Similarly to the structural interaction variant example, Snpeff provides an additional annotation called `sequence_feature`

In [15]:
snpeff_df[snpeff_df['effect'].str.contains('sequence_feature')][3:4]

Unnamed: 0,POS,REF,ALT,Feature_id,Annotation,Impact,Hgvsc,effect
19449,117120148,C,CA,ENST00000003084,sequence_feature,LOW,c.-1_1insA,sequence_feature


In [16]:
vep_df.ix[19449]

Unnamed: 0,POS,REF,ALT,Feature_id,Annotation,Impact,Hgvsc,effect
19449,117120148,C,CA,ENST00000003084,5_prime_UTR_variant,MODIFIER,ENST00000003084.6:c.1dupA,5_prime_UTR_variant
19449,117120148,C,CA,ENST00000426809,upstream_gene_variant,MODIFIER,,upstream_gene_variant
19449,117120148,C,CA,ENST00000446805,intron_variant,MODIFIER,ENST00000446805.1:c.-191+401dupA,intron_variant
19449,117120148,C,CA,ENST00000454343,5_prime_UTR_variant,MODIFIER,ENST00000454343.1:c.1dupA,5_prime_UTR_variant
19449,117120148,C,CA,ENST00000546407,intron_variant&non_coding_transcript_variant,MODIFIER,ENST00000546407.1:n.166+4287dupA,intron_variant


In [17]:
snpeff_df.ix[19449]

Unnamed: 0,POS,REF,ALT,Feature_id,Annotation,Impact,Hgvsc,effect
19449,117120148,C,CA,ENST00000003084,frameshift_variant&start_lost,HIGH,c.1dupA,frameshift_variant
19449,117120148,C,CA,ENST00000454343,frameshift_variant&start_lost,HIGH,c.1dupA,frameshift_variant
19449,117120148,C,CA,ENST00000426809,frameshift_variant&start_lost,HIGH,c.1dupA,frameshift_variant
19449,117120148,C,CA,ENST00000003084,sequence_feature,LOW,c.-1_1insA,sequence_feature
19449,117120148,C,CA,ENST00000546407,intron_variant,MODIFIER,n.166+4287dupA,intron_variant
19449,117120148,C,CA,ENST00000446805,intron_variant,MODIFIER,c.-191+401dupA,intron_variant


Here's a consequential mismatch. Vep annotates this deletion of a T on ENST00000446805 as `coding_sequence_variant` with a `LOW` impact, whereas SnpEff annotates it as a frameshift with a `HIGH` impact.

In [18]:
vep_df[vep_df['effect'].str.contains('coding_sequence_variant')][:4]

Unnamed: 0,POS,REF,ALT,Feature_id,Annotation,Impact,Hgvsc,effect
32771,117171030,CT,C,ENST00000446805,incomplete_terminal_codon_variant&coding_seque...,LOW,ENST00000446805.1:c.109delT,coding_sequence_variant
32774,117171031,T,A,ENST00000446805,incomplete_terminal_codon_variant&coding_seque...,LOW,ENST00000446805.1:c.109T>A,coding_sequence_variant
32775,117171031,T,G,ENST00000446805,incomplete_terminal_codon_variant&coding_seque...,LOW,ENST00000446805.1:c.109T>G,coding_sequence_variant
32776,117171031,T,C,ENST00000446805,incomplete_terminal_codon_variant&coding_seque...,LOW,ENST00000446805.1:c.109T>C,coding_sequence_variant


In [36]:
vep_df.ix[32771]

Unnamed: 0,POS,REF,ALT,Feature_id,Annotation,Impact,Hgvsc,effect,normalized_effect
32771,117171030,CT,C,ENST00000003084,frameshift_variant,HIGH,ENST00000003084.6:c.352delT,frameshift_variant,frameshift_variant
32771,117171030,CT,C,ENST00000426809,frameshift_variant,HIGH,ENST00000426809.1:c.352delT,frameshift_variant,frameshift_variant
32771,117171030,CT,C,ENST00000446805,incomplete_terminal_codon_variant&coding_seque...,LOW,ENST00000446805.1:c.109delT,coding_sequence_variant,coding_sequence_variant
32771,117171030,CT,C,ENST00000454343,frameshift_variant,HIGH,ENST00000454343.1:c.352delT,frameshift_variant,frameshift_variant


In [37]:
snpeff_df.ix[32771]

Unnamed: 0,POS,REF,ALT,Feature_id,Annotation,Impact,Hgvsc,effect,normalized_effect
32771,117171030,CT,C,ENST00000003084,frameshift_variant,HIGH,c.352delT,frameshift_variant,frameshift_variant
32771,117171030,CT,C,ENST00000446805,frameshift_variant&splice_region_variant,HIGH,c.109delT,frameshift_variant,frameshift_variant
32771,117171030,CT,C,ENST00000454343,frameshift_variant,HIGH,c.352delT,frameshift_variant,frameshift_variant
32771,117171030,CT,C,ENST00000426809,frameshift_variant,HIGH,c.352delT,frameshift_variant,frameshift_variant


In [21]:
snpeff_df = snpeff_df[~snpeff_df['effect'].str.contains('structural_interaction_variant|sequence_feature')]

In [22]:
collapse_map = {
'3_prime_UTR_variant': '3_prime_UTR_variant', 
'5_prime_UTR_premature_start_codon_gain_variant': '5_prime_UTR_premature_start_codon_gain_variant',
'5_prime_UTR_variant': '5_prime_UTR_variant',
'coding_sequence_variant': 'coding_sequence_variant',
'conservative_inframe_deletion': 'inframe_deletion',
'conservative_inframe_insertion': 'inframe_insertion',
'disruptive_inframe_deletion': 'inframe_deletion',
'disruptive_inframe_insertion': 'inframe_insertion',
'downstream_gene_variant': 'downstream_gene_variant',
'exon_loss_variant': 'exon_loss_variant',
'frameshift_variant': 'frameshift_variant',
'incomplete_terminal_codon_variant': 'incomplete_terminal_codon_variant',
'inframe_deletion': 'inframe_deletion',
'inframe_insertion': 'inframe_insertion', 
'initiator_codon_variant': 'initiator_codon_variant',
'intergenic_region': 'intergenic_region',
'intron_variant': 'intron_variant',
'missense_variant': 'missense_variant',
'non_coding_transcript_exon_variant': 'non_coding_transcript_exon_variant',
'non_coding_transcript_variant': 'non_coding_transcript_variant',
'protein_altering_variant': 'inframe_insertion',
'splice_acceptor_variant': 'splice_acceptor_variant',
'splice_donor_variant': 'splice_donor_variant',
'splice_region_variant': 'splice_region_variant',
'start_lost': 'start_lost',
'stop_gained': 'stop_gained',
'stop_lost': 'stop_lost', 
'stop_retained_variant': 'stop_retained_variant',
'synonymous_variant': 'synonymous_variant',
'upstream_gene_variant': 'upstream_gene_variant'}

In [23]:
vep_df['normalized_effect'] = vep_df['effect'].apply(lambda eff: collapse_map[eff])
snpeff_df['normalized_effect'] = snpeff_df['effect'].apply(lambda eff: collapse_map[eff])

In [24]:
vc_vep = vep_df.groupby(['normalized_effect']).size()
vc_vep.name = "VEP"
vc_snpeff = snpeff_df.groupby(['normalized_effect']).size()
vc_snpeff.name = "SnpEff"
vc_df = pd.DataFrame([vc_vep, vc_snpeff])
print("Annotations\n")
print(vc_df.transpose().fillna(0))

Annotations

                                                     VEP    SnpEff
3_prime_UTR_variant                               6654.0    6680.0
5_prime_UTR_premature_start_codon_gain_variant       0.0     286.0
5_prime_UTR_variant                               9987.0   10259.0
coding_sequence_variant                             28.0       0.0
downstream_gene_variant                          99177.0   99185.0
exon_loss_variant                                    0.0       7.0
frameshift_variant                              113706.0  113693.0
incomplete_terminal_codon_variant                    8.0       0.0
inframe_deletion                                 13604.0   13609.0
inframe_insertion                                25795.0   25861.0
initiator_codon_variant                              0.0       8.0
intergenic_region                                    0.0    1403.0
intron_variant                                  417918.0  416946.0
missense_variant                                 

In [25]:
effect_df = pd.merge(vep_df, snpeff_df, on=['POS', 'REF', 'ALT', "Feature_id" ], how='outer', suffixes=('_vep','_snpeff'))

In [26]:
effect_df.head()

Unnamed: 0,POS,REF,ALT,Feature_id,Annotation_vep,Impact_vep,Hgvsc_vep,effect_vep,normalized_effect_vep,Annotation_snpeff,Impact_snpeff,Hgvsc_snpeff,effect_snpeff,normalized_effect_snpeff
0,117105737,C,A,ENST00000546407,upstream_gene_variant,MODIFIER,,upstream_gene_variant,upstream_gene_variant,upstream_gene_variant,MODIFIER,n.-101C>A,upstream_gene_variant,upstream_gene_variant
1,117105737,C,G,ENST00000546407,upstream_gene_variant,MODIFIER,,upstream_gene_variant,upstream_gene_variant,upstream_gene_variant,MODIFIER,n.-101C>G,upstream_gene_variant,upstream_gene_variant
2,117105737,C,T,ENST00000546407,upstream_gene_variant,MODIFIER,,upstream_gene_variant,upstream_gene_variant,upstream_gene_variant,MODIFIER,n.-101C>T,upstream_gene_variant,upstream_gene_variant
3,117105737,C,CA,ENST00000546407,upstream_gene_variant,MODIFIER,,upstream_gene_variant,upstream_gene_variant,upstream_gene_variant,MODIFIER,n.-101_-100insA,upstream_gene_variant,upstream_gene_variant
4,117105737,C,CG,ENST00000546407,upstream_gene_variant,MODIFIER,,upstream_gene_variant,upstream_gene_variant,upstream_gene_variant,MODIFIER,n.-101_-100insG,upstream_gene_variant,upstream_gene_variant


In [27]:
effect_df['Impact_match'] = effect_df.apply(lambda row: row['Impact_vep'] == row['Impact_snpeff'], axis=1)

In [28]:
effect_df['effect_match'] = effect_df.apply(lambda row: row['effect_vep'] == row['effect_snpeff'], axis=1)

In [29]:
effect_df['normalized_effect_match'] = effect_df.apply(lambda row: row['normalized_effect_vep'] == row['normalized_effect_snpeff'], axis=1)

In [30]:
round(effect_df['Impact_match'].value_counts()/effect_df['Impact_match'].size*100, 2)

True     99.29
False     0.71
Name: Impact_match, dtype: float64

In [31]:
effect_df.groupby(['Impact_vep', 'Impact_snpeff'])['Impact_match'].count()

Impact_vep  Impact_snpeff
HIGH        HIGH             122728
            LOW                 307
            MODERATE             64
            MODIFIER             55
LOW         HIGH               1286
            LOW               24976
            MODERATE             19
            MODIFIER            633
MODERATE    HIGH                243
            LOW                  19
            MODERATE          70377
            MODIFIER              3
MODIFIER    HIGH                 76
            LOW                2054
            MODERATE              5
            MODIFIER         645789
Name: Impact_match, dtype: int64

In [32]:
round(effect_df['effect_match'].value_counts()/effect_df['effect_match'].size*100, 2)

True     94.57
False     5.43
Name: effect_match, dtype: float64

In [33]:
pd.DataFrame(effect_df.groupby(['effect_vep', 'effect_snpeff'])['effect_match'].count())

Unnamed: 0_level_0,Unnamed: 1_level_0,effect_match
effect_vep,effect_snpeff,Unnamed: 2_level_1
3_prime_UTR_variant,3_prime_UTR_variant,6648
3_prime_UTR_variant,downstream_gene_variant,12
3_prime_UTR_variant,frameshift_variant,6
3_prime_UTR_variant,non_coding_transcript_variant,6
3_prime_UTR_variant,splice_region_variant,74
3_prime_UTR_variant,stop_lost,8
5_prime_UTR_variant,5_prime_UTR_premature_start_codon_gain_variant,279
5_prime_UTR_variant,5_prime_UTR_variant,9944
5_prime_UTR_variant,conservative_inframe_insertion,1
5_prime_UTR_variant,exon_loss_variant,3


In [34]:
round(effect_df['normalized_effect_match'].value_counts()/effect_df['normalized_effect_match'].size*100, 2)

True     99.08
False     0.92
Name: normalized_effect_match, dtype: float64

In [35]:
pd.DataFrame(effect_df.groupby(['normalized_effect_vep', 'normalized_effect_snpeff'])['effect_match'].count())

Unnamed: 0_level_0,Unnamed: 1_level_0,effect_match
normalized_effect_vep,normalized_effect_snpeff,Unnamed: 2_level_1
3_prime_UTR_variant,3_prime_UTR_variant,6648
3_prime_UTR_variant,downstream_gene_variant,12
3_prime_UTR_variant,frameshift_variant,6
3_prime_UTR_variant,non_coding_transcript_variant,6
3_prime_UTR_variant,splice_region_variant,74
3_prime_UTR_variant,stop_lost,8
5_prime_UTR_variant,5_prime_UTR_premature_start_codon_gain_variant,279
5_prime_UTR_variant,5_prime_UTR_variant,9944
5_prime_UTR_variant,exon_loss_variant,3
5_prime_UTR_variant,frameshift_variant,34
