In [3]:
import pandas as pd
import numpy as np

###  El objetivo de este script es reunir toda la data de variantes curadas en Disgenet, ya que el dataset por si solo no brinda toda la info que se necesita para estudiar mutaciones en proteinas

## Datasets: variant - disease association curated; Summary of Curated VDAs

In [4]:
# este dataset viene de la pagina de downloads (https://www.disgenet.org/downloads; Octubre 2020)
vda = pd.read_csv('curated_variant_disease_associations.tsv', sep='\t')
vda.columns = vda.columns.str.lower().str.replace(' ',"_").str.replace("-",'_').str.replace('/','_')
vda = vda.rename(columns={'snpid':'snp_id'})

In [67]:
print(f'snps unicos en el dataset: {len(vda.snp_id.unique())}')

snps unicos en el dataset: 168051


In [5]:
# este dataset viene de buscar en la web de disgenet y descargarlo completo (https://www.disgenet.org/browser/0/0/0/0/_a/_b./; Febrero 2021)
# tiene mas info que complementa al de arriba
sumvda = pd.read_csv('summaryofvdas_CURATED.tsv', sep='\t')
sumvda.columns = sumvda.columns.str.lower().str.replace(' ',"_").str.replace("-",'_').str.replace('/','_')
sumvda = sumvda.rename(columns={'variant':'snp_id', 'chr':'chromosome', 'gene': 'gene_name'})
sumvda = sumvda[['snp_id', 'gene_name', 'gene_id', 'consequence', 'alleles', 'class',
        'af_exome', 'alt_ref_exome', 'af_genome', 'alt_ref_genome']]

In [6]:
# uno ambas tablas, axis=1 es por columnas
disgenet_vdas = pd.concat([vda, sumvda], axis=1)
# Elimino la col duplicada (snp_id, son identicas)
disgenet_vdas = disgenet_vdas.loc[:, ~disgenet_vdas.columns.duplicated()]

## Mappings con uniprot y gene id

In [7]:
# Mappings UniProts: uniprots id con entrez gene id (https://www.disgenet.org/downloads, Mappings)
uniprots = pd.read_csv('mapa_geneid_4_uniprot_crossref.tsv.gz', sep='\t', compression='gzip')
uniprots = uniprots.rename(columns={'UniProtKB':'uniprot_acc', 'GENEID':'gene_id'})

In [8]:
# The file contains the mappings of DisGeNET variants (dbSNP Identifiers)
# to NCBI Entrez identifiers according to dbSNP database (https://www.disgenet.org/downloads, Variant-Gene Mappings File)
variant_gene = pd.read_csv('variant_to_gene_mappings.tsv.gz', sep='\t', compression='gzip')
variant_gene = variant_gene.rename(columns={'snpId':'snp_id', 'geneId':'gene_id', 'geneSymbol': 'gene_name', 'sourceId':'source_id'})

In [9]:
# agrego los uniprots
variant_gene = variant_gene.merge(uniprots)

In [10]:
disgenet_total = disgenet_vdas.merge(variant_gene, on=['snp_id', 'gene_name'], how='left')
disgenet_total

Unnamed: 0,snp_id,chromosome,position,dsi,dpi,diseaseid,diseasename,diseasetype,diseaseclass,diseasesemantictype,...,consequence,alleles,class,af_exome,alt_ref_exome,af_genome,alt_ref_genome,gene_id_y,source_id,uniprot_acc
0,rs1000005,21,33060745,,,C0200638,Eosinophil count procedure,phenotype,,Laboratory Procedure,...,intron variant,G/C,snv,,,0.505584,G/C,,,
1,rs10000770,4,142693109,1.000,0.04,C0023467,"Leukemia, Myelocytic, Acute",disease,C04,Neoplastic Process,...,intron variant,C/T,snv,,,0.108425,C/T,8821.0,DBSNP,O15327
2,rs10000770,4,142693109,1.000,0.04,C0023467,"Leukemia, Myelocytic, Acute",disease,C04,Neoplastic Process,...,intron variant,C/T,snv,,,0.108425,C/T,8821.0,VEP,O15327
3,rs1000091588,11,68935374,1.000,0.12,C1858517,SPINAL MUSCULAR ATROPHY WITH RESPIRATORY DISTR...,disease,C16;C08;C10,Disease or Syndrome,...,stop gained,C/T,snv,,,0.000007,C/T,3508.0,VEP,P38935
4,rs1000091588,11,68935374,1.000,0.12,C1858517,SPINAL MUSCULAR ATROPHY WITH RESPIRATORY DISTR...,disease,C16;C08;C10,Disease or Syndrome,...,stop gained,C/T,snv,,,0.000007,C/T,3508.0,DBSNP,P38935
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
433635,rs999921351,8,86628939,1.000,0.12,C1849792,Achromatopsia 3,disease,C23;C11;C10,Disease or Syndrome,...,stop gained,C/T,snv,,,,,54714.0,VEP,Q9NQW8
433636,rs999943,6,33656956,1.000,0.08,C0028754,Obesity,disease,C23;C18,Disease or Syndrome,...,intron variant,A/G,snv,,,0.282840,A/G,3710.0,VEP,Q14573
433637,rs999943,6,33656956,1.000,0.08,C0028754,Obesity,disease,C23;C18,Disease or Syndrome,...,intron variant,A/G,snv,,,0.282840,A/G,3710.0,DBSNP,Q14573
433638,rs999944,2,64822719,1.000,0.08,C0520679,"Sleep Apnea, Obstructive",disease,C08;C10,Disease or Syndrome,...,intergenic variant,A/G,snv,,,0.833959,A/G,,,


In [11]:
disgenet_total = disgenet_total.drop(columns=['gene_id_y', 'alt_ref_genome'])  # gene_id ya esta y alt_ref_genome es la col de alleles
disgenet_total = disgenet_total.rename(columns= {'gene_id_x': 'gene_id'})

In [12]:
disgenet_total.columns

Index(['snp_id', 'chromosome', 'position', 'dsi', 'dpi', 'diseaseid',
       'diseasename', 'diseasetype', 'diseaseclass', 'diseasesemantictype',
       'score', 'ei', 'yearinitial', 'yearfinal', 'nofpmids', 'source',
       'gene_name', 'gene_id', 'consequence', 'alleles', 'class', 'af_exome',
       'alt_ref_exome', 'af_genome', 'source_id', 'uniprot_acc'],
      dtype='object')

In [13]:
disgenet_total.gene_id = disgenet_total.gene_id.map(lambda x: str(x).split(';')[0] if x != np.nan else x)
disgenet_total.gene_id = disgenet_total.gene_id.apply(float) # no me deja convertir la serie en int porque hay nans (que son floats de numpy)

In [14]:
disgenet_total.gene_id

0         101930746.0
1              8821.0
2              8821.0
3              3508.0
4              3508.0
             ...     
433635        54714.0
433636         3710.0
433637         3710.0
433638            NaN
433639         4887.0
Name: gene_id, Length: 433640, dtype: float64

In [15]:
disgenet_total.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 433640 entries, 0 to 433639
Data columns (total 26 columns):
 #   Column               Non-Null Count   Dtype  
---  ------               --------------   -----  
 0   snp_id               433640 non-null  object 
 1   chromosome           433640 non-null  object 
 2   position             433640 non-null  int64  
 3   dsi                  331869 non-null  float64
 4   dpi                  301020 non-null  float64
 5   diseaseid            433640 non-null  object 
 6   diseasename          433640 non-null  object 
 7   diseasetype          433640 non-null  object 
 8   diseaseclass         275362 non-null  object 
 9   diseasesemantictype  433640 non-null  object 
 10  score                433640 non-null  float64
 11  ei                   289998 non-null  float64
 12  yearinitial          289998 non-null  float64
 13  yearfinal            289998 non-null  float64
 14  nofpmids             433640 non-null  int64  
 15  source           

In [60]:
disgenet_total.duplicated().any()

False

In [16]:
head = disgenet_total.head(100)

## Dividir la tabla total en distintos subsets: Genes; Diseases; Stats

In [17]:
disgenet_total_genes = disgenet_total[['snp_id', 'uniprot_acc', 'gene_name', 'gene_id', 'chromosome', 'position', 'alleles', 'class', 'source', 'consequence']]

In [18]:
disgenet_total_disease = disgenet_total[['snp_id', 'gene_name', 'gene_id', 'diseaseid', 'diseasename', 'diseasetype', 'diseaseclass', 'diseasesemantictype',
                                        'source', 'consequence', 'alleles', 'class', 'af_exome']]

In [19]:
disgenet_total_stat = disgenet_total[['snp_id','alleles', 'dsi', 'dpi', 'score', 'ei', 'yearinitial', 'yearfinal',
                                        'nofpmids', 'af_exome',  'alt_ref_exome', 'af_genome','source_id']]

In [61]:
disgenet_total_genes.duplicated().sum()

253260

_________

## Procesar tablas de disgenet con las consultas en VEP

In [20]:
# Esta tabla trae un snp por fila
cols1 = ['snp_id', 'allele_string', 'start_genome', 'end_genome', 'chromosome', 'assembly', 'most_severe_consequence', 'transcript_consequences']
rsid_data = pd.read_csv('rsid_data.txt', sep= '\t', names= cols1, skiprows= 1)

In [21]:
rsid_data

Unnamed: 0,snp_id,allele_string,start_genome,end_genome,chromosome,assembly,most_severe_consequence,transcript_consequences
0,rs10015979,A/G,3107715,3107715,4,GRCh38,intron_variant,-
1,rs10023020,G/A,121803449,121803449,4,GRCh38,intron_variant,-
2,rs10030552,C/A/G,140051110,140051110,4,GRCh38,intron_variant,-
3,rs10038058,A/G/T,111107582,111107582,5,GRCh38,intron_variant,-
4,rs10038177,C/A/T,111100751,111100751,5,GRCh38,intron_variant,-
...,...,...,...,...,...,...,...,...
6828,rs927292,C/G,68792124,68792124,14,GRCh38,intron_variant,-
6829,rs9283429,C/T,240398647,240398647,1,GRCh38,intron_variant,-
6830,rs9287233,A/C/T,240398427,240398427,1,GRCh38,intron_variant,-
6831,rs9287237,G/A/T,240433914,240433914,1,GRCh38,intron_variant,-


In [25]:
rsid_data.snp_id.isnull().any()

False

In [26]:
rsid_data.snp_id.value_counts()

rs254271        10
rs587776589     10
rs1555791188    10
rs527236094     10
rs869312187     10
                ..
rs869025662      1
rs1555534147     1
rs1555611110     1
rs1567552637     1
rs1057519989     1
Name: snp_id, Length: 5794, dtype: int64

In [27]:
rsid_data[rsid_data.snp_id == 'rs587776589']

Unnamed: 0,snp_id,allele_string,start_genome,end_genome,chromosome,assembly,most_severe_consequence,transcript_consequences
4839,rs587776589,CCGGAAGCAGGCC/CC,54128344,54128356,19,GRCh38,frameshift_variant,-
4840,rs587776589,CCGGAAGCAGGCC/CC,54128465,54128477,CHR_HSCHR19LRC_COX1_CTG3_1,GRCh38,frameshift_variant,-
4841,rs587776589,CCGGAAGCAGGCC/CC,54128465,54128477,CHR_HSCHR19LRC_COX2_CTG3_1,GRCh38,frameshift_variant,-
4842,rs587776589,CCGGAAGCAGGCC/CC,54128465,54128477,CHR_HSCHR19LRC_LRC_I_CTG3_1,GRCh38,frameshift_variant,-
4843,rs587776589,CCGGAAGCAGGCC/CC,54128465,54128477,CHR_HSCHR19LRC_LRC_J_CTG3_1,GRCh38,frameshift_variant,-
4844,rs587776589,CCGGAAGCAGGCC/CC,54128465,54128477,CHR_HSCHR19LRC_LRC_S_CTG3_1,GRCh38,frameshift_variant,-
4845,rs587776589,CCGGAAGCAGGCC/CC,54128465,54128477,CHR_HSCHR19LRC_LRC_T_CTG3_1,GRCh38,frameshift_variant,-
4846,rs587776589,CCGGAAGCAGGCC/CC,54128465,54128477,CHR_HSCHR19LRC_PGF1_CTG3_1,GRCh38,frameshift_variant,-
4847,rs587776589,CCGGAAGCAGGCC/CC,54128465,54128477,CHR_HSCHR19LRC_PGF2_CTG3_1,GRCh38,frameshift_variant,-
4848,rs587776589,CCGGAAGCAGGCC/CC,54128465,54128477,CHR_HSCHR19_4_CTG3_1,GRCh38,frameshift_variant,-


In [22]:
# Aqui esta desglosado por alelo. Por ejemplo, para C/A/G/T habra tres filas para ese snp
cols2 = ['snp_id', 'allele_string', 'type', 'ensembl_gene', 'allele_alt', 'from_to_aa', 'cdna_start', 'cdna_end', 'codons', 'impact', 'gene_name', 'cds_start', 'cds_end', 'aa_start', 'aa_end', 'consequence']
allele_data = pd.read_csv('allele_data.txt', sep= '\t', names= cols2, skiprows= 1)

In [24]:
print(f'allele_data snps unicos:{len(allele_data.snp_id.unique())}, rsid_data snps unicos:{len(rsid_data.snp_id.unique())}')

allele_data snps unicos:5665, rsid_data snps unicos:5794


## Separo el from y to aa en cols

In [29]:
allele_data['from_aa'] = allele_data.from_to_aa.map(lambda x: x.split('/') if x != '-' else x)
allele_data['to_aa'] = allele_data.from_aa.str[1]
allele_data['from_aa'] = allele_data.from_aa.str[0]

In [30]:
# Para variantes sinonimas
for i in allele_data.index:
    if allele_data.consequence[i] == 'synonymous_variant' and allele_data.from_aa[i] != '-':
        allele_data.to_aa[i] = allele_data.from_aa[i]

## Mergear *disgenet_total_genes* con *allele_data* para poder agregar los cambios en proteina

In [31]:
allele_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11236 entries, 0 to 11235
Data columns (total 18 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   snp_id         11236 non-null  object
 1   allele_string  11236 non-null  object
 2   type           11236 non-null  object
 3   ensembl_gene   11236 non-null  object
 4   allele_alt     11236 non-null  object
 5   from_to_aa     11236 non-null  object
 6   cdna_start     11236 non-null  object
 7   cdna_end       11236 non-null  object
 8   codons         11236 non-null  object
 9   impact         11236 non-null  object
 10  gene_name      11236 non-null  object
 11  cds_start      11236 non-null  object
 12  cds_end        11236 non-null  object
 13  aa_start       11236 non-null  object
 14  aa_end         11236 non-null  object
 15  consequence    11236 non-null  object
 16  from_aa        11236 non-null  object
 17  to_aa          5661 non-null   object
dtypes: object(18)
memory usage

In [32]:
disgenet_total_genes.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 433640 entries, 0 to 433639
Data columns (total 10 columns):
 #   Column       Non-Null Count   Dtype  
---  ------       --------------   -----  
 0   snp_id       433640 non-null  object 
 1   uniprot_acc  350276 non-null  object 
 2   gene_name    398088 non-null  object 
 3   gene_id      398088 non-null  float64
 4   chromosome   433640 non-null  object 
 5   position     433640 non-null  int64  
 6   alleles      433640 non-null  object 
 7   class        433640 non-null  object 
 8   source       433640 non-null  object 
 9   consequence  433640 non-null  object 
dtypes: float64(1), int64(1), object(8)
memory usage: 36.4+ MB


In [64]:
disgenet_total_genes[~disgenet_total_genes.duplicated()]

Unnamed: 0,snp_id,uniprot_acc,gene_name,gene_id,chromosome,position,alleles,class,source,consequence
0,rs1000005,,LINC00945,101930746.0,21,33060745,G/C,snv,GWASCAT,intron variant
1,rs10000770,O15327,INPP4B,8821.0,4,142693109,C/T,snv,GWASCAT,intron variant
3,rs1000091588,P38935,IGHMBP2,3508.0,11,68935374,C/T,snv,CLINVAR,stop gained
5,rs1000096,P57682,KLF3,51274.0,4,38691214,C/T,snv,GWASCAT,intron variant
7,rs10001106,,,,4,10125817,T/A;C,snv,GWASDB,intergenic variant
...,...,...,...,...,...,...,...,...,...,...
433632,rs9999118,Q8NB90,SPATA5,166378.0,4,123130312,A/G,snv,GWASCAT;GWASDB,intron variant
433634,rs999921351,Q9NQW8,CNGB3,54714.0,8,86628939,C/T,snv,CLINVAR,stop gained
433636,rs999943,Q14573,ITPR3,3710.0,6,33656956,A/G,snv,GWASCAT;GWASDB,intron variant
433638,rs999944,,,,2,64822719,A/G,snv,GWASCAT,intergenic variant


In [79]:
# para generar coord genomicas
'''for i in range(10):
    print(str(rsid_data.chromosome[i]) + ':' + str(rsid_data.start_genome[i]) + ':' + str(rsid_data.end_genome[i]))'''

4:3107715:3107715
4:121803449:121803449
4:140051110:140051110
5:111107582:111107582
5:111100751:111100751
5:171411280:171411280
5:111102658:111102658
5:111090753:111090753
3:49855463:49855463
5:111117147:111117147


In [35]:
allele_data.columns

Index(['snp_id', 'allele_string', 'type', 'ensembl_gene', 'allele_alt',
       'from_to_aa', 'cdna_start', 'cdna_end', 'codons', 'impact', 'gene_name',
       'cds_start', 'cds_end', 'aa_start', 'aa_end', 'consequence', 'from_aa',
       'to_aa'],
      dtype='object')

In [39]:
allele_subset = allele_data[['snp_id','gene_name', 'type', 'allele_string', 'allele_alt', 'from_aa', 'to_aa', 'aa_start', 'aa_end', 'consequence']].copy()

In [48]:
disgenet_total_genes_aa = disgenet_total_genes.merge(allele_subset, on= ['snp_id'], how= 'left')
disgenet_total_genes_aa

Unnamed: 0,snp_id,uniprot_acc,gene_name_x,gene_id,chromosome,position,alleles,class,source,consequence_x,gene_name_y,type,allele_string,allele_alt,from_aa,to_aa,aa_start,aa_end,consequence_y
0,rs1000005,,LINC00945,101930746.0,21,33060745,G/C,snv,GWASCAT,intron variant,,,,,,,,,
1,rs10000770,O15327,INPP4B,8821.0,4,142693109,C/T,snv,GWASCAT,intron variant,,,,,,,,,
2,rs10000770,O15327,INPP4B,8821.0,4,142693109,C/T,snv,GWASCAT,intron variant,,,,,,,,,
3,rs1000091588,P38935,IGHMBP2,3508.0,11,68935374,C/T,snv,CLINVAR,stop gained,,,,,,,,,
4,rs1000091588,P38935,IGHMBP2,3508.0,11,68935374,C/T,snv,CLINVAR,stop gained,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
461779,rs999921351,Q9NQW8,CNGB3,54714.0,8,86628939,C/T,snv,CLINVAR,stop gained,,,,,,,,,
461780,rs999943,Q14573,ITPR3,3710.0,6,33656956,A/G,snv,GWASCAT;GWASDB,intron variant,,,,,,,,,
461781,rs999943,Q14573,ITPR3,3710.0,6,33656956,A/G,snv,GWASCAT;GWASDB,intron variant,,,,,,,,,
461782,rs999944,,,,2,64822719,A/G,snv,GWASCAT,intergenic variant,,,,,,,,,


In [55]:
disgenet_total_genes_aa.duplicated().any()

True

In [54]:
disgenet_total_genes_aa.duplicated().sum()

275391

In [56]:
disgenet_total_genes_aa.drop_duplicates()

Unnamed: 0,snp_id,uniprot_acc,gene_name_x,gene_id,chromosome,position,alleles,class,source,consequence_x,gene_name_y,type,allele_string,allele_alt,from_aa,to_aa,aa_start,aa_end,consequence_y
0,rs1000005,,LINC00945,101930746.0,21,33060745,G/C,snv,GWASCAT,intron variant,,,,,,,,,
1,rs10000770,O15327,INPP4B,8821.0,4,142693109,C/T,snv,GWASCAT,intron variant,,,,,,,,,
3,rs1000091588,P38935,IGHMBP2,3508.0,11,68935374,C/T,snv,CLINVAR,stop gained,,,,,,,,,
5,rs1000096,P57682,KLF3,51274.0,4,38691214,C/T,snv,GWASCAT,intron variant,,,,,,,,,
7,rs10001106,,,,4,10125817,T/A;C,snv,GWASDB,intergenic variant,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
461776,rs9999118,Q8NB90,SPATA5,166378.0,4,123130312,A/G,snv,GWASCAT;GWASDB,intron variant,,,,,,,,,
461778,rs999921351,Q9NQW8,CNGB3,54714.0,8,86628939,C/T,snv,CLINVAR,stop gained,,,,,,,,,
461780,rs999943,Q14573,ITPR3,3710.0,6,33656956,A/G,snv,GWASCAT;GWASDB,intron variant,,,,,,,,,
461782,rs999944,,,,2,64822719,A/G,snv,GWASCAT,intergenic variant,,,,,,,,,


### con isin()

In [46]:
allele_subset[allele_subset.snp_id.isin(disgenet_total_genes.snp_id)] # todos los snp de allele_data estan en disgenet total

Unnamed: 0,snp_id,gene_name,type,allele_string,allele_alt,from_aa,to_aa,aa_start,aa_end,consequence
0,rs10015979,HTT,protein_coding,A/G,G,-,,-,-,intron_variant
1,rs10023020,EXOSC9,protein_coding,G/A,A,-,,-,-,intron_variant
2,rs10030552,MAML3,protein_coding,C/A/G,A,-,,-,-,intron_variant
3,rs10030552,MAML3,protein_coding,C/A/G,G,-,,-,-,intron_variant
4,rs10038058,WDR36,protein_coding,A/G/T,G,-,,-,-,intron_variant
...,...,...,...,...,...,...,...,...,...,...
11231,rs9287233,-,lncRNA,A/C/T,T,-,,-,-,downstream_gene_variant
11232,rs9287237,FMN2,protein_coding,G/A/T,A,-,,-,-,intron_variant
11233,rs9287237,FMN2,protein_coding,G/A/T,T,-,,-,-,intron_variant
11234,rs9287838,FIGN,protein_coding,G/A/C,A,-,,-,-,intron_variant


In [47]:
disgenet_total_genes.set_index('snp_id').join(allele_subset.set_index('snp_id'),lsuffix='_left', rsuffix='_right') # deberia ser lo mismo que con merge

Unnamed: 0_level_0,uniprot_acc,gene_name_left,gene_id,chromosome,position,alleles,class,source,consequence_left,gene_name_right,type,allele_string,allele_alt,from_aa,to_aa,aa_start,aa_end,consequence_right
snp_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
rs1000005,,LINC00945,101930746.0,21,33060745,G/C,snv,GWASCAT,intron variant,,,,,,,,,
rs10000770,O15327,INPP4B,8821.0,4,142693109,C/T,snv,GWASCAT,intron variant,,,,,,,,,
rs10000770,O15327,INPP4B,8821.0,4,142693109,C/T,snv,GWASCAT,intron variant,,,,,,,,,
rs1000091588,P38935,IGHMBP2,3508.0,11,68935374,C/T,snv,CLINVAR,stop gained,,,,,,,,,
rs1000091588,P38935,IGHMBP2,3508.0,11,68935374,C/T,snv,CLINVAR,stop gained,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
rs999921351,Q9NQW8,CNGB3,54714.0,8,86628939,C/T,snv,CLINVAR,stop gained,,,,,,,,,
rs999943,Q14573,ITPR3,3710.0,6,33656956,A/G,snv,GWASCAT;GWASDB,intron variant,,,,,,,,,
rs999943,Q14573,ITPR3,3710.0,6,33656956,A/G,snv,GWASCAT;GWASDB,intron variant,,,,,,,,,
rs999944,,,,2,64822719,A/G,snv,GWASCAT,intergenic variant,,,,,,,,,
