In [1]:
import pandas as pd
import vcf

In [2]:
# load in annovar annotated GTEx eQTLs
gtex = pd.read_csv('gtex_eqtls.hg19_multianno.txt', sep='\t')

# keep only first 6 columns
cols = ['Chr', 'Start', 'End', 'Ref', 'Alt', 'gnomAD_genome_ALL']
gtex = gtex[cols].copy()

In [3]:
# load in vcf
vcfr = vcf.VCFReader(filename='/data3/16p12_WGS/phasing/whatshap/combined.vcf.gz')

In [4]:
# make placeholder columns
gtex['vcf_alt'] = '.'
gtex['filter'] = '.'
for samp in vcfr.samples:
    gtex[samp] = '.'
    
    
# label previous chrom (this is to help print progress)
previous_chrom = ''

# for each row in gtex, get FILTER and sample GTs
for i, row in gtex.iterrows():    
    num = 0
    chrom = row['Chr']
    start = row['Start']
    end = row['End']
    ref = row['Ref']
    alt = row['Alt']
    
    # print progress
    if chrom != previous_chrom:
        print(chrom)
        previous_chrom = chrom
    
    # get record from vcf
    for r in vcfr.fetch(chrom, start-1, end):
        ref_vcf = r.REF
        alt_vcf = r.ALT
        
        # if ref doesn't equal gtex ref then skip
        if ref_vcf != ref:
            continue
        
        # format alternate allele
        alt_vcf = r.ALT
        alt_vcf = [str(s) for s in alt_vcf]
        alt_vcf = ','.join(alt_vcf)
        
        gtex.at[i, 'vcf_alt'] = alt_vcf
        gtex.at[i, 'filter'] = r.FILTER
        
        for samp in r.samples:
            sub = samp.sample
            gt = samp['GT']
            gtex.at[i, sub] = gt
            
        num = num + 1
        
    if num > 1:
        print(chrom, start, end, num)
        break
            

chr1
chr2
chr3
chr4
chr5
chr6
chr7
chr8
chr9
chr10
chr11
chr12
chr13
chr14
chr15
chr16
chr17
chr18
chr19
chr20
chr21
chr22
chrX


In [5]:
gtex.to_csv('gtex_eqtls_annotated_gts.tsv', sep='\t', index=False)

In [8]:
gtex[gtex.vcf_alt == '.']

Unnamed: 0,Chr,Start,End,Ref,Alt,gnomAD_genome_ALL,vcf_alt,filter,SG001,SG003,...,SG046,SG037,SG038,SG039,SG040,SG151,SG149,SG148,SG152,SG150
39,chr1,1179227,1179227,G,A,0.0143,.,.,.,.,...,.,.,.,.,.,.,.,.,.,.
852,chr1,5813923,5813923,G,A,0.0273,.,.,.,.,...,.,.,.,.,.,.,.,.,.,.
853,chr1,5814537,5814537,C,T,0.0262,.,.,.,.,...,.,.,.,.,.,.,.,.,.,.
855,chr1,6398522,6398522,G,C,0.0226,.,.,.,.,...,.,.,.,.,.,.,.,.,.,.
883,chr1,7363581,7363581,G,A,0.0313,.,.,.,.,...,.,.,.,.,.,.,.,.,.,.
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
287273,chrX,155230932,155230932,A,G,0.4370,.,.,.,.,...,.,.,.,.,.,.,.,.,.,.
287274,chrX,155232838,155232838,A,C,0.2245,.,.,.,.,...,.,.,.,.,.,.,.,.,.,.
287275,chrX,155233098,155233098,T,C,0.5313,.,.,.,.,...,.,.,.,.,.,.,.,.,.,.
287276,chrX,155234730,155234730,T,C,0.2516,.,.,.,.,...,.,.,.,.,.,.,.,.,.,.


In [11]:
gtex.shape

(287278, 40)