In [2]:
import pandas as pd

In [5]:
# load in annovar annotated GTEx eQTLs
gtex = pd.read_csv('gtex_eqtls_annotated_gts.tsv', sep='\t')

# remove rows with no vcf record
gtex = gtex[gtex.vcf_alt != '.'].copy()

In [30]:
samples = [s for s in gtex.columns if s.startswith('SG')]

# create dict to store values
d = {}
for samp in samples:
    d[samp] = 0
    
# iterate through all gtex eqtls
for i, row in gtex.iterrows():
    # get alt allele defined by gtex
    alt = row['Alt']
    
    # get frequency of alternat allele
    freq_alt = row['gnomAD_genome_ALL']
    if freq_alt == '.':
        freq_alt = 0
    else:
        freq_alt = float(freq_alt)
    
    # get allele number of alternate allele
    vcf_alt = row['vcf_alt']
    vcf_alt = vcf_alt.split(',')
    alt_number = 0
    for j in range(len(vcf_alt)):
        allele = vcf_alt[j]
        if allele == alt:
            alt_number = j + 1

    # if alt allele not in vcf then skip
    if alt_number == 0:
        continue
    
    # is alt of ref the minor allele?
    if freq_alt < 0.5:
        maf_number = str(alt_number)
    else:
        maf_number = str(0)
    
    # for each sample, does it have the minor allele
    for samp in samples:
        gt = row[samp]
        has_minor_eqtl = maf_number in gt
        if has_minor_eqtl:
            d[samp] = d[samp] + 1

In [40]:
df = pd.DataFrame(pd.Series(d), columns=['minor_eqtl'])

df.index.name = 'sample'
df.to_csv('minor_allele_summary.tsv', sep='\t')

In [41]:
df

Unnamed: 0_level_0,minor_eqtl
sample,Unnamed: 1_level_1
SG001,104471
SG003,104984
SG006,101937
SG007,102677
SG002,106468
SG011,108101
SG025,101238
SG022,105410
SG024,104920
SG021,104531
