In [None]:
# function to determine type of variant
# inputs: reference base(s) and alternate base(s) from VCF
# outputs: type of variant
# currently, only identifies some variant types

def get_variant_type(REF, ALT):
    if (len(REF) == 1) and (len(ALT) == 1):
        vartype = 'substitution'
        
    elif (len(REF) == 1) and (len(ALT) > 1):
        vartype = 'insertion'
            
    elif len(REF) > len(ALT):
        vartype = 'deletion'
        
    else:
        # unknown variant type
        vartype = 'unknown'
        
    return vartype

In [None]:
# function to put variant into hgvs notation
# inputs:  chromosome, variant position, reference base(s), and alternate base(s) from VCF, 
# and variant type (from function get_variant_type)
# outputs: hgvs notation
# currently, only some variant types covered

def get_hgvs_notation(chromosome, position, REF, ALT, varianttype):
    # base
    hgvs_part1 = chromosome + ':' + 'g.' + str(position)
    
    # variant specific
    if varianttype == 'substitution':
        hgvs_part2 = REF + '>' + ALT
        
    elif varianttype == 'insertion':
        hgvs_part2 = '_' + str(position + 1) + 'ins' + ALT[1:]
        
    elif varianttype == 'deletion':
        hgvs_part2 = '_' + str(position + len(REF) - 1) + 'del'
        
    else:
        # unknown variant type
        hgvs_part2 = ''
        
    if hgvs_part2 == '':
        # put placeholder, ideally, all variant types put into proper hgvs notation
        hgvs_notation = '1:g.25362501C>A'
    else:
        hgvs_notation = hgvs_part1 + hgvs_part2
    
    return hgvs_notation

In [None]:
# function to process Ensembl VEP hgvs API response from POST request
# input: response from API as dict, processed by json.loads()
# outputs: lists of gene symbol, most severe consequence, and minor allele frequency for each variant in input data

def get_info(data):
    gene = []
    effect = []
    minoralfreq = []

    for item in range(len(data)):
        # get gene, if available
        if 'transcript_consequences' in data[item] \
        and 'gene_symbol' in data[item]['transcript_consequences'][0]:
            gene.append(data[item]['transcript_consequences'][0]['gene_symbol'])
        else:
            gene.append('unknown')

        # get effect, if available
        if 'most_severe_consequence' in data[item]:
            effect.append(data[item]['most_severe_consequence'])
        else:
            effect.append('unknown')

        # get minor allele frequency, if available
        if 'colocated_variants' in data[item] \
        and ('minor_allele_freq' in data[item]['colocated_variants'][0]):
            minoralfreq.append(data[item]['colocated_variants'][0]['minor_allele_freq'])

        elif 'colocated_variants' in data[item] \
        and (len(data[item]['colocated_variants']) > 1) \
        and ('minor_allele_freq' in data[item]['colocated_variants'][1]):
            minoralfreq.append(data[item]['colocated_variants'][1]['minor_allele_freq'])
        else:
            minoralfreq.append('unknown')
            
    return gene, effect, minoralfreq