In [1]:
import pandas as pd
import numpy as np
import math
from collections import defaultdict
from os import environ
import pickle
import fileinput
import sys
import datetime
from mapping_func import create_exon_pos_table, find_chrom_bps, protein_pos_to_hmm_state_and_aa
from calc_exac_freq_func import create_alt_codon, exac_validation_checks, retrieve_codon_seq, codon_table
from indels_func import is_indel, table_editing, indel_type
from entropy_func import JSD_background, JSD, SE, JSdiv
from af_format_calc import format_af, calculate_af_adj
from IPython.core.display import HTML
HTML("<style>.container { width:100% !important; }</style>")

In [2]:
#Trying to fix the problem of: "The history saving thread hit an unexpected error" when running on the server
#as described here: https://github.com/ipython/ipython/issues/1342
from IPython.config.loader import Config
from IPython.core.interactiveshell import InteractiveShell

cfg = Config()
cfg.HistoryManager.hist_file = ':memory:'
ip = InteractiveShell.instance(config=cfg)



In [24]:
#Getting path
curr_dir = !pwd

#Reading the list of filtered domains
with open(curr_dir[0]+"/../5.domains_stats/filtered10_list.pik", 'rb') as handle:
    filtered_domains_list10 = pickle.load(handle)
filtered_domains_list10.sort()

#Getting the domain index as environment variable called "idx"
try:
    domain_index = int(environ['idx'])
except:
    domain_index = 2

domain_name = filtered_domains_list10[domain_index]

print domain_name

MHC_II_beta


In [25]:
in_path = curr_dir[0]+"/../3.parse_HMMER/hmm_domains/pfam-v30/"
filename = domain_name+".csv"
domain_data = pd.read_csv(in_path+filename, sep='\t', index_col=0, dtype={"chrom_num": str})
#Sort the domain data
sorted_domain_data = domain_data.sort_values(by=["chrom_num", "gene", "TargetStart"])
sorted_domain_data = sorted_domain_data.reset_index(drop=True)

#Get the canonic protein id for Zinc domain
with open(curr_dir[0]+"/../4.parse_Uniprot/domains_canonic_prot/pfam-v30/"+domain_name+"_canonic_prot.pik", 'rb') as handle:
    canonic_protein = pickle.load(handle)
    
#Get the filtered table of domains
    
chromosome_names = ["1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "20", "21", "22", "X", "Y"]

In [26]:
#Counting how many domains instances are excluded because of strange chrom names.
chrom_names_list = sorted_domain_data["chrom_num"].tolist()
strange_chrom_sum = 0
for name in chrom_names_list:
    if (name not in chromosome_names):
        strange_chrom_sum += 1
print "Leaving outside "+str(strange_chrom_sum)+" out of "+str(len(chrom_names_list))

Leaving outside 108 out of 124


In [27]:
def stop_codon_notation(aa):
    """Coverting the HMMER 'x' char to '*' for stop codon notation unifirmity"""
    if (aa == 'X'):
        return "*"
    else:
        return aa

In [28]:
def validate_HMMER_hg19_codon(bp_ref, aa, chrom_pos_list, protein_pos, hmm_state):
    """Validation: checking that the returned codon sequence from hg19 match the HMMER amino-acid"""

    #For error logging
    functionNameAsString = sys._getframe().f_code.co_name
    
    translated_aa = codon_table[bp_ref.upper()]
    if (translated_aa != aa):
        print "chrom_pos_list = "+str(chrom_pos_list)+" protein_pos = "+str(protein_pos)+" hmm_state = "+str(hmm_state)
        print functionNameAsString+" Error: hg19 codon sequence retrieved "+bp_ref.upper()+"="+translated_aa+" doesn't match HMMER amino-acid "+aa

In [8]:
def validate_exac_aa(exac_prot_data, exac_alt_aa, alt_aa, chrom_pos):
    """Validation: checking if the calculated aa matches the ExAC aa"""
    #For error logging
    functionNameAsString = sys._getframe().f_code.co_name
    
    if (exac_prot_data and exac_alt_aa != alt_aa):
        print functionNameAsString+" "+ str(chrom_pos)+" Error: the ExAC alt aa "+exac_alt_aa+" doesn't match my alt aa calculation "+alt_aa

In [18]:
def change_ref_aa(res_dict, alterations_af_dict, alterations_af_adj_dict, aa, aa_sum, aa_adj_sum, bp_af_dict, bp_af_adj_dict):
    """Changing the ref aa and updating all relevant dictionaries"""
    
    #Adding the refrence allele to the alterations dicts
    old_ref = res_dict["aa_ref"]
    sum_of_all_alt = sum(sum(alterations_af_dict.values(), []))
    sum_of_all_alt_adj = sum(sum(alterations_af_adj_dict.values(), []))
    alterations_af_dict[old_ref] = [1 - sum_of_all_alt]
    alterations_af_adj_dict[old_ref] = [1 - sum_of_all_alt_adj]

    #Updating the aa to be the ref
    res_dict["aa_ref"] = aa

    #Finding the codon that codes aa with highest frequency and update bp_ref
    old_bp_ref = res_dict["bp_ref"]
    max_af = 0
    for codon in (bp_af_adj_dict.keys()):
        if (codon_table[codon.upper()] == aa):
            if (bp_af_adj_dict[codon] >= max_af):
                max_af = bp_af_adj_dict[codon]
                res_dict["bp_ref"] = codon
                
    #Adding the ref bp to the bp dicts
    bp_af_dict[old_bp_ref] = res_dict["af"]
    bp_af_adj_dict[old_bp_ref] = res_dict["af_adj"]
    
    #Updating the Frequencies of ref
    res_dict["af"] =(1 - aa_sum)
    res_dict["af_adj"] = (1 - aa_adj_sum)

    #Deleting from the alterations dict
    del alterations_af_dict[aa]
    del alterations_af_adj_dict[aa]
    
    #Deleting from the bps dict
    del bp_af_dict[res_dict["bp_ref"]]
    del bp_af_adj_dict[res_dict["bp_ref"]]

In [21]:
def change_syn_ref_bp(new_bp_ref, old_bp_ref, res_dict, bp_af_dict, bp_af_adj_dict):
    
    #Updating bp_ref with the high freq. codon
    res_dict["bp_ref"] = new_bp_ref
    
    #Add the old ref to the bp dicts with its freq.
    bp_freq_adj_sum = sum(bp_af_adj_dict.values())
    bp_freq_sum = sum(bp_af_dict.values())
    bp_af_adj_dict[old_bp_ref] = (1 - bp_freq_adj_sum)
    bp_af_dict[old_bp_ref] = (1 - bp_freq_sum)
    
    #Delete the new ref from the bp dicts
    del bp_af_adj_dict[new_bp_ref]
    del bp_af_dict[new_bp_ref]

In [20]:
#A function that return a dict with the MAF info for the protein position and corresponding chromosomal location
def calc_exac_maf_data(chrom_pos_list, chrom_gene_table, indels_table, protein_pos, aa, chrom_raw_data, chrom, hmm_state):
    
    #For error logging
    functionNameAsString = sys._getframe().f_code.co_name
    
    #Initializing the results dictionary
    res_dict = {}
    res_dict["chrom"] = chrom
    res_dict["chrom_pos"] = chrom_pos_list
    res_dict["prot_pos"] = protein_pos
    res_dict["aa_ref"] = stop_codon_notation(aa)
    res_dict["bp_ref"] = retrieve_codon_seq(chrom_pos_list, chrom_raw_data, chrom)
    #an counters lists
    an_dict = {k: [] for k in ["an", "an_adj", "an_afr", "an_amr", "an_eas", "an_fin", "an_nfe", "an_oth", "an_sas"]}
    res_dict.update(an_dict)
    #ac counters lists
    ac_dict = {k: [] for k in ["ac_adj", "ac_afr", "ac_amr", "ac_eas", "ac_fin", "ac_het", "ac_hom", "ac_nfe", "ac_oth", "ac_sas"]}
    res_dict.update(ac_dict)
    res_dict["SIFT"] = []
    res_dict["PolyPhen"] = []
    res_dict["clin_sig"] = []
    res_dict["SwissProt"] = []
    res_dict["ens_prot"] = []
    
    #Initializing more variables
    indels_cnt = 0
    errors_cnt = 0
    filter_cnt = 0
    inframe_ids = []
    alterations_af_dict = defaultdict(list)
    alterations_af_adj_dict = defaultdict(list)
    bp_af_dict = dict()
    bp_af_adj_dict = dict()
    bp_list = [] #Will not update when changing ref aa/bp, has to correspond to the population prob.
    
    #Validation: checking that the returned codon sequence from hg19 match the HMMER amino-acid
    validate_HMMER_hg19_codon(res_dict["bp_ref"], aa, chrom_pos_list, protein_pos, hmm_state)
    
    #Going over all 3 codon positions
    for i in range(len(chrom_pos_list)):
        chrom_pos = chrom_pos_list[i]
        alt_codon_pos = i
            
        #Retreiving relevant ExAC entry
        chrom_alter_table = chrom_gene_table[chrom_gene_table["pos"] == chrom_pos]
        chrom_alter_table = chrom_alter_table.reset_index(drop=True)
                
        if (chrom_alter_table.shape[0] == 0):
            #No ExAC entry for this chromosome position - not adding alteration data
            continue
        
        else:
            #In case there are several alterations for that position, iterating
            for index, line in chrom_alter_table.iterrows():
                chrom_alter = line
                
                #Skipping alterations that were filtered out by VQSR
                if (chrom_alter["filter"] != "PASS"):
                    filter_cnt += 1
                    continue
                
                #Extracting ref and alt
                exac_ref_bp = chrom_alter["ref"]
                exac_alt_bp = chrom_alter["alt"]
                
                #Check if indel - skip (we assume the whole protein may not function and don't add those to the MAF count)
                if (is_indel(exac_ref_bp, exac_alt_bp, chrom_alter) != indel_type.NO_INDEL):
                    indels_cnt += 1
                    continue
                
                #Perform validation checks (comparing ExAC and HMMER data)
                (exac_prot_data, exac_alt_aa, exac_alt_codon, errors) = exac_validation_checks(chrom_alter, protein_pos, aa, alt_codon_pos, chrom_pos, res_dict["bp_ref"])
                if (errors):
                    errors_cnt += 1

                #Extracting ExAC allele frequency data
                af = chrom_alter["AF"]
                an = int(chrom_alter["AN"])
                an_adj = int(chrom_alter["AN_Adj"])
                ac_adj = chrom_alter["AC_Adj"]
                
                #Calculating the alteration relevant data
                alt_codon = create_alt_codon (exac_ref_bp, exac_alt_bp, res_dict["bp_ref"], alt_codon_pos, chrom_raw_data)   
                if (len(alt_codon) != 3):
                    continue 
                else:
                    alt_aa = codon_table[alt_codon.upper()]
                
                #Validation: ExAC aa match the calculated alt
                validate_exac_aa(exac_prot_data, exac_alt_aa, alt_aa, chrom_pos)

                #Calculating the allele frequency adjusted
                af_adj = calculate_af_adj(an_adj, ac_adj)
                
                #Saving the bp with the frequency (for both syn and nonsyn)
                bp_af_dict[alt_codon] = af
                bp_af_adj_dict[alt_codon] = af_adj
                bp_list.append(alt_codon)
                
                res_dict["an"].append(an)
                res_dict["an_adj"].append(chrom_alter["AN_Adj"])
                res_dict["an_afr"].append(chrom_alter["AN_AFR"])
                res_dict["an_amr"].append(chrom_alter["AN_AMR"])
                res_dict["an_eas"].append(chrom_alter["AN_EAS"])
                res_dict["an_fin"].append(chrom_alter["AN_FIN"])
                res_dict["an_nfe"].append(chrom_alter["AN_NFE"])
                res_dict["an_oth"].append(chrom_alter["AN_OTH"])
                res_dict["an_sas"].append(chrom_alter["AN_SAS"])
                
                res_dict["ac_adj"].append(chrom_alter["AC_Adj"])
                res_dict["ac_afr"].append(chrom_alter["AC_AFR"])
                res_dict["ac_amr"].append(chrom_alter["AC_AMR"])
                res_dict["ac_eas"].append(chrom_alter["AC_EAS"])
                res_dict["ac_fin"].append(chrom_alter["AC_FIN"])
                res_dict["ac_het"].append(chrom_alter["AC_Het"])
                res_dict["ac_hom"].append(chrom_alter["AC_Hom"])
                res_dict["ac_nfe"].append(chrom_alter["AC_NFE"])
                res_dict["ac_oth"].append(chrom_alter["AC_OTH"])
                res_dict["ac_sas"].append(chrom_alter["AC_SAS"])
                
                #Non-synonymous(!!!) - logging the alteration in the dictionary
                if (alt_aa != res_dict["aa_ref"]):
                    alterations_af_dict[alt_aa].append(float(af))
                    alterations_af_adj_dict[alt_aa].append(af_adj)
                    
                #Saving the SIFT and PolyPhen scores (for both syn and nonsyn)
                res_dict["SIFT"].append(chrom_alter["SIFT"])
                res_dict["PolyPhen"].append(chrom_alter["PolyPhen"])
                res_dict["clin_sig"].append(chrom_alter["clin_sig"])
                
                #Saving SwissProt id and Ensembl prot id
                res_dict["SwissProt"].append(chrom_alter["SWISSPROT"])
                res_dict["ens_prot"].append(chrom_alter["ENSP"])
                

    #Calculating the overall MAF from the alteration dicts
    res_dict["af"] = 0
    res_dict["af_adj"] = 0
    
    res_dict["aa_ref_orig"] = res_dict["aa_ref"]
    for aa in alterations_af_dict.keys():
        aa_sum = sum(alterations_af_dict[aa])
        aa_adj_sum = sum(alterations_af_adj_dict[aa])
        
        #Checking if any alteration is above 0.5, and changing the ref accordingly
        if (aa_sum > 0.5):
            
            #Update all relevent information to switching a ref aa
            change_ref_aa(res_dict, alterations_af_dict, alterations_af_adj_dict, aa, aa_sum, aa_adj_sum, bp_af_dict, bp_af_adj_dict)
            break
        else:
            res_dict["af"] += aa_sum
            res_dict["af_adj"] += aa_adj_sum
        
        #Fix the AF format
        res_dict["af"] = format_af(res_dict["af"])
        res_dict["af_adj"] = format_af(res_dict["af_adj"])
    
    #Checking if any syn aleration bp is above 0.5 (nonsyn were already checked), and changing ref bp accordingly
    for codon in bp_af_adj_dict.keys():
        if ((codon_table[codon] == res_dict["aa_ref"]) and (bp_af_adj_dict[codon] > 0.5)):
            new_bp_ref = codon
            old_bp_ref = res_dict["bp_ref"]
            change_syn_ref_bp(new_bp_ref, old_bp_ref, res_dict, bp_af_dict, bp_af_adj_dict)
            
        
    res_dict["alterations_af_adj_dict"] = alterations_af_adj_dict
    res_dict["bp_af_dict"] = bp_af_dict
    res_dict["bp_af_adj_dict"] = bp_af_adj_dict
    res_dict["bp_list"] = bp_list
    
    return (res_dict, indels_cnt, errors_cnt, filter_cnt)

In [29]:
%%time

chrom_path = curr_dir[0]+"/../1.parse_ExAC/parsed/"
chrom_filename = "parsed_chrom"
states_dict = defaultdict(list)
print "Starting...."

#For error logging
functionNameAsString = sys._getframe().f_code.co_name

#A list of all the ens genes
domain_ens_genes_all = []

#A list to count indels per gene
domain_ens_genes_indels = []

#A list to count validation errors per gene
domain_ens_genes_errors = []

#A list to count ExAC filtered-out per gene
domain_ens_genes_filter = []

for chrom in chromosome_names:
    
    #Filtering the domain data relevant to this chromosome
    domain_chrom_data = sorted_domain_data[sorted_domain_data["chrom_num"] == chrom]
    
    #Loading the ExAC parsed data of this chromosome
    fields = ["chrom", "pos", "ref", "alt", "filter", "AC", "AC_AFR", "AC_AMR", "AC_Adj", "AC_EAS", "AC_FIN", "AC_Het", "AC_Hom", "AC_NFE", "AC_OTH", "AC_SAS", 
              "AF", "AN", "AN_AFR", "AN_AMR", "AN_Adj", "AN_EAS", "AN_FIN", "AN_NFE", "AN_OTH", "AN_SAS", "gene", "feature", "feature_type", "conseq",
              "prot_pos", "amino_acids", "codons", "strand", "ENSP", "SWISSPROT", "exon", "intron", "domains", "SIFT", "PolyPhen","clin_sig"]
    chrom_csv = pd.read_csv(chrom_path+chrom_filename+chrom+".csv", sep='\t', index_col=0, usecols=fields,
                           dtype={"AC": int, "AC_AFR": int, "AC_AMR": int, "AC_Adj": int, "AC_EAS": int,
                               "AC_FIN": int, "AC_Het": int, "AC_Hom": int, "AC_NFE": int, "AC_OTH": int, "AC_SAS": int, "AF": float, "AN": int, "AN_AFR": int,
                               "AN_AMR": int, "AN_Adj": int, "AN_EAS": int, "AN_FIN": int, "AN_NFE": int, "AN_OTH": int, "AN_SAS": int, "prot_pos": str})
    chrom_csv = chrom_csv.sort_values(by=["pos"])
    chrom_csv = chrom_csv.reset_index(drop=True)
    chrom_csv.fillna('', inplace=True)
    chrom_csv["comments"] = ""
    
    #Getting a list of all the relevant ensembl gene ids for this chromosome
    domain_ens_genes = (domain_chrom_data["gene"]).unique()
    domain_ens_genes_all.extend(domain_ens_genes)
    
    #For each ensembl gene in the domain data - finding all the ExAC alterations
    for ens_gene in domain_ens_genes:
        
        #Filtering the domain data for this gene according to the canonical protein id
        canonic_prot = canonic_protein[ens_gene]
        canonic_prot_t = canonic_prot[:canonic_prot.find(".")] #Trimming the ".#" at the end
        domain_gene_table = domain_chrom_data[domain_chrom_data["prot"] == canonic_prot]
        #Making sure that if two HMM-matches overlaps, the higher bit score will come first in the table
        domain_gene_table = domain_gene_table.sort_values(by="BitScore", ascending=False)
        domain_gene_name = domain_gene_table["hugoSymbol"].unique()[0]
        if (len(domain_gene_table["hugoSymbol"].unique()) > 1):
            print functionNameAsString+" Error: "+ens_gene+": more than one Hugo symbol"  #sanity check
        
        #Creating a table of the exons for this gene, according to the canonical protein
        chrom_raw_data = domain_gene_table["chromosome"].unique()[0] #there should be only one element here
        if (len(domain_gene_table["chromosome"].unique()) > 1):
            print functionNameAsString+" Error: "+ens_gene+": more than one chromosome raw data" #sanity check
        targetid = domain_gene_table["#TargetID"].unique()[0]
        exon_table = create_exon_pos_table(chrom_raw_data, targetid)
        
        #Filtering the chromosome data to the gene exons region
        exons_start_pos = min(exon_table["start_pos"][0],exon_table["start_pos"][len(exon_table)-1]) #in case of complelemt, the minimal position could be at the last row
        exons_end_pos = max(exon_table["end_pos"][0],exon_table["end_pos"][len(exon_table)-1]) #in case of complelemt, the maximal position could be at the first row
        chrom_gene_table = chrom_csv[chrom_csv["pos"] >= int(exons_start_pos)][chrom_csv["pos"] <= int(exons_end_pos)][chrom_csv["ENSP"] == canonic_prot_t]
        chrom_gene_table = chrom_gene_table.reset_index(drop=True)
        
        #Handling indels
        indels_table = table_editing(chrom_gene_table)
        
        #A counter for indels inside the domain
        protein_indels_cnt = 0
        #A counter for validation errors inside the domain
        protein_errors_cnt = 0
        #A counter for ExAC filter-out inside the domain
        protein_filter_cnt = 0
        
        #Iterating over the amino-acids of the protein
        prot_len = int(domain_gene_table["length"].unique()[0])
        
        for protein_pos in range(1,prot_len+1):
    
            #Trying to match HMM-state, and retreive the aa from HMMER results
            (hmm_state, aa) = protein_pos_to_hmm_state_and_aa(protein_pos, domain_gene_table) #TODO: what happens when two matches overlap? maybe sort to the best bit score?
                
            #If there's a match to HMM-state: find the corresponding codon bps chromosome positions
            if (hmm_state > 0):
                chrom_pos_list =find_chrom_bps(protein_pos, exon_table, chrom_raw_data)
                
                #Analysis of the amino-acid MAF and realted data, returned in a dictionary
                (info_dict, indels_cnt, errors_cnt, filter_cnt) = calc_exac_maf_data(chrom_pos_list, chrom_gene_table, indels_table, protein_pos, aa, chrom_raw_data, chrom, hmm_state)
                info_dict["ens_gene"] = ens_gene
                
                #Adding the dictionary to the HMM-state list
                states_dict[hmm_state].append(info_dict)
                
                #Adding the indels to the global counter
                protein_indels_cnt += indels_cnt
                
                #Adding the errors to the global counter
                protein_errors_cnt += errors_cnt
                
                #Adding the filtered to the global counter
                protein_filter_cnt += filter_cnt
        
        domain_ens_genes_indels.append(protein_indels_cnt)
        domain_ens_genes_errors.append(protein_errors_cnt)
        domain_ens_genes_filter.append(protein_filter_cnt)
        print "Finished protein "+ens_gene 
                                
    print "Finished chromosome "+chrom

!mkdir -p domains_states_dicts/pfam-v30/$domain_name
with open(curr_dir[0]+"/domains_states_dicts/pfam-v30/"+domain_name+"/"+domain_name+"_hmm_states_dict_"+datetime.date.today().strftime("%m.%d.%y")+".pik", 'wb') as handle:
    pickle.dump(states_dict, handle, protocol=pickle.HIGHEST_PROTOCOL)

Starting....
Finished chromosome 1
Finished chromosome 2
Finished chromosome 3
Finished chromosome 4
Finished chromosome 5
Finished protein ENSG00000179344.12
Finished protein ENSG00000196126.6
Finished protein ENSG00000198502.5
Finished protein ENSG00000223865.6
Finished protein ENSG00000232629.4
Finished protein ENSG00000241106.2
Finished protein ENSG00000242574.4
Finished chromosome 6
Finished chromosome 7
Finished chromosome 8
Finished chromosome 9
Finished chromosome 10
Finished chromosome 11
Finished chromosome 12
Finished chromosome 13
Finished chromosome 14
Finished chromosome 15
Finished chromosome 16
Finished chromosome 17
Finished chromosome 18
Finished chromosome 19
Finished chromosome 20
Finished chromosome 21
Finished chromosome 22
Finished chromosome X
Finished chromosome Y
CPU times: user 4min 45s, sys: 3min 33s, total: 8min 19s
Wall time: 11min 3s


In [23]:
states_dict[84]

[{'PolyPhen': [],
  'SIFT': [],
  'SwissProt': [],
  'aa_ref': 'P',
  'aa_ref_orig': 'P',
  'ac_adj': [],
  'ac_afr': [],
  'ac_amr': [],
  'ac_eas': [],
  'ac_fin': [],
  'ac_het': [],
  'ac_hom': [],
  'ac_nfe': [],
  'ac_oth': [],
  'ac_sas': [],
  'af': 0,
  'af_adj': 0,
  'alterations_af_adj_dict': defaultdict(list, {}),
  'an': [],
  'an_adj': [],
  'an_afr': [],
  'an_amr': [],
  'an_eas': [],
  'an_fin': [],
  'an_nfe': [],
  'an_oth': [],
  'an_sas': [],
  'bp_af_adj_dict': {},
  'bp_af_dict': {},
  'bp_list': [],
  'bp_ref': 'CCA',
  'chrom': '1',
  'chrom_pos': (51913765, 51913764, 51913763),
  'clin_sig': [],
  'ens_gene': 'ENSG00000085832.12',
  'ens_prot': [],
  'prot_pos': 202},
 {'PolyPhen': ['probably_damaging(1)', ''],
  'SIFT': ['deleterious(0)', ''],
  'SwissProt': ['EPS15_HUMAN', 'EPS15_HUMAN'],
  'aa_ref': 'P',
  'aa_ref_orig': 'P',
  'ac_adj': [1, 1],
  'ac_afr': [0, 0],
  'ac_amr': [0, 0],
  'ac_eas': [0, 0],
  'ac_fin': [0, 0],
  'ac_het': [1, 1],
  'ac_hom': [

In [19]:
sum(domain_ens_genes_errors)

0

In [20]:
sum(domain_ens_genes_indels)

1

In [21]:
sum(domain_ens_genes_filter)

9

In [22]:
max(chrom_gene_table["AN"])

121412