In [None]:
import pandas as pd
import numpy as np
import sys, os
from collections import defaultdict

llpsprots = "../../llps_human_all_proteins.csv"
datafile  = "../../cosmic/CosmicMutantExport.tsv.gz"

mapfile_ENSP = "../llps_uniprot2ENSP.tab.txt"
mapfile_ENST = "../llps_uniprot2ENST.tab.txt"

def load_mapping(mapfile):
    mapdict = defaultdict(lambda: False)
    with open(mapfile) as infmt:
        next(infmt)
        for line in infmt:
            arr = line.strip().split("\t")
            if mapdict[arr[1]]:
                print(f"WARNING: {arr[1]} already in dict")
            else:
                mapdict[arr[1]] = True
    return mapdict
                
ENSP_dict = load_mapping(mapfile_ENSP)
ENST_dict = load_mapping(mapfile_ENST)

In [None]:
# read COSMIC file line by line, to heavy to load entirely in memory

import gzip

select = ["Gene name", "Accession Number", "HGNC ID", "Primary site", 
          "Primary histology", "Genome-wide screen", "GENOMIC_MUTATION_ID", "LEGACY_MUTATION_ID",
          "MUTATION_ID", "Mutation CDS", "Mutation AA", "Mutation Description", "GRCh", 
          "Mutation genome position", "SNP", "Mutation somatic status", "Pubmed_PMID", "Age",
          "HGVSP", "HGVSC", "HGVSG"]

all_cols = list()
with gzip.open(datafile) as ifile, open("COSMIC_crop.txt", 'w') as ofile:
    headers = next(ifile).decode().rstrip('\n').split("\t")
    ix = [headers.index(x) for x in select]
    ofile.write("\t".join(select)+"\n")
    for i,line in enumerate(ifile):
        try:
            arr = line.decode().rstrip("\n").split("\t")

            cols = [arr[i] for i in ix]
            # print(cols[18], cols[19], cols[20])
            if cols[18] != "p.?" and cols[18] != "":
                ENSP_id = cols[18].split(".")[0]
            if cols[19] != "":
                ENST_id = cols[19].split(".")[0]

            # Filter only mutations in our LLPS dataset
            if ENSP_dict[ENSP_id] or ENST_dict[ENST_id]:
                ofile.write("\t".join(cols)+"\n")        
        except:
            print(f"Error at line {i}: {line}")
            continue

In [1]:
from collections import defaultdict

mut_dict = defaultdict(list)

with open("COSMIC_crop.txt") as ifile:
    headers = next(ifile)
    for line in ifile:
        arr = line.rstrip("\n").split("\t")
        mut_dict[arr[11]].append(arr)
        

In [2]:
for k in mut_dict:
    print(f"{k}: {len(mut_dict[k])} mutations")

Substitution - coding silent: 901732 mutations
Unknown: 10361611 mutations
Substitution - Missense: 3349351 mutations
Substitution - Nonsense: 276396 mutations
Deletion - Frameshift: 140620 mutations
: 6561 mutations
Insertion - Frameshift: 79493 mutations
Deletion - In frame: 48375 mutations
Complex - frameshift: 2332 mutations
Frameshift: 1850 mutations
Insertion - In frame: 13431 mutations
Nonstop extension: 2467 mutations
Complex - compound substitution: 8 mutations
Whole gene deletion: 2088 mutations
Complex - deletion inframe: 364 mutations
Complex - insertion inframe: 52 mutations


In [3]:
import pandas as pd
import re 
import numpy as np

headers = ["Gene name", "Accession Number", "HGNC ID", "Primary site", 
          "Primary histology", "Genome-wide screen", "GENOMIC_MUTATION_ID", "LEGACY_MUTATION_ID",
          "MUTATION_ID", "Mutation CDS", "Mutation AA", "Mutation Description", "GRCh", 
          "Mutation genome position", "SNP", "Mutation somatic status", "Pubmed_PMID", "Age",
          "HGVSP", "HGVSC", "HGVSG"]



In [17]:
def separar_en_cols(df, column, conseq, conseq_regex, override=False):
    '''
    recibe un DataFrame, el nombre de una columna auxiliar (column)
    y un string con el tipo de consecuencia (conseq). La col. auxiliar
    es una tupla con los elementos implicados en una mutacion
    como la siguiente (aa1, start_pos, aa2, end_pos, aa/s_nuevos).
    Devuelve el DataFrame df con estas 5 nuevas columnas
    '''
    
    df_crop = df[df[column].str.contains(conseq_regex)].copy()
      
    if override:
        df_crop['aux'] = df_crop[column].str.findall(conseq_regex).str[0]
    else:
        df_crop['aux'] = df_crop[column].str.findall('^([A-Z][a-z]{2})(\d+)_?([A-Z][a-z]{2})?(\d+)?'+conseq_regex+'(.*)$').str[0]
        
    if conseq == "missense" or conseq == "nonsense":
        df_crop['start_aa'] = df_crop['aux'].map(lambda x: x[1])
        df_crop['end_aa'] = df_crop['start_aa']
        df_crop['from'] = df_crop['aux'].map(lambda x: x[0])        
        df_crop['to'] = df_crop['aux'].map(lambda x: x[2])
    else:
    
        # start position
        df_crop['start_aa'] = df_crop['aux'].map(lambda x: x[1])

        # end position
        df_crop['end_aa'] = df_crop['aux'].map(lambda x: int(x[3]) if x[3] != '' else np.nan)

        # from: es el/los aa que cambian
        df_crop['from'] = df_crop['aux'].map(lambda x: x[0] + x[2]) # concateno si existe mas de un aa que cambia (o sea, si es un rango)

        # to: aa/s nuevos
        if conseq == "nonsense":
            df_crop['to'] = "Ter"
        elif conseq == "deletion":
            df_crop['to'] = ""
        else:
            df_crop['to'] = df_crop['aux'].map(lambda x: x[4] if x[4] != '' else np.nan)

    # consecuencia de la mutacion
    df_crop['consequence'] = conseq

    df_crop = df_crop.drop(columns=['aux'])

    return df_crop[['cambio', 'start_aa', 'end_aa', 'from', 'to', 'consequence']]

def seq3(seq):
    
    protein_letters_1to3 = {
        "A": "Ala",
        "C": "Cys",
        "D": "Asp",
        "E": "Glu",
        "F": "Phe",
        "G": "Gly",
        "H": "His",
        "I": "Ile",
        "K": "Lys",
        "L": "Leu",
        "M": "Met",
        "N": "Asn",
        "P": "Pro",
        "Q": "Gln",
        "R": "Arg",
        "S": "Ser",
        "T": "Thr",
        "V": "Val",
        "W": "Trp",
        "Y": "Tyr",
        "B": "Asx",
        "X": "Xaa",
        "Z": "Glx",
        "J": "Xle",
        "U": "Sec",
        "O": "Pyl",
        "*": "Ter"
    }
    
    return "".join(protein_letters_1to3.get(aa, "Xaa") for aa in seq)

In [23]:

def separar_en_cols_missense(df, column, conseq, conseq_regex, override=False):
    
    df_crop = df[df[column].str.contains(conseq_regex)].copy()
      
    if override:
        df_crop['aux'] = df_crop[column].str.findall(conseq_regex).str[0]
    else:
        df_crop['aux'] = df_crop[column].str.findall('^([A-Z][a-z]{2})(\d+)_?([A-Z][a-z]{2})?(\d+)?'+conseq_regex+'(.*)$').str[0]
            
    # start position
    df_crop['start_aa'] = df_crop['aux'].map(lambda x: x[1])
    df_crop.start_aa = df_crop.start_aa.apply(int)
    
    # end position
    df_crop['end_aa'] = df_crop['start_aa']
    # df_crop['end_aa'] = df_crop['aux'].map(lambda x: int(x[1]) if x[1] != '' else np.nan)
    
    # from: es el/los aa que cambian
    df_crop['from'] = df_crop['aux'].map(lambda x: seq3(x[0])) # concateno si existe mas de un aa que cambia (o sea, si es un rango)
    df_crop['from'] = df_crop['from'].apply(str)
    
    df_crop['to'] = df_crop['aux'].map(lambda x: seq3(x[2]))
    df_crop['to'] = df_crop['to'].apply(str)
        
    # consecuencia de la mutacion
    df_crop['consequence'] = conseq

    df_crop = df_crop.drop(columns=['aux'])

    return df_crop[['cambio', 'start_aa', 'end_aa', 'from', 'to', 'consequence']]

## Process MISSENSE

In [6]:
df_tmp = pd.DataFrame(mut_dict["Substitution - Missense"], columns=headers)

# Subset mutations with "p." only
df_tmp['cambio'] = df_tmp["HGVSP"].map(lambda x: re.findall('p\..*$', x))
df_tmp['cambio'] = df_tmp.cambio.str[0]
df_tmp.cambio = df_tmp.cambio.str.lstrip('p.') 

# separate those that don't have HGVSP
ix_nulls = df_tmp["cambio"].isnull()
df_nulls = df_tmp[ix_nulls].copy()

# do classic missense processing
missense = separar_en_cols(df_tmp[~ix_nulls], "cambio", "missense", '^([A-Z][a-z]{2})(\d+)(?!Ter)([A-Z][a-z]{2})$', override=True)
df_tmp = df_tmp.drop(columns=["cambio"])

# Now process those that have NULL in HGVSP but have some information on "Mutation AA"
# small check in case something is not a missense mutation
check = df_nulls["Mutation AA"].map(lambda x: re.findall('p\.[A-Z]\d+[A-Z]$', x))
if np.sum(check.isnull()) > 0:
    print("Warning MISSENSE! some nulls or errors here")

# continue with mutation processing
ix_X = df_nulls["Mutation AA"].str.contains('p\.[A-Z]\d+X$')  # discard mutations to X
df_pass = df_nulls[~ix_X].copy()
df_pass["cambio"] = df_pass["Mutation AA"].str.lstrip('p.')
df_done = separar_en_cols_missense(df_pass, "cambio", "missense", '^([A-Z])(\d+)([A-Z])$', override=True)

  return func(self, *args, **kwargs)


In [7]:
all_missense = pd.concat((missense, df_done))
missense_df = pd.merge(df_tmp, all_missense, right_index=True, left_index=True)
print(missense.shape)
print(df_done.shape)
print(all_missense.shape)
print(missense_df.shape)



(3345113, 6)
(1519, 6)
(3346632, 6)
(3346632, 27)


In [11]:
missense_df.to_csv("llps_missense_cosmic.csv.gz", sep=",", header=True, index=False, compression='gzip')

In [12]:
del missense_df
del all_missense

## Process NONSENSE

In [34]:
### Process Nonsense

df_tmp = pd.DataFrame(mut_dict["Substitution - Nonsense"], columns=headers)

# Subset mutations with "p." only
df_tmp['cambio'] = df_tmp["HGVSP"].map(lambda x: re.findall('p\..*$', x))
df_tmp['cambio'] = df_tmp.cambio.str[0]
df_tmp.cambio = df_tmp.cambio.str.lstrip('p.') 

# # separate those that don't have HGVSP
ix_nulls = df_tmp["cambio"].isnull()
df_nulls = df_tmp[ix_nulls].copy()

# # do classic missense processing
nonsense = separar_en_cols(df_tmp[~ix_nulls], "cambio", "nonsense", "(?<=\d)Ter", override=False)
df_tmp = df_tmp.drop(columns=["cambio"])

# # Now process those that have NULL in HGVSP but have some information on "Mutation AA"
# # small check in case something is not a missense mutation
check = df_nulls["Mutation AA"].map(lambda x: re.findall('p\.[A-Z]\d+\*$', x))
if np.sum(check.isnull()) > 0:
    print("Warning NONSENSE! some nulls or errors here")

# # continue with mutation processing
df_nulls["cambio"] = df_nulls["Mutation AA"].str.lstrip('p.')
df_done = separar_en_cols_missense(df_nulls, "cambio", "nonsense", '^([A-Z])(\d+)(\*)$', override=True)

  return func(self, *args, **kwargs)


In [35]:
print(nonsense.shape)
print(df_done.shape)

(275729, 6)
(212, 6)


In [36]:
all_nonsense = pd.concat((nonsense, df_done))
nonsense_df = pd.merge(df_tmp, all_nonsense, right_index=True, left_index=True)

In [37]:
nonsense_df.to_csv("llps_nonsense_cosmic.csv.gz", sep=",", header=True, index=False, compression='gzip')

In [38]:
print(nonsense_df.shape)

(275941, 27)


In [39]:
del nonsense_df
del all_nonsense

# Process DELETIONS

In [166]:
### Process Deletions

df_tmp = pd.DataFrame(mut_dict["Deletion - In frame"], columns=headers)
df_tmp

# Subset mutations with "p." only
df_tmp['cambio'] = df_tmp["HGVSP"].map(lambda x: re.findall('p\..*$', x))
df_tmp['cambio'] = df_tmp.cambio.str[0]
df_tmp.cambio = df_tmp.cambio.str.lstrip('p.') 

# separate those that don't have HGVSP
ix_nulls = df_tmp["cambio"].isnull()
df_nulls = df_tmp[ix_nulls]
df_notnulls = df_tmp[~ix_nulls].copy()
ix_right = df_notnulls["cambio"].str.contains('^([A-Z][a-z]{2})(\d+)_?([A-Z][a-z]{2})?(\d+)?del$')
df_weird = df_notnulls[~ix_right].copy()

# # # do classic missense processing
deletions = separar_en_cols(df_notnulls[ix_right], "cambio", "deletion", '^([A-Z][a-z]{2})(\d+)_?([A-Z][a-z]{2})?(\d+)?del$', override=True)
# df_tmp = df_tmp.drop(columns=["cambio"])

# # # Now process those that have NULL in HGVSP but have some information on "Mutation AA"
# # # small check in case something is not a missense mutation
# check = df_nulls["Mutation AA"].map(lambda x: re.findall('p\.[A-Z]\d+\*$', x))
# if np.sum(check.isnull()) > 0:
#     print("Warning NONSENSE! some nulls or errors here")

# # # continue with mutation processing
# df_nulls["cambio"] = df_nulls["Mutation AA"].str.lstrip('p.')
# df_done = separar_en_cols_missense(df_nulls, "cambio", "nonsense", '^([A-Z])(\d+)(\*)$', override=True)

  return func(self, *args, **kwargs)


In [169]:
deletions[:50]

Unnamed: 0,cambio,start_aa,end_aa,from,to,consequence
0,Lys359del,359,,Lys,,deletion
1,Lys359del,359,,Lys,,deletion
2,Lys359del,359,,Lys,,deletion
3,Gln394del,394,,Gln,,deletion
4,Lys359del,359,,Lys,,deletion
5,Lys359del,359,,Lys,,deletion
7,Glu208del,208,,Glu,,deletion
8,Lys359del,359,,Lys,,deletion
9,Glu208del,208,,Glu,,deletion
10,Ala2del,2,,Ala,,deletion


In [165]:
def separar_en_cols(df, column, conseq, conseq_regex, override=False):
    '''
    recibe un DataFrame, el nombre de una columna auxiliar (column)
    y un string con el tipo de consecuencia (conseq). La col. auxiliar
    es una tupla con los elementos implicados en una mutacion
    como la siguiente (aa1, start_pos, aa2, end_pos, aa/s_nuevos).
    Devuelve el DataFrame df con estas 5 nuevas columnas
    '''
    
    df_crop = df[df[column].str.contains(conseq_regex)].copy()
      
    if override:
        df_crop['aux'] = df_crop[column].str.findall(conseq_regex).str[0]
    else:
        df_crop['aux'] = df_crop[column].str.findall('^([A-Z][a-z]{2})(\d+)_?([A-Z][a-z]{2})?(\d+)?'+conseq_regex+'(.*)$').str[0]
        
    if conseq == "missense" or conseq == "nonsense":
        df_crop['start_aa'] = df_crop['aux'].map(lambda x: x[1])
        df_crop['end_aa'] = df_crop['start_aa']
        df_crop['from'] = df_crop['aux'].map(lambda x: x[0])        
        df_crop['to'] = df_crop['aux'].map(lambda x: x[2])
    else:
    
        # start position
        df_crop['start_aa'] = df_crop['aux'].map(lambda x: x[1])

        # end position
        df_crop['end_aa'] = df_crop['aux'].map(lambda x: int(x[3]) if x[3] != '' else np.nan)

        # from: es el/los aa que cambian
        df_crop['from'] = df_crop['aux'].map(lambda x: x[0] + x[2]) # concateno si existe mas de un aa que cambia (o sea, si es un rango)

        # to: aa/s nuevos
        if conseq == "nonsense":
            df_crop['to'] = "Ter"
        elif conseq == "deletion":
            df_crop['to'] = ""
        else:
            df_crop['to'] = df_crop['aux'].map(lambda x: x[4] if x[4] != '' else np.nan)

    # consecuencia de la mutacion
    df_crop['consequence'] = conseq

    df_crop = df_crop.drop(columns=['aux'])

    return df_crop[['cambio', 'start_aa', 'end_aa', 'from', 'to', 'consequence']]


In [151]:
conseq_regex = 'del'
for e in df_notnulls[ix_right]["cambio"]:
    m = re.search('^([A-Z][a-z]{2})(\d+)_?([A-Z][a-z]{2})?(\d+)?del$', e)
    if m is not None:
        print(m.group(1), m.group(2), m.group(3), m.group(4))
    else:
        print(m)

Lys 359 None None
Lys 359 None None
Lys 359 None None
Gln 394 None None
Lys 359 None None
Lys 359 None None
Glu 208 None None
Lys 359 None None
Glu 208 None None
Ala 2 None None
Gln 394 None None
Glu 208 None None
Met 357 Lys 388
Lys 150 None None
Tyr 346 None None
Met 357 Lys 388
Met 357 Lys 388
Tyr 346 None None
Val 267 Asn 270
Lys 359 None None
Ile 1200 None None
Arg 331 None None
Thr 421 Pro 422
Met 357 Lys 388
Gly 35 Glu 38
Gly 35 Glu 38
His 354 Gln 357
Gln 161 None None
Cys 460 Ala 462
Glu 414 None None
Ser 337 Lys 340
Gln 161 None None
Tyr 346 None None
Leu 141 None None
Gln 161 None None
Leu 165 None None
Gln 161 None None
Gln 161 None None
Gln 161 None None
Lys 336 None None
Arg 331 None None
Pro 280 Ala 284
Glu 1923 None None
Glu 1923 None None
Glu 1923 None None
Glu 967 None None
Ile 1200 None None
Glu 967 None None
Glu 1388 None None
Glu 1923 None None
Glu 1620 None None
Thr 245 None None
Val 371 None None
Glu 1923 None None
Lys 1622 None None
Glu 1923 None None
Glu 1388 No

Gln 204 None None
Ile 240 None None
Ile 240 None None
Gln 204 None None
Gln 204 None None
Gln 204 None None
Gln 204 None None
Gln 203 Gln 204
Gln 204 None None
Gln 204 None None
Gln 204 None None
Gln 204 None None
Leu 116 Leu 122
Gln 204 None None
Gln 204 None None
Gln 204 None None
Gln 204 None None
Gln 204 None None
Gln 204 None None
Gln 204 None None
His 375 Pro 382
Gln 204 None None
Pro 88 None None
Gln 204 None None
Gln 204 None None
Gln 204 None None
Phe 1467 None None
Gln 204 None None
Gln 204 None None
Gln 204 None None
Gln 204 None None
Ile 820 None None
Gln 204 None None
Gln 204 None None
Gln 204 None None
Gln 204 None None
Gln 204 None None
His 375 Pro 382
Gln 204 None None
Lys 658 None None
Trp 896 None None
His 1703 Arg 1708
Glu 1301 None None
His 1703 Arg 1708
Lys 129 None None
Lys 129 None None
His 1703 Arg 1708
Glu 1509 None None
Gly 313 None None
His 523 None None
Gly 71 None None
Ser 68 None None
Arg 110 None None
Gly 327 None None
Lys 1595 None None
Phe 1467 None Non

Lys 297 None None
Gln 345 None None
Glu 104 None None
Leu 234 Gly 241
Asp 466 None None
Lys 101 None None
Val 730 None None
Glu 1464 None None
Asn 1768 Glu 1775
Gly 149 None None
Glu 1464 None None
Lys 297 None None
Asp 1264 None None
Thr 49 None None
Asn 60 Arg 63
Thr 49 None None
Gln 2194 None None
Glu 1414 None None
Glu 1464 None None
Lys 297 None None
Glu 631 None None
Thr 1529 None None
Ser 1206 None None
Ser 69 None None
Glu 64 None None
Glu 64 None None
Glu 598 None None
Pro 948 Thr 989
Glu 1464 None None
Glu 1464 None None
Leu 670 None None
Glu 1464 None None
Lys 297 None None
Pro 948 Thr 989
Glu 25 None None
Thr 1529 None None
Ser 141 None None
Lys 297 None None
Lys 455 None None
Arg 185 None None
Glu 1464 None None
Glu 1464 None None
Lys 297 None None
Lys 297 None None
Gln 309 None None
Ser 1206 None None
Lys 2225 None None
Glu 1464 None None
Tyr 313 None None
Asp 1264 None None
Arg 141 None None
Arg 141 None None
Ser 1110 None None
Ser 764 Gln 769
Ala 128 Glu 130
Leu 456 Non

Gln 507 Gln 510
Gln 649 Gln 650
Ser 45 None None
Leu 31 Ser 33
Leu 46 Ser 47
Gln 650 None None
Gln 772 None None
Thr 40 Ala 43
Gln 772 None None
Gln 647 Gln 650
Gln 507 Gln 510
Ala 5 Ala 80
Gln 507 Gln 510
Gln 507 Gln 510
Arg 178 Leu 180
Ala 5 Ala 80
Ser 45 None None
Gln 507 Gln 510
Gln 508 Gln 510
Gln 650 None None
Arg 281 Gly 283
Gly 38 Pro 44
Gln 507 Gln 510
Leu 724 None None
Gln 507 Gln 510
Gln 772 None None
Gln 507 Gln 510
Gln 507 Gln 510
Gln 772 None None
Gln 772 None None
Gln 509 Gln 510
Trp 25 Ser 33
Gln 772 None None
Gln 507 Gln 510
Ser 23 Ala 39
Ala 5 Ala 80
Ser 45 None None
Gln 507 Gln 510
Gln 650 None None
Ile 35 None None
Ser 45 None None
Gln 507 Gln 510
Ser 881 His 889
Ala 5 Ala 80
Leu 828 None None
Gln 772 None None
Asp 6 Ser 29
Gln 772 None None
Ser 45 None None
Gln 772 None None
Gln 507 Gln 510
Ala 5 Ala 80
Ala 5 Ala 80
Ala 5 Ala 80
Ser 45 None None
Ala 5 Ala 80
Ala 5 Ala 80
His 36 Ser 37
Ala 5 Ala 80
Gln 507 Gln 510
Gln 507 Gln 510
Gln 772 None None
Ser 23 Ser 33
Ser 

Gly 490 Gly 493
Gly 24 None None
Gly 490 Gly 493
Glu 1331 None None
Phe 28 None None
Gly 490 Gly 493
Gly 24 None None
Gly 490 Gly 493
Ser 700 None None
Gly 24 None None
Ser 508 Gly 512
Tyr 542 Gly 543
Glu 454 None None
Ser 553 None None
Lys 588 None None
Gly 490 Gly 493
Gly 243 Pro 244
Gly 490 Gly 493
Gly 490 Gly 493
Ser 508 Gly 512
Gly 490 Gly 493
Gly 24 None None
Ser 483 None None
Gln 371 None None
Gly 490 Gly 493
Lys 689 None None
Thr 55 Gly 61
Lys 16 Ala 17
Glu 464 None None
His 1287 None None
Glu 666 None None
His 1287 None None
Arg 875 None None
Phe 1234 Lys 1237
Lys 689 None None
Glu 639 None None
Val 558 None None
Glu 1212 None None
Glu 1331 None None
Val 558 None None
Lys 517 None None
Lys 1434 None None
Lys 689 None None
Leu 1101 None None
Lys 517 None None
Gly 243 Pro 244
Gln 424 None None
Thr 2407 None None
Gln 424 None None
Lys 344 None None
Val 1275 None None
Glu 1268 Glu 1273
Glu 1273 None None
Glu 1273 None None
Glu 1268 Glu 1273
Gln 424 None None
Gln 424 None None
Glu 

Pro 47 None None
Arg 25 None None
Glu 38 None None
Glu 38 None None
Glu 588 None None
Ala 223 Phe 225
Glu 38 None None
Ser 97 Ser 98
Gly 23 None None
Arg 32 Val 35
Lys 440 None None
Glu 38 None None
Gln 1819 None None
Glu 38 None None
Glu 38 None None
Glu 38 None None
Gly 273 None None
Glu 38 None None
Glu 38 None None
Asn 96 None None
Glu 38 None None
Glu 38 None None
Cys 1479 None None
Glu 38 None None
Ser 97 Ser 98
Gly 23 None None
Lys 94 None None
Leu 637 Asp 638
Ser 938 None None
Ala 549 Glu 552
Gly 273 None None
Glu 557 None None
Gln 72 None None
Ala 66 Ala 67
Thr 1872 None None
Ile 3908 None None
Ser 938 None None
Ser 261 None None
Gly 487 None None
Gln 173 None None
Arg 1478 None None
Cys 1479 None None
Lys 939 None None
Asp 346 Pro 350
Lys 1735 None None
Leu 184 None None
Leu 261 None None
Ser 261 None None
Glu 107 None None
Leu 184 None None
Glu 107 None None
Lys 809 None None
Pro 250 None None
Pro 250 None None
Thr 198 None None
Pro 250 None None
Asp 175 None None
Glu 286 No

Tyr 310 Leu 311
His 180 Glu 181
Lys 297 Leu 300
Lys 305 Thr 306
Tyr 182 Gln 187
His 180 Lys 189
Glu 192 Arg 195
Pro 298 Thr 306
Glu 181 Tyr 182
Thr 306 None None
Glu 181 Tyr 182
Phe 186 Gln 187
Lys 317 None None
Glu 181 None None
Lys 732 None None
Thr 306 None None
Glu 181 Tyr 182
Leu 300 Asp 308
His 180 Thr 184
Glu 334 Glu 336
Glu 334 Glu 336
Lys 189 None None
Leu 179 None None
Glu 178 Arg 179
Phe 186 Gln 187
Arg 304 None None
Asn 294 Pro 298
Ile 301 Lys 305
Lys 297 None None
Glu 188 None None
Asp 170 Lys 178
Met 293 Ser 295
Glu 178 Arg 179
Glu 334 Glu 336
Glu 334 Glu 336
Asn 108 None None
Phe 186 Lys 189
Val 131 Arg 139
Leu 303 Lys 305
Lys 189 None None
Ile 301 Leu 311
Val 132 Leu 143
His 180 Glu 181
Ile 135 None None
Asp 194 Tyr 197
Leu 179 Lys 189
Thr 306 None None
Asp 170 Glu 181
Val 175 His 180
Val 175 His 180
Glu 351 None None
Asp 170 None None
Pro 298 Leu 303
Ser 129 Leu 134
Arg 304 None None
Thr 184 Asp 194
Glu 181 Tyr 182
Lys 297 Leu 300
Phe 186 Arg 191
Glu 285 Ser 295
Gln 18

Gln 3203 Gln 3204
Ser 45 None None
Ala 5 Ala 80
Ser 45 None None
Ser 45 None None
Ser 3513 Gly 3519
Arg 3381 Gln 3387
Ala 5 Ala 80
Val 777 None None
Ser 3513 Gly 3522
Gln 1740 Gln 1741
Ile 35 Gly 38
Ala 5 Ala 80
Gln 1740 Gln 1741
Gln 1741 None None
Ala 43 Glu 53
Ala 784 None None
Gln 3395 None None
Ser 45 None None
Ser 45 None None
Gln 3380 None None
Gly 3521 Gly 3527
Val 777 None None
Ser 3513 Gly 3520
Lys 358 None None
Gln 1740 Gln 1741
Ala 106 Pro 107
Arg 3381 Gln 3387
Ser 264 None None
Ser 3513 None None
His 671 Leu 672
Gln 3204 None None
Gln 1740 Gln 1741
Lys 139 Asp 142
Gly 836 Pro 837
Ala 472 Glu 478
Gly 834 Pro 837
Gln 1741 None None
Arg 3381 Gln 3388
Ser 3513 Gly 3519
Gln 1740 Gln 1741
Val 777 None None
Val 777 None None
Lys 644 None None
Gln 3380 None None
Gln 1739 Gln 1741
Gly 3527 None None
Gln 1741 None None
Arg 3381 Gln 3388
Val 777 None None
Pro 2048 Pro 2050
Gly 3526 Gly 3527
Lys 155 None None
Gln 1741 None None
Glu 70 None None
Glu 70 None None
Glu 70 None None
Glu 48 

Leu 31 Ile 35
Ser 45 None None
Phe 1466 None None
Lys 353 None None
Lys 658 None None
Phe 1466 None None
Phe 1466 None None
Lys 185 Tyr 194
Ser 45 None None
Tyr 30 Ser 37
Phe 1466 None None
Gly 573 None None
Ile 35 Gly 38
Thr 40 Leu 46
Ser 45 Asp 58
Ile 35 Thr 41
Ala 5 Ala 80
Ser 45 None None
Ser 45 None None
Gly 38 Ala 39
Ser 45 None None
Ser 45 Gly 48
Asp 32 Ser 33
Ala 5 Ala 80
Ser 45 None None
Asn 284 None None
His 24 Gly 38
Gln 394 None None
Val 22 Ser 33
Trp 25 Asp 32
Gly 573 None None
Ser 45 None None
Ala 5 Ala 80
Ala 20 Ala 80
Tyr 30 Thr 40
Ser 45 None None
Gly 1138 Pro 1142
Ser 45 None None
Glu 1107 None None
Lys 1117 None None
Ser 45 None None
Val 22 Tyr 64
Ala 5 Ala 80
Trp 25 Asp 32
Ser 45 None None
Glu 979 None None
Gln 394 None None
Ser 45 None None
Thr 42 Gly 48
Ser 45 None None
Ala 5 Ala 80
Leu 31 Ile 35
Ser 45 None None
Val 22 Ser 33
Lys 150 None None
Ser 45 None None
Ser 45 None None
Ser 45 None None
Ser 45 None None
Glu 267 None None
Ala 5 Ala 80
Ser 45 None None
Ser 4

Phe 278 None None
Ile 66 None None
Ser 768 None None
Arg 272 Glu 275
Ile 941 None None
Ala 496 None None
Arg 272 Glu 275
Ala 496 None None
His 671 Leu 672
Lys 1332 None None
Lys 546 None None
Ala 496 None None
Tyr 421 Glu 428
Arg 272 Glu 275
Gln 273 None None
Arg 885 Asn 888
Lys 546 None None
Trp 818 Tyr 820
Glu 522 None None
Gln 1043 None None
Leu 387 None None
Glu 882 None None
Pro 1684 None None
Pro 550 None None
Leu 1321 None None
Pro 1684 None None
Cys 1499 None None
Arg 272 Glu 275
Lys 276 Cys 279
Glu 94 None None
Lys 30 None None
Glu 5 None None
Ala 496 None None
Lys 276 Cys 279
Arg 272 Glu 275
Glu 223 None None
Val 880 None None
Phe 22 None None
Pro 1684 None None
Pro 1684 None None
Asp 818 None None
Lys 689 None None
Arg 361 Phe 363
Glu 1331 None None
Thr 958 None None
Lys 235 None None
Asp 19 None None
Lys 588 None None
Asp 19 None None
Gly 243 Pro 244
Glu 117 Phe 122
Gly 472 None None
Gly 192 None None
Asp 19 None None
Asp 19 None None
Asp 19 None None
Glu 1334 None None
Asp

Glu 746 Ala 750
Phe 360 None None
Leu 747 Thr 751
Glu 746 Ala 750
Glu 368 None None
Glu 746 Ala 750
Glu 746 Ala 750
Glu 395 Glu 397
Asn 473 None None
Leu 747 Thr 751
Glu 368 None None
Ser 45 None None
His 190 None None
Leu 747 Thr 751
Asn 590 Val 591
Pro 1285 None None
Glu 746 Ala 750
Glu 746 Ala 750
Glu 87 None None
Gln 470 Pro 477
Val 305 None None
Pro 1285 None None
Glu 746 Ala 750
Glu 746 Ala 750
Glu 746 Ala 750
Pro 85 Tyr 91
Glu 746 Ala 750
Glu 143 None None
Glu 746 Ala 750
Glu 746 Ala 750
Leu 747 Thr 751
Glu 746 Ala 750
Glu 746 Ala 750
Glu 746 Ala 750
Pro 1285 None None
Thr 264 None None
Glu 746 Ala 750
Glu 746 Ala 750
Glu 746 Ala 750
Thr 768 None None
Glu 746 Ala 750
Glu 746 Ala 750
Glu 746 Ala 750
Glu 746 Ala 750
Glu 746 Ala 750
Glu 746 Ala 750
Asp 19 None None
Glu 746 Ala 750
Glu 746 Ala 750
Glu 746 Ala 750
Glu 746 Ala 750
Glu 746 Ala 750
Glu 746 Ala 750
Glu 746 Ala 750
Glu 746 Ala 750
Val 190 Arg 202
Glu 746 Ala 750
Glu 746 Ala 750
Val 305 None None
Glu 746 Ala 750
Glu 746 Al

Glu 746 Ala 750
Lys 959 Lys 960
Lys 959 Lys 960
Lys 1443 None None
Lys 1493 None None
Lys 959 Lys 960
Leu 747 Thr 751
Glu 746 Ala 750
Glu 746 Ala 750
Leu 67 None None
Lys 1233 None None
Glu 746 Ala 750
Glu 746 Ala 750
Phe 1640 None None
Lys 959 Lys 960
Lys 959 Lys 960
Glu 746 Ala 750
Glu 746 Ala 750
Leu 747 Thr 751
Glu 746 Ala 750
Leu 747 Ser 752
Glu 746 Ala 750
Ile 749 None None
Lys 959 Lys 960
Met 131 Gln 139
Asn 1682 None None
Lys 959 Lys 960
Lys 959 Lys 960
Glu 746 Ala 750
Glu 746 Ala 750
Ser 45 None None
Lys 959 Lys 960
Glu 746 Ala 750
Glu 746 Ala 750
Glu 746 Ala 750
Glu 746 Ala 750
Glu 746 Ala 750
Gln 438 Glu 439
Lys 959 Lys 960
Glu 746 Ala 750
Lys 959 Lys 960
Gly 162 Cys 168
Glu 746 Ala 750
Lys 1493 None None
Lys 959 Lys 960
Val 578 None None
Glu 746 Ala 750
Glu 746 Ala 750
Leu 747 Thr 751
Lys 959 Lys 960
Lys 308 None None
Glu 746 Ala 750
Glu 746 Ala 750
Glu 746 Ala 750
Lys 959 Lys 960
Glu 746 Ala 750
Glu 746 Ala 750
Glu 746 Ala 750
Glu 746 Ala 750
Lys 1750 None None
Glu 746 Ala

Glu 746 Ala 750
Glu 746 Ala 750
Glu 746 Ala 750
Glu 746 Ala 750
Glu 746 Ala 750
Glu 746 Ala 750
Trp 25 Asp 32
Glu 746 Ala 750
Glu 746 Ala 750
Pro 210 None None
Pro 210 None None
Glu 746 Ala 750
Glu 746 Ala 750
Glu 746 Ala 750
Glu 746 Ala 750
Glu 13 None None
Glu 746 Ala 750
Leu 1444 None None
Lys 337 None None
Pro 210 None None
Glu 619 None None
Val 22 Ser 33
Pro 210 None None
Pro 210 None None
Pro 210 None None
Leu 314 None None
Glu 746 Ala 750
Leu 747 Ala 750
Pro 210 None None
Lys 517 None None
Glu 746 Ala 750
Glu 619 None None
Glu 746 Ala 750
Glu 347 His 353
Pro 210 None None
Pro 210 None None
Glu 619 None None
Glu 746 Ala 750
Glu 746 Ala 750
Thr 567 Arg 569
Ser 752 Ile 759
Glu 746 Ala 750
Glu 746 Ala 750
Glu 746 Ala 750
Glu 619 None None
Glu 444 None None
Glu 619 None None
Gln 1237 None None
Glu 619 None None
Asp 19 None None
Glu 619 None None
Glu 619 None None
Glu 619 None None
Glu 746 Ala 750
Glu 746 Ala 750
Glu 746 Ala 750
Glu 746 Ala 750
Glu 619 None None
Glu 619 None None
Glu 

Glu 746 Ala 750
Glu 746 Ala 750
Glu 1740 None None
Glu 746 Ala 750
Pro 1261 None None
Glu 746 Ala 750
Glu 746 Ala 750
Glu 746 Ala 750
Glu 746 Ala 750
Ser 45 None None
Lys 33 None None
Glu 559 None None
Glu 746 Ala 750
Pro 677 None None
Glu 746 Ala 750
Glu 746 Ala 750
Glu 746 Ala 750
Glu 746 Ala 750
Leu 991 None None
Glu 746 Ala 750
Val 22 Gly 38
Glu 746 Ala 750
Glu 19 None None
Ile 172 None None
Glu 746 Ala 750
Glu 1740 None None
Gly 410 None None
Asn 343 None None
Glu 746 Ala 750
Glu 1740 None None
Glu 746 Ala 750
Pro 52 None None
Pro 362 None None
Glu 746 Ala 750
Glu 746 Ala 750
Glu 746 Ala 750
Glu 746 Ala 750
Glu 746 Ala 750
Glu 746 Ala 750
Glu 746 Ala 750
Glu 746 Ala 750
Glu 746 Glu 749
Glu 746 Ala 750
Glu 131 None None
Ser 45 None None
Glu 402 Glu 403
Phe 538 None None
Glu 746 Ala 750
Glu 746 Ala 750
Glu 431 Leu 434
Glu 746 Ser 752
Leu 747 Ser 752
Pro 362 None None
Glu 746 Ala 750
Glu 746 Ala 750
Glu 746 Ala 750
Glu 746 Ala 750
Pro 1261 None None
Glu 746 Ala 750
Lys 129 None None


Ser 45 None None
Leu 747 Thr 751
Ser 45 None None
Ser 45 Pro 52
Asp 32 Ser 47
Glu 746 None None
His 24 Ser 33
Ser 45 None None
Ser 45 None None
Glu 491 None None
Asp 883 Leu 884
Glu 40 None None
Leu 31 Ile 35
Asn 255 Val 256
Ser 45 None None
Glu 812 None None
Gly 65 None None
Ser 45 None None
Tyr 30 Ser 37
Gln 204 None None
Ile 35 Gly 38
Thr 40 Leu 46
Ser 45 Asp 58
Ile 35 Thr 41
Ala 5 Ala 80
Ser 45 None None
Gly 38 Ala 39
Ser 45 None None
Ser 45 Gly 48
Thr 680 Thr 683
Ser 202 None None
Ser 45 None None
Trp 25 Asp 32
Asp 32 Ser 33
Ala 5 Ala 80
Leu 747 Thr 751
Arg 1178 None None
Arg 2425 Phe 2427
Ser 45 None None
His 24 Gly 38
Val 22 Ser 33
Ala 5 Ala 80
Trp 25 Asp 32
Leu 747 Ala 750
Lys 267 None None
Glu 746 Ala 750
Glu 746 Ala 750
Glu 746 Ala 750
Glu 746 Ala 750
Glu 746 Ala 750
Glu 746 Ala 750
Glu 746 Thr 751
Glu 746 Ala 750
Glu 746 Ala 750
Glu 427 None None
Ser 45 None None
Glu 746 Ala 750
Glu 746 Ala 750
Glu 746 Ala 750
Pro 362 None None
Ala 5 Ala 80
Ala 20 Ala 80
Lys 2311 None None
T

Ala 321 None None
His 700 None None
Glu 746 Ala 750
Glu 746 Ala 750
Glu 746 Ala 750
Glu 746 Ala 750
Glu 746 Ala 750
Glu 746 Ala 750
Ala 5 Ala 80
Leu 747 Ser 752
Ser 45 None None
Ser 45 None None
Lys 1638 None None
Glu 746 Ala 750
Glu 746 Ala 750
Glu 746 Ala 750
Arg 793 None None
Asp 360 None None
Thr 42 Gly 48
Lys 360 None None
Leu 747 Ser 752
Arg 202 None None
Ile 770 Leu 771
Glu 746 Ala 750
Glu 746 Ala 750
Glu 746 Ala 750
Glu 746 Ala 750
Glu 746 Ala 750
Gln 676 None None
Glu 746 Ala 750
Glu 746 Ala 750
Glu 746 Ala 750
Glu 746 Ala 750
Glu 746 Ala 750
Glu 746 Ala 750
Glu 746 Ala 750
Glu 746 Ala 750
Glu 3377 None None
Arg 7 None None
Ser 227 None None
Gln 1237 None None
Lys 346 None None
Asp 168 Glu 169
Leu 747 Thr 751
Ser 45 None None
Glu 387 None None
Glu 746 Ala 750
Ser 227 None None
Asp 131 None None
Leu 747 Pro 753
Glu 746 Ala 750
Glu 746 Ala 750
Glu 746 Ala 750
Glu 746 Ala 750
Glu 746 Ala 750
Glu 746 Ala 750
Glu 746 Ala 750
Glu 746 Ala 750
Asp 32 Ile 35
Gln 238 None None
Ala 2373 

Ser 45 None None
Glu 22 None None
Lys 562 None None
Ile 216 None None
Ile 843 Asp 846
Glu 475 None None
Trp 896 None None
Gln 827 None None
Leu 747 Ser 752
Ala 5 Ala 80
Ser 44 None None
Lys 658 None None
Gly 38 Ala 39
Gln 1237 None None
Glu 1158 None None
Tyr 197 None None
Ser 45 None None
Pro 88 None None
Glu 746 Ala 750
Glu 746 Ala 750
Ser 276 Gly 279
Glu 746 Ala 750
Glu 746 Ala 750
Met 204 Ile 216
Tyr 124 None None
Glu 746 Ala 750
Glu 746 Ala 750
Glu 746 Ala 750
Glu 746 Ala 750
Glu 746 Ala 750
Glu 746 Ala 750
Glu 746 Ala 750
Glu 746 Ala 750
Glu 746 Ala 750
His 700 None None
His 408 None None
Ile 35 Gly 38
Gln 1189 None None
Leu 128 Val 129
Glu 746 Ala 750
Ser 45 None None
Ser 45 None None
Ala 5 Ala 80
Ala 5 Ala 80
Ala 5 Ala 80
Tyr 87 Lys 93
Ala 1627 Glu 1634
Glu 139 None None
Leu 747 Thr 751
Thr 3562 None None
Tyr 30 Thr 40
Leu 98 Trp 107
Glu 746 Ala 750
Glu 506 None None
Ile 217 Asp 221
Ser 45 None None
Glu 304 None None
Ile 35 Thr 41
Tyr 134 Ile 139
Glu 746 Ala 750
Glu 746 Ala 750

Val 118 Arg 119
Lys 1227 None None
Glu 27 None None
Leu 747 Thr 751
Asp 440 None None
Ser 45 None None
Thr 116 Arg 117
Thr 116 Arg 117
Pro 315 None None
Ser 45 None None
Gly 227 Glu 232
Ser 45 None None
Val 570 Gly 571
Leu 226 None None
Lys 609 None None
Glu 232 None None
His 24 Ser 33
Pro 512 Pro 515
Pro 138 Cys 143
Asp 32 Ser 47
Arg 136 Cys 137
Met 94 Lys 100
Lys 867 Glu 868
Ser 45 None None
Lys 650 Glu 654
Glu 268 None None
Asp 242 Arg 243
Met 54 Arg 58
Asp 242 Arg 243
Glu 277 None None
Val 118 Met 121
Thr 42 Gly 48
Thr 42 Gly 48
Ala 37 Ser 51
Gln 699 Lys 700
Ala 26 None None
Asp 402 Arg 403
Gln 97 Lys 100
Ser 45 None None
Ala 7 None None
Ser 202 None None
Ser 45 None None
Val 179 Tyr 181
Lys 25 None None
Ile 249 None None
Ser 45 Pro 52
Ser 45 None None
Arg 669 None None
Pro 1402 None None
Glu 746 Ala 750
Glu 746 Ala 750
His 36 Ser 37
Val 445 His 450
Val 445 His 450
Lys 207 None None
Ser 45 None None
Leu 226 Lys 266
Ile 216 None None
Asp 32 Ser 47
Val 118 Arg 119
Leu 106 Trp 107
Ile

Leu 1412 Ala 1414
Pro 44 Ser 45
Val 179 Tyr 181
Gln 1741 None None
Glu 746 Ala 750
Leu 747 Thr 751
Leu 747 Thr 751
Arg 1178 None None
Phe 386 Ala 389
Glu 746 Ala 750
Glu 746 Ala 750
Glu 746 Ala 750
Glu 746 Ala 750
Glu 746 Ala 750
Glu 746 Ala 750
Glu 746 Ala 750
Asp 242 Arg 243
Asp 242 Arg 243
Glu 746 Ala 750
Glu 746 Ala 750
Arg 117 None None
Ser 45 None None
Ala 43 None None
Pro 138 Cys 143
Asn 564 Pro 568
Ala 5 Ala 80
Glu 1469 Cys 1471
Ser 45 None None
Lys 517 None None
Ser 45 None None
Ser 45 None None
Leu 747 Thr 751
Ser 45 None None
Pro 152 None None
Ala 5 Ala 80
Tyr 87 Lys 93
Gln 874 None None
Val 179 None None
Arg 1178 None None
Asn 14373 None None
Asn 601 Asn 602
Gln 376 None None
Glu 746 Glu 749
Val 96 His 98
Phe 360 None None
Glu 503 None None
Ala 5 Ala 80
Gly 227 Glu 232
Glu 746 Ala 750
Pro 138 Cys 143
Ala 99 Pro 103
Thr 192 None None
Leu 226 None None
Lys 2253 None None
Asp 835 None None
Ile 216 None None
Ile 35 Gly 38
Arg 29 Ala 34
Asn 850 Pro 851
Glu 746 Ala 750
Glu 746 Al

Leu 747 Thr 751
Asn 92 None None
Leu 91 Met 94
Gln 238 None None
Gln 238 None None
Gly 115 Thr 116
Glu 462 Arg 465
Ala 20 Ala 80
Ala 5 Ala 80
Val 164 None None
Asn 321 None None
Ser 45 None None
His 129 Glu 132
Glu 746 Ala 750
Glu 746 Ala 750
Glu 746 Ala 750
Glu 746 Ala 750
Glu 746 Ala 750
Glu 746 Ala 750
Glu 746 Ala 750
Glu 746 Ala 750
Tyr 181 Pro 184
Ser 45 None None
Leu 747 Ser 752
Gln 238 None None
Ser 45 None None
Glu 746 Ala 750
Asp 61 None None
Gly 38 Ala 39
Phe 76 None None
Ser 274 Ser 290
Pro 152 None None
Ser 45 None None
His 24 Gly 38
Glu 746 Ala 750
Glu 746 Ala 750
Glu 746 Ala 750
Glu 746 Ala 750
Glu 746 Ala 750
Glu 746 Ala 750
Glu 746 Ala 750
Glu 746 Ala 750
Glu 746 Ala 750
His 450 Lys 459
Glu 746 Ala 750
Glu 121 None None
Glu 2220 None None
Ser 227 None None
Asn 92 None None
Pro 262 Ser 264
Ala 62 None None
Glu 746 Ala 750
Glu 746 Ala 750
Glu 746 Ala 750
Glu 746 Ala 750
Glu 746 Ala 750
His 140 None None
Ile 123 Tyr 124
Ser 45 None None
Glu 746 Thr 751
Glu 2636 None None
V

Ala 5 Ala 80
Ile 35 Gly 38
Cys 199 Ser 201
Pro 138 Cys 143
Ser 23 Ile 35
Val 22 Ser 33
Leu 31 Ile 35
Pro 138 Cys 143
Ile 216 None None
Ile 216 None None
Gly 3521 Gly 3527
Glu 746 Ala 750
Glu 746 Ala 750
Ala 5 Ala 80
Asn 1839 None None
Glu 746 Ala 750
Glu 746 Ala 750
Glu 462 Arg 465
Ile 836 None None
Glu 746 Ala 750
Glu 746 Ala 750
Leu 380 None None
Ile 390 None None
Glu 746 Ala 750
Glu 746 Ala 750
Glu 746 Ala 750
Glu 746 Ala 750
Glu 746 Ala 750
Ala 5 Ala 80
Glu 746 Ala 750
Val 179 None None
Tyr 87 Lys 93
Arg 577 Asp 578
Glu 503 None None
Ser 202 Gly 206
Glu 374 None None
Ala 459 Ala 473
Glu 2290 Arg 2291
Leu 747 Ser 752
Leu 31 Ser 33
His 700 None None
Ser 227 None None
Asn 226 Val 227
Tyr 87 Asn 92
Val 777 None None
Leu 747 Thr 751
Glu 451 Tyr 452
Glu 746 Ala 750
Glu 746 Ala 750
Glu 746 Ala 750
Glu 746 Ala 750
Cys 137 His 140
Ser 67 Tyr 68
Gln 204 None None
Leu 747 Thr 751
Gly 160 Glu 165
Ser 33 None None
Glu 746 Ala 750
Glu 746 Ala 750
Glu 746 Ala 750
Glu 746 Ala 750
Glu 746 Ala 750
G

Ser 45 None None
His 700 None None
Asp 131 None None
Ser 2599 Gly 2608
Leu 747 Ser 752
Leu 747 Ser 752
Gly 152 None None
Ala 5 Ala 80
Gln 556 Leu 562
Glu 746 Ala 750
Glu 746 Ala 750
Glu 746 Ala 750
Glu 746 Ala 750
Glu 746 Ala 750
Glu 746 Ala 750
Glu 746 Ala 750
Glu 746 Ala 750
Met 14 Ser 45
Glu 746 Ala 750
Ser 1050 None None
His 700 None None
Leu 747 Thr 751
Cys 137 His 140
Lys 703 None None
Gly 69 None None
Gly 69 None None
Gly 69 None None
Leu 747 Thr 751
Leu 747 Thr 751
Glu 746 Ala 750
Glu 746 Ala 750
Gln 1740 Gln 1741
Ala 99 Val 104
Val 118 Arg 119
Ile 2166 None None
Gly 223 None None
Val 235 Pro 239
Ala 5 Ala 80
Pro 138 Cys 143
Lys 745 Glu 749
Leu 213 Ile 215
Ile 212 None None
Ala 5 Ala 80
Gly 152 None None
Ile 35 None None
Glu 746 Ala 750
Glu 746 Ala 750
Asp 19 None None
Ser 625 Ser 632
Met 198 Asn 200
Gln 1328 None None
Cys 96 Ala 99
Thr 116 Arg 117
Thr 116 Arg 117
Ala 5 Ala 80
Ala 5 Ala 80
Ala 5 Ala 80
Lys 920 None None
Ile 216 None None
Phe 594 Tyr 597
Ile 836 None None
Thr 57

Gly 720 Val 724
Pro 138 Cys 143
Gln 1328 None None
Lys 396 None None
Gln 238 None None
Arg 669 None None
Gly 69 Phe 70
Glu 833 None None
Ser 45 None None
Glu 1202 None None
Glu 86 None None
Ala 5 Ala 80
Ser 45 None None
Thr 192 None None
Leu 226 None None
Ser 45 None None
Gly 2613 None None
Ser 45 None None
Glu 882 None None
Gln 421 None None
Tyr 30 Ser 33
Ser 45 None None
Ala 99 Pro 103
Ala 5 Ala 80
Lys 703 None None
Pro 212 Thr 213
Ser 45 None None
Ile 216 None None
Ala 5 Ala 80
Gln 153 None None
Ser 45 None None
Leu 213 None None
Asn 208 Arg 209
Arg 574 None None
Gly 206 None None
Ser 45 None None
Ile 216 None None
Gln 127 None None
Ser 45 None None
Asp 14 Leu 16
Asp 464 Tyr 467
Asp 464 Tyr 467
Gly 115 Thr 116
Pro 113 Pro 114
Leu 585 Asn 586
Glu 239 None None
Tyr 87 Asn 92
Pro 138 Cys 143
Ala 5 Ala 80
Glu 399 None None
Gln 105 None None
Gly 271 None None
Leu 3010 None None
Tyr 181 Pro 184
Val 74 Cys 199
Leu 7 Asp 58
Ile 149 None None
Ser 7 Gln 11
Thr 41 Ser 45
His 140 None None
Ile 

Ile 735 Tyr 741
Pro 138 Cys 143
Tyr 197 Met 198
Asp 580 Arg 583
Phe 70 Arg 71
Glu 267 None None
Lys 567 Leu 570
Pro 138 His 140
Ile 35 Gly 38
Asp 592 None None
Glu 520 None None
Ser 23 Ser 33
Pro 152 None None
Glu 54 None None
Arg 24 None None
Ser 45 None None
Val 179 None None
Pro 152 None None
Lys 575 Arg 577
Gly 271 Gly 275
Ala 5 Ala 80
Ser 45 None None
Asp 242 Arg 244
Val 179 None None
Gly 3527 None None
Ser 183 Ala 186
Pro 152 None None
Gln 376 None None
Val 777 None None
Ser 45 None None
Glu 658 Lys 665
Lys 857 None None
Pro 138 Cys 143
Pro 152 None None
Asn 259 None None
Arg 618 Phe 626
Lys 100 Thr 101
Pro 138 Cys 143
Ser 45 None None
Leu 213 None None
Glu 916 None None
Pro 152 None None
Arg 2425 Glu 2432
Met 844 Ser 847
Pro 114 None None
Tyr 195 None None
Gln 28 Gln 61
Val 22 Ser 33
Asn 453 Gln 455
Ile 216 None None
Ile 216 None None
Val 22 Gly 38
Ile 836 None None
Gly 710 None None
Ser 45 None None
Ala 24 Ala 26
Ser 45 None None
His 450 Glu 451
Ser 45 None None
Thr 217 Ser 221

Glu 399 None None
Tyr 197 None None
Thr 2911 None None
Ser 274 Ser 290
Ser 274 Ser 290
Asn 189 None None
Thr 116 Ala 122
Gln 238 None None
Gly 223 None None
Ser 45 None None
Arg 577 None None
Ser 45 None None
Ile 836 None None
Asp 16 None None
Gly 814 None None
Val 22 Gly 38
Ser 45 None None
Glu 451 Tyr 452
Val 577 None None
Pro 112 Val 134
Asn 92 None None
Glu 246 Leu 250
Glu 1430 None None
Glu 658 Lys 665
Ser 45 Pro 52
Lys 2083 None None
Ser 45 None None
Gln 352 None None
Gly 3517 Gly 3527
Asp 32 Ser 47
Glu 658 Glu 659
Glu 658 Glu 659
Glu 658 Glu 659
Lys 612 None None
Ser 441 None None
Phe 286 None None
Asp 131 None None
Ser 45 None None
Ala 5 Ala 80
Glu 249 None None
Gly 34 Ser 45
Asn 97 None None
Leu 137 Val 140
Ala 99 Cys 102
Pro 35 None None
Ile 655 Lys 657
Gly 3527 None None
Ser 45 None None
Ala 5 Ala 80
Met 282 None None
Thr 642 Lys 643
Ile 405 Leu 420
Gly 2113 None None
Phe 231 Asp 242
Asp 835 None None
Gly 302 Gly 303
Phe 173 His 175
Ser 146 None None
Phe 281 None None
Pro 13

Pro 138 Cys 143
Asn 447 Gly 451
Ser 45 None None
Pro 138 Cys 143
Leu 46 Ser 47
Pro 113 Pro 114
Tyr 195 Met 198
Met 198 Asn 200
Ser 163 Gly 164
Pro 138 Cys 143
Lys 459 None None
Lys 459 None None
Tyr 30 Ser 33
Lys 106 None None
Arg 2467 Gln 2474
Ile 35 Gly 38
Glu 485 None None
Lys 3390 Val 3391
Ser 45 None None
Ser 23 Ser 33
Gly 2613 None None
Gly 243 Pro 244
Ala 5 Ala 80
Met 37 His 39
Val 22 Gly 38
Ser 45 None None
Lys 72 None None
Gly 271 None None
Glu 651 None None
Ser 45 None None
Val 179 None None
Pro 89 Ala 90
Asn 200 Ser 201
Val 3794 None None
Ser 45 None None
Leu 213 Ile 215
Asn 92 None None
Gly 160 Glu 165
Leu 213 Ile 215
Ala 24 Ala 26
Glu 132 Val 133
Ala 5 Ala 80
Val 179 None None
Trp 25 Asp 32
Lys 917 Met 920
Val 422 None None
Pro 152 Gln 153
Asn 208 Arg 209
Ala 37 Ser 51
Pro 152 None None
Glu 131 None None
Arg 243 Glu 248
Leu 213 Ile 215
Gln 827 None None
Tyr 30 Ser 33
Tyr 30 Ser 33
Ile 215 Thr 217
Gln 200 His 202
Val 133 Glu 141
Ala 390 Gln 399
Ala 390 Gln 399
His 408 None 

Tyr 197 None None
Ile 639 Lys 641
Arg 117 None None
Glu 491 None None
Ile 216 None None
Lys 106 None None
Thr 192 Ile 193
Val 179 None None
Glu 165 Tyr 166
Lys 360 None None
Tyr 195 None None
Asp 14 Leu 16
Ser 45 None None
Gln 619 Gln 621
Pro 250 None None
Pro 19 Asn 20
Asp 1667 None None
Leu 225 Asn 229
Lys 167 None None
Pro 138 Cys 143
Glu 325 None None
Glu 616 None None
Leu 25 None None
Leu 1241 Gln 1242
Val 179 None None
Leu 1241 Gln 1242
Asn 79 None None
Glu 493 None None
Ser 617 None None
Arg 557 None None
Ser 45 None None
Glu 324 Glu 325
Val 186 Ser 188
Ser 23 Ile 35
Asp 131 None None
Asp 169 Val 177
Ser 579 None None
Lys 1583 None None
Pro 89 None None
Phe 286 None None
Gly 134 Arg 140
Ser 45 None None
Glu 658 Lys 665
Asp 670 None None
Ile 156 Gly 160
Ile 216 None None
Ile 216 None None
Leu 329 Val 332
Leu 329 Val 332
Pro 268 Leu 273
Cys 137 None None
Asp 835 None None
Gly 462 None None
Tyr 328 Ser 329
Val 22 Ser 33
Leu 31 Ser 33
Gly 240 Arg 241
Leu 65 His 66
Asn 92 None None
A

Gly 76 None None
Glu 1596 None None
Val 179 Glu 182
His 140 None None
Glu 43 None None
Glu 324 Glu 325
Thr 111 Pro 114
Leu 226 None None
Ser 33 Ser 37
Lys 210 None None
Glu 219 None None
Ala 238 Ser 240
Asn 224 Asn 229
Pro 799 Pro 800
Asn 545 None None
Leu 469 None None
Cys 190 Thr 191
Gly 2613 None None
Lys 2017 Ser 2018
Trp 13 Phe 16
Asp 19 None None
Tyr 589 Val 592
Ser 202 Cys 203
Ser 45 None None
Glu 281 None None
Glu 607 None None
Phe 577 None None
Ala 5 Ala 80
Ala 5 Ala 80
Lys 557 Leu 560
Gly 160 Glu 165
Leu 120 None None
Gly 240 Arg 241
Pro 21 None None
Gln 619 Gln 621
Ala 57 Ala 58
Leu 669 Arg 673
Ser 53 Asp 54
Leu 204 Ser 208
Leu 1587 Phe 1593
Asp 32 Ser 47
Asp 7 Phe 12
Asn 668 None None
Leu 926 None None
Ser 1043 None None
Gly 834 Pro 837
Lys 1263 None None
Lys 1263 None None
Ser 45 None None
Gln 126 Met 130
Thr 214 None None
Asp 835 None None
Asn 92 None None
Lys 1255 None None
Gln 619 Gln 621
Cys 97 His 101
Asn 208 Arg 210
Glu 1562 None None
Asp 131 None None
Ala 5 Ala 80
G

Asn 224 Asn 229
Ala 302 Leu 303
His 408 None None
Glu 125 None None
His 36 Glu 39
Leu 213 Ile 215
Leu 235 None None
Glu 458 None None
Ala 2122 None None
Asp 440 Lys 448
Met 14 Ser 45
Ser 1091 None None
Gln 424 None None
Glu 707 None None
Gly 243 Pro 244
Ser 45 None None
Ile 35 Gly 38
Ser 45 None None
Asn 196 None None
Lys 130 None None
Glu 325 None None
Glu 150 None None
Gly 240 Arg 241
Glu 325 None None
Glu 325 None None
Gly 130 None None
Tyr 124 Gln 126
Glu 222 None None
Lys 567 None None
Ala 5 Ala 80
Ala 5 Ala 80
Gln 1328 None None
Ile 216 None None
Glu 139 Arg 141
Glu 233 None None
Arg 119 Ala 120
Pro 418 None None
Glu 804 None None
Trp 2100 Gly 2102
Gly 223 Arg 228
His 520 None None
Ser 45 None None
Asn 103 Tyr 104
Leu 268 None None
Glu 125 None None
Lys 517 None None
Ala 5 Ala 80
Cys 49 None None
Ala 5 Ala 80
Asn 70 None None
Ala 97 Gly 103
Gln 127 None None
Leu 259 None None
Phe 222 Met 223
Glu 161 None None
Lys 816 None None
Ala 239 Gln 242
Glu 325 None None
Glu 141 Ser 144
Val

Ser 45 None None
Ala 5 Ala 80
Ala 5 Ala 80
Gly 93 His 100
His 139 Ser 144
Ser 1091 None None
Lys 567 Leu 570
Phe 90 None None
Ile 123 None None
His 47 Glu 48
Ile 123 None None
Ile 1150 None None
Pro 95 Arg 102
Glu 249 None None
Asn 486 Pro 490
Ala 5 Ala 80
Ala 5 Ala 80
Lys 268 None None
Glu 1361 None None
Lys 305 None None
Lys 210 None None
Gln 12 None None
Gln 12 None None
Thr 576 None None
Thr 360 None None
Gly 206 None None
Glu 126 Ser 128
Pro 45 Cys 50
Asp 131 None None
Ser 1036 None None
Lys 567 Leu 570
Glu 691 None None
Leu 884 Gln 892
Gly 38 Pro 44
Pro 610 None None
Ser 23 Ala 39
Gln 402 None None
Glu 110 None None
Ala 5 Ala 80
Tyr 30 Ser 33
Ile 571 Leu 573
Lys 569 None None
Lys 569 None None
Glu 240 None None
Phe 456 Gln 457
Ser 45 None None
Glu 249 None None
Asp 131 None None
Glu 1420 None None
Ser 1642 None None
Asn 233 None None
Asn 424 Lys 436
Glu 57 Leu 60
Ser 45 None None
Ser 45 None None
Ser 45 None None
Ser 44 None None
Gly 113 None None
Glu 275 None None
Ser 491 None N

Glu 131 None None
Asp 131 None None
Ser 1074 None None
Cys 1500 None None
Ile 216 None None
Pro 95 Arg 102
Gln 2210 None None
Pro 95 Arg 102
Ser 33 None None
Pro 44 Ser 45
Thr 641 Leu 644
Ser 14 None None
Ser 14 None None
Phe 777 None None
Lys 639 None None
Ser 1642 None None
Ala 37 Ser 51
Glu 161 None None
Glu 264 None None
Ser 45 None None
Glu 715 None None
Leu 411 Val 412
His 36 Ser 37
Asp 16 Thr 18
Ser 525 None None
Ile 216 None None
Leu 425 Val 426
Glu 275 None None
Leu 380 None None
Lys 245 None None
Asp 166 None None
Ile 139 None None
Lys 468 None None
Val 179 None None
Pro 1238 None None
Val 179 None None
Thr 18 Pro 19
Trp 25 Asp 32
Lys 1233 None None
Gly 220 Gly 223
Arg 465 Tyr 467
Asn 92 None None
Gly 308 None None
Ala 201 None None
Glu 715 None None
Arg 205 Glu 211
Asp 168 None None
Asp 242 Arg 244
Glu 29 None None
Asp 205 Met 208
Arg 150 None None
Pro 95 Arg 102
Leu 16 None None
Tyr 195 None None
Thr 23 Ala 29
Ser 1642 None None
Glu 161 None None
Pro 33 None None
Ser 23 Ala

Glu 1550 Glu 1552
Arg 151 None None
Ala 219 Glu 221
Glu 483 His 489
Glu 715 None None
Val 84 Tyr 88
Lys 299 Lys 301
Lys 1256 None None
Ile 119 None None
Met 8 Ala 80
Leu 1498 None None
Glu 152 Glu 153
Arg 1347 Glu 1348
Lys 1263 None None
Pro 685 None None
Glu 284 None None
Ile 123 None None
Ser 247 None None
Phe 16 None None
Pro 45 Cys 50
Lys 186 None None
Ala 5 Ala 80
Gly 207 None None
Glu 764 None None
Glu 47 None None
Glu 1230 None None
Gln 455 Lys 459
Glu 275 None None
Glu 199 None None
Ala 5 Ala 80
Asn 389 None None
Glu 286 None None
Gly 712 Phe 715
Leu 120 Ile 122
Pro 45 Cys 50
Pro 89 None None
Glu 126 None None
Lys 1332 None None
Asn 441 Thr 454
Pro 59 None None
Leu 276 None None
Asn 103 None None
Lys 157 None None
Cys 190 Ile 193
Gly 152 None None
Val 118 Arg 119
His 473 None None
Ser 37 Pro 52
Glu 691 None None
Gln 420 Gln 421
Asp 19 None None
Cys 97 Ile 100
Pro 95 Arg 102
Glu 1128 None None
Thr 142 None None
Ala 5 Ala 80
Leu 120 Ile 122
Asn 200 Ser 201
Gly 22 Ala 29
Glu 132 V

Asn 171 None None
Leu 46 None None
Leu 658 None None
Leu 411 Val 412
Arg 141 None None
Ser 43 Arg 49
Asn 113 None None
Ile 405 Leu 420
Leu 226 None None
Glu 195 None None
Ser 23 Ser 33
Glu 286 None None
Glu 1901 None None
Glu 451 Tyr 452
Arg 278 Glu 279
Lys 377 None None
Glu 106 None None
Met 484 Asn 486
Ala 5 Ala 80
Pro 95 Arg 102
Leu 573 None None
Asn 74 None None
Gln 421 None None
Pro 95 Arg 102
Pro 95 Arg 102
Ser 1642 None None
Gln 401 None None
Tyr 30 Ser 33
Ser 45 None None
Glu 619 None None
Lys 459 None None
Arg 255 None None
Arg 178 Leu 180
Asn 12 None None
Gly 273 None None
Ala 1309 None None
Glu 664 None None
Pro 534 Leu 536
Ser 45 None None
Phe 70 Arg 71
Ile 571 Arg 574
Ser 525 None None
Arg 117 None None
Glu 234 None None
Pro 95 Asp 97
Ser 367 Ile 373
Lys 389 None None
Pro 1864 None None
Glu 1781 Lys 1782
Val 1383 None None
Gly 289 Ala 296
Ser 1642 None None
Pro 660 Gly 661
Gln 557 Ser 559
Asp 464 Tyr 467
Asp 1083 None None
Met 473 Leu 474
Gln 188 None None
Pro 834 None Non

Asp 32 Ile 35
Lys 959 Lys 960
Asp 32 Ile 35
Gln 1356 Gly 1366
Leu 137 Trp 146
Glu 257 Gln 262
Asn 131 None None
Glu 428 None None
Glu 393 None None
Gly 34 Ser 71
Gly 34 Ser 71
Glu 152 Glu 153
Arg 250 None None
Tyr 126 Lys 132
Ala 182 Ala 184
Pro 177 Cys 182
Glu 1764 None None
Lys 297 None None
Asn 235 None None
Pro 177 His 178
Glu 128 None None
Pro 44 None None
Pro 44 None None
Gly 243 Pro 244
His 179 None None
Gln 1039 Pro 1042
Gly 271 None None
Lys 169 Cys 174
His 182 None None
Glu 233 Lys 240
Glu 422 None None
Glu 888 None None
Arg 158 Ala 159
Val 1450 None None
Tyr 234 Met 237
Asp 6 Thr 41
Asp 6 Thr 41
Lys 925 None None
Lys 679 None None
Lys 679 None None
Ala 5 Ala 80
Ala 5 Ala 80
Gly 783 Lys 785
Ala 5 Ala 80
Ala 5 Ala 80
Gly 153 None None
Phe 16 None None
Pro 559 None None
Gln 187 Gln 188
Leu 1719 None None
Cys 296 None None
Gln 401 None None
Tyr 30 Ser 33
Tyr 30 Ser 33
Ser 45 None None
Ser 274 None None
Glu 303 None None
Gly 247 None None
Pro 191 None None
Ala 281 None None
Thr 4

Glu 1124 None None
Val 555 Gln 556
Lys 550 Lys 558
Ser 23 Ile 35
Ser 23 Ile 35
Val 274 Pro 278
Lys 558 Val 559
Tyr 126 Asn 131
Val 157 Arg 158
Pro 551 Glu 554
Trp 557 Lys 558
Pro 551 Glu 554
Met 552 Glu 554
Asp 218 Arg 221
Pro 152 Pro 153
Glu 178 Arg 179
Thr 254 Arg 259
Val 22 Ser 33
Val 22 Ser 33
Cys 376 Ala 377
Glu 1602 None None
Gly 154 Thr 155
Thr 155 Arg 156
Thr 155 Arg 156
Val 560 None None
Tyr 220 Pro 223
Thr 379 Phe 380
Glu 1357 None None
Asp 131 None None
Glu 177 None None
Trp 557 Lys 558
Glu 158 Glu 165
Ser 274 Ser 290
Lys 58 None None
Gln 424 None None
Asn 564 Leu 576
Pro 577 Asp 579
Trp 557 Lys 558
Val 559 Glu 561
Leu 265 None None
Val 559 Val 560
Trp 557 Lys 558
His 179 None None
Glu 389 None None
Glu 235 None None
Arg 55 None None
Glu 1127 None None
Arg 175 Cys 176
Met 552 Trp 557
Ser 274 Ser 290
Val 569 Leu 576
Lys 234 None None
Glu 99 None None
Glu 1673 Arg 1676
Glu 27 Glu 28
Met 133 Lys 139
Ile 344 Asp 347
Ala 182 Ala 184
Ile 162 Tyr 163
Tyr 553 Gln 556
Glu 482 Glu 484

Asn 235 None None
His 966 None None
Asp 131 None None
Gln 556 Glu 561
Ser 45 None None
Tyr 553 Lys 558
Ala 5 Ala 80
Met 552 Gln 556
Trp 557 Lys 558
Pro 1261 None None
Leu 130 Met 133
Leu 265 None None
Lys 418 None None
Phe 145 Leu 146
Lys 546 None None
Phe 556 None None
Trp 557 Glu 561
Tyr 570 Leu 576
Trp 557 Lys 558
Trp 557 Lys 558
Ile 191 None None
Ile 33 None None
Ser 715 None None
Val 560 None None
Tyr 553 Lys 558
His 168 Glu 171
Lys 95 None None
Ile 232 Tyr 236
Ser 45 None None
Lys 1195 None None
Ser 45 None None
Met 552 Tyr 553
Tyr 553 Lys 558
Gly 78 None None
Gln 556 Trp 557
Ser 1198 None None
Asp 281 Arg 282
Asn 131 None None
Val 833 None None
Trp 13 Phe 16
Gly 260 None None
Trp 557 None None
Lys 992 None None
Ser 423 None None
Val 555 Pro 573
Ser 45 None None
Gly 262 None None
Asp 211 Arg 214
Trp 557 Lys 558
Lys 550 Lys 558
Gly 262 None None
Asp 644 Glu 648
Val 560 None None
Asp 1245 Gln 1286
Gly 59 None None
Glu 237 None None
Phe 1543 None None
Glu 271 Arg 273
Ile 33 None Non

Tyr 568 Thr 574
Ala 12 Gly 15
Trp 560 None None
Ile 33 None None
Asp 131 None None
Lys 550 Pro 551
Leu 201 His 214
Ala 76 None None
Asp 261 None None
Ala 1607 None None
Asn 564 Leu 576
Gln 186 Leu 193
Cys 176 His 179
Trp 557 Lys 558
Glu 119 None None
Asp 258 None None
Trp 557 Lys 558
Asn 1496 Asn 1497
Ala 138 Val 143
Ser 45 None None
Trp 557 Lys 558
Met 237 Asn 239
Met 552 Tyr 553
Ile 255 None None
Pro 177 Cys 182
Ser 215 Tyr 220
Asp 33 Phe 39
Lys 558 Glu 562
Gln 556 Val 559
Trp 557 Lys 558
Val 560 None None
Trp 557 Lys 558
His 178 Ser 183
Trp 557 Lys 558
Gly 78 None None
Ser 252 None None
Thr 211 Ser 215
His 1643 Arg 1648
Lys 21 None None
Arg 273 Cys 275
Ala 289 Ala 295
Gly 1125 None None
Trp 557 Glu 561
Lys 558 Glu 562
Leu 31 Asp 81
Tyr 553 Gln 556
Pro 551 Gln 556
Glu 305 Lys 308
Asp 148 Thr 150
Pro 58 His 115
Gln 556 Lys 558
Lys 550 Glu 554
Asp 419 None None
Ile 820 None None
Trp 557 Lys 558
Ser 45 None None
Ser 45 None None
Glu 1892 None None
Lys 87 None None
Lys 79 None None
Asn 5

Glu 917 None None
Pro 194 None None
Trp 958 Glu 959
Lys 558 Gly 565
Ser 1874 None None
Ala 1035 Pro 1038
Gly 864 Gln 865
His 107 Gly 119
Trp 557 Lys 558
Pro 577 Asp 579
Ile 294 None None
Glu 117 None None
Gly 271 None None
Glu 104 None None
Ser 274 Ser 290
Glu 71 None None
Ser 179 None None
Lys 550 Lys 558
Glu 89 None None
Lys 1263 None None
Glu 126 None None
Tyr 553 Lys 558
Tyr 553 Lys 558
Gln 103 None None
Lys 623 None None
Ser 1214 Asn 1217
Leu 116 Leu 122
Ser 991 Ser 992
Asn 564 Tyr 578
Leu 103 Leu 104
Pro 978 Pro 980
Arg 2454 None None
Tyr 553 Gln 556
Gln 905 None None
Ser 557 Gly 563
Gly 941 Ala 951
Asn 239 Ser 240
Gly 108 Phe 109
Ser 557 Gly 563
Tyr 234 None None
Val 218 None None
Gln 1152 Glu 1156
Ser 308 None None
Pro 577 Asp 579
Cys 141 Gln 144
Glu 350 None None
Lys 139 Thr 140
Glu 522 None None
Ala 56 None None
Glu 358 None None
Val 218 None None
Ser 45 None None
Pro 177 Cys 182
Thr 155 Ala 161
Glu 616 None None
Ala 208 Pro 212
Met 246 Pro 250
Asn 131 None None
Pro 191 None 

Glu 27 Glu 28
Asn 401 None None
Lys 533 None None
Pro 556 Pro 565
Ser 45 None None
Val 560 Leu 576
Val 560 Leu 576
Leu 145 None None
Ser 280 Pro 284
Asp 131 None None
Trp 557 Val 559
Lys 745 None None
Gly 262 None None
Ala 5 Ala 80
Val 157 Arg 158
Asn 239 Ser 240
Lys 1263 None None
Arg 337 Glu 343
Ser 557 Gly 563
Cys 49 None None
Glu 1129 None None
Gln 38 None None
Phe 910 None None
Ser 274 Ser 290
Ile 33 None None
Val 560 None None
Ile 574 Thr 578
Asp 32 None None
Ile 282 None None
Gly 262 Ser 269
Val 569 Leu 576
Glu 171 Val 172
Glu 48 None None
Trp 557 Lys 558
Val 559 Gly 565
Val 1292 Gln 1331
Pro 191 None None
Gln 556 Trp 557
Pro 1013 None None
Val 560 Leu 576
Leu 441 None None
Asp 131 None None
Tyr 553 Lys 558
Lys 22 None None
Thr 315 None None
Asp 579 None None
Thr 150 None None
Glu 1129 None None
Leu 130 Met 133
Ala 79 Ala 88
Leu 576 None None
Trp 557 Glu 561
Glu 1892 None None
Leu 576 None None
Thr 231 None None
Val 560 Leu 576
Val 216 Tyr 220
Glu 1049 None None
Pro 191 None Non

Val 555 Ile 571
Ala 8 None None
Glu 554 Lys 558
Arg 158 Ala 159
Lys 69 None None
Ala 5 Ala 80
Lys 1666 None None
Gln 713 None None
Glu 230 Phe 235
Glu 357 Gly 361
Glu 110 None None
Ala 5 Ala 80
Ile 35 Gly 38
Lys 558 Val 559
Pro 44 None None
Ser 483 None None
Ala 5 Ala 80
Ser 45 None None
Ser 557 Gly 563
Gly 262 Phe 270
Lys 1255 None None
Asn 131 None None
Asp 419 None None
Lys 1431 None None
Leu 201 None None
Trp 557 Glu 561
Glu 351 None None
Asp 579 None None
Arg 506 None None
Lys 703 None None
Leu 252 None None
Lys 219 None None
Pro 115 Gln 118
Glu 110 None None
Asp 419 None None
Ala 5 Ala 80
Ser 227 Ile 232
Pro 551 Val 555
Ala 291 None None
Thr 2676 Pro 2680
Glu 351 None None
Thr 231 None None
Pro 142 None None
Trp 557 Lys 558
Pro 58 His 115
Leu 684 Ile 685
Gly 4 Gly 17
Lys 550 Pro 551
Gln 74 None None
Ser 45 None None
Ala 5 Ala 80
Lys 558 Glu 562
Ala 5 Ala 80
Tyr 30 Ser 33
Asp 6 Thr 41
Pro 512 Pro 515
Arg 52 Pro 59
Pro 142 None None
Arg 52 Pro 59
Ser 45 None None
Trp 557 Glu 561
Se

Pro 662 Arg 664
Asp 32 Ser 33
Ala 5 Ala 80
Ser 45 None None
Ser 241 None None
Gly 108 None None
Asp 6 Thr 41
Leu 120 None None
Phe 212 His 214
Thr 2911 None None
Gln 556 Val 559
Val 157 None None
Glu 110 None None
Ala 5 Ala 80
Trp 557 Lys 558
Gly 1158 Leu 1159
Ala 5 Ala 80
Lys 550 Val 559
Ser 45 Gly 48
Val 22 Ser 33
Ser 45 None None
Ser 45 None None
Tyr 30 Ser 33
Ser 45 None None
Gly 38 Ala 39
Gly 291 Asn 292
Ser 45 None None
Arg 2027 None None
Val 560 None None
Met 1239 None None
Leu 31 Ile 35
Asp 918 None None
Lys 737 Glu 741
Ser 45 None None
Gln 556 Pro 573
Ala 5 Ala 80
Thr 41 Asn 51
Asn 40 None None
Glu 203 None None
Ser 45 None None
Asp 304 Glu 307
Phe 109 Arg 110
Asp 1567 Lys 1568
Pro 177 Cys 182
Thr 42 Gly 48
Pro 177 Cys 182
His 242 His 246
Ser 45 None None
Lys 550 Lys 558
Ser 274 Ser 290
Ala 43 None None
Ala 5 Ala 80
Ser 45 None None
Lys 655 None None
Ala 5 Ala 80
Trp 557 Lys 558
Met 552 Val 555
His 469 None None
Ile 35 Thr 41
Gln 905 None None
Glu 651 None None
Gly 103 None No

Glu 841 None None
Ser 41 None None
Val 427 Gly 428
Lys 703 None None
Ala 129 None None
Asn 447 Gly 451
Asp 32 His 36
Ser 45 None None
Ile 998 None None
Arg 248 Pro 250
Ser 45 None None
Thr 826 None None
Gln 93 Gln 95
Asp 898 Glu 901
Ala 43 Ser 47
Asp 32 Ile 35
Lys 378 None None
Ala 8 Ala 15
Asp 32 Ser 47
Ala 39 Thr 40
Ser 33 Ser 37
Cys 238 Met 243
Ser 184 None None
Gly 154 Arg 156
Ser 45 None None
Asp 131 None None
Glu 393 None None
Pro 397 None None
Ala 5 Ala 80
Ala 24 Ala 26
Ile 153 None None
Glu 204 Leu 206
Glu 72 Leu 74
His 14 Ile 20
Ser 23 Ile 35
Glu 454 None None
Ala 5 Ala 80
Ala 223 Ser 231
Val 22 Ser 33
Gly 34 Ser 71
Ser 45 None None
Ala 1558 None None
Ala 428 None None
Pro 386 None None
Val 196 Ala 205
Glu 2039 None None
Ala 125 Ile 128
Ser 45 None None
Pro 529 Asp 536
Asn 1171 None None
Arg 52 Pro 59
Glu 1141 None None
Glu 182 None None
Cys 100 Gly 114
Leu 638 None None
Ala 428 None None
Glu 218 None None
Glu 110 None None
Lys 1666 None None
Ala 214 None None
Thr 641 Leu 644


Ile 255 None None
Lys 76 None None
Arg 335 Phe 338
Gly 324 None None
Ser 45 None None
Ser 45 None None
Val 8 Gly 16
Leu 265 None None
Leu 133 None None
Arg 957 None None
Cys 238 None None
Gln 1216 None None
Glu 639 None None
Ser 557 Gly 563
Val 86 None None
Val 218 None None
Asp 274 Asp 277
Trp 25 Asp 32
Leu 220 None None
Tyr 30 Ser 37
Leu 31 Ile 35
Ser 542 None None
Ser 45 None None
Ala 5 Ala 80
Glu 710 None None
Glu 223 None None
Ala 5 Ala 80
Ser 96 None None
Gln 1216 None None
Glu 168 None None
Glu 14 Ala 16
Tyr 234 None None
Ser 45 None None
Glu 113 None None
Glu 14 None None
Ser 45 None None
Pro 44 Ser 60
Asp 1091 None None
Arg 156 Ile 162
Arg 24 Ile 30
Glu 218 None None
Glu 198 None None
Ser 145 None None
Val 25 Arg 26
Val 157 Arg 158
Phe 639 None None
Lys 300 None None
Val 448 Gly 451
Gln 847 None None
Pro 386 None None
Asp 131 None None
Gly 49 None None
Ala 5 Ala 80
Asp 1226 None None
Ser 45 None None
Leu 252 None None
Ala 24 Ala 26
Glu 131 None None
Arg 341 His 345
Asp 207 Val

Ala 5 Ala 80
His 47 None None
His 179 None None
Ser 45 None None
Glu 715 None None
Lys 62 None None
Ser 45 None None
Ser 542 None None
Lys 430 None None
Lys 517 None None
Gln 655 None None
Asp 6 Ser 29
Lys 24204 Ser 24208
His 504 None None
Ile 30 Tyr 31
Ile 162 Tyr 163
Met 14 Ser 45
Thr 90 Tyr 96
Ala 49 Pro 50
Ala 5 Ala 80
His 179 Asp 184
His 47 Asp 52
Ser 542 None None
Pro 192 None None
Glu 144 Arg 145
Ile 123 None None
Ile 255 None None
Ala 5 Ala 80
Ser 45 None None
Ser 45 None None
His 654 Glu 657
Glu 22410 None None
Gly 609 Ala 615
Ile 172 None None
Ser 45 None None
Gly 296 Phe 297
Pro 177 Cys 182
Pro 45 Cys 50
Pro 130 None None
Ile 195 Gly 199
Leu 862 None None
Leu 265 None None
Leu 133 None None
Gln 1238 None None
Lys 762 None None
His 520 None None
Met 238 Pro 239
Ala 5 Ala 80
Glu 62 None None
Pro 463 Gly 464
Ile 123 None None
Ile 255 None None
Trp 25 Asp 32
Pro 1222 Pro 1225
Ser 23 Ser 33
Glu 168 None None
Ile 63 Gly 67
Ser 45 None None
Glu 334 Tyr 340
Arg 19 None None
Arg 1169

Ala 5 Ala 80
Ser 45 None None
Leu 275 None None
Leu 46 Ser 47
Lys 186 None None
Ser 45 None None
Ala 5 Ala 80
Val 157 Ile 162
Glu 294 None None
Trp 25 Asp 32
Cys 176 His 179
Cys 44 His 47
Ser 274 Ser 290
Met 307 None None
Glu 22 None None
Ser 45 None None
Glu 50 Pro 51
Leu 708 Val 718
Lys 1329 None None
Gly 302 Gly 303
Lys 1048 None None
Ala 6 Val 11
Ala 138 Val 143
Glu 372 None None
Met 105 Asn 107
Met 237 Asn 239
Ile 123 None None
Ile 255 None None
Arg 52 Pro 59
Pro 16942 None None
Leu 98 None None
Gly 271 Gly 275
Tyr 30 Ser 33
Thr 155 Arg 156
Phe 79 Phe 113
Met 90 Pro 91
Asp 104 Ile 106
Glu 719 None None
Leu 31 Ser 33
Ser 45 None None
Ser 45 None None
Pro 58 His 115
Leu 31 Ile 35
His 469 None None
Glu 286 None None
Pro 45 Cys 50
Ser 32604 None None
Glu 388 None None
Gln 1238 None None
Lys 689 None None
Ser 45 Ser 47
Ser 45 None None
Pro 177 Cys 182
Lys 269 None None
Ser 45 None None
Gln 95 None None
Ser 45 None None
Ala 4558 Ala 4563
Asp 148 Thr 150
Asp 16 Thr 18
Asp 32 None None
Se

Pro 45 Cys 50
Glu 17518 None None
Leu 666 None None
Lys 110 None None
Ala 145 Tyr 147
Thr 772 Ala 773
His 619 None None
Lys 377 None None
Lys 206 None None
Ser 940 None None
Ser 7 Gln 11
Glu 424 None None
Phe 25 Ser 26
Pro 1261 None None
Leu 67 None None
Ser 261 None None
Val 25 None None
Glu 377 None None
Gly 427 Gly 429
Ala 5 Ala 80
Gly 262 None None
Ile 255 None None
Leu 265 None None
Gly 4 Gly 17
Asn 131 None None
Asn 107 Ser 108
Ala 5 Ala 80
Gln 95 None None
Pro 485 None None
Gly 262 None None
Gly 6 None None
Ala 5 Ala 80
Ala 5 Ala 80
Glu 388 None None
Met 90 Pro 91
Leu 133 None None
Cys 176 None None
Pro 177 Cys 182
Leu 1209 Gln 1216
Ala 5 Ala 80
Ser 129 None None
Val 108 None None
Glu 666 None None
Asp 149 Arg 150
Asp 149 Arg 150
Arg 429 None None
Leu 324 Pro 326
Ser 45 None None
Pro 177 Cys 182
Ser 45 None None
Gly 262 None None
Gly 108 Phe 109
Ala 5 Ala 80
Val 86 None None
Pro 59 None None
Glu 180 Asp 184
Gln 1216 None None
Gln 1216 None None
Glu 50 None None
Glu 168 None None

Phe 242 None None
Gly 240 None None
Val 218 None None
Ile 123 None None
Thr 124 None None
Ser 106 Tyr 107
Glu 275 None None
Ala 5 Ala 80
Glu 153 Asn 156
Leu 327 Leu 328
Val 40 Glu 48
Tyr 104 None None
Tyr 104 None None
Asn 567 None None
Met 90 Pro 91
Glu 15644 Ile 15647
Asn 255 Val 256
Tyr 236 Met 243
Gly 199 Glu 204
Pro 177 Cys 182
Pro 177 Cys 182
Val 378 Phe 379
Tyr 280 None None
Gln 12 None None
Ser 128 Ser 129
Ser 45 None None
Leu 265 None None
Glu 210 None None
Pro 21 Gly 22
Ile 122 Thr 124
Ser 1096 Glu 1098
Val 173 His 179
Asn 263 Phe 270
Gln 33 None None
Leu 252 None None
Gln 373 None None
Glu 266 None None
Glu 945 None None
Glu 462 None None
Leu 265 None None
Ala 49 Thr 53
Thr 41 Ser 45
Arg 117 Pro 118
Ser 45 None None
Thr 422 Tyr 423
Leu 470 None None
Asn 103 Tyr 104
His 650 None None
Gln 1238 None None
Lys 28689 Glu 28690
Gly 480 Pro 482
Glu 153 Asn 156
Leu 13 None None
Leu 65 None None
Leu 252 Ile 254
Val 218 None None
Pro 158 Met 161
Asn 131 None None
Val 157 Arg 158
Gly 10

Ser 157 None None
Ile 255 None None
Gln 192 None None
Met 237 Asn 239
Leu 705 Thr 709
Thr 155 Arg 156
Thr 155 Arg 156
Gly 648 None None
Lys 1023 None None
Lys 909 None None
Arg 203 Leu 213
Glu 325 None None
Phe 453 None None
His 179 Glu 180
Gln 144 None None
Ile 255 None None
Ser 1023 None None
Thr 150 Arg 158
Gly 470 None None
Lys 1358 None None
Glu 1257 None None
Pro 142 Gln 144
Arg 151 None None
Ser 29 None None
Glu 330 None None
Met 105 Asn 107
Tyr 236 None None
Tyr 236 None None
Gly 130 None None
Gly 130 None None
Gln 12 None None
Lys 462 Asp 465
Gly 130 None None
Gly 16 Gly 17
Thr 211 Ser 215
Asn 107 Ser 108
Cys 97 Ile 100
Phe 453 None None
Ser 1270 None None
Gly 108 Phe 109
Ile 255 None None
Thr 1401 None None
Lys 588 None None
Glu 462 None None
Asp 63 Gln 108
Glu 40 None None
Glu 287 None None
Ser 215 Tyr 220
Pro 59 Leu 69
Ala 8 None None
Glu 1805 None None
Tyr 126 Lys 132
Pro 45 Cys 50
Lys 453 None None
Leu 252 Ile 254
Glu 25468 None None
Asp 1431 None None
Gln 524 None None
V

Lys 546 None None
Lys 562 None None
Gln 144 None None
Tyr 102 None None
Ser 109 None None
Gly 147 Arg 148
Ile 2940 None None
Lys 1409 None None
Arg 249 Thr 256
Gln 524 None None
Arg 26 Ala 27
Val 20681 Thr 20682
Pro 191 None None
Cys 141 Pro 142
Glu 286 Glu 287
Thr 150 Pro 151
Gly 68 None None
Gly 416 Val 421
Gly 266 Asn 268
Ser 307 None None
His 164 Ser 171
Leu 145 None None
Glu 210 None None
Gln 373 None None
Ile 123 None None
Ile 123 None None
Glu 204 Asn 210
Arg 7 None None
Glu 210 None None
Pro 191 None None
Val 218 None None
Lys 1227 None None
Cys 106 None None
Glu 154 Glu 155
Pro 177 His 178
Glu 204 Leu 206
Val 218 None None
Arg 592 None None
Gly 2760 None None
Phe 109 Arg 110
Pro 448 None None
Val 218 None None
Thr 150 None None
Val 218 None None
Lys 361 Asp 364
Pro 191 None None
Pro 59 None None
Gly 62 None None
Gln 373 None None
Glu 429 None None
Leu 5 Gln 10
Gln 373 None None
Gln 373 None None
Gln 373 None None
Ile 3093 None None
Tyr 31 Gln 33
Pro 45 Cys 50
Val 86 None None


In [108]:
df_tmp[~ix_nulls][:14]

Unnamed: 0,Gene name,Accession Number,HGNC ID,Primary site,Primary histology,Genome-wide screen,GENOMIC_MUTATION_ID,LEGACY_MUTATION_ID,MUTATION_ID,Mutation CDS,...,GRCh,Mutation genome position,SNP,Mutation somatic status,Pubmed_PMID,Age,HGVSP,HGVSC,HGVSG,cambio
0,GATA3,ENST00000379328.8,4172,breast,carcinoma,n,COSV60516010,COSM6904322,114423927,c.1075_1077del,...,38,10:8073763-8073765,n,Confirmed somatic variant,28481359.0,,ENSP00000368632.3:p.Lys359del,ENST00000379328.8:c.1075_1077del,10:g.8073763_8073765del,Lys359del
1,GATA3,ENST00000379328.8,4172,breast,carcinoma,n,COSV60516010,COSM6904322,114423927,c.1075_1077del,...,38,10:8073763-8073765,n,Reported in another cancer sample as somatic,30062102.0,,ENSP00000368632.3:p.Lys359del,ENST00000379328.8:c.1075_1077del,10:g.8073763_8073765del,Lys359del
2,GATA3,ENST00000379328.8,4172,breast,carcinoma,n,COSV60516010,COSM6904322,114423927,c.1075_1077del,...,38,10:8073763-8073765,n,Confirmed somatic variant,30205045.0,66.0,ENSP00000368632.3:p.Lys359del,ENST00000379328.8:c.1075_1077del,10:g.8073763_8073765del,Lys359del
3,TP73_ENST00000604479,ENST00000604479.5,12003,large_intestine,carcinoma,y,COSV60698986,COSM4735502,159429521,c.1181_1183del,...,38,1:3729433-3729435,n,Confirmed somatic variant,25344691.0,64.16,ENSP00000474322.1:p.Gln394del,ENST00000604479.5:c.1181_1183del,1:g.3729433_3729435del,Gln394del
4,GATA3,ENST00000379328.8,4172,breast,carcinoma,n,COSV60516010,COSM6904322,114423927,c.1075_1077del,...,38,10:8073763-8073765,n,Reported in another cancer sample as somatic,30181556.0,,ENSP00000368632.3:p.Lys359del,ENST00000379328.8:c.1075_1077del,10:g.8073763_8073765del,Lys359del
5,GATA3,ENST00000379328.8,4172,breast,carcinoma,n,COSV60516010,COSM6904322,114423927,c.1075_1077del,...,38,10:8073763-8073765,n,Reported in another cancer sample as somatic,30181556.0,,ENSP00000368632.3:p.Lys359del,ENST00000379328.8:c.1075_1077del,10:g.8073763_8073765del,Lys359del
6,CKAP2_ENST00000490903,ENST00000490903.5,1990,large_intestine,carcinoma,y,COSV51621004,COSM5111120,135489931,c.653_661del,...,38,13:52461623-52461631,n,Confirmed somatic variant,22810696.0,81.0,ENSP00000417830.1:p.Thr218_Gln221delinsLys,ENST00000490903.5:c.653_661del,13:g.52461623_52461631del,Thr218_Gln221delinsLys
7,CHMP3,ENST00000263856.8,29865,NS,malignant_melanoma,y,COSV55686973,COSM5862948,98776512,c.618_620del,...,38,2:86505864-86505866,n,Confirmed somatic variant,24265153.0,47.0,ENSP00000263856.4:p.Glu208del,ENST00000263856.8:c.618_620del,2:g.86505864_86505866del,Glu208del
8,GATA3,ENST00000379328.8,4172,breast,carcinoma,n,COSV60516010,COSM6904322,114423927,c.1075_1077del,...,38,10:8073763-8073765,n,Confirmed somatic variant,30205045.0,64.0,ENSP00000368632.3:p.Lys359del,ENST00000379328.8:c.1075_1077del,10:g.8073763_8073765del,Lys359del
9,CHMP3,ENST00000263856.8,29865,NS,malignant_melanoma,y,COSV55686973,COSM5862948,98776512,c.618_620del,...,38,2:86505864-86505866,n,Confirmed somatic variant,24265153.0,57.0,ENSP00000263856.4:p.Glu208del,ENST00000263856.8:c.618_620del,2:g.86505864_86505866del,Glu208del


In [95]:
df_tmp[~ix_nulls]["cambio"][:40]

0                         Lys359del
1                         Lys359del
2                         Lys359del
3                         Gln394del
4                         Lys359del
5                         Lys359del
6            Thr218_Gln221delinsLys
7                         Glu208del
8                         Lys359del
9                         Glu208del
10                          Ala2del
11                        Gln394del
12                        Glu208del
13                 Met357_Lys388del
14                        Lys150del
15                        Tyr346del
16                 Met357_Lys388del
17                 Met357_Lys388del
18                        Tyr346del
19                 Val267_Asn270del
20                        Lys359del
21                       Ile1200del
22                        Arg331del
23                 Thr421_Pro422del
24                 Met357_Lys388del
25                   Gly35_Glu38del
26                   Gly35_Glu38del
27                 His354_Gl

In [163]:
# list(df_tmp[~ix_nulls]["cambio"].str.findall('^([A-Z][a-z]{2})(\d+)_?([A-Z][a-z]{2})?(\d+)?'+conseq_regex+'(.*)$').str[0][0:100])
aux = df_notnulls[ix_right]["cambio"].str.findall('^([A-Z][a-z]{2})(\d+)_?([A-Z][a-z]{2})?(\d+)?del$').str[0][0:100]
for x in aux:
    print(x[4])

IndexError: tuple index out of range