In [65]:
import ast
import random
import matplotlib
from matplotlib import cm

In [66]:
def read_in_clusters(virus,subtype, mutation_type, exposure_cutoff, neighbor_cutoff):
    """
    Read in clusters of surface-exposed residues with fixations (or near fixations) that 
    were found by running saveClusters.py in pymol 
    """
    
    if subtype:
        with open(f'results/clusters/{virus}_{subtype}_{mutation_type}_{exposure_cutoff}_{neighbor_cutoff}_clusters.txt') as f:
            lines = f.readlines()
    else:
        with open(f'results/clusters/{virus}_{mutation_type}_{exposure_cutoff}_{neighbor_cutoff}_clusters.txt') as f:
            lines = f.readlines()
    
    clusters_dict = ast.literal_eval(lines[0])
    
    
    return clusters_dict

In [67]:
#EPITOPES OR RBDS

#h3n2 source: https://www.pnas.org/doi/10.1073/pnas.0701396104
#layered dict keys are: virus, chain, epitope name

#229e source: https://elifesciences.org/articles/51230
known_epitopes_by_virus = {'h3n2': {'A':{'A':[122, 124, 126, 131, 133, 135, 137, 142, 143, 144, 145, 146], 
                           'B':[155, 156, 157, 158, 159, 160, 163, 164, 186, 188, 189, 190, 192, 193, 196, 197], 
                           'C':[50, 53, 54, 275, 276, 278, 299, 307], 
                           'D': [121, 172, 173, 174, 201, 207, 213, 217, 226, 227, 242, 244, 248], 
                           'E': [57, 62, 63, 67, 75, 78, 81, 82, 83, 92, 94, 260, 262], 
                            'und': [2,3,5,25,33,106,202,22,225,271]}, 
                            'B':{}}, 
                           '229e': {'A': {'RBD': [308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 
                                                  318, 319, 320, 321, 322, 323, 324,325, 352, 353, 
                                                  354, 355, 356, 357, 358, 359, 404, 405, 406, 407, 408]}}}

In [99]:
def write_ngl_file_epitope_overlap(pdb_accession, virus, subtype, mutation_type, 
                            exposure_cutoff, neighbor_cutoff):
    """
    Write a ngl file to color structure by overlap between known epitopes (or RBD) and those predicted by clusters
    
    """
    
    cluster_dict = read_in_clusters(virus,subtype, mutation_type, exposure_cutoff, neighbor_cutoff)

    #all residues in the cluster, as dictionary where key is chain and value is list of residues
    all_res_predicted_epitopes = {}
    for cluster_num, cluster_members in cluster_dict.items():
        for cluster_member in cluster_members:
            chain = cluster_member[0]
            res_num = cluster_member[1]

            if res_num.isnumeric():
                res_num_int = int(res_num)
                if chain in all_res_predicted_epitopes.keys():
                    all_res_predicted_epitopes[chain].append(res_num_int)
                else:
                    all_res_predicted_epitopes[chain] = [res_num_int]

    

                    
    known_epitopes = known_epitopes_by_virus[virus]
    all_res_in_known_epitopes = {chain: [item for sublist in list(epis.values()) for item in sublist] for chain, epis in known_epitopes.items()}
    
    # find which predicted residues are in known epitopes, and which aren't 
    overlap = {c:{} for c in all_res_predicted_epitopes.keys()}
    for c, r in all_res_predicted_epitopes.items():
        known_epi_in_chain = all_res_in_known_epitopes[c]
        overlap_in_chain = list(set(r) & set(known_epi_in_chain))
        overlap[c]['overlap'] = overlap_in_chain
        
        only_predicted = list(set(r) - set(known_epi_in_chain))
        overlap[c]['only_predicted'] = only_predicted
        only_known = list(set(known_epi_in_chain) - set(r))
        overlap[c]['only_known'] = only_known
        
    
                
    #colors for overlap, only_predicted clusters, and only_known epitopes    
#     viridis = matplotlib.cm.get_cmap('viridis', 3)
#     rgba_colors = [viridis.colors[x] for x in range(3)] 
#     hex_colors = [matplotlib.colors.to_hex(x)[1:] for x in rgba_colors]
    hex_colors = ['ffd000', 'ff5000', '6a6a6a']
    ##ffa500
    cmap = {'only_predicted':hex_colors[0], 'overlap': hex_colors[1], 'only_known': hex_colors[2]}


        #initialize list of text to write file encoding NGL structure for html
    text_lines = []
    
    #make color map for each chain
    chain_schemes = {}
    
    #all chains
    chains = []

    
    for chain, overlap_by_chain in overlap.items():
        scheme_id = f'{chain}_epitope_overlap_scheme'
        chain_schemes[chain] = scheme_id
        chains.append(chain)
        coloring_code_lines = [f"var {scheme_id} = NGL.ColormakerRegistry.addScheme(function (params) {{", 
                       "this.atomColor = function (atom) {", "if (atom.resno == 0) {return 0xf0f0f0}"]
        
        
        
            
        for overlap_kind, residues in overlap_by_chain.items(): 
            coloring_code_lines.append(f"else if ({residues}.includes(atom.resno)) {{return 0x{cmap[overlap_kind]}}}")

                    
        coloring_code_lines.append("else {return 0xf0f0f0}")
        coloring_code_lines.append(("}})"))
        coloring_code_lines.append(("\n"))

        text_lines += coloring_code_lines

        
    if subtype:
        ngl_filename = f'../../../atlas-of-viral-adaptation/{virus}_{subtype}/assets/{virus}_{subtype}_{mutation_type}_{exposure_cutoff}_{neighbor_cutoff}_overlap-knownepitopes_colorScheme.txt'
    else:
        ngl_filename = f'../../../atlas-of-viral-adaptation/{virus}/assets/{virus}_{mutation_type}_{exposure_cutoff}_{neighbor_cutoff}_overlap-knownepitopes_colorScheme.txt'
    with open(ngl_filename, 'w') as f:
        for line in text_lines:
            f.write(line)
            f.write('\n')
            

In [100]:
#Webster and Laver identified four antigenic sites on the surface of H3 HA (A–D) by competition assays (https://www.sciencedirect.com/science/article/pii/0042682280903724)
#and Skehel identified a fifth antigenic site, E (https://www.pnas.org/doi/abs/10.1073/pnas.81.6.1779)
#6th from Shih et al
write_ngl_file_epitope_overlap('4fnk', 'h3n2', None, 'nonsyn', 
                            15, 8)

In [101]:
#RBD from https://elifesciences.org/articles/51230
write_ngl_file_epitope_overlap('6u7h', '229e', None, 'nonsyn', 
                            15, 8)

In [72]:
write_ngl_file_epitope_overlap('6ohw', 'oc43', 'a', 'nonsyn', 
                            15, 8)

KeyError: 'oc43'

In [None]:
write_ngl_file_epitope_overlap('4nrj', 'vic', None, 'nonsyn', 
                            15, 8)

In [None]:
write_ngl_file_epitope_overlap('4m4y', 'h1n1pdm', None, 'nonsyn', 
                            15, 8)