#### Read in a text file listing the surface-exposed residues on a protein (as determined in Pymol by findSurfaceResidues.py) and write an NGL file to color these residues on the structure

In [1]:
import ast 

In [2]:
def get_exposure_results(virus, subtype, exposure):
    """
    Read in the file listing all residues that have at least `exposure` square angstroms exposed to the solvent 
    """
    
    if subtype:
        with open(f'results/{virus}_{subtype}_surfaceresidues_{exposure}angstrom.txt') as f:
            lines = f.readlines()
    else:
        with open(f'results/{virus}_surfaceresidues_{exposure}angstrom.txt') as f:
            lines = f.readlines()

    #remove duplicates
    lines = list(set(lines))
            
    return lines

In [73]:
def write_ngl_file(pdb_accession, chains, virus, subtype, surface_exposure):
    """
    Write an NGL file to color every residue in the given pdb structure according to it's surface exposure
    
    The residue must have `surface_exposure` square angstroms or greater of surface exposed
    
    Will automatically color all subunits of the trimer if the PDB file is a trimer
    """
    
    exposed_residues = get_exposure_results(virus, subtype, surface_exposure)
    #restructure exposed_residues to be a dictionary with key as chain and list of exposed resides as values
    exposed_residues_by_chain = {}
    for res in exposed_residues:
        res_chain = ast.literal_eval(res)[0]
        res_num = ast.literal_eval(res)[1]
        if res_num.isnumeric():
            res_num_int = int(res_num)
        else:
            pass
        
        #limit to only chains listed in arguments
        if res_chain in chains:
            if res_chain not in exposed_residues_by_chain.keys():
                exposed_residues_by_chain[res_chain] = [res_num_int]
            else:
                exposed_residues_by_chain[res_chain].append(res_num_int)
    
    #initialize list of text to write file encoding NGL structure for html
    text_lines = []
    
    #what color to put on surface
    surface_color = "6C7593"

    
    #make color map for each chain
    chain_schemes = {}
    for chain in chains:
        scheme_id = f'{chain}_surface_scheme'
        chain_schemes[chain] = scheme_id

        coloring_code_lines = [f"var {scheme_id} = NGL.ColormakerRegistry.addScheme(function (params) {{", 
                               "this.atomColor = function (atom) {", "if (atom.resno == 0) {return 0xe7e7e7}"]
        

        coloring_code_lines.append(f"else if ({exposed_residues_by_chain[chain]}.includes(atom.resno)) {{return 0x{surface_color}}}") 
        
        coloring_code_lines.append("else {return 0xe7e7e7}")
        coloring_code_lines.append(("}})"))
        coloring_code_lines.append(("\n"))

        text_lines += coloring_code_lines
    

    if subtype:
        ngl_filename = f'../../../atlas-of-viral-adaptation/{virus}_{subtype}/assets/{virus}_{subtype}_{surface_exposure}a_surface_colorScheme.txt'
    else:
        ngl_filename = f'../../../atlas-of-viral-adaptation/{virus}/assets/{virus}_{surface_exposure}a_surface_colorScheme.txt'     

                
        
    with open(ngl_filename, 'w') as f:
        for line in text_lines:
            f.write(line)
            f.write('\n')

In [74]:
write_ngl_file('4fnk', ['A', 'B'], 'h3n2', None, '15')

In [76]:
write_ngl_file('6u7h', ['A'], '229e', None, '15')

In [77]:
write_ngl_file('6ohw', ['A'], 'oc43', 'a', '15')

In [78]:
write_ngl_file('4nrj', ['A', 'B'], 'vic', None, '15')

In [75]:
write_ngl_file('4m4y', ['A', 'B'], 'h1n1pdm', None, '15')