In [1]:
import glob
import json
import ast
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from Bio import SeqIO

## Write pymol script to color egg adaptive mutations on structure

In [2]:
#based on the pdb files, chains that are HA1
trimerization_chains = {'h3n2':['A', 'C', 'E'], 'h1n1pdm':['A', 'C', 'E'], 
                        'vic':['A', 'C', 'E'], 'yam':['A', 'C', 'E']}

In [21]:
# read in egg-adaptive sites and colors
egg_adaptive_sites_by_virus = "Figure3/egg-adaptive-muts-threshold_3percent.json"

with open(egg_adaptive_sites_by_virus) as json_handle:
    sites_and_colors = json.load(json_handle)

In [22]:
def write_pml_file(virus, gene, pml_filename, pdb_accession, monomer, with_receptor):
    """
    Write .pml file to color residues with egg adaptation mutations
    Color just one monomers, and gray out other monomers of trimer and make sites of egg muts darker gray
    """
        
    # get pdb structure, show surface and make background white
    fetch_pdb = f"fetch {pdb_accession}"
    # some pdbs (like 4o5n) are just a monomer and need to trimerize them
    # others (like 3m6s) are already trimers
    if monomer==True:
        text_lines = ["set assembly, 1",fetch_pdb, f"split_states {pdb_accession}, prefix=mono", "bg_color white", 
                    "show surface", "set surface_type, 0", "set transparency, 0.8", 
                     "hide sticks", "remove solvent", "remove resname SO4", "set seq_view, 1", "hide cartoon"]

        text_lines+=["select mono0001", "color 0xF0F0F0, sele"] 
        text_lines+=["set transparency, 0.6", "select mono0002", "color 0x808080, sele", "select mono0003", "color 0x808080, sele"]
        if with_receptor == True:
            if pdb_accession == '2YP4':
                text_lines+=["select mono0001 and chain C", "show sticks, sele", "color black, sele"]
    
    else:
        text_lines = ["set assembly, 1",fetch_pdb, "bg_color white",
                    "show surface", "set surface_type, 0", "set transparency, 0.8", 
                      "hide sticks", "remove solvent", "remove resname SO4", "set seq_view, 1", "hide cartoon"]
        text_lines+=["select chain A+B", "color 0xF0F0F0, sele"]
        text_lines+=["set transparency, 0.6", "select chain C+D+E+F", "color 0x808080, sele"] 
        if with_receptor == True:
            if pdb_accession == '3ube':
                text_lines+=["select chain M", "show sticks, sele", "color black, sele"]
            if pdb_accession == '4M44':
                text_lines+=["select chain J", "show sticks, sele", "color black, sele"]
    

    # get egg mutation sites and colors
    sites_this_gene = sites_and_colors[virus][gene]

    
    # adjust residue numbers according to PDB structure numbering
    res_to_color = {}
    for residue, color in sites_this_gene.items():
        
        # the structure for Yam (4M40) has an insertion of one residue at 165 compared to the coordinates used in my tree
        if virus=='yam':
            if int(residue) >= 165:
                residue = str(int(residue)-1)
        # same situation with Vic (4FQM)
        if virus=='vic':
            if int(residue) >= 165:
                residue = str(int(residue)-2)
        #h1n1 3ube has different numbreing
        if pdb_accession == '3ube':
            if int(residue) in [222, 223]:
                residue = str(int(residue)+3)
            elif int(residue) in [187, 191]:
                residue = str(int(residue)+3)
            elif int(residue) in [127]:
                residue = str(int(residue)+4)
        
        # switch hex color format from #AAAAAA to 0xAAAAAA
        color_code = f'0x{color[1:]}'
        res_to_color[residue] = color_code


    
    for resi, res_color in res_to_color.items():

        # just color one monomer
        if monomer == True:
            text_lines.append(f"select mono0001 and chain A and resi {resi}")
            text_lines.append("set transparency, 0, sele")
            text_lines.append(f"color 0x{res_color}, sele")

            # for the other two monomers, color them but transparently
            for other_monomer in ['mono0002', 'mono0003']:
                text_lines.append(f"select {other_monomer} and chain A and resi {resi}")
                text_lines.append(f"color 0x808080, sele")
                text_lines.append("set transparency, 0, sele")
            text_lines.append(f"dele {pdb_accession}")
        else:
            text_lines.append(f"select chain A and resi {resi}")
            text_lines.append("set transparency, 0, sele")
            text_lines.append(f"color 0x{res_color}, sele")
            
            for other_monomer in ['C', 'E']:
                text_lines.append(f"select chain {other_monomer} and resi {resi}")
                text_lines.append(f"color 0x808080, sele")
                text_lines.append("set transparency, 0, sele")
                
                                    
    pml_filename = 'Figure3/pymol_commands/'+pml_filename
    with open(pml_filename, 'w') as f:
        for line in text_lines:
            f.write(line)
            f.write('\n')
        
    

In [23]:
# h3n2 HA
write_pml_file('h3n2', 'HA1', 'h3n2_eggadaptive_structure.pml', '4o5n', True, False)

In [None]:
run /Users/katekistler/nextstrain/egg-adaptation-manuscript/figure-panels/Figure3/pymol_commands/h3n2_eggadaptive_structure.pml

In [24]:
#this one has human-like receptor
write_pml_file('h3n2', 'HA1', 'h3n2_eggadaptive_structure_receptor.pml', '2YP4', True, True)

In [None]:
run /Users/katekistler/nextstrain/egg-adaptation-manuscript/figure-panels/Figure3/pymol_commands/h3n2_eggadaptive_structure_receptor.pml

In [17]:
# h1n1pdm HA
write_pml_file('h1n1pdm', 'HA1', 'h1n1pdm_eggadaptive_structure.pml', '3m6s', False, False)

In [None]:
run /Users/katekistler/nextstrain/egg-adaptation-manuscript/figure-panels/Figure3/pymol_commands/h1n1pdm_eggadaptive_structure.pml

In [18]:
#this one has human-like receptor
write_pml_file('h1n1pdm', 'HA1', 'h1n1pdm_eggadaptive_structure_receptor.pml', '3ube', False, True)

In [None]:
run /Users/katekistler/nextstrain/egg-adaptation-manuscript/figure-panels/Figure3/pymol_commands/h1n1pdm_eggadaptive_structure_receptor.pml

In [19]:
# Vic HA
write_pml_file('vic', 'HA1', 'vic_eggadaptive_structure.pml', '4FQM', False, False)

In [None]:
run /Users/katekistler/nextstrain/egg-adaptation-manuscript/figure-panels/Figure3/pymol_commands/vic_eggadaptive_structure.pml

In [20]:
# Yam HA
write_pml_file('yam', 'HA1', 'yam_eggadaptive_structure.pml', '4M40', False, False)

In [None]:
run /Users/katekistler/nextstrain/egg-adaptation-manuscript/figure-panels/Figure3/pymol_commands/yam_eggadaptive_structure.pml