This notebook showcases the generation of mappings between protein components. 

It is intended to be used for computing relative binding free energies for protein mutations.

In [8]:
import pandas as pd
import numpy as np
from kartograf import KartografAtomMapper
from gufe import ProteinComponent
from gufe.tokenization import JSON_HANDLER

LICENSE: Could not open license file "oe_license.txt" in local directory
LICENSE: N.B. OE_LICENSE environment variable is not set
LICENSE: N.B. OE_DIR environment variable is not set
LICENSE: No product keys!
LICENSE: No product keys!
LICENSE: No product keys!
LICENSE: No product keys!
Error: 
  [31m×[0m could not find pixi.toml or pyproject.toml at directory /home/ijpulidos/
  [31m│[0m workdir/repos/pale/playground/protein-mutation

Error: 
  [31m×[0m could not find pixi.toml or pyproject.toml at directory /home/ijpulidos/
  [31m│[0m workdir/repos/pale/playground/protein-mutation



We aim to reproduce the results in https://github.com/choderalab/perses-barnase-barstar-paper/tree/main/input_files and https://github.com/choderalab/perses-barnase-barstar-paper/blob/main/data/table_terminally_blocked.csv

## Generate mutated protein

The mutation is P61R in Chain A

In [1]:
import os
from pdbfixer import PDBFixer

In [2]:
pdbfixer = PDBFixer("./hair2_dimer/her2-for-mapping-p61r.pdb")

In [3]:
pdbfixer.findMissingResidues()
pdbfixer.findMissingAtoms()

In [4]:
pdbfixer.applyMutations(["PRO-61-ARG"], "A")
pdbfixer.findMissingResidues()
pdbfixer.findMissingAtoms()
pdbfixer.addMissingAtoms()
pdbfixer.addMissingHydrogens(7.0)

In [5]:
from openmm.app import PDBFile
omm_top = pdbfixer.topology
omm_pos = pdbfixer.positions
with open("./hair2_dimer/mutated_dimer_P61R.pdb", "w") as out_file:
    PDBFile.writeFile(omm_top, omm_pos, out_file)

In [6]:
# Create dictionary for AA code translation
aa_three_to_one_code = {
    "ALA": "A",
    "GLY": "G",
    "ILE": "I",
    "LEU": "L",
    "PRO": "P",
    "VAL": "V",
    "PHE": "F",
    "TRP": "W",
    "TYR": "Y",
    "ASP": "D",
    "GLU": "E",
    "ARG": "R",
    "HIS": "H",
    "LYS": "K",
    "SER": "S",
    "THR": "T",
    "CYS": "C",
    "MET": "M",
    "ASN": "N",
    "GLN": "Q"
}
aa_one_to_three_code = {value: key for key, value in aa_three_to_one_code.items()}

In [9]:
# Generating mappings for all the relevant mutations
# TODO: What happened to the `allow_bond_breaks=True/False` option?
atom_mapper = KartografAtomMapper(map_exact_ring_matches_only=False, atom_map_hydrogens=True)
mutation_string = "P61R"
# Read the capped dipeptide pdb
initial_comp = ProteinComponent.from_pdb_file("hair2_dimer/her2-for-mapping-p61r.pdb")
# Read the ALA dipeptide pdb
final_comp = ProteinComponent.from_pdb_file(f"hair2_dimer/mutated_dimer_{mutation_string}.pdb")
# Generate mappings
mapping = next(atom_mapper.suggest_mappings(initial_comp, final_comp))
# Serializing mappings
# with open(f"mappings/{initial_aa_three_letter}_to_{final_aa_three_letter}.json", "w") as out_file:
#     mapping.to_json(out_file)

In [10]:
list(mapping.componentA_unique)

[]

In [11]:
list(mapping.componentB_unique)

[898, 912, 913, 914, 915, 916, 917, 918, 919, 920]

In [12]:
component_a = mapping.componentA
component_b = mapping.componentB

In [13]:
initial_topology = component_a.to_openmm_topology()
final_topology = component_b.to_openmm_topology()

In [14]:
initial_atoms = list(initial_topology.atoms())
final_atoms = list(final_topology.atoms())

In [15]:
[initial_atoms[unique_guy] for unique_guy in mapping.componentA_unique]

[]

In [16]:
[final_atoms[unique_guy] for unique_guy in mapping.componentB_unique]

[<Atom 898 (H) of chain 0 residue 60 (ARG)>,
 <Atom 912 (NE) of chain 0 residue 60 (ARG)>,
 <Atom 913 (HE) of chain 0 residue 60 (ARG)>,
 <Atom 914 (CZ) of chain 0 residue 60 (ARG)>,
 <Atom 915 (NH1) of chain 0 residue 60 (ARG)>,
 <Atom 916 (HH11) of chain 0 residue 60 (ARG)>,
 <Atom 917 (HH12) of chain 0 residue 60 (ARG)>,
 <Atom 918 (NH2) of chain 0 residue 60 (ARG)>,
 <Atom 919 (HH21) of chain 0 residue 60 (ARG)>,
 <Atom 920 (HH22) of chain 0 residue 60 (ARG)>]

In [17]:
# TODO: Visualize mapping by extracting the residues from the transformation
initial_res = list(initial_topology.residues())[60]
final_res = list(final_topology.residues())[60]

In [18]:
mappings_dir = "./hair2_dimer/mappings" 
os.makedirs(mappings_dir, exist_ok=True)
with open(f"{mappings_dir}/dimer_{mutation_string}.json", "w") as out_file:
    mapping.to_json(out_file)

