In [37]:
from typing import List, Tuple, Dict, Union
from dataclasses import dataclass
import os

import pynmrstar

# Step 1: Create the data structures 
We create three dataclasses: Atom, Residue, and Restraint.

Atoms contain information about the residue they belong to, their own label, and their chemical shift.

Residues contain their own labels and indices as well as a dictionary containing all of their Atoms.

Restraints contain two Atoms, one an amide proton and the other an aromatic ring proton.

These dataclasses make the overall analysis simpler and cleaner, and have scaled well to the more complex analysis in the survey code.

In [38]:
@dataclass
class Atom:
    """
    Contain necessary information about an individual atom.

    Instance variables:
    res_index -- the index of the residue containing the atom
    res_label -- the label of the residue containing the atom
    atom_label -- the label of the atom
    cs_val -- the reported chemical shift of the atom
    """
    res_index: str
    res_label: str
    atom_label: str
    cs_val: Union[float, None] #in some cases there won't be a CS reported for the atom

@dataclass
class Residue:
    """
    Contain necessary information about a residue, most importantly atoms_dict.

    Instance variables:
    res_index -- the index of the residue within the entry
    res_label -- the label of the residue within the entry
    atoms_dict -- dict of the atoms within the residue, organized by atom label
    """
    res_index: str
    res_label: str
    atoms_dict: Dict[str, Atom]

@dataclass
class Restraint:
    """
    Contain the two Atoms involved in a restraint between an amide and aromatic
    ring proton.
    
    Instance variables:
    atom_amide -- the amide Atom involved in the restraint
    atom_aroma -- the aromatic ring Atom involved in the restraint
    """
    atom_amide: Atom
    atom_aroma: Atom


# Step 2: Loading chemical shift data from BMRB
We load the BMRB entry from ReBoxitory and parse it using PyNMRSTAR (see [link](https://github.com/uwbmrb/PyNMRSTAR)) to build Residue instances for every residue that will contain Atom instances for every atom with a reported chemical shift.

In [39]:
def make_residues_dict(bmrb_id: str) -> Dict[str, Residue]:
    '''
    Create a dict of Residue instances containing Atoms and their
    reported chemical shifts from a BMRB entry.
    
    Keyword arguments:
    bmrb_id -- the BMRB ID of the entry to be considered
    Returns:
    residues_dict -- dict of Residue instances containing Atoms with
        reported chemical shifts
    '''
    filepath = os.path.join(
        '/reboxitory', '2021', '07', 'BMRB', 'macromolecules',
        f'bmr{bmrb_id}', f'bmr{bmrb_id}_3.str'
    )
    entry = pynmrstar.Entry.from_file(filepath)
    shifts_loop = entry.get_loops_by_category("Atom_chem_shift")[0]
    shifts_list = shifts_loop.get_tag(
        ['Comp_index_ID', 'Comp_ID', 'Atom_ID', 'Val']
    )

    # iterate through BMRB entry, creating Atom instances with
    # chemical shifts as we go
    atoms_dict_all = {}
    for atom_info in shifts_list:
        res_index = atom_info[0]
        res_label = atom_info[1]
        atom_label = atom_info[2]
        cs_val = atom_info[3]
        atom = Atom(res_index, res_label, atom_label, cs_val)
        if res_index not in atoms_dict_all:
            atoms_dict_all[res_index] = {}
        atoms_dict_all[res_index][atom_label] = atom
    
    # iterate through all atoms, grouping by res_index into
    # Residue instances in order to create residues_dict
    residues_dict = {}
    for res_index in atoms_dict_all:
        atoms_dict = atoms_dict_all[res_index]
        # we need the res_label; get an atom from the dict
        # and check its res_label
        res_label = list(atoms_dict.values())[0].res_label
        res = Residue(res_index, res_label, atoms_dict)
        residues_dict[res_index] = res
    
    return residues_dict
        

## Example
Let's try this out on BMRB ID = 11017:

In [40]:
residues_dict = make_residues_dict('11017')

Let's find a (non-aromatic) residue with an amide listed and an aromatic residue with aromatic ring protons:

In [41]:
res_amide = None
res_aroma = None

labels_aroma = ['TYR', 'PHE', 'HIS', 'TRP'] # list of the aromatic residue labels

# find an aromatic residue and a non-aromatic residue
for res_index in residues_dict:
    res = residues_dict[res_index]
    # checking for non-aromatic
    if res.res_label not in labels_aroma and 'H' in res.atoms_dict:
        if res_amide is None: # we only need one
            res_amide = res
            print(f'Non-aromatic residue found: {res.res_index} {res.res_label}')
    # checking for aromatic
    elif res.res_label in labels_aroma:
        res_aroma = res
        print(f'Aromatic residue found: {res.res_index} {res.res_label}')
    if res_amide is not None and res_aroma is not None:
        break

# get the amide Atom and print its chemical shift:
atom_amide = res_amide.atoms_dict['H']
print(f'Amide H in {res_amide.res_index} {res_amide.res_label} had a reported chemical shift of {atom_amide.cs_val}')
# get the aromatic HD1 Atom and print its chemical shift
atom_aroma = res_aroma.atoms_dict['HD1']
print(f'Aromatic ring HD1 in {res_aroma.res_index} {res_aroma.res_label} had a reported chemical shift of {atom_aroma.cs_val}')

Non-aromatic residue found: 3 LYS
Aromatic residue found: 7 PHE
Amide H in 3 LYS had a reported chemical shift of 8.52
Aromatic ring HD1 in 7 PHE had a reported chemical shift of 7.47


# Step 3: Loading restraint data
The restraint file is loaded from the PDB section of ReBoxitory and also parsed using PyNMRSTAR. We search for restraints between amide protons and aromatic ring protons. For each of these, we construct a Restraint instance containing two Atom instances.

In [31]:
def make_restraints_dict(
    pdb_id: str
) -> Dict[str, Dict[str, Restraint]]:
    """
    Create dictionary of all restraints for a PDB entry.

    Keyword arguments:
    restraint_entry -- parsed PyNMRSTAR object representing info in restraint 
    file
    Returns
    restraints_dict -- dict of all restraints for entry by restraint_id and 
        member_id
    """
    # find the restraint file in ReBoxitory
    filepath = os.path.join(
        '/reboxitory', '2021', '07', 'PDB', 'data', 'structures', 'all', 
        'nmr_restraints_v2', f"{pdb_id.lower()}_mr.str.gz"
    )
    entry = pynmrstar.Entry.from_file(filepath)
    
    restraints_dict = {}
    restraint_loops_list = entry.get_loops_by_category(
        'Gen_dist_constraint'
    ) # get a list of 'distance constraint' (restraint) loops w/in the file
    for i, restraint_loop in enumerate(restraint_loops_list):
        restraints_list = restraint_loop.get_tag(
            [
                "ID", "Member_ID", "Member_logic_code",
                "Comp_index_ID_1", "Comp_ID_1", "Atom_ID_1",
                "Comp_index_ID_2", "Comp_ID_2", "Atom_ID_2",
                "Distance_val", "Distance_lower_bound_val", 
                "Distance_upper_bound_val"
            ] 
        ) # for each loop, get a list of the relevant info for each restraint
        for restraint_info in restraints_list:
            restraint, restraint_id, member_id = make_restraint(restraint_info)
            if restraint is not None:
                # there are multiple loops, so need to distinguish restraint_ids
                restraint_id = f'{str(i)},{restraint_id}'
                # there might be multiple member_ids for each restraint_id if
                # the restraint was ambiguously between multiple atoms
                if restraint_id not in restraints_dict:
                    restraints_dict[restraint_id] = {}
                restraints_dict[restraint_id][member_id] = restraint
    return restraints_dict
                
def make_restraint(
    restraint_info: List
) -> Union[Tuple[Restraint, str, str], None]:
    '''
    Read line from restraint file and build restraint if it is amide-aromatic.

    Keyword arguments:
    restraint_entry -- str of a line of restraint data in the restraint file
    Returns:
    'No amide atom or atoms in same residue' -- self-explanatory exception
    'No aromatic ring proton' -- if one atom is amide but other is not aromatic
        ring proton
    restraint -- Restraint object with an amide atom and an aromatic ring 
        proton; None if the restraint is not amide-aromatic
    restraint_id -- restraint ID as found in restraint file
    member_id -- member ID as found in restraint file
    '''
    # info for cataloguing restraints
    restraint_id = restraint_info[0]
    member_id = restraint_info[1]
    # info for the first atom
    res_index_1 = restraint_info[3]
    res_label_1 = restraint_info[4]
    atom_label_1 = restraint_info[5]
    # and create an Atom instance w/o a Z-score
    atom_1 = Atom(res_index_1, res_label_1, atom_label_1, None)  
    # info for the second atom
    res_index_2 = restraint_info[6]
    res_label_2 = restraint_info[7]
    atom_label_2 = restraint_info[8]
    atom_2 = Atom(res_index_2, res_label_2, atom_label_2, None)
    
    # now check if this is an amide-aromatic, and find out which is which
    bool_amide, atom_amide = check_amide(atom_1, atom_2)
    # we also don't consider the restraint if the atoms are from 
    # the same residue
    if bool_amide and atom_1.res_index != atom_2.res_index:
        # check if the other atom is an aromatic ring proton
        if atom_1 == atom_amide:
            bool_aroma, atom_aroma = check_aroma(atom_2)
        elif atom_2 == atom_amide:
            bool_aroma, atom_aroma = check_aroma(atom_1)
        # if both checks are passed, create the restraint
        if bool_aroma:
            restraint = Restraint(atom_amide, atom_aroma)
            return restraint, restraint_id, member_id
        else:
            return None, restraint_id, member_id
    else:
        return None, restraint_id, member_id
        
    
def check_amide(
    atom_1: Atom, atom_2: Atom
) -> Tuple[bool, Union[Atom, None]]:
    """
    Check if either of atom_1 or atom_2 is an amide hydrogen; if so, return
    that atom.

    Keyword arguments:
    atom_1 -- the first Atom instance built from the restraint in file
    atom_2 -- the second Atom instance built from the restraint in file
    Returns:
    bool_amide -- True if one of the atoms is an amide, False otherwise
    atom_amide -- None if neither of the atoms is an amide, otherwise it is the
       atom that was found to be an amide
    """
    atom_amide = None
    bool_amide = False
    atoms_list = [atom_1, atom_2]
    for atom in atoms_list:
        if atom.atom_label == 'H':
            bool_amide = True
            atom_amide = atom
    return bool_amide, atom_amide

def check_aroma(atom: Atom) -> Tuple[bool, Union[Atom, None]]:
    """
    Check if an atom is an aromatic ring proton.

    Keyword arguments:
    atom -- the Atom object to be checked
    Returns:
    bool_aroma -- True if the atom is an aromatic ring proton, False otherwise
    atom_aroma -- The Atom object
    """
    bool_aroma = False
    atom_aroma = None
    aromatics_dict = {
        "PHE": ["HD1", "HD2", "QD", "HE1", "HE2", "QE", "HZ"],
        "TYR": ["HD1", "HD2", "QD", "HE1", "HE2", "QE", "HH"],
        "TRP": ["HD1", "HE1", "HE3", "QE", "HZ2", "HZ3", "QZ", "HH2"],
        "HIS": ["HD1", "HD2", "QD", "HE1", "HE2", "QE"]
    }
    if atom.res_label in aromatics_dict:
        if atom.atom_label in aromatics_dict[atom.res_label]:
            bool_aroma = True
            atom_aroma = atom
    return bool_aroma, atom_aroma



## Example (cont.)
Returning to our example, we can check 

https://api.bmrb.io/current/mappings/bmrb/pdb

to see that the PDB ID corresponding to BMRB ID 11017 is 2RN7.

In [32]:
restraints_dict = make_restraints_dict('2RN7')
print(restraints_dict)

{'0,3': {'1': Restraint(atom_amide=Atom(res_index='8', res_label='SER', atom_label='H', cs_val=None), atom_aroma=Atom(res_index='7', res_label='PHE', atom_label='QD', cs_val=None))}, '0,168': {'1': Restraint(atom_amide=Atom(res_index='28', res_label='SER', atom_label='H', cs_val=None), atom_aroma=Atom(res_index='26', res_label='TYR', atom_label='QD', cs_val=None))}}


We have found two amide-aromatic restraints, one between the 8 SER amide and the 7 PHE aromatic ring pseudoatom QD, and the other between the 28 SER amide and the 26 TYR pseudoatom QD.

Note that the chemical shifts are listed as None for all of the atoms. This is because the Atom instances in restraints_dict were created solely from the restraints file, which does not contain chemical shift assignments. All that remains is to correlate the atoms in the restraints_dict with the atoms in the residues_dict so that we can see their chemical shifts.

# Step 4: Correlating atoms from restraint file and BMRB/PDB

In [42]:
def find_correlate(
    atom_restraint: Atom, residues_dict: Dict[str, Residue]
) -> Atom:
    '''
    For an Atom instance generated by parsing a restraint file,
    find the corresponding Atom instance (with chemical shift)
    in the residues_dict generated by parsing the BMRB entry.
    
    Keyword Arguments:
    atom_restraint -- Atom instance generated from a restraint file
    residues_dict -- dict of Residues generated from the BMRB entry
    Returns:
    atom_correlate -- the Atom from residues_dict corresponding to
        atom_restraint
    '''
    res_index = atom_restraint.res_index
    res_correlate = residues_dict[res_index]
    atom_label = atom_restraint.atom_label
    atom_correlate = res_correlate.atoms_dict[atom_label]
    
    return atom_correlate

def correlate_restraints_w_residues(
    restraints_dict: Dict[str, Dict[str, Restraint]], 
    residues_dict: Dict[str, Residue]
) -> Dict[str, Dict[str, Restraint]]:
    '''
    Replace all Atoms in restraints_dict with corresponding Atoms from'
    residues_dict, which have chemical shifts.
    
    Keyword arguments:
    restraints_dict -- dict of Restraints generated from the restraints file
    residues_dict -- dict of Residues generated from the BMRB entry
    '''
    for restraint_id in restraints_dict:
        for member_id in restraints_dict[restraint_id]:
            restraint = restraints_dict[restraint_id][member_id]
            restraint.atom_amide = find_correlate(
                restraint.atom_amide, residues_dict
            )
    return restraints_dict
    

We can apply this to the example case, and find the Z-scores of the restrained 8 SER and 28 SER amides:

In [46]:
restraints_dict = correlate_restraints_w_residues(restraints_dict, residues_dict)

for restraint_id in restraints_dict:
    for member_id in restraints_dict[restraint_id]:
        restraint = restraints_dict[restraint_id][member_id]
        atom_aroma = restraint.atom_aroma
        atom_amide = restraint.atom_amide
        print(
            atom_aroma.res_index, atom_aroma.res_label, atom_aroma.atom_label,
            atom_amide.res_index, atom_amide.res_label, atom_amide.atom_label,
            atom_amide.cs_val
        )

7 PHE QD 8 SER H 8.52
26 TYR QD 28 SER H 7.55


# Final Thoughts
* Confederation between two databases (BMRB and PDB) allows us to investigate the relationship between information held in both (chemical shifts and amide-aromatic interactions/restraints)
* ReBoxitory provides fast and easy access to large databases relevant to protein NMR, aiding large scale surveys of the data therein

The code shown above can be found at [link](https://github.com/uwbmrb/rcs). It is based on other code in the repository that was used for an [article](https://doi.org/10.5194/mr-2021-53) currently in preprint. The original code is significantly more complex to take into account the distance from amides to aromatic ring centers as well as the various errors that one finds in a large survey of multiple databases.