# Step 1: Create the data structures we will use

In [1]:
from typing import List, Tuple, Dict, Union
from dataclasses import dataclass
import os

In [10]:
@dataclass
class Atom:
    """
    Contain necessary information about an individual atom.

    Instance variables:
    res_index -- the index of the residue containing the atom
    res_label -- the label of the residue containing the atom
    atom_label -- the label of the atom
    cs_sigma -- the Z score of the atom
    """
    res_index: str
    res_label: str
    atom_label: str
    cs_sigma: Union[float, None] #in some cases there won't be a CS reported for the atom

@dataclass
class Residue:
    """
    Contain necessary information about a residue, most importantly atoms_dict.

    Instance variables:
    res_index -- the index of the residue within the entry
    res_label -- the label of the residue within the entry
    atoms_dict -- dict of the atoms within the residue, organized by atom label
    """
    res_index: str
    res_label: str
    atoms_dict: Dict[str, Atom]

@dataclass
class Restraint:
    """
    Contain the two Atoms involved in a restraint between an amide and aromatic
    ring proton.
    
    Instance variables:
    atom_amide -- the amide Atom involved in the restraint
    atom_aroma -- the aromatic ring Atom involved in the restraint
    """
    atom_amide: Atom
    atom_aroma: Atom


# Step 2: Loading the data from Kumaran's output

In [11]:
pdb_id = '1Z5F'
bmrb_id = '5787'

k_file_path = f'{pdb_id}_{bmrb_id}_ORIG_ORIG.dat'

In [12]:
def make_residues_from_file(filename: str) -> Dict[str, Residue]:
    """
    Build a dictionary of all amide-containing Residues and the 5 nearest aromatic
    Residues to any of them. Amide-containing Residues will contain the amide Atom 
    instance in their Residue.atoms_dict. Aromatic Residues will do the same for 
    aromatic ring protons. Some will have both.
    
    Keyword arguments:
    filename -- path to file containing data on chemical shifts
    Returns:
    residues_dict -- dict of amide-containing and aromatic Residues with
        atoms and their chemical shifts where reported
    """
    residues_dict = {}
    with open(filename) as infile:
        lines = infile.readlines()
        for line in lines:
            line = line.split(',')
            # construct Residues with amide and/or aromatic ring Atoms
            # and add them to the residues_dict
            residues_dict = add_residues(line, residues_dict)
    return residues_dict
            
def add_residues(
    line: List[str], residues_dict: Dict[str, Residue]
) -> Dict[str, Residue]:
    """
    Add residues from a line of the data file to residues_dict if not already
    in it; otherwise update atoms_dict of the residues.

    Keyword arguments:
    line -- a line from the data file
    residues_dict -- dict containing Residues for a PDB entry organized by 
        residue index
    """
    res_amide, res_aroma_list = make_residues(line)
    if res_amide.res_index in residues_dict: # The Residue was already made as an aromatic earlier in the file
        residues_dict[res_amide.res_index].atoms_dict.update(
            res_amide.atoms_dict
        ) # so add the amide Atom to the already existing Residue
    else: # This is the first appearance of the Residue in the k-file
        residues_dict[res_amide.res_index] = res_amide
    for res_aroma in res_aroma_list: # A line of the k-file includes 5 nearest rings
        if res_aroma.res_index in residues_dict:
            residues_dict[res_aroma.res_index].atoms_dict.update(
                res_aroma.atoms_dict
            )
        else:
            residues_dict[res_aroma.res_index] = res_aroma
    return residues_dict
    
def make_residues(line: List[str]) -> Tuple[Residue, List[Residue]]:
    """
    Make one amide and multiple aromatic residues from line of ring data file.

    Keyword arguments:
    line -- a line from the data file
    Returns:
    res_amide -- Residue instance containing the amide proton
    res_aroma_list -- list of Residue instances containing aromatic ring protons
    """
    res_amide = make_res_amide(line)
    res_aroma_list = []
    for i in range(5): # A line of the k-file includes 5 nearest rings
        res_aroma = make_res_aroma(line, i)
        if isinstance(res_aroma, Residue):
            res_aroma_list.append(make_res_aroma(line, i))
    return res_amide, res_aroma_list

    
def make_res_amide(line: List[str]) -> Residue:
    """
    Make a residue containing the amide atom from a line of ring data file.
    
    Keyword arguments:
    line -- a line from the data file
    i -- index of the ring in the line
    Returns:
    res_amide -- Residue instance
    """
    res_index = line[2] # the index of the residue containing the amide
    res_label = line[3] # the label of the residue containing the amide
    cs_sigma = float(line[5]) # the z-score of the amide
    
    # create an Atom instance from the information in the k-file
    atom = Atom(res_index, res_label, 'H', cs_sigma)
    
    # create a Residue instance containing only the atom
    # if this residue also contains a ring, they will be combined later
    atoms_dict = {atom.atom_label: atom}
    res_amide = Residue(res_index, res_label, atoms_dict)
    
    return res_amide

def make_res_aroma(line: List[str], i: int) -> Union[Residue, str]:
    """
    Make an aromatic residue from a line of Kumaran's ring data file.

    Keyword arguments:
    line -- a line from the data file
    i -- index of the ring in the line
    Returns:
    'Empty ring' -- if data for this ring is empty
    res_aroma -- Residue instance
    """
    num = 52 #the periodicity of aromatics in file
    start_index = 8 + num * i
    end_index = 8 + num + num * i
    ring_data = line[start_index:end_index]
    res_index = ring_data[0]
    res_label = ring_data[1]
    if res_index == '.': # A . gets put in for the last rings in k-file if <5 were found
        return "Empty ring"
    atoms_dict = make_atoms_aroma(ring_data)

    res_aroma = Residue(res_index, res_label, atoms_dict)
    return res_aroma


def make_atoms_aroma(ring_data: List[str]) -> List[Atom]:
    """
    Make a list of aromatic ring protons from Kumaran's ring data file.

    Keyword arguments:
    ring_data -- a slice of a line from the data file corresponding to the 
        particular ring
    Returns:
    atoms_list -- list of Atom instances for all aromatic ring protons
    """
    atoms_file_dict = {
        "PHE": {20: 'HD1', 22: 'HD2', 24: 'HE1', 26: 'HE2', 28: 'HZ'},
        "TYR": {20: 'HD1', 22: 'HD2', 24: 'HE1', 26: 'HE2', 28: 'HH'},
        "TRP": {20: 'HE3', 22: 'HZ2', 24: 'HZ3', 26: 'HH2', 28: 'HE1'}, 
        "HIS": {20: 'HD2', 22: 'HE1', 24: 'HE2', 26: 'xx', 28: 'yy'},
    } # xx and yy are pseudoatoms to maintain the formatting in the k-file
    res_index = ring_data[0]
    res_label = ring_data[1]
    atoms_dict = {}
    for i in range(5): # Go through all 5 rings on the line. 
        cs_sigma_index = 20 + (2 * i)
        cs_sigma = ring_data[cs_sigma_index]
        if cs_sigma == '.': # the atom didn't have a CS in BMRB
            cs_sigma = None
        else:
            cs_sigma = float(cs_sigma)
        atom_label = atoms_file_dict[res_label][cs_sigma_index]
        atom = Atom(res_index, res_label, atom_label, cs_sigma)
        atoms_dict[atom.atom_label] = atom
    return atoms_dict
    
        

## Example
Let's try this out on BMRB ID = 5787, PDB ID = 1Z5F:

In [13]:
k_filename = os.path.join('output', '2RN7_11017_ORIG_ORIG.dat')
residues_dict = make_residues_from_file(k_filename)

Let's find a residue with only an amide listed and another with aromatic ring protons

In [14]:
res_amide = None
res_aroma = None

for res_index in residues_dict:
    res = residues_dict[res_index]
    if len(res.atoms_dict) == 1:
        res_amide = res
    elif len(res.atoms_dict) > 1:
        res_aroma = res
    if res_amide is not None and res_aroma is not None:
        break

print(res_amide)
print(res_aroma)

Residue(res_index='3', res_label='LYS', atoms_dict={'H': Atom(res_index='3', res_label='LYS', atom_label='H', cs_sigma=0.582)})
Residue(res_index='7', res_label='PHE', atoms_dict={'HD1': Atom(res_index='7', res_label='PHE', atom_label='HD1', cs_sigma=131.5), 'HD2': Atom(res_index='7', res_label='PHE', atom_label='HD2', cs_sigma=39.365), 'HE1': Atom(res_index='7', res_label='PHE', atom_label='HE1', cs_sigma=131.5), 'HE2': Atom(res_index='7', res_label='PHE', atom_label='HE2', cs_sigma=38.852), 'HZ': Atom(res_index='7', res_label='PHE', atom_label='HZ', cs_sigma=129.7), 'H': Atom(res_index='7', res_label='PHE', atom_label='H', cs_sigma=0.176)})


Above, you can see all of the information that is stored for each residue. Note that the aromatic residue, 7 PHE, also has an amide listed along with its chemical shift.

# Step 3: Loading restraint data
First, we can get the restraint file from reboxitory

In [15]:
pdb_id = '2RN7'
bmrb_id = '11017'
filepath = os.path.join(
    '/reboxitory', '2021', '06', 'PDB', 'data', 'structures', 'all', 
    'nmr_restraints_v2', f"{pdb_id.lower()}_mr.str.gz"
)

The file can be parsed using PyNMRSTAR (see [link](https://github.com/uwbmrb/PyNMRSTAR))

In [17]:
import pynmrstar
restraint_entry = pynmrstar.Entry.from_file(filepath)
type(restraint_entry)

pynmrstar.entry.Entry

We will build a dict of all Restraints between aromatic ring protons and amide protons that we can find in the restraint file.

In [None]:
def make_restraints_dict(
    restraint_entry: pynmrstar.entry.Entry
) -> Dict[str, Dict[str, Restraint]]:
    """
    Create dictionary of all restraints for a PDB entry.

    Keyword arguments:
    restraint_entry -- parsed PyNMRSTAR object representing info in restraint 
    file
    Returns
    restraints_dict -- dict of all restraints for entry by restraint_id and 
        member_id
    """
    restraints_dict = {}
    restraint_loops_list = entry.get_loops_by_category(
        'Gen_dist_constraint'
    ) # get a list of 'distance constraint' (restraint) loops w/in the file
    for i, restraint_loop in enumerate(restraint_loops_list):
        restraints_list = restraint_loop.get_tag(
            [
                "ID", "Member_ID", "Member_logic_code",
                "Comp_index_ID_1", "Comp_ID_1", "Atom_ID_1",
                "Comp_index_ID_2", "Comp_ID_2", "Atom_ID_2",
                "Distance_val", "Distance_lower_bound_val", 
                "Distance_upper_bound_val"
            ] 
        ) # for each loop, get a list of the relevant info for each restraint
        for restraint_info in restraints_list:
            restraint, restraint_id, member_id = make_restraint(restraint_entry)
            
def make_restraint(
    restraint_info: ???
) -> Union[Tuple[Restraint, str, str], None]:
    '''
    Read line from restraint file and build restraint if it is amide-aromatic.

    Keyword arguments:
    restraint_entry -- str of a line of restraint data in the restraint file
    Returns:
    'No amide atom or atoms in same residue' -- self-explanatory exception
    'No aromatic ring proton' -- if one atom is amide but other is not aromatic
        ring proton
    restraint -- Restraint object with an amide atom and an aromatic ring 
        proton
    restraint_id -- restraint ID as found in restraint file
    member_id -- member ID as found in restraint file
    None -- if the restraint is not amide-aromatic
    '''
    # info for cataloguing restraints
    restraint_id = restraint_entry[0]
    member_id = restraint_entry[1]
    # info for the first atom
    res_index_1 = restraint_entry[3]
    res_label_1 = restraint_entry[4]
    atom_label_1 = restraint_entry[5]
    # and create an Atom instance w/o a Z-score
    atom_1 = Atom(res_index_1, res_label_1, atom_label_1, None)  
    # info for the second atom
    res_index_2 = restraint_entry[6]
    res_label_2 = restraint_entry[7]
    atom_label_2 = restraint_entry[8]
    atom_2 = Atom(res_index_2, res_label_2, atom_label_2, None)
    
    # now check if this is an amide-aromatic, and find out which is which
    bool_amide, atom_amide = check_amide(atom_1, atom_2)
    # we also don't consider the restraint if the atoms are from 
    # the same residue
    if bool_amide and atom_1.res_index != atom_2.res_index:
        # check if the other atom is an aromatic ring proton
        if atom_1 == atom_amide:
            bool_aroma, atom_aroma = check_aroma(atom_2)
        elif atom_2 == atom_amide:
            bool_aroma, atom_aroma = check_aroma(atom_1)
        # if both checks are passed, create the restraint
        if bool_aroma:
            restraint = Restraint(atom_amide, atom_aroma)
            return restraint, restraint_id, member_id
        else:
            return None, restraint_id, member_id
    else:
        return None
        
    
def check_amide(
    atom_1,: Atom, atom_2: Atom
) -> Tuple[bool, Union[Atom, None]]:
    """
    Check if either of atom_1 or atom_2 is an amide hydrogen; if so, return
    that atom.

    Keyword arguments:
    atom_1 -- the first Atom instance built from the restraint in file
    atom_2 -- the second Atom instance built from the restraint in file
    Returns:
    bool_amide -- True if one of the atoms is an amide, False otherwise
    atom_amide -- None if neither of the atoms is an amide, otherwise it is the
       atom that was found to be an amide
    """
    atom_amide = None
    bool_amide = False
    atoms_list = [atom_1, atom_2]
    for atom in atoms_list:
        if atom.atom_label == 'H':
            bool_amide = True
            atom_amide = atom
    return bool_amide, atom_amide

def check_aroma(atom: Atom) -> Tuple[bool, Union[Atom, None]]:
    """
    Check if an atom is an aromatic ring proton.

    Keyword arguments:
    atom -- the Atom object to be checked
    Returns:
    bool_aroma -- True if the atom is an aromatic ring proton, False otherwise
    atom_aroma -- The Atom object
    """
    bool_aroma = False
    atom_aroma = None
    aromatics_dict = {
        "PHE": ["HD1", "HD2", "QD", "HE1", "HE2", "QE", "HZ"],
        "TYR": ["HD1", "HD2", "QD", "HE1", "HE2", "QE", "HH"],
        "TRP": ["HD1", "HE1", "HE3", "QE", "HZ2", "HZ3", "QZ", "HH2"],
        "HIS": ["HD1", "HD2", "QD", "HE1", "HE2", "QE"]
    }
    if atom.res_label in aromatics_dict:
        if atom.atom_label in aromatics_dict[atom.res_label]:
            bool_aroma = True
            atom_aroma = atom
    return bool_aroma, atom_aroma