In [60]:
amber_files = ["/home/jok120/build/amber18/dat/leap/parm/" + s for s in (
    "frcmod.ff14SB", "parm99.dat")]
atomnames_file = "/home/jok120/build/amber18/dat/leap/lib/all_amino94.lib"

print(amber_files)

['/home/jok120/build/amber18/dat/leap/parm/frcmod.ff14SB', '/home/jok120/build/amber18/dat/leap/parm/parm99.dat']


In [61]:
import re

In [71]:
import sidechainnet
from sidechainnet.structure.fastbuild import AA3to1
from sidechainnet.structure.HydrogenBuilder import HYDROGEN_NAMES

In [103]:
def get_atom_names_for_res(resname3, amber_atomname_file):
    """Extract (atom_name, amber_atom_type) tuples from AMBER data."""
    pattern = (rf'(?<=!entry\.{resname3.upper()}\.unit\.atoms table  str name  str type '
        " int typex  int resx  int flags  int seq  int elmnt "
        r' dbl chg)(\n "[\w\*]+" "[\w\*]+"[^\n]*$)+')
    with open(amber_atomname_file, "r") as f:
        atomname_str = f.read()  
    match = re.search(pattern, atomname_str, re.MULTILINE)
    if match is None:
        print(f"Unable to find atom names for {resname3}.")
        return {}
    atom_name_data = match.group(0).strip()
    name_type_list = []
    for line in atom_name_data.split("\n"):
        fields = line.split()
        name_type_list.append((fields[0].strip("\""), fields[1].strip("\"")))
    
    return dict(name_type_list)

In [104]:
get_atom_names_for_res("TRP", atomnames_file)

{'N': 'N',
 'H': 'H',
 'CA': 'CT',
 'HA': 'H1',
 'CB': 'CT',
 'HB2': 'HC',
 'HB3': 'HC',
 'CG': 'C*',
 'CD1': 'CW',
 'HD1': 'H4',
 'NE1': 'NA',
 'HE1': 'H',
 'CE2': 'CN',
 'CZ2': 'CA',
 'HZ2': 'HA',
 'CH2': 'CA',
 'HH2': 'HA',
 'CZ3': 'CA',
 'HZ3': 'HA',
 'CE3': 'CA',
 'HE3': 'HA',
 'CD2': 'CB',
 'C': 'C',
 'O': 'O'}

In [105]:
def get_all_atom_names_dict():
    """Create a dictionary mapping RES3 -> {atomname: atomtype}."""
    all_dict = {}
    for res3 in AA3to1.keys():
        if res3 == "HIS":
            all_dict["HIS"] = get_atom_names_for_res("HID", atomnames_file)
        else:
            all_dict[res3] = get_atom_names_for_res(res3, atomnames_file)
    return all_dict

In [106]:
at_dict = get_all_atom_names_dict()

In [107]:
HYDROGEN_TYPES = HYDROGEN_NAMES.copy()
for resname, atomnames in HYDROGEN_NAMES.items():
    print(resname, atomnames)
    HYDROGEN_TYPES[resname] = [at_dict[resname][an] for an in  atomnames]

ALA ['H', 'HA', 'HB1', 'HB2', 'HB3']
ARG ['H', 'HA', 'HB2', 'HB3', 'HD2', 'HD3', 'HG2', 'HG3', 'HE', 'HH11', 'HH12', 'HH21', 'HH22']
ASN ['H', 'HA', 'HB2', 'HB3', 'HD21', 'HD22']
ASP ['H', 'HA', 'HB2', 'HB3']
CYS ['H', 'HA', 'HB2', 'HB3', 'HG']
GLN ['H', 'HA', 'HB2', 'HB3', 'HG2', 'HG3', 'HE21', 'HE22']
GLU ['H', 'HA', 'HB2', 'HB3', 'HG2', 'HG3']
GLY ['H', 'HA2', 'HA3']
HIS ['H', 'HA', 'HB2', 'HB3', 'HE1', 'HD2', 'HD1']
ILE ['H', 'HA', 'HB', 'HD11', 'HD12', 'HD13', 'HG12', 'HG13', 'HG21', 'HG22', 'HG23']
LEU ['H', 'HA', 'HB2', 'HB3', 'HD11', 'HD12', 'HD13', 'HD21', 'HD22', 'HD23', 'HG']
LYS ['H', 'HA', 'HB2', 'HB3', 'HD2', 'HD3', 'HE2', 'HE3', 'HG2', 'HG3', 'HZ1', 'HZ2', 'HZ3']
MET ['H', 'HA', 'HB2', 'HB3', 'HE1', 'HE2', 'HE3', 'HG2', 'HG3']
PHE ['H', 'HA', 'HB2', 'HB3', 'HD1', 'HD2', 'HE1', 'HE2', 'HZ']
PRO ['HA', 'HB2', 'HB3', 'HD2', 'HD3', 'HG2', 'HG3']
SER ['H', 'HA', 'HB2', 'HB3', 'HG']
THR ['H', 'HA', 'HB', 'HG1', 'HG21', 'HG22', 'HG23']
TRP ['H', 'HA', 'HB2', 'HB3', 'HD1', 'HE1'

In [110]:
import pprint
pprint.pprint(HYDROGEN_TYPES)

{'ALA': ['H', 'H1', 'HC', 'HC', 'HC'],
 'ARG': ['H',
         'H1',
         'HC',
         'HC',
         'H1',
         'H1',
         'HC',
         'HC',
         'H',
         'H',
         'H',
         'H',
         'H'],
 'ASN': ['H', 'H1', 'HC', 'HC', 'H', 'H'],
 'ASP': ['H', 'H1', 'HC', 'HC'],
 'CYS': ['H', 'H1', 'H1', 'H1', 'HS'],
 'GLN': ['H', 'H1', 'HC', 'HC', 'HC', 'HC', 'H', 'H'],
 'GLU': ['H', 'H1', 'HC', 'HC', 'HC', 'HC'],
 'GLY': ['H', 'H1', 'H1'],
 'HIS': ['H', 'H1', 'HC', 'HC', 'H5', 'H4', 'H'],
 'ILE': ['H', 'H1', 'HC', 'HC', 'HC', 'HC', 'HC', 'HC', 'HC', 'HC', 'HC'],
 'LEU': ['H', 'H1', 'HC', 'HC', 'HC', 'HC', 'HC', 'HC', 'HC', 'HC', 'HC'],
 'LYS': ['H',
         'H1',
         'HC',
         'HC',
         'HC',
         'HC',
         'HP',
         'HP',
         'HC',
         'HC',
         'H',
         'H',
         'H'],
 'MET': ['H', 'H1', 'HC', 'HC', 'H1', 'H1', 'H1', 'H1', 'H1'],
 'PHE': ['H', 'H1', 'HC', 'HC', 'HA', 'HA', 'HA', 'HA', 'HA'],
 'PRO': ['H

In [70]:
# Let's get this working first for TRP LEU ASP GLU
bonded_pairs = {
    "ASP": [("CA", "CB"), ("CB", "CG"), ("CG", "OD1"), ("CG", "OD2")] + \
           [("CB", "HB2"), ("CB", "HB3")],
    
    "GLU": [("CA", "CB"), ("CB", "CG"), ("CG", "CD"), ("CD", "OE1"), ("CD", "OE2")] + \
           [('CB', 'HB2'), ('CB', 'HB3'), ('CG','HG2'), ('CG','HG3')],
    
    "LEU": [("CA", "CB"), ("CB", "CG"), ("CG", "CD1"), ("CG", "CD2")] + \
           [('CB', 'HB2'), ('CB', 'HB3'), ('CD1', 'HD11'), ('CD1', 'HD12'),
            ('CD1', 'HD13'), ('CD2', 'HD21'), ('CD2', 'HD22'), ('CD2', 'HD23'),
            ('CG', 'HG')],
    
    "TRP": [('CA', 'CB'), ('CB', 'CG'), ('CG', 'CD1'), ('CD1', 'NE1'),
            ('NE1', 'CE2'), ('CE2', 'CZ2'), ('CZ2', 'CH2'), ('CH2', 'CZ3'),
            ('CZ3', 'CE3'), ('CE3', 'CD2')] + \
           [('C', 'HB2'), ('C', 'HB3'), ('C', 'HD1'), ('C', 'HE1'), ('C', 'HE3'), ('C', 'HH2'), ('C', 'HZ2'), ('C', 'HZ3')]
}

In [None]:
C