In [1]:
from rdkit.Chem.rdmolfiles import MolFromPDBFile
from rdkit.Chem.rdchem import Mol
from rdkit.Chem import AllChem

import numpy as np
import rdkit.Chem as Chem
from rdkit.Chem import AddHs, AssignStereochemistry, HybridizationType, ChiralType, BondStereo, MolFromMol2File
from rdkit.Chem.AllChem import ComputeGasteigerCharges
import os
import sys

sys.path.append("../../")
from src.data.pocket_utils import get_atom_coordinates, find_pocket_atoms_RDKit
from src.data.utils import pdb_to_rdkit_mol, mol2_to_rdkit_mol, get_vdw_radius, add_charges_to_molecule, get_node_features, get_edge_features, extract_charges_from_mol2
from src.utils.constants import ES_THRESHOLD

In [2]:
def ionic_interaction(mol: Chem.Mol, atom_1: int, atom_2: int):
    """
    Determine if there is an electrostatic interaction between atom 1 and atom 2 based on whether the coulombic interaction
    between the two atoms is less than or equal to some threshold imported from src/utils/constants.py.

    Parameters:
    mol (Chem.Mol): The RDKit molecule. NOTE: We assume that the molecule has _TriposAtomCharges charges assigned to the atoms.
    atom_1 (int): The index of the first atom.
    atom_2 (int): The index of the second atom.
    
    Returns:
    tuple: (0, 0, 0, 0, 0) if there is an electrostatic interaction, None otherwise 

    Conversion Factor Calculation:
    The Coulombic interaction energy between two point charges q1 and q2 separated by a distance r in vacuum is given by:

        E = k * q1 * q2 / r

    Where:
    - E is the energy in Joules (J).
    - k is Coulomb's constant, 8.987 x 10^9 N m^2 C^-2.
    - q1 and q2 are the charges in Coulombs (C).
    - r is the distance in meters (m).

    To convert this energy into kJ/mol, the following conversions are applied:
    - 1 e (elementary charge) = 1.602 x 10^-19 C
    - Avogadro's number (N_A) = 6.022 x 10^23 mol^-1
    - 1 Angstrom (Å) = 10^-10 meters (m)

    Therefore, the conversion factor from (charge_1 * charge_2) / distance to kJ/mol is:

        Conversion factor = (k * N_A * (1.602 x 10^-19)^2) * 10^10

    Plugging in the values:

        Conversion factor = (8.987 x 10^9 * 6.022 x 10^23 * (1.602 x 10^-19)^2) * 10^10
                          ≈ 1388.93 kJ mol^-1 Å^-1
    """
    # Constants
    CONVERSION_FACTOR_KJ = 1388.93  # kJ mol^-1 Å^-1

    # Get atomic charges using Gasteiger charges or another suitable method
    charge_1 = float(mol.GetAtomWithIdx(atom_1).GetProp('_TriposAtomCharges'))
    charge_2 = float(mol.GetAtomWithIdx(atom_2).GetProp('_TriposAtomCharges'))
    
    # Get the distance between the two atoms
    conf = mol.GetConformer()
    pos_1 = conf.GetAtomPosition(atom_1)
    pos_2 = conf.GetAtomPosition(atom_2)
    distance = pos_1.Distance(pos_2)

    coulombic_interaction_kj = (charge_1 * charge_2 / distance) * CONVERSION_FACTOR_KJ

    # Check if the Coulombic interaction is less than or equal to the threshold
    if abs(coulombic_interaction_kj) >= ES_THRESHOLD:
        return (0, 0, 0, 0, 0, coulombic_interaction_kj)
    else:
        return None

In [3]:
def calculate_gasteiger_charges(mol):
    AllChem.ComputeGasteigerCharges(mol)
    for atom in mol.GetAtoms():
        atom.SetProp('_TriposAtomCharges', str(atom.GetDoubleProp('_GasteigerCharge')))
    return mol


In [4]:
# Define test cases
test_cases = [("CCCCCCCCCC=C", 0, 10), # expected output: None
              ("C=C", 0, 1), # expected output:  None
              ("CC(=O)O", 2, 3) # expected output: (0, 0, 0, 0, 0)
              ]

# Initialize RDKit molecules and conformers
molecules = []
for smiles, a1, a2 in test_cases:
    mol = Chem.MolFromSmiles(smiles)
    AllChem.EmbedMolecule(mol)
    mol = calculate_gasteiger_charges(mol)
    molecules.append((mol, a1, a2))

# Run test cases
for mol, atom_1, atom_2 in molecules:
    # Replace this with the actual function call
    result = ionic_interaction(mol, atom_1, atom_2)
    print(f"SMILES: {Chem.MolToSmiles(mol)}, Atoms: ({atom_1}, {atom_2}) -> Result: {result}")


SMILES: C=CCCCCCCCCC, Atoms: (0, 10) -> Result: None
SMILES: C=C, Atoms: (0, 1) -> Result: None
SMILES: CC(=O)O, Atoms: (2, 3) -> Result: (0, 0, 0, 0, 0, 74.26637744171447)


[22:35:18] Molecule does not have explicit Hs. Consider calling AddHs()
[22:35:18] Molecule does not have explicit Hs. Consider calling AddHs()
[22:35:18] Molecule does not have explicit Hs. Consider calling AddHs()


In [5]:
complex_mol = Chem.RemoveHs(mol2_to_rdkit_mol("../../test_data/pdb/1a0q/1a0q_complex_charged.mol2", sanitize = False), sanitize = False) #/Users/tsachmackey/Documents/Summer 2024/Research /Batista project/AffinityNet/test_data/pdb/1a0q
ligand_mol = Chem.RemoveHs(mol2_to_rdkit_mol("../../test_data/pdb/1a0q/1a0q_ligand.mol2", sanitize = False), sanitize = False) #/Users/tsachmackey/Documents/Summer 2024/Research /Batista project/AffinityNet/test_data/pdb/1a0q
num_atoms_in_protein = len(complex_mol.GetAtoms()) - len(ligand_mol.GetAtoms())
charges = extract_charges_from_mol2("../../test_data/pdb/1a0q/1a0q_complex_charged.mol2")

mol = complex_mol
AssignStereochemistry(mol, force=True, cleanIt=True)
protein_or_ligand_ids = [-1 if atom.GetIdx() < num_atoms_in_protein else 1 for atom in mol.GetAtoms()]

['1', 'N', '27.234', '12.955', '59.573', 'N', '1', 'ILE', '-0.215']
['2', 'HN1', '27.296', '12.868', '60.608', 'H', '1', 'ILE', '0.088']
['3', 'HN2', '28.165', '12.761', '59.152', 'H', '1', 'ILE', '0.106']
['4', 'HN3', '26.933', '13.918', '59.322', 'H', '1', 'ILE', '0.100']
['5', 'CA', '26.259', '11.993', '59.062', 'C', '1', 'ILE', '-0.061']
['6', 'HA', '26.699', '11.034', '59.337', 'H', '1', 'ILE', '0.057']
['7', 'C', '26.060', '12.005', '57.544', 'C', '1', 'ILE', '0.215']
['8', 'O', '25.651', '12.995', '56.933', 'O', '1', 'ILE', '-0.241']
['9', 'CB', '24.841', '12.193', '59.715', 'C', '1', 'ILE', '0.025']
['10', 'HB', '24.474', '13.179', '59.430', 'H', '1', 'ILE', '0.033']
['11', 'CG1', '24.902', '12.121', '61.236', 'C', '1', 'ILE', '-0.068']
['12', 'HG11', '25.432', '11.208', '61.506', 'H', '1', 'ILE', '0.051']
['13', 'HG12', '25.466', '12.984', '61.589', 'H', '1', 'ILE', '0.051']
['14', 'CG2', '23.911', '11.073', '59.220', 'C', '1', 'ILE', '-0.192']
['15', 'HG21', '23.831', '11.123

In [6]:
add_charges_to_molecule(mol, charges)
pocket_atom_indices = find_pocket_atoms_RDKit(mol, protein_or_ligand_ids, num_atoms_in_protein) # just use all of the atoms for now 
# pocket_atom_indices = [atom.GetIdx() for atom in mol.GetAtoms()]
edge_features, edge_indices = get_edge_features(mol, pocket_atom_indices, pairwise_function=ionic_interaction)

NameError: name 'METAL_OX_STATES' is not defined

In [7]:
atom = mol.GetAtomWithIdx(4094)

# Print details about the atom
print(f"Atom symbol: {atom.GetSymbol()}")
print(f"Atom atomic number: {atom.GetAtomicNum()}")
print(f"Atom partial charge: {atom.GetProp('_TriposAtomCharges')}")

Atom symbol: C
Atom atomic number: 6
Atom partial charge: 0.35499999999999998


In [8]:
def find_zinc_atom(mol: Chem.Mol):
    """
    Loop through the RDKit molecule and look for a zinc atom.

    Parameters:
    mol (Chem.Mol): The RDKit molecule.

    Returns:
    int: The index of the zinc atom, or None if no zinc atom is found.
    """
    for atom in mol.GetAtoms():
        if atom.GetAtomicNum() == 30:  # Zinc has an atomic number of 30
            return atom.GetIdx()
    return None

# Find the zinc atom
zinc_index = find_zinc_atom(mol)

In [10]:
atom = mol.GetAtomWithIdx(zinc_index)

# Print details about the atom
print(f"Atom symbol: {atom.GetSymbol()}")
print(f"Atom atomic number: {atom.GetAtomicNum()}")
print(f"Atom partial charge: {atom.GetProp('_TriposAtomCharges')}")

Atom symbol: Zn
Atom atomic number: 30
Atom partial charge: 2


In [11]:
atom = mol.GetAtomWithIdx(4094)

# Print details about the atom
print(f"Atom symbol: {atom.GetSymbol()}")
print(f"Atom atomic number: {atom.GetAtomicNum()}")
print(f"Atom partial charge: {atom.GetProp('_TriposAtomCharges')}")

Atom symbol: C
Atom atomic number: 6
Atom partial charge: 0.35499999999999998


In [12]:
atom = mol.GetAtomWithIdx(4095)

# Print details about the atom
print(f"Atom symbol: {atom.GetSymbol()}")
print(f"Atom atomic number: {atom.GetAtomicNum()}")
print(f"Atom partial charge: {atom.GetProp('_TriposAtomCharges')}")

Atom symbol: O
Atom atomic number: 8
Atom partial charge: -0.27000000000000002


In [13]:
# Extract the last column from edge_features
last_column = edge_features[:, -1]

# Sort edge_indices based on the last column
sorted_indices = np.argsort(last_column)
sorted_edge_indices = edge_indices[sorted_indices]
sorted_edge_features = edge_features[sorted_indices]

print("Sorted Edge Indices:", sorted_edge_indices[:100])

Sorted Edge Indices: [[4095 6305]
 [6305 4095]
 [6305 4096]
 [4096 6305]
 [3684 6304]
 [6304 3684]
 [6305 4134]
 [4134 6305]
 [6305 4011]
 [4011 6305]
 [6305 4091]
 [4091 6305]
 [4736 6304]
 [6304 4736]
 [3700 6304]
 [6304 3700]
 [3679 6304]
 [6304 3679]
 [6304 4739]
 [4739 6304]
 [3695 6304]
 [6304 3695]
 [4074 6305]
 [6305 4074]
 [3683 6304]
 [6304 3683]
 [6304 3630]
 [3630 6304]
 [4041 6305]
 [6305 4041]
 [4095 4094]
 [4094 4095]
 [4096 4094]
 [4094 4096]
 [6304 3623]
 [3623 6304]
 [4786 4788]
 [4788 4786]
 [3678 6304]
 [6304 3678]
 [3684 3682]
 [3682 3684]
 [6305 4090]
 [4090 6305]
 [4787 4786]
 [4786 4787]
 [4102 6305]
 [6305 4102]
 [3683 3682]
 [3682 3683]
 [6331 6324]
 [6324 6331]
 [6331 6326]
 [6326 6331]
 [6304 4748]
 [4748 6304]
 [6304 4733]
 [4733 6304]
 [6323 6328]
 [6328 6323]
 [6329 6323]
 [6323 6329]
 [4808 6304]
 [6304 4808]
 [6325 6331]
 [6331 6325]
 [3257 6304]
 [6304 3257]
 [4169 4168]
 [4168 4169]
 [1371 1370]
 [1370 1371]
 [3997 6305]
 [6305 3997]
 [6305 1446]
 [14

In [14]:
print("Sorted Edge Features:", sorted_edge_features)

Sorted Edge Features: [[   0.            0.            0.            0.            0.
  -262.46352172]
 [   0.            0.            0.            0.            0.
  -262.46352172]
 [   0.            0.            0.            0.            0.
  -253.48171353]
 ...
 [   0.            0.            0.            0.            0.
   292.41441035]
 [   0.            0.            0.            0.            0.
   334.94795218]
 [   0.            0.            0.            0.            0.
   334.94795218]]
