Skip to content

Commit

Permalink
Merge pull request #224 from hjkgrp/ligand_racs
Browse files Browse the repository at this point in the history
Add ligand RACs methods
  • Loading branch information
ralf-meyer committed Apr 22, 2024
2 parents db7532f + e0689e6 commit 7a938be
Show file tree
Hide file tree
Showing 8 changed files with 273 additions and 6 deletions.
10 changes: 10 additions & 0 deletions docs/source/Classes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,16 @@ mol3D Class
:undoc-members:
:show-inheritance:


Mol2D Class
--------------------------------

.. automodule:: molSimplify.Classes.mol3D
:members:
:undoc-members:
:show-inheritance:


monomer3D Class
--------------------------------

Expand Down
79 changes: 79 additions & 0 deletions molSimplify/Classes/mol2D.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,25 @@ def __repr__(self):

@classmethod
def from_smiles(cls, smiles):
"""Create a Mol2D object from a SMILES string.
Parameters
----------
smiles : str
SMILES representation of the molecule.
Returns
-------
Mol2D
Mol2D object of the molecule
Examples
--------
Create a furan molecule from SMILES:
>>> mol = Mol2D.from_smiles("o1cccc1")
>>> mol
Mol2D(O1C4H4)
"""
mol = cls()

# Load using openbabel OBMol
Expand Down Expand Up @@ -103,13 +122,45 @@ def from_mol_file(cls, filename):
return mol

def graph_hash(self):
"""Calculates the node attributed graph hash of the molecule.
Returns
-------
str
node attributed graph hash
Examples
--------
Create a furan molecule from SMILES:
>>> mol = Mol2D.from_smiles("o1cccc1")
and calculate its graph hash:
>>> mol.graph_hash()
'f6090276d7369c0c0a535113ec1d97cf'
"""
# This is necessary because networkx < 2.7 had a bug in the implementation
# of weisfeiler_lehman_graph_hash
# https://github.com/networkx/networkx/pull/4946#issuecomment-914623654
assert version.parse(nx.__version__) >= version.parse("2.7")
return nx.weisfeiler_lehman_graph_hash(self, node_attr="symbol")

def graph_hash_edge_attr(self):
"""Calculates the edge attributed graph hash of the molecule.
Returns
-------
str
edge attributed graph hash
Examples
--------
Create a furan molecule from SMILES:
>>> mol = Mol2D.from_smiles("o1cccc1")
and calculate its graph hash:
>>> mol.graph_hash_edge_attr()
'a9b6fbc7b5f53613546d5e91a7544ed6'
"""
# This is necessary because networkx < 2.7 had a bug in the implementation
# of weisfeiler_lehman_graph_hash
# https://github.com/networkx/networkx/pull/4946#issuecomment-914623654
Expand All @@ -124,6 +175,27 @@ def graph_hash_edge_attr(self):
return nx.weisfeiler_lehman_graph_hash(G, edge_attr="label")

def graph_determinant(self, return_string=True):
"""Calculates the molecular graph determinant.
Parameters
----------
return_string : bool, optional
Flag to return the determinant as a string. Default is True.
Returns
-------
str
edge attributed graph hash
Examples
--------
Create a furan molecule from SMILES:
>>> mol = Mol2D.from_smiles("o1cccc1")
and calculate its graph hash:
>>> mol.graph_determinant()
'-19404698740'
"""
atomic_masses = globalvars().amass()
weights = np.array(
[
Expand Down Expand Up @@ -164,6 +236,13 @@ def find_metal(self, transition_metals_only: bool = True) -> List[int]:
metal_list : list
List of indices of metal atoms in Mol2D.
Examples
--------
Build Vanadyl acetylacetonate from SMILES:
>>> mol = Mol2D.from_smiles("CC(=[O+]1)C=C(C)O[V-3]12(#[O+])OC(C)=CC(C)=[O+]2")
>>> mol.find_metal()
[7]
"""
globs = globalvars()

Expand Down
54 changes: 50 additions & 4 deletions molSimplify/Informatics/graph_racs.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import numpy as np
import networkx as nx
import operator
from typing import Callable, List, Optional
from typing import Callable, List, Optional, Tuple
from molSimplify.Classes.mol2D import Mol2D
from molSimplify.Classes.globalvars import amassdict, endict

Expand Down Expand Up @@ -137,15 +137,16 @@ def octahedral_racs(
metals = mol.find_metal()
if len(metals) != 1:
raise ValueError("Currently only supports mononuclear TMCs.")
connecting_atoms = list(subgraphs.neighbors(metals[0]))
metal = metals[0]
connecting_atoms = sorted(list(subgraphs.neighbors(metal)))
# Assert that we are removing 6 edges
if len(connecting_atoms) != 6:
raise ValueError(
"First metal in the graph does not have 6 neighbors "
"as expected for an octahedral complex."
)
# Then cut the graph by removing all connections to the first atom
subgraphs.remove_edges_from([(0, c) for c in connecting_atoms])
# Then cut the graph by removing all connections to the metal atom
subgraphs.remove_edges_from([(metal, c) for c in connecting_atoms])

if equatorial_connecting_atoms is None:
# Assume the first 4 connecting atoms belong to the equatorial ligands
Expand Down Expand Up @@ -240,3 +241,48 @@ def octahedral_racs(
)

return output


def ligand_racs(
mol: Mol2D,
depth: int = 3,
full_scope: bool = True,
property_fun: Callable[[Mol2D, int], np.ndarray] = racs_property_vector,
) -> np.ndarray:

# First cut the molecular graph into subgraphs for the metal and the ligands
subgraphs = mol.copy()
# First find all connecting atoms of the first metal:
metals = mol.find_metal()
if len(metals) != 1:
raise ValueError("Currently only supports mononuclear TMCs.")
metal = metals[0]
connecting_atoms = sorted(list(subgraphs.neighbors(metal)))

n_ligands = len(connecting_atoms)
n_props = len(property_fun(mol, list(mol.nodes.keys())[0]))
n_scopes = 4 if full_scope else 2
output = np.zeros((n_ligands, n_scopes, depth + 1, n_props))

# Then cut the graph by removing all connections to the metal atom
subgraphs.remove_edges_from([(metal, c) for c in connecting_atoms])
# Build list of tuples for all connecting atoms and corresponding ligand subgraphs
# TODO: I am sure there is a better way of doing this than looping over all subgraphs
ligand_graphs: List[Tuple[int, Mol2D]] = []
for c in connecting_atoms:
for gi in nx.connected_components(subgraphs):
if c in gi:
ligand_graphs.append((c, subgraphs.subgraph(gi)))
break

# Actual calculation of the RACs
for i, (c, g) in enumerate(ligand_graphs):
# starting with connecting atom centered product RACs
output[i, 0] = atom_centered_AC(g, c, depth=depth, operation=operator.mul, property_fun=property_fun)
output[i, 1] = atom_centered_AC(g, c, depth=depth, operation=operator.sub, property_fun=property_fun)
# Add full scope RACs if requested
if full_scope:
output[i, 2] = multi_centered_AC(g, depth=depth, operation=operator.mul, property_fun=property_fun)
output[i, 3] = multi_centered_AC(g, depth=depth, operation=operator.sub, property_fun=property_fun)

return output
70 changes: 68 additions & 2 deletions tests/informatics/test_graph_racs.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,12 @@
import json
import numpy as np
from molSimplify.Classes.mol2D import Mol2D
from molSimplify.Informatics.graph_racs import atom_centered_AC, multi_centered_AC, octahedral_racs
from molSimplify.Informatics.graph_racs import (
atom_centered_AC,
multi_centered_AC,
octahedral_racs,
ligand_racs,
)


@pytest.fixture
Expand Down Expand Up @@ -97,7 +102,7 @@ def test_octahedral_racs(
equatorial_connecting_atoms=eq_atoms,
)

# Dictionary encoded the order of the descriptors in the numpy array
# Dictionary encoding the order of the descriptors in the numpy array
start_scopes = {
0: ("f", "all"),
1: ("mc", "all"),
Expand Down Expand Up @@ -125,3 +130,64 @@ def test_octahedral_racs(
abs(descriptors[s, d, p] - ref_dict[f"{start}-{prop}-{d}-{scope}"])
< atol
)


@pytest.mark.parametrize(
"mol2_path, ref_path, n_ligs",
[
("fe_carbonyl_6.mol2", "lig_racs_fe_carbonyl_6.json", 6),
(
"mn_furan_water_ammonia_furan_water_ammonia.mol2",
"lig_racs_mn_furan_water_ammonia_furan_water_ammonia.json",
6,
),
(
"co_acac_en.mol2",
"lig_racs_co_acac_en.json",
4,
),
],
)
def test_ligand_racs(
resource_path_root, mol2_path, ref_path, n_ligs, atol=1e-4
):

mol = Mol2D.from_mol2_file(resource_path_root / "inputs" / "informatics" / mol2_path)

with open(resource_path_root / "refs" / "informatics" / ref_path, "r") as fin:
ref_dict = json.load(fin)
print(ref_dict)

depth = 3
descriptors = ligand_racs(
mol,
depth=depth,
full_scope=True
)

assert descriptors.shape == (n_ligs, 4, depth + 1, 5)

starts = {
0: "lc_P",
1: "lc_D",
2: "f_P",
3: "f_D",
}

properties = ["Z", "chi", "T", "I", "S"]
for lig in range(n_ligs):
for s, start in starts.items():
for d in range(depth + 1):
for p, prop in enumerate(properties):
print(
f"lig_{lig}",
start,
d,
prop,
descriptors[lig, s, d, p],
ref_dict[f"lig_{lig}-{start}-{prop}-{d}"],
)
assert (
abs(descriptors[lig, s, d, p] - ref_dict[f"lig_{lig}-{start}-{prop}-{d}"])
< atol
)
63 changes: 63 additions & 0 deletions tests/testresources/inputs/informatics/co_acac_en.mol2
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
@<TRIPOS>MOLECULE
*****
27 28 0 0 0
SMALL
GASTEIGER

@<TRIPOS>ATOM
1 CO 0.0000 0.0000 0.0000 Co.oh 1 UNL1 0.0000
2 O -0.6700 1.8950 0.0000 O.2 1 UNL1 0.0000
3 C -1.3926 2.3556 -0.8711 C.2 1 UNL1 0.0000
4 C -1.8421 3.7813 -0.7554 C.3 1 UNL1 0.0000
5 C -1.8229 1.5252 -2.0274 C.2 1 UNL1 0.0000
6 C -1.4656 0.2373 -2.2091 C.2 1 UNL1 0.0000
7 O -0.6700 -0.3937 -1.3214 O.2 1 UNL1 0.0000
8 C -1.9428 -0.5350 -3.4056 C.3 1 UNL1 0.0000
9 H -2.4146 3.9152 0.1864 H 1 UNL1 0.0000
10 H -2.4909 4.0728 -1.6081 H 1 UNL1 0.0000
11 H -0.9561 4.4502 -0.7405 H 1 UNL1 0.0000
12 H -2.4652 1.9970 -2.7660 H 1 UNL1 0.0000
13 H -2.5897 0.0769 -4.0704 H 1 UNL1 0.0000
14 H -2.5236 -1.4176 -3.0655 H 1 UNL1 0.0000
15 H -1.0665 -0.8830 -3.9914 H 1 UNL1 0.0000
16 N 2.1500 0.0000 -0.0000 N.4 1 UNL1 0.0000
17 N -0.1289 -0.8057 1.3955 N.4 1 UNL1 0.0000
18 C 1.1919 -1.0963 1.9575 C.3 1 UNL1 0.0000
19 C 2.1892 -0.0483 1.4632 C.3 1 UNL1 0.0000
20 H 3.2181 -0.2703 1.8221 H 1 UNL1 0.0000
21 H 1.8975 0.9356 1.8811 H 1 UNL1 0.0000
22 H 1.5356 -2.0933 1.6172 H 1 UNL1 0.0000
23 H 1.1501 -1.1205 3.0686 H 1 UNL1 0.0000
24 H 2.6782 -0.8198 -0.3764 H 1 UNL1 0.0000
25 H 2.6254 0.8658 -0.3340 H 1 UNL1 0.0000
26 H -0.5522 -0.0121 1.9282 H 1 UNL1 0.0000
27 H -0.7494 -1.6354 1.5137 H 1 UNL1 0.0000
@<TRIPOS>BOND
1 13 8 1
2 15 8 1
3 8 14 1
4 8 6 1
5 12 5 1
6 6 5 2
7 6 7 1
8 5 3 1
9 10 4 1
10 7 1 1
11 3 4 1
12 3 2 2
13 4 11 1
14 4 9 1
15 24 16 1
16 25 16 1
17 1 17 1
18 16 19 1
19 17 27 1
20 17 26 1
21 17 18 1
22 19 20 1
23 19 21 1
24 19 18 1
25 22 18 1
26 18 23 1
27 1 16 1
28 1 2 1
Loading

0 comments on commit 7a938be

Please sign in to comment.