Skip to content

Commit

Permalink
Merge pull request #207 from hjkgrp/mol2D
Browse files Browse the repository at this point in the history
Mol2D
  • Loading branch information
ralf-meyer committed Mar 21, 2024
2 parents c8018c1 + 8ded31e commit bc503a1
Show file tree
Hide file tree
Showing 15 changed files with 592 additions and 38 deletions.
1 change: 1 addition & 0 deletions devtools/conda-envs/mols.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ dependencies:
- pandas
- scipy
- libffi
- networkx >= 2.7
- importlib_resources
- pip
- pip:
Expand Down
1 change: 1 addition & 0 deletions devtools/conda-envs/mols_minimal.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ dependencies:
- pandas
- scipy
- libffi
- networkx >= 2.7
- importlib_resources
- pip
- pip:
Expand Down
150 changes: 150 additions & 0 deletions molSimplify/Classes/mol2D.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
import networkx as nx
import numpy as np
from packaging import version
from molSimplify.Classes.globalvars import globalvars

try:
from openbabel import openbabel # version 3 style import
except ImportError:
import openbabel # fallback to version 2


class Mol2D(nx.Graph):

def __repr__(self):
atom_counts = {}
for _, sym in self.nodes.data(data="symbol", default="X"):
if sym not in atom_counts:
atom_counts[sym] = 1
else:
atom_counts[sym] += 1

symbols = globalvars().elementsbynum()
formula = ""
for sym in sorted(atom_counts.keys(),
key=lambda x: symbols.index(x), reverse=True):
formula += f"{sym}{atom_counts[sym]}"
return f"Mol2D({formula})"

@classmethod
def from_smiles(cls, smiles):
mol = cls()

# Load using openbabel OBMol
obConversion = openbabel.OBConversion()
OBMol = openbabel.OBMol()
obConversion.SetInFormat('smi')
obConversion.ReadString(OBMol, smiles)
OBMol.AddHydrogens()

symbols = globalvars().elementsbynum()
# Add atoms
for i, atom in enumerate(openbabel.OBMolAtomIter(OBMol)):
sym = symbols[atom.GetAtomicNum() - 1]
mol.add_node(i, symbol=sym)

# Add bonds
for bond in openbabel.OBMolBondIter(OBMol):
# Subtract 1 because of zero indexing vs. one indexing
mol.add_edge(bond.GetBeginAtomIdx() - 1, bond.GetEndAtomIdx() - 1)

return mol

@classmethod
def from_mol2_file(cls, filename):
mol = cls()

with open(filename, "r") as fin:
lines = fin.readlines()

# Read counts line:
sp = lines[2].split()
n_atoms = int(sp[0])
n_bonds = int(sp[1])

atom_start = lines.index("@<TRIPOS>ATOM\n") + 1
for i, line in enumerate(lines[atom_start:atom_start + n_atoms]):
sp = line.split()
sym = sp[5].split(".")[0]
mol.add_node(i, symbol=sym)

bond_start = lines.index("@<TRIPOS>BOND\n") + 1
for line in lines[bond_start:bond_start + n_bonds]:
sp = line.split()
# Subtract 1 because of zero indexing vs. one indexing
mol.add_edge(int(sp[1]) - 1, int(sp[2]) - 1)

return mol

@classmethod
def from_mol_file(cls, filename):
mol = cls()

with open(filename, "r") as fin:
lines = fin.readlines()

# Read counts line:
sp = lines[3].split()
n_atoms = int(sp[0])
n_bonds = int(sp[1])

# Add atoms (offset of 4 for the header lines):
for i, line in enumerate(lines[4:4 + n_atoms]):
sp = line.split()
mol.add_node(i, symbol=sp[3])

# Add bonds:
for line in lines[4 + n_atoms:4 + n_atoms + n_bonds]:
sp = line.split()
# Subtract 1 because of zero indexing vs. one indexing
mol.add_edge(int(sp[0]) - 1, int(sp[1]) - 1)

return mol

def graph_hash(self):
# This is necessary because networkx < 2.7 had a bug in the implementation
# of weisfeiler_lehman_graph_hash
# https://github.com/networkx/networkx/pull/4946#issuecomment-914623654
assert version.parse(nx.__version__) >= version.parse("2.7")
return nx.weisfeiler_lehman_graph_hash(self, node_attr="symbol")

def graph_hash_edge_attr(self):
# This is necessary because networkx < 2.7 had a bug in the implementation
# of weisfeiler_lehman_graph_hash
# https://github.com/networkx/networkx/pull/4946#issuecomment-914623654
assert version.parse(nx.__version__) >= version.parse("2.7")
# Copy orginal graph before adding edge attributes
G = self.copy()

for i, j in G.edges:
G.edges[i, j]["label"] = "".join(sorted([G.nodes[i]["symbol"],
G.nodes[j]["symbol"]]))

return nx.weisfeiler_lehman_graph_hash(G, edge_attr="label")

def graph_determinant(self, return_string=True):
atomic_masses = globalvars().amass()
weights = np.array(
[
atomic_masses[atom[1]][0]
for atom in self.nodes(data="symbol", default="X")
]
)
adjacency = nx.to_numpy_array(self)
mat = np.outer(weights, weights) * adjacency
np.fill_diagonal(mat, weights)
with np.errstate(over='raise'):
try:
det = np.linalg.det(mat)
except (np.linalg.LinAlgError, FloatingPointError):
(sign, det) = np.linalg.slogdet(mat)
if sign != 0:
det = sign*det
if return_string:
det = str(det)
if "e+" in det:
sp = det.split("e+")
det = sp[0][0:12] + "e+" + sp[1]
else:
det = det[0:12]
return det
2 changes: 1 addition & 1 deletion molSimplify/Classes/mol3D.py
Original file line number Diff line number Diff line change
Expand Up @@ -5247,7 +5247,7 @@ def get_mol_graph_det(self, oct=True, useBOMat=False):
with np.errstate(over='raise'):
try:
det = np.linalg.det(tmpgraph)
except np.linalg.LinAlgError:
except (np.linalg.LinAlgError, FloatingPointError):
(sign, det) = np.linalg.slogdet(tmpgraph)
if sign != 0:
det = sign*det
Expand Down
36 changes: 18 additions & 18 deletions molSimplify/Ligands/acac.mol
Original file line number Diff line number Diff line change
@@ -1,33 +1,33 @@

OpenBabel11081614413D
OpenBabel03142421563D

14 13 0 0 0 0 0 0 0 0999 V2000
3.1117 0.8158 -1.3940 O 0 5 0 0 0 0 0 0 0 0 0 0
2.5035 0.2178 -0.5193 C 0 0 0 0 0 0 0 0 0 0 0 0
1.0119 0.1135 -0.5927 C 0 0 0 0 0 0 0 0 0 0 0 0
3.2352 -0.4172 0.6354 C 0 0 0 0 0 0 0 0 0 0 0 0
4.7377 -0.3230 0.7389 C 0 0 0 0 0 0 0 0 0 0 0 0
5.3853 0.2682 -0.1114 O 0 0 0 0 0 0 0 0 0 0 0 0
5.4522 -0.9568 1.8929 C 0 0 0 0 0 0 0 0 0 0 0 0
0.6247 0.6297 -1.4972 H 0 0 0 0 0 0 0 0 0 0 0 0
0.5611 0.5821 0.3070 H 0 0 0 0 0 0 0 0 0 0 0 0
0.7146 -0.9552 -0.6360 H 0 0 0 0 0 0 0 0 0 0 0 0
4.7384 -1.4561 2.5816 H 0 0 0 0 0 0 0 0 0 0 0 0
6.0070 -0.1796 2.4590 H 0 0 0 0 0 0 0 0 0 0 0 0
6.1702 -1.7146 1.5151 H 0 0 0 0 0 0 0 0 0 0 0 0
2.7035 -0.9212 1.3622 H 0 0 0 0 0 0 0 0 0 0 0 0
3.1144 0.8372 -1.4020 O 0 0 0 0 0 0 0 0 0 0 0 0
2.5505 0.2219 -0.5095 C 0 0 0 0 0 0 0 0 0 0 0 0
1.0576 0.0909 -0.5564 C 0 0 0 0 0 0 0 0 0 0 0 0
3.3222 -0.3885 0.6057 C 0 0 0 0 0 0 0 0 0 0 0 0
4.6639 -0.3188 0.7249 C 0 0 0 0 0 0 0 0 0 0 0 0
5.4090 0.3313 -0.1923 O 0 5 0 0 0 0 0 0 0 0 0 0
5.3707 -0.9629 1.8831 C 0 0 0 0 0 0 0 0 0 0 0 0
0.5953 1.1001 -0.5394 H 0 0 0 0 0 0 0 0 0 0 0 0
0.6737 -0.4886 0.3095 H 0 0 0 0 0 0 0 0 0 0 0 0
0.7595 -0.4299 -1.4905 H 0 0 0 0 0 0 0 0 0 0 0 0
2.7584 -0.9218 1.3662 H 0 0 0 0 0 0 0 0 0 0 0 0
4.6658 -1.4697 2.5767 H 0 0 0 0 0 0 0 0 0 0 0 0
5.9265 -0.1881 2.4512 H 0 0 0 0 0 0 0 0 0 0 0 0
6.0904 -1.7166 1.5010 H 0 0 0 0 0 0 0 0 0 0 0 0
1 2 2 0 0 0 0
2 3 1 0 0 0 0
2 4 1 0 0 0 0
3 8 1 0 0 0 0
3 9 1 0 0 0 0
3 10 1 0 0 0 0
4 5 2 0 0 0 0
4 14 1 0 0 0 0
4 11 1 0 0 0 0
5 6 1 0 0 0 0
5 7 1 0 0 0 0
7 11 1 0 0 0 0
7 12 1 0 0 0 0
7 13 1 0 0 0 0
M CHG 1 5 -1
7 14 1 0 0 0 0
M CHG 1 6 -1
M END
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ dependencies = [
"numpy",
"scipy",
"pandas",
"networkx",
"networkx>=2.7",
"scikit-learn",
"keras",
"tensorflow",
Expand Down
121 changes: 121 additions & 0 deletions tests/test_Mol2D.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
import pytest
from molSimplify.Classes.mol2D import Mol2D


def water_Mol2D():
mol = Mol2D()
mol.add_nodes_from(
[(0, {"symbol": "O"}), (1, {"symbol": "H"}), (2, {"symbol": "H"})]
)
mol.add_edges_from([(0, 1), (0, 2)])
return mol


def furan_Mol2D():
mol = Mol2D()
mol.add_nodes_from(
[
(0, {"symbol": "O"}),
(1, {"symbol": "C"}),
(2, {"symbol": "C"}),
(3, {"symbol": "C"}),
(4, {"symbol": "C"}),
(5, {"symbol": "H"}),
(6, {"symbol": "H"}),
(7, {"symbol": "H"}),
(8, {"symbol": "H"}),
]
)
mol.add_edges_from(
[(0, 1), (1, 2), (2, 3), (3, 4), (0, 4), (1, 5), (2, 6), (3, 7), (4, 8)]
)
return mol


def acac_Mol2D():
mol = Mol2D()
mol.add_nodes_from(
[
(0, {"symbol": "O"}),
(1, {"symbol": "C"}),
(2, {"symbol": "C"}),
(3, {"symbol": "C"}),
(4, {"symbol": "C"}),
(5, {"symbol": "O"}),
(6, {"symbol": "C"}),
(7, {"symbol": "H"}),
(8, {"symbol": "H"}),
(9, {"symbol": "H"}),
(10, {"symbol": "H"}),
(11, {"symbol": "H"}),
(12, {"symbol": "H"}),
(13, {"symbol": "H"}),
]
)
mol.add_edges_from(
[(0, 1), (1, 2), (1, 3), (2, 7), (2, 8), (2, 9), (3, 4),
(3, 10), (4, 5), (4, 6), (6, 11), (6, 12), (6, 13)]
)
return mol


@pytest.mark.parametrize(
"mol, ref",
[
(water_Mol2D(), "Mol2D(O1H2)"),
(furan_Mol2D(), "Mol2D(O1C4H4)"),
(acac_Mol2D(), "Mol2D(O2C5H7)"),
])
def test_Mol2D_repr(mol, ref):
assert mol.__repr__() == ref


@pytest.mark.parametrize(
"name, smiles, mol_ref",
[
("water", "O", water_Mol2D()),
("furan", "o1cccc1", furan_Mol2D()),
("acac", "[O-]-C(C)=CC(=O)C", acac_Mol2D()),
]
)
def test_Mol2D_constructors(resource_path_root, name, smiles, mol_ref):
# From mol file
mol = Mol2D.from_mol_file(resource_path_root / "inputs" / "io" / f"{name}.mol")

assert mol.nodes == mol_ref.nodes
assert mol.edges == mol_ref.edges

# From mol2 file
mol = Mol2D.from_mol2_file(resource_path_root / "inputs" / "io" / f"{name}.mol2")

assert mol.nodes == mol_ref.nodes
assert mol.edges == mol_ref.edges

# From smiles
mol = Mol2D.from_smiles(smiles)

assert mol.nodes == mol_ref.nodes
assert mol.edges == mol_ref.edges


@pytest.mark.parametrize(
"filename, node_hash_ref, edge_hash_ref, graph_det_ref",
[
("water.mol2", "2c96eb4288d63b2a9437ddd4172e67f3", "e5206dd18f7e829cf77fae1a48e7b0b9", "-507.9380086"),
("formaldehyde.mol2", "e4af80ed7395aef9001d1b170c5c0ec6", "df21357bb47fe3aa2e062c7e3a3b573e", "-42043.85450"),
("furan.mol2", "f6090276d7369c0c0a535113ec1d97cf", "a9b6fbc7b5f53613546d5e91a7544ed6", "-19404698740"),
("acac.mol2", "0dfc6836e022406f1be7b5627451d590", "d4879258a3a6c832b55b2e9d7387f96d", "-2.822252628e+16"),
("ADUYUV.mol2", "e134585068e342d12364850112ec7609", "9bc68a69bfeceabffe878084da72f542", "2.7503745443e+80"),
]
)
def test_Mol2D_graph_hash(resource_path_root, filename, node_hash_ref, edge_hash_ref, graph_det_ref):
# From mol file
mol = Mol2D.from_mol2_file(resource_path_root / "inputs" / "io" / filename)

node_hash = mol.graph_hash()
edge_hash = mol.graph_hash_edge_attr()
graph_det = mol.graph_determinant()

assert node_hash == node_hash_ref
assert edge_hash == edge_hash_ref
assert graph_det == graph_det_ref
Loading

0 comments on commit bc503a1

Please sign in to comment.