Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Classifying geometry based on RMSD #213

Merged
merged 13 commits into from
May 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 33 additions & 0 deletions molSimplify/Classes/globalvars.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import subprocess
from typing import Dict, Tuple
from molSimplify.utils.metaclasses import Singleton
import numpy as np

# Dictionary containing atomic mass, atomic number, covalent radius, num valence electrons
# Data from http://www.webelements.com/ (last accessed May 13th 2015)
Expand Down Expand Up @@ -463,6 +464,28 @@
[[70.5, 70.5, 67.7, 67.7, 120, 120, 135.5, 135.5]]
}

#need each point to be distance 1 from the origin
deg_to_rad = 2*np.pi / 360
all_polyhedra = {
"linear": np.array([(1, 0, 0), (-1, 0, 0)]),
"bent": np.array([(1, 0, 0), (np.cos(120*deg_to_rad), np.sin(120*deg_to_rad), 0)]),
"trigonal planar": np.array([(1, 0, 0), (np.cos(120*deg_to_rad), np.sin(120*deg_to_rad), 0), (np.cos(240*deg_to_rad), np.sin(240*deg_to_rad), 0)]),
"T shape": np.array([(1, 0, 0), (np.cos(90*deg_to_rad), np.sin(90*deg_to_rad), 0), (np.cos(180*deg_to_rad), np.sin(180*deg_to_rad), 0)]),
"trigonal pyramidal": np.array([(1, -1, -1), (-1, 1, -1), (-1, -1, 1)])/np.sqrt(3),
"tetrahedral": np.array([(1, 1, 1), (1, -1, -1), (-1, 1, -1), (-1, -1, 1)])/np.sqrt(3),
"square planar": np.array([(1, 0, 0), (0, 1, 0), (-1, 0, 0), (0, -1, 0)]),
"seesaw": np.array([(1, 0, 0), (np.cos(120*deg_to_rad), np.sin(120*deg_to_rad), 0), (0, 0, 1), (0, 0, -1)]),
"trigonal bipyramidal": np.array([(0, 0, 1), (0, 0, -1), (1, 0, 0), (np.cos(120*deg_to_rad), np.sin(120*deg_to_rad), 0), (np.cos(240*deg_to_rad), np.sin(240*deg_to_rad), 0)]),
"square pyramidal": np.array([(1, 0, 0), (0, 1, 0), (-1, 0, 0), (0, -1, 0), (0, 0, 1)]),
"pentagonal planar": np.array([(1, 0, 0), (np.cos(72*deg_to_rad), np.sin(72*deg_to_rad), 0), (np.cos(144*deg_to_rad), np.sin(144*deg_to_rad), 0), (np.cos(216*deg_to_rad), np.sin(216*deg_to_rad), 0), (np.cos(288*deg_to_rad), np.sin(288*deg_to_rad), 0)]),
"octahedral": np.array([(1, 0, 0), (0, 1, 0), (-1, 0, 0), (0, -1, 0), (0, 0, 1), (0, 0, -1)]),
"pentagonal pyramidal": np.array([(1, 0, 0), (np.cos(72*deg_to_rad), np.sin(72*deg_to_rad), 0), (np.cos(144*deg_to_rad), np.sin(144*deg_to_rad), 0), (np.cos(216*deg_to_rad), np.sin(216*deg_to_rad), 0), (np.cos(288*deg_to_rad), np.sin(288*deg_to_rad), 0), (0, 0, 1)]),
"trigonal prismatic": np.array([(1, 0, 1), (np.cos(120*deg_to_rad), np.sin(120*deg_to_rad), 1), (np.cos(240*deg_to_rad), np.sin(240*deg_to_rad), 1), (1, 0, -1), (np.cos(120*deg_to_rad), np.sin(120*deg_to_rad), -1), (np.cos(240*deg_to_rad), np.sin(240*deg_to_rad), -1)])/np.sqrt(2),
"pentagonal bipyramidal": np.array([(1, 0, 0), (np.cos(72*deg_to_rad), np.sin(72*deg_to_rad), 0), (np.cos(144*deg_to_rad), np.sin(144*deg_to_rad), 0), (np.cos(216*deg_to_rad), np.sin(216*deg_to_rad), 0), (np.cos(288*deg_to_rad), np.sin(288*deg_to_rad), 0), (0, 0, 1), (0, 0, -1)]),
"square antiprismatic": np.array([(1, 0, 1), (0, 1, 1), (-1, 0, 1), (0, -1, 1), (np.cos(45*deg_to_rad), np.sin(45*deg_to_rad), -1), (np.cos(135*deg_to_rad), np.sin(135*deg_to_rad), -1), (np.cos(225*deg_to_rad), np.sin(225*deg_to_rad), -1), (np.cos(315*deg_to_rad), np.sin(315*deg_to_rad), -1)])/np.sqrt(2),
"tricapped trigonal prismatic": np.array([(1, 0, 1)/np.sqrt(2), (np.cos(120*deg_to_rad), np.sin(120*deg_to_rad), 1)/np.sqrt(2), (np.cos(240*deg_to_rad), np.sin(240*deg_to_rad), 1)/np.sqrt(2), (1, 0, -1)/np.sqrt(2), (np.cos(120*deg_to_rad), np.sin(120*deg_to_rad), -1)/np.sqrt(2), (np.cos(240*deg_to_rad), np.sin(240*deg_to_rad), -1)/np.sqrt(2), (np.cos(60*deg_to_rad), np.sin(60*deg_to_rad), 0), (np.cos(180*deg_to_rad), np.sin(180*deg_to_rad), 0), (np.cos(300*deg_to_rad), np.sin(300*deg_to_rad), 0)])
}

# Module for running bash commands
# @param cmd String containing command to be run
# @return bash output string
Expand Down Expand Up @@ -720,6 +743,16 @@ def get_all_angle_refs(self):
"""
return all_angle_refs

def get_all_polyhedra(self):
"""Get reference polyhedra dict.

Returns
-------
all_polyhedra : dict
Reference polyhedra for various geometries.
"""
return all_polyhedra

def add_custom_path(self, path):
"""Record custom path in ~/.molSimplify file

Expand Down
176 changes: 172 additions & 4 deletions molSimplify/Classes/mol3D.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from openbabel import openbabel # version 3 style import
except ImportError:
import openbabel # fallback to version 2
from typing import List, Optional, Tuple
from typing import List, Optional, Tuple, Dict, Any
from scipy.spatial import ConvexHull
from molSimplify.utils.decorators import deprecated

Expand All @@ -25,7 +25,8 @@
from molSimplify.Scripts.geometry import (distance, connectivity_match,
vecangle, rotation_params,
rotate_around_axis)
from molSimplify.Scripts.rmsd import rigorous_rmsd
from molSimplify.Scripts.rmsd import rigorous_rmsd, kabsch_rmsd, kabsch_rotate
from itertools import permutations

try:
import PyQt5 # noqa: F401
Expand Down Expand Up @@ -4918,7 +4919,7 @@ def Structure_inspection(self, init_mol=None, catoms_arr=None, num_coord=5, dict
num_coord=num_coord, debug=debug)
return flag_oct, flag_list, dict_oct_info, flag_oct_loose, flag_list_loose

def get_fcs(self, strict_cutoff=False, catom_list=None):
def get_fcs(self, strict_cutoff=False, catom_list=None, max6=True):
"""
Get first coordination shell of a transition metal complex.

Expand All @@ -4928,6 +4929,8 @@ def get_fcs(self, strict_cutoff=False, catom_list=None):
strict bonding cutoff for fullerene and SACs
catom_list : list, optional
List of indices of coordinating atoms.
max6 : bool, optional
If True, will return catoms from oct_comp.

Returns
-------
Expand All @@ -4938,7 +4941,7 @@ def get_fcs(self, strict_cutoff=False, catom_list=None):
self.get_num_coord_metal(debug=False, strict_cutoff=strict_cutoff, catom_list=catom_list)
catoms = self.catoms
# print(catoms, [self.getAtom(x).symbol() for x in catoms])
if len(catoms) > 6:
if max6 and len(catoms) > 6:
_, catoms = self.oct_comp(debug=False)
fcs = [metalind] + catoms
return fcs
Expand Down Expand Up @@ -5683,6 +5686,171 @@ def get_geometry_type(self, dict_check=False, angle_ref=False,
}
return results

def get_geometry_type_distance(
self, max_dev=1e6, close_dev=1e-2,
flag_catoms=False, catoms_arr=None,
skip=False, transition_metals_only=False) -> Dict[str, Any]:
"""
Get the type of the geometry (available options in globalvars all_geometries).

uses hapticity truncated first coordination shell.
Does not require the input of num_coord.

Parameters
----------
max_dev : float, optional
Maximum RMSD allowed between a structure and an ideal geometry before it is classified as unknown. Default is 1e6.
close_dev : float, optional
Maximum difference in RMSD between two classifications allowed before they are compared by maximum single-atom deviation as well.
flag_catoms : bool, optional
Whether or not to return the catoms arr. Default as False.
catoms_arr : Nonetype, optional
Uses the catoms of the mol3D by default. User and overwrite this connection atom array by explicit input.
Default is Nonetype.
skip : list, optional
Geometry checks to skip. Default is False.
transition_metals_only : bool, optional
Flag for considering more than just transition metals as metals. Default is False.

Returns
-------
results : dictionary
Contains the classified geometry and the RMSD from an ideal structure.
Summary contains a list of the RMSD and the maximum single-atom deviation for all considered geometry types.

"""

first_shell, hapt = self.get_first_shell()
num_coord = first_shell.natoms - 1
all_geometries = globalvars().get_all_geometries()
all_polyhedra = globalvars().get_all_polyhedra()
summary = {}

if len(first_shell.graph): # Find num_coord based on metal_cn if graph is assigned
if len(first_shell.findMetal()) > 1:
raise ValueError('Multimetal complexes are not yet handled.')
elif len(first_shell.findMetal(transition_metals_only=transition_metals_only)) == 1:
# Use oct=False to ensure coordination number based on radius cutoffs only
num_coord = len(first_shell.getBondedAtomsSmart(first_shell.findMetal(transition_metals_only=transition_metals_only)[0], oct=False))
else:
raise ValueError('No metal centers exist in this complex.')
if catoms_arr is not None and len(catoms_arr) != num_coord:
raise ValueError("num_coord and the length of catoms_arr do not match.")

if num_coord not in list(all_geometries.keys()):
# should we indicate somehow that these are unknown due to a different coordination number?
results = {
"geometry": "unknown",
"rmsd": np.NAN,
"summary": {},
"hapticity": hapt,
"close_rmsds": False
}
return results

possible_geometries = all_geometries[num_coord]

# for each same-coordinated geometry, get the minimum RMSD and the maximum single-atom deviation in that pairing
for geotype in possible_geometries:
rmsd_calc, max_dist = self.dev_from_ideal_geometry(all_polyhedra[geotype])
summary.update({geotype: [rmsd_calc, max_dist]})

close_rmsds = False
current_rmsd, geometry = max_dev, "unknown"
for geotype in summary:
# if the RMSD for this structure is the lowest seen so far (within a threshold)
if summary[geotype][0] < (current_rmsd + close_dev):
# if the RMSDs are close, flag this in the summary and classify on second criterion
if np.abs(summary[geotype][0] - current_rmsd) < close_dev:
close_rmsds = True
if summary[geotype][1] < summary[geometry][1]:
# classify based on largest singular deviation
current_rmsd = summary[geotype][0]
geometry = geotype
else:
current_rmsd = summary[geotype][0]
geometry = geotype

results = {
"geometry": geometry,
"rmsd": current_rmsd,
"summary": summary,
"hapticity": hapt,
"close_rmsds": close_rmsds
}
return results

def dev_from_ideal_geometry(self, ideal_polyhedron: np.ndarray) -> Tuple[float, float]:
"""
Return the minimum RMSD between a geometry and an ideal polyhedron (with the same average bond distances).
Enumerates all possible indexing of the geometry. As such, only recommended for small systems.

Parameters
----------
ideal_polyhedron: np.array of 3-tuples of coordinates
Reference list of points for an ideal geometry

Returns
-------
rmsd: float
Minimum root mean square distance between the fed geometry and the ideal polyhedron
single_dev: float
Maximum distance between any paired points in the fed geometry and the ideal polyhedron.
"""

metal_idx = self.findMetal()
if len(metal_idx) == 0:
raise ValueError('No metal centers exist in this complex.')
elif len(metal_idx) != 1:
raise ValueError('Multimetal complexes are not yet handled.')
temp_mol = self.get_first_shell()[0]
fcs_indices = temp_mol.get_fcs(max6=False)
# remove metal index from first coordination shell
fcs_indices.remove(temp_mol.findMetal()[0])

if len(fcs_indices) != len(ideal_polyhedron):
raise ValueError('The coordination number differs between the two provided structures.')

# have to redo getting metal_idx with the new mol after running get_first_shell
# want to work with temp_mol since it has the edge and sandwich logic implemented to replace those with centroids
metal_atom = temp_mol.getAtoms()[temp_mol.findMetal()[0]]
fcs_atoms = [temp_mol.getAtoms()[i] for i in fcs_indices]
# construct a np array of the non-metal atoms in the FCS
distances = []
positions = np.zeros([len(fcs_indices), 3])
for idx, atom in enumerate(fcs_atoms):
distance = atom.distance(metal_atom)
distances.append(distance)
# shift so the metal is at (0, 0, 0)
positions[idx, :] = np.array(atom.coords()) - np.array(metal_atom.coords())

current_min = np.inf
orders = permutations(range(len(ideal_polyhedron)))
max_dist = 0

# if desired, make it so the ideal polyhedron has same average bond distance as the mol
# scaled_polyhedron = ideal_polyhedron * np.mean(np.array(distances))

# for all possible assignments, find RMSD between ideal and actual structure
ideal_positions = np.zeros([len(fcs_indices), 3])
for order in orders:
for i in range(len(order)):
# if you wanted to use the same average bond length for all, use the following
# ideal_positions[i, :] = scaled_polyhedron[order[i]]
# if you want to let each ligand scale its length independently, uncomment the following
ideal_positions[i, :] = ideal_polyhedron[order[i]] * distances[i]
rmsd_calc = kabsch_rmsd(ideal_positions, positions)
if rmsd_calc < current_min:
current_min = rmsd_calc
# calculate and store the maximum pairwise distance
rot_ideal = kabsch_rotate(ideal_positions, positions)
diff_matrix = rot_ideal - positions
pairwise_dists = np.sum(diff_matrix**2, axis=1)
max_dist = np.max(pairwise_dists)

# return minimum RMSD, maximum pairwise distance in that structure
return current_min, max_dist

def get_features(self, lac=True, force_generate=False, eq_sym=False,
use_dist=False, NumB=False, Gval=False, size_normalize=False,
alleq=False, strict_cutoff=False, catom_list=None, MRdiag_dict={}, depth=3):
Expand Down
12 changes: 6 additions & 6 deletions molSimplify/Data/tbp.dat
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
0.0 0.0 0.0
-1.7865930597 -0.0003546225 -0.0007425232
1.7865967124 -0.0003546332 -0.0007425118
2.86499999724e-07 1.382576768 -1.1498771629
2.92799999801e-07 0.2985354596 1.7728580026
4.09099999654e-07 -1.6828658296 -0.6318871017
0.00000 0.00000 0.00000
2.00000 0.00000 0.00000
-1.00000 1.73205 0.00000
-1.00000 -1.73205 0.00000
0.00000 0.00000 2.00000
0.00000 0.00000 -2.00000
6 changes: 3 additions & 3 deletions molSimplify/Data/tpl.dat
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
0.00000 0.00000 0.00000
-1.58487 0.39844 0.52472
0.77037 -1.41380 0.59470
0.78682 0.99734 -1.17123
2.00000 0.00000 0.00000
-1.00000 1.73205 0.00000
-1.00000 -1.73205 0.00000
66 changes: 66 additions & 0 deletions tests/test_mol3D.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import numpy as np
from molSimplify.Classes.mol3D import mol3D
from molSimplify.Classes.atom3D import atom3D
from molSimplify.Classes.globalvars import globalvars


def test_adding_and_deleting_atoms():
Expand Down Expand Up @@ -335,3 +336,68 @@ def test_mol3D_from_smiles_benzene():

np.testing.assert_allclose(mol.graph, ref_graph)
np.testing.assert_allclose(mol.bo_graph, ref_bo_graph)


@pytest.mark.parametrize(
"geo_type, key",
[
('linear', 'linear'),
('trigonal_planar', 'trigonal planar'),
('t_shape', 'T shape'),
('trigonal_pyramidal', 'trigonal pyramidal'),
('tetrahedral', 'tetrahedral'),
('square_planar', 'square planar'),
('seesaw', 'seesaw'),
('trigonal_bipyramidal', 'trigonal bipyramidal'),
('square_pyramidal', 'square pyramidal'),
('pentagonal_planar', 'pentagonal planar'),
('octahedral', 'octahedral'),
('pentagonal_pyramidal', 'pentagonal pyramidal'),
# ('trigonal_prismatic', 'trigonal prismatic'),
# ('pentagonal_bipyramidal', 'pentagonal bipyramidal'),
# ('square_antiprismatic', 'square antiprismatic'),
# ('tricapped_trigonal_prismatic', 'tricapped trigonal prismatic'),
]
)
def test_dev_from_ideal_geometry(resource_path_root, geo_type, key):
mol = mol3D()
mol.readfromxyz(resource_path_root / "inputs" / "geometry_type" / f"{geo_type}.xyz")

globs = globalvars()
polyhedra = globs.get_all_polyhedra()
rmsd, max_dev = mol.dev_from_ideal_geometry(polyhedra[key])

print(polyhedra[key])

assert rmsd < 1e-3
assert max_dev < 1e-3


@pytest.mark.parametrize(
"geo_type, ref",
[
('linear', 'linear'),
('trigonal_planar', 'trigonal planar'),
('t_shape', 'T shape'),
('trigonal_pyramidal', 'trigonal pyramidal'),
('tetrahedral', 'tetrahedral'),
('square_planar', 'square planar'),
('seesaw', 'seesaw'),
('trigonal_bipyramidal', 'trigonal bipyramidal'),
('square_pyramidal', 'square pyramidal'),
('pentagonal_planar', 'pentagonal planar'),
('octahedral', 'octahedral'),
('pentagonal_pyramidal', 'pentagonal pyramidal'),
('trigonal_prismatic', 'trigonal prismatic'),
# ('pentagonal_bipyramidal', 'pentagonal bipyramidal'),
# ('square_antiprismatic', 'square antiprismatic'),
# ('tricapped_trigonal_prismatic', 'tricapped trigonal prismatic'),
]
)
def test_geo_geometry_type_distance(resource_path_root, geo_type, ref):
mol = mol3D()
mol.readfromxyz(resource_path_root / "inputs" / "geometry_type" / f"{geo_type}.xyz")

result = mol.get_geometry_type_distance()
print(result)
assert result['geometry'] == ref
26 changes: 13 additions & 13 deletions tests/testresources/inputs/geometry_type/seesaw.xyz
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
13
07/15/2022 21:19, XYZ structure generated by mol3D Class, molSimplify
04/30/2024 13:23, XYZ structure generated by mol3D Class, molSimplify
Fe 0.000000 0.000000 0.000000
O -2.120000 -0.000421 -0.000881
H -2.730515 0.026086 0.779322
H -2.727124 -0.028395 -0.783794
O 2.120000 -0.000421 -0.000881
H 2.728823 -0.572514 -0.533990
H 2.728652 0.572801 0.531233
O 0.000000 1.629945 -1.355611
H 0.153088 1.607265 -2.334413
H -0.153473 2.588291 -1.155199
O 0.000000 -1.984703 -0.745222
H 0.126817 -2.825828 -0.236778
H -0.126096 -2.282875 -1.681767
O 2.120000 0.000000 0.000000
H 2.726198 0.027327 -0.782547
H 2.726184 -0.027327 0.782547
O -1.060000 1.835974 0.000000
H -1.410402 2.333646 -0.781116
H -1.315789 2.388255 0.781116
O 0.000000 0.000000 2.120000
H -0.293326 -0.726007 2.726198
H 0.293326 0.726007 2.726184
O 0.000000 -0.000000 -2.120000
H 0.222961 0.750609 -2.726198
H -0.222961 -0.750609 -2.726184
Loading
Loading