diff --git a/cctk/ensemble.py b/cctk/ensemble.py index eac9659..170e486 100644 --- a/cctk/ensemble.py +++ b/cctk/ensemble.py @@ -210,7 +210,7 @@ def join_ensembles(cls, ensembles, name=None): return new_ensemble - def align(self, align_to=1, atoms=None): + def align(self, align_to=1, atoms=None, return_rmsd=False): """ Aligns every geometry to the specified geometry based on the atoms in `atom_numbers`. If `atom_numbers` is `None`, then a full alignment is performed. @@ -218,9 +218,11 @@ def align(self, align_to=1, atoms=None): align_to (int): which geometry to align to (1-indexed) atoms (list): which atoms to align in each molecule (1-indexed; must be at least 3) alternatively, specify ``None`` for all atoms or "heavy" for all heavy atoms + return_rmsd (Bool): whether to return RMSD before and after rotation Returns: a new ``Ensemble()`` object with the objects aligned + (optional) before rmsd and after rmsd """ self._check_molecule_number(align_to) @@ -246,16 +248,23 @@ def align(self, align_to=1, atoms=None): molecule.translate_molecule(-centroid) template = self.molecules[align_to - 1].geometry[atoms] + before_rmsd = 0 + after_rmsd = 0 #### perform alignment using Kabsch algorithm new_ensemble = copy.deepcopy(self) for molecule in new_ensemble.molecules: + before_rmsd += compute_RMSD(template, molecule.geometry[atoms]) new_geometry = align_matrices(molecule.geometry[atoms], molecule.geometry, template) molecule.geometry = new_geometry + after_rmsd += compute_RMSD(template, molecule.geometry[atoms]) assert len(molecule.geometry) == len(molecule.atomic_numbers), "wrong number of geometry elements!" - return new_ensemble + if return_rmsd: + return new_ensemble, before_rmsd, after_rmsd + else: + return new_ensemble def eliminate_redundant(self, cutoff=0.5, heavy_only=True, atom_numbers=None): """ diff --git a/cctk/gaussian_file.py b/cctk/gaussian_file.py index 6078669..7069f1e 100644 --- a/cctk/gaussian_file.py +++ b/cctk/gaussian_file.py @@ -149,6 +149,8 @@ def write_file(self, filename, molecule=None, header=None, footer=None, **kwargs if header is None: header = self.header + + if footer is None: footer = self.footer self.write_molecule_to_file(filename, molecule, header, footer, **kwargs) diff --git a/cctk/helper_functions.py b/cctk/helper_functions.py index 6794d16..361c98c 100644 --- a/cctk/helper_functions.py +++ b/cctk/helper_functions.py @@ -11,15 +11,30 @@ from . import data # relative-import the *package* containing the templates """ -This code populates ELEMENT_DICTIONARY from a static datafile. +This code populates ELEMENT_DICTIONARY and ISOTOPE_DICTIONARY from a static datafile. """ ELEMENT_DICTIONARY = {} +ISOTOPE_DICTIONARY = {} isotope_file = pkg_resources.open_text(data, "isotopes.csv") +prev_number = 1 +current_dict = {} for line in isotope_file: symbol, number, mass, abundance = line.split(",") if symbol == "Symbol": continue + ELEMENT_DICTIONARY[number] = symbol + + if number == prev_number: + current_dict[float(mass)] = float(abundance.rstrip()) + else: + ISOTOPE_DICTIONARY[prev_number] = current_dict + current_dict = {} + current_dict[float(mass)] = float(abundance.rstrip()) + + prev_number = number + +ISOTOPE_DICTIONARY[prev_number] = current_dict ELEMENT_DICTIONARY["0"] = "Bq" INV_ELEMENT_DICTIONARY = {v: int(k) for k, v in ELEMENT_DICTIONARY.items()} @@ -261,3 +276,12 @@ def compute_RMSD(geometry1, geometry2): squared_difference = np.square(geometry1 - geometry2) temp = np.sum(squared_difference) / (3 * len(geometry1)) return np.sqrt(temp) + +def get_isotopic_distribution(z): + """ + For an element with number ``z``, returns two ``np.array`` objects containing that element's weights and relative abundances. + """ + z = str(z) + masses = list(ISOTOPE_DICTIONARY[z].keys()) + weights = list(ISOTOPE_DICTIONARY[z].values()) + return np.array(masses), np.array(weights) diff --git a/cctk/molecule.py b/cctk/molecule.py index 56f7c4b..1bd2db4 100644 --- a/cctk/molecule.py +++ b/cctk/molecule.py @@ -13,6 +13,7 @@ compute_dihedral_between, compute_unit_vector, get_covalent_radius, + get_isotopic_distribution, ) @@ -611,8 +612,68 @@ def rotate_molecule(self, axis, degrees): return self - def calculate_mass_spectrum(): - pass + def _recurse_through_formula(self, formula, masses, weights, cutoff=0.0000001, mass_precision=4, weight_precision=8): + """ + Recurses through a formula and generates m/z isotopic pattern using tail recursion. + + To prevent blowup of memory, fragments with very low abundance are ignored. Masses and weights are also rounded after every step. + To prevent error accumulation, internal precisions several orders of magnitude lower than the precision of interest should be employed. + The default values should work nicely for low-res MS applications. + + Args: + formula (np.array, dtype=np.int8): vector containing atoms left to incorporate + masses (np.array): list of mass fragments at current iteration + weights (np.array): relative weights at current iteration + cutoff (float): cutoff for similarity (masses within ``cutoff`` will be combined) + mass_precision (int): number of decimal places to store for mass + weight_precision (int): number of decimal places to store for weight + + Returns: + masses + weights + """ + if np.array_equal(formula, np.zeros(shape=92, dtype=np.int8)): + return masses[np.argsort(masses)], weights[np.argsort(masses)] + + current_e = np.nonzero(formula)[0][0] + e_masses, e_weights = get_isotopic_distribution(current_e) + + new_masses = np.zeros(shape=(len(masses)*len(e_masses))) + new_weights = np.zeros(shape=(len(masses)*len(e_masses))) + for i in range(len(masses)): + for j in range(len(e_masses)): + new_masses[i*len(e_masses)+j] = masses[i] + e_masses[j] + new_weights[i*len(e_masses)+j] = weights[i] * e_weights[j] + + newer_masses, indices = np.unique(np.round(new_masses, decimals=mass_precision), return_inverse=True) + newer_weights = np.zeros_like(newer_masses) + for k in range(len(newer_weights)): + newer_weights[k] = np.sum(new_weights[np.nonzero(indices == k)]) + newer_weights = np.round(newer_weights, decimals=weight_precision) + + formula[current_e] += -1 + above_cutoff = np.nonzero(newer_weights > cutoff) + return self._recurse_through_formula(formula, newer_masses[above_cutoff], newer_weights[above_cutoff], cutoff, mass_precision, weight_precision) + + def calculate_mass_spectrum(self, **kwargs): + """ + Generates list of m/z values based on formula string (e.g. "C10H12") + + Final weights rounded to one decimal point (because of low-res MS). + """ + form_vec = np.zeros(shape=92, dtype=np.int8) + for z in self.atomic_numbers: + form_vec[z - 1] += 1 + + masses, weights = self._recurse_through_formula(form_vec, [0], [1], **kwargs) + + new_masses, indices = np.unique(np.round(masses, decimals=1), return_inverse=True) + new_weights = np.zeros_like(new_masses) + for k in range(len(new_weights)): + new_weights[k] = np.sum(weights[np.nonzero(indices == k)]) + new_weights = new_weights / np.max(new_weights) + + return new_masses, new_weights def add_atom_at_centroid(self, symbol, atom_numbers, weighted=False): """ diff --git a/docs/img/t04_counterion_effects.png b/docs/img/t04_counterion_effects.png new file mode 100644 index 0000000..8eda37e Binary files /dev/null and b/docs/img/t04_counterion_effects.png differ diff --git a/docs/tutorial_03.rst b/docs/tutorial_03.rst index 7c9b4c8..f25c2f0 100644 --- a/docs/tutorial_03.rst +++ b/docs/tutorial_03.rst @@ -36,15 +36,15 @@ in practice most of low-energy conformational space can be sampled by selecting a few key rotatable bonds and letting Gaussian's ``opt`` keyword do the rest. For the purposes of this study, we chose to study four key dihedral angles: -1. Rotating around F2Ala #1's alpha carbon with respect to the amide (C3–C5–C8–O6) -1. Rotating around F2Ala #2's alpha carbon with respect to the amide (H12–C11–C14–O16) -1. Rotating the difluoromethyl group in F2Ala #1 (N1–C3–C5–H7) -1. Rotating the difluoromethyl group in F2Ala #2 (N9–C11–C13–H15) + - Rotating around F2Ala #1's alpha carbon with respect to the amide (C3–C5–C8–O6) + - Rotating around F2Ala #2's alpha carbon with respect to the amide (H12–C11–C14–O16) + - Rotating the difluoromethyl group in F2Ala #1 (N1–C3–C5–H7) + - Rotating the difluoromethyl group in F2Ala #2 (N9–C11–C13–H15) We also chose to sample: -1. *cis*/*trans* isomerism in F2Ala #1 (O27–C26–N1–H2) -1. *cis*/*trans* isomerism in F2Ala #2 (O8–C6–N9–H10) -1. Methyl ester conformation (O16–C14–O17–C18) + - *cis*/*trans* isomerism in F2Ala #1 (O27–C26–N1–H2) + - *cis*/*trans* isomerism in F2Ala #2 (O8–C6–N9–H10) + - Methyl ester conformation (O16–C14–O17–C18) Each of the dihedral angles in the first list was set to 0, 120, and 240 degrees, while each of the dihedral angles in the second list was set to 0 and 180 degrees (648 conformations in total). diff --git a/docs/tutorial_04.rst b/docs/tutorial_04.rst index 63eefbc..f38fe1b 100644 --- a/docs/tutorial_04.rst +++ b/docs/tutorial_04.rst @@ -9,13 +9,127 @@ Objectives This tutorial will teach: -- Things +- Rotation/translation of ``Molecule`` objects. +- Direct creation and editing of ``Molecule`` objects. Overview ======== -This tutorial will +Metal triflates are frequently employed as "M+" precursors in Lewis-acid-catalyzed transformations, since the weakly-binding triflate can easily be displaced by Lewis-basic ligands. +However, reported anion effects imply that "weakly-coordinating anions" like triflate may indeed have a non-negligible effect on catalysis. +In `one such study `_ by Evans and coworkers, +the counterion for a Cu(II)-tBuBox complex was found to have a dramatic effect on rate and small effects on enantioselectivity and diastereoselectivity, +indicating that one or more equivalents of the counterion are present in the transition state: -Creating a Bash Script -====================== +.. image:: /img/t04_counterion_effects.png +Understanding how anions are bound to cationic catalysts could improve mechanistic understanding and lead to the design of improved catalytic systems. +Computational modelling of weakly-bound ion pairs is difficult, however, because of the potential for numerous nearly degenerate binding modes. +Accordingly, many distinct arrangements must be sampled and evaluated: although such an approach would be tedious by hand, automation with *cctk* renders it facile. + +This tutorial will focus on evaluating the ground-state conformation of Evans's system for the hexafluoroantimonate and triflate anions. +We will make the simplifying assumption that only one anion will coordinate to the catalyst at a time, consistent with observation of singly-cationic metal–ligand complexes in solution. + +Creating Structures +=================== + +The structures of Cu(II)-tBuBox (S=1/2 dication), SbF\ :sub:`6` (S=0 anion), and OTf (S=0 anion) were first optimized separately at the +UB3LYP-D3BJ/6-31G(d)-SDD(Sb,Cu)/SMD(dichloromethane) level of theory. + +In order to efficiently manipulate the ion pair, all molecules were loaded into *cctk* and then centered:: + + def center_molecule(molecule): + """ + Moves a ``Molecule`` object's centroid to the origin + """ + atoms = np.arange(0, molecule.num_atoms()) + molecule.translate_molecule(-molecule.geometry[atoms].mean(axis=0)) + return molecule + + cation = center_molecule(GaussianFile.read_file("CuII-tBuBox-dication.out").get_molecule()) + anion1 = center_molecule(GaussianFile.read_file("SbF6_anion.out").get_molecule()) + anion2 = center_molecule(GaussianFile.read_file("OTf_anion.out").get_molecule()) + + anions = [anion1, anion2] + anion_names = ["SbF6", "OTf"] + +To determine the relative position of the two molecules, a random vector was sampled from a spherical distribution:: + + def spherical_random(radius=1): + """ + Generates a random point on a sphere of radius ``radius``. + """ + v = np.random.normal(size=3) + v = v/np.linalg.norm(v) + return v * radius + +The anion was then rotated randomly about all 3 Cartesian axes, and the atomic numbers and coordinates were concatenated to produce a new ``Molecule`` object. +The output structures were written to ``.gjf`` files:: + + for i in range(num_structures): + trans_v = spherical_random(radius=8) + for j in range(len(anions)): + + x = copy.deepcopy(anions[j]) + x.translate_molecule(trans_v) + x.rotate_molecule(np.array([1,0,0]), np.random.random()*360) + x.rotate_molecule(np.array([0,1,0]), np.random.random()*360) + x.rotate_molecule(np.array([0,0,1]), np.random.random()*360) + + atoms = np.hstack((cation.atomic_numbers.T, x.atomic_numbers.T)) + geoms = np.vstack((cation.geometry, x.geometry)) + + mx = Molecule(atomic_numbers=atoms, geometry=geoms, charge=1, multiplicity=2) + GaussianFile.write_molecule_to_file(f"CuII-tBuBox-{anion_names[j]}_c{i}.gjf", mx, "#p opt b3lyp/genecp empiricaldispersion=gd3bj scrf=(smd, solvent=dichloromethane)", footer) + + +The complete script (``generate_ion_pairs.py``) looks like this:: + + import copy + import numpy as np + from cctk import Molecule, GaussianFile + + num_structures = 25 + + footer = "" + with open('footer', 'r') as file: + footer = file.read() + + def spherical_random(radius=1): + """ + Generates a random point on a sphere of radius ``radius``. + """ + v = np.random.normal(size=3) + v = v/np.linalg.norm(v) + return v * radius + + def center_molecule(molecule): + """ + Moves a ``Molecule`` object's centroid to the origin + """ + atoms = np.arange(0, molecule.num_atoms()) + molecule.translate_molecule(-molecule.geometry[atoms].mean(axis=0)) + return molecule + + cation = center_molecule(GaussianFile.read_file("CuII-tBuBox-dication.out").get_molecule()) + anion1 = center_molecule(GaussianFile.read_file("SbF6_anion.out").get_molecule()) + anion2 = center_molecule(GaussianFile.read_file("OTf_anion.out").get_molecule()) + + anions = [anion1, anion2] + anion_names = ["SbF6", "OTf"] + + for i in range(num_structures): + trans_v = spherical_random(radius=8) + for j in range(len(anions)): + + x = copy.deepcopy(anions[j]) + x.translate_molecule(trans_v) + x.rotate_molecule(np.array([1,0,0]), np.random.random()*360) + x.rotate_molecule(np.array([0,1,0]), np.random.random()*360) + x.rotate_molecule(np.array([0,0,1]), np.random.random()*360) + + atoms = np.hstack((cation.atomic_numbers.T, x.atomic_numbers.T)) + geoms = np.vstack((cation.geometry, x.geometry)) + + mx = Molecule(atomic_numbers=atoms, geometry=geoms, charge=1, multiplicity=2) + GaussianFile.write_molecule_to_file(f"CuII-tBuBox-{anion_names[j]}_c{i}.gjf", mx, "#p opt b3lyp/genecp empiricaldispersion=gd3bj scrf=(smd, solvent=dichloromethane)", footer) diff --git a/docs/tutorial_06.rst b/docs/tutorial_06.rst new file mode 100644 index 0000000..63eefbc --- /dev/null +++ b/docs/tutorial_06.rst @@ -0,0 +1,21 @@ +.. _tutorial_04: + +================================ +Tutorial 04: Changing File Types +================================ + +Objectives +========== + +This tutorial will teach: + +- Things + +Overview +======== + +This tutorial will + +Creating a Bash Script +====================== + diff --git a/docs/tutorials.rst b/docs/tutorials.rst index 5c51edd..7b579f8 100644 --- a/docs/tutorials.rst +++ b/docs/tutorials.rst @@ -4,7 +4,8 @@ Tutorials ========== -The following tutorials have been provided to demonstrate how *cctk* can be used for real-world problems: +The following tutorials have been provided to demonstrate how *cctk* can be used for real-world problems. +Supplemental files and all scripts can be found on Github (``tutorials/``). .. toctree:: :maxdepth: 2 @@ -13,3 +14,4 @@ The following tutorials have been provided to demonstrate how *cctk* can be used tutorial_01 tutorial_02 tutorial_03 + tutorial_04 diff --git a/test/core_tests.py b/test/core_tests.py index 2836539..364dfcc 100644 --- a/test/core_tests.py +++ b/test/core_tests.py @@ -191,6 +191,12 @@ def test_add_atoms(self): self.assertEqual(mol.num_atoms(), 4) self.assertListEqual(list(mol.get_vector(4)), [2, 0, 0]) + def test_mass_spec(self): + mol = cctk.Molecule(np.array([12], dtype=np.int8), [[0, 0, 0]]) + masses, weights = mol.calculate_mass_spectrum() + self.assertListEqual(list(masses), [23.]) + self.assertListEqual(list(weights), [1.]) + class TestGroup(unittest.TestCase): def test_group_add(self): path = "static/acetaldehyde.out" diff --git a/tutorial/tutorial_04/footer b/tutorial/tutorial_04/footer new file mode 100644 index 0000000..bdb690c --- /dev/null +++ b/tutorial/tutorial_04/footer @@ -0,0 +1,9 @@ +-C -H -N -O -F -S 0 +6-31G(d) +**** +-Cu -Sb +SDD +**** + +-Cu -Sb +SDD diff --git a/tutorial/tutorial_04/generate_ion_pairs.py b/tutorial/tutorial_04/generate_ion_pairs.py new file mode 100644 index 0000000..14e8caf --- /dev/null +++ b/tutorial/tutorial_04/generate_ion_pairs.py @@ -0,0 +1,48 @@ +import copy +import numpy as np +from cctk import Molecule, GaussianFile + +num_structures = 25 + +footer = "" +with open('footer', 'r') as file: + footer = file.read() + +def spherical_random(radius=1): + """ + Generates a random point on a sphere of radius ``radius``. + """ + v = np.random.normal(size=3) + v = v/np.linalg.norm(v) + return v * radius + +def center_molecule(molecule): + """ + Moves a ``Molecule`` object's centroid to the origin + """ + atoms = np.arange(0, molecule.num_atoms()) + molecule.translate_molecule(-molecule.geometry[atoms].mean(axis=0)) + return molecule + +cation = center_molecule(GaussianFile.read_file("CuII-tBuBox-dication.out").get_molecule()) +anion1 = center_molecule(GaussianFile.read_file("SbF6_anion.out").get_molecule()) +anion2 = center_molecule(GaussianFile.read_file("OTf_anion.out").get_molecule()) + +anions = [anion1, anion2] +anion_names = ["SbF6", "OTf"] + +for i in range(num_structures): + trans_v = spherical_random(radius=8) + for j in range(len(anions)): + + x = copy.deepcopy(anions[j]) + x.translate_molecule(trans_v) + x.rotate_molecule(np.array([1,0,0]), np.random.random()*360) + x.rotate_molecule(np.array([0,1,0]), np.random.random()*360) + x.rotate_molecule(np.array([0,0,1]), np.random.random()*360) + + atoms = np.hstack((cation.atomic_numbers.T, x.atomic_numbers.T)) + geoms = np.vstack((cation.geometry, x.geometry)) + + mx = Molecule(atomic_numbers=atoms, geometry=geoms, charge=1, multiplicity=2) + GaussianFile.write_molecule_to_file(f"CuII-tBuBox-{anion_names[j]}_c{i}.gjf", mx, "#p opt b3lyp/genecp empiricaldispersion=gd3bj scrf=(smd, solvent=dichloromethane)", footer)