Skip to content

Commit

Permalink
added new tutorial content, pushed some fun changes
Browse files Browse the repository at this point in the history
  • Loading branch information
corinwagen committed Feb 26, 2020
1 parent 344288a commit 2ff9972
Show file tree
Hide file tree
Showing 12 changed files with 313 additions and 17 deletions.
13 changes: 11 additions & 2 deletions cctk/ensemble.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,17 +210,19 @@ def join_ensembles(cls, ensembles, name=None):

return new_ensemble

def align(self, align_to=1, atoms=None):
def align(self, align_to=1, atoms=None, return_rmsd=False):
"""
Aligns every geometry to the specified geometry based on the atoms in `atom_numbers`. If `atom_numbers` is `None`, then a full alignment is performed.
Args:
align_to (int): which geometry to align to (1-indexed)
atoms (list): which atoms to align in each molecule (1-indexed; must be at least 3)
alternatively, specify ``None`` for all atoms or "heavy" for all heavy atoms
return_rmsd (Bool): whether to return RMSD before and after rotation
Returns:
a new ``Ensemble()`` object with the objects aligned
(optional) before rmsd and after rmsd
"""
self._check_molecule_number(align_to)

Expand All @@ -246,16 +248,23 @@ def align(self, align_to=1, atoms=None):
molecule.translate_molecule(-centroid)

template = self.molecules[align_to - 1].geometry[atoms]
before_rmsd = 0
after_rmsd = 0

#### perform alignment using Kabsch algorithm
new_ensemble = copy.deepcopy(self)
for molecule in new_ensemble.molecules:
before_rmsd += compute_RMSD(template, molecule.geometry[atoms])
new_geometry = align_matrices(molecule.geometry[atoms], molecule.geometry, template)
molecule.geometry = new_geometry
after_rmsd += compute_RMSD(template, molecule.geometry[atoms])

assert len(molecule.geometry) == len(molecule.atomic_numbers), "wrong number of geometry elements!"

return new_ensemble
if return_rmsd:
return new_ensemble, before_rmsd, after_rmsd
else:
return new_ensemble

def eliminate_redundant(self, cutoff=0.5, heavy_only=True, atom_numbers=None):
"""
Expand Down
2 changes: 2 additions & 0 deletions cctk/gaussian_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,8 @@ def write_file(self, filename, molecule=None, header=None, footer=None, **kwargs

if header is None:
header = self.header

if footer is None:
footer = self.footer

self.write_molecule_to_file(filename, molecule, header, footer, **kwargs)
Expand Down
26 changes: 25 additions & 1 deletion cctk/helper_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,15 +11,30 @@
from . import data # relative-import the *package* containing the templates

"""
This code populates ELEMENT_DICTIONARY from a static datafile.
This code populates ELEMENT_DICTIONARY and ISOTOPE_DICTIONARY from a static datafile.
"""
ELEMENT_DICTIONARY = {}
ISOTOPE_DICTIONARY = {}
isotope_file = pkg_resources.open_text(data, "isotopes.csv")
prev_number = 1
current_dict = {}
for line in isotope_file:
symbol, number, mass, abundance = line.split(",")
if symbol == "Symbol":
continue

ELEMENT_DICTIONARY[number] = symbol

if number == prev_number:
current_dict[float(mass)] = float(abundance.rstrip())
else:
ISOTOPE_DICTIONARY[prev_number] = current_dict
current_dict = {}
current_dict[float(mass)] = float(abundance.rstrip())

prev_number = number

ISOTOPE_DICTIONARY[prev_number] = current_dict
ELEMENT_DICTIONARY["0"] = "Bq"

INV_ELEMENT_DICTIONARY = {v: int(k) for k, v in ELEMENT_DICTIONARY.items()}
Expand Down Expand Up @@ -261,3 +276,12 @@ def compute_RMSD(geometry1, geometry2):
squared_difference = np.square(geometry1 - geometry2)
temp = np.sum(squared_difference) / (3 * len(geometry1))
return np.sqrt(temp)

def get_isotopic_distribution(z):
"""
For an element with number ``z``, returns two ``np.array`` objects containing that element's weights and relative abundances.
"""
z = str(z)
masses = list(ISOTOPE_DICTIONARY[z].keys())
weights = list(ISOTOPE_DICTIONARY[z].values())
return np.array(masses), np.array(weights)
65 changes: 63 additions & 2 deletions cctk/molecule.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
compute_dihedral_between,
compute_unit_vector,
get_covalent_radius,
get_isotopic_distribution,
)


Expand Down Expand Up @@ -611,8 +612,68 @@ def rotate_molecule(self, axis, degrees):

return self

def calculate_mass_spectrum():
pass
def _recurse_through_formula(self, formula, masses, weights, cutoff=0.0000001, mass_precision=4, weight_precision=8):
"""
Recurses through a formula and generates m/z isotopic pattern using tail recursion.
To prevent blowup of memory, fragments with very low abundance are ignored. Masses and weights are also rounded after every step.
To prevent error accumulation, internal precisions several orders of magnitude lower than the precision of interest should be employed.
The default values should work nicely for low-res MS applications.
Args:
formula (np.array, dtype=np.int8): vector containing atoms left to incorporate
masses (np.array): list of mass fragments at current iteration
weights (np.array): relative weights at current iteration
cutoff (float): cutoff for similarity (masses within ``cutoff`` will be combined)
mass_precision (int): number of decimal places to store for mass
weight_precision (int): number of decimal places to store for weight
Returns:
masses
weights
"""
if np.array_equal(formula, np.zeros(shape=92, dtype=np.int8)):
return masses[np.argsort(masses)], weights[np.argsort(masses)]

current_e = np.nonzero(formula)[0][0]
e_masses, e_weights = get_isotopic_distribution(current_e)

new_masses = np.zeros(shape=(len(masses)*len(e_masses)))
new_weights = np.zeros(shape=(len(masses)*len(e_masses)))
for i in range(len(masses)):
for j in range(len(e_masses)):
new_masses[i*len(e_masses)+j] = masses[i] + e_masses[j]
new_weights[i*len(e_masses)+j] = weights[i] * e_weights[j]

newer_masses, indices = np.unique(np.round(new_masses, decimals=mass_precision), return_inverse=True)
newer_weights = np.zeros_like(newer_masses)
for k in range(len(newer_weights)):
newer_weights[k] = np.sum(new_weights[np.nonzero(indices == k)])
newer_weights = np.round(newer_weights, decimals=weight_precision)

formula[current_e] += -1
above_cutoff = np.nonzero(newer_weights > cutoff)
return self._recurse_through_formula(formula, newer_masses[above_cutoff], newer_weights[above_cutoff], cutoff, mass_precision, weight_precision)

def calculate_mass_spectrum(self, **kwargs):
"""
Generates list of m/z values based on formula string (e.g. "C10H12")
Final weights rounded to one decimal point (because of low-res MS).
"""
form_vec = np.zeros(shape=92, dtype=np.int8)
for z in self.atomic_numbers:
form_vec[z - 1] += 1

masses, weights = self._recurse_through_formula(form_vec, [0], [1], **kwargs)

new_masses, indices = np.unique(np.round(masses, decimals=1), return_inverse=True)
new_weights = np.zeros_like(new_masses)
for k in range(len(new_weights)):
new_weights[k] = np.sum(weights[np.nonzero(indices == k)])
new_weights = new_weights / np.max(new_weights)

return new_masses, new_weights

def add_atom_at_centroid(self, symbol, atom_numbers, weighted=False):
"""
Expand Down
Binary file added docs/img/t04_counterion_effects.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
14 changes: 7 additions & 7 deletions docs/tutorial_03.rst
Original file line number Diff line number Diff line change
Expand Up @@ -36,15 +36,15 @@ in practice most of low-energy conformational space can be sampled
by selecting a few key rotatable bonds and letting Gaussian's ``opt`` keyword do the rest.

For the purposes of this study, we chose to study four key dihedral angles:
1. Rotating around F2Ala #1's alpha carbon with respect to the amide (C3–C5–C8–O6)
1. Rotating around F2Ala #2's alpha carbon with respect to the amide (H12–C11–C14–O16)
1. Rotating the difluoromethyl group in F2Ala #1 (N1–C3–C5–H7)
1. Rotating the difluoromethyl group in F2Ala #2 (N9–C11–C13–H15)
- Rotating around F2Ala #1's alpha carbon with respect to the amide (C3–C5–C8–O6)
- Rotating around F2Ala #2's alpha carbon with respect to the amide (H12–C11–C14–O16)
- Rotating the difluoromethyl group in F2Ala #1 (N1–C3–C5–H7)
- Rotating the difluoromethyl group in F2Ala #2 (N9–C11–C13–H15)

We also chose to sample:
1. *cis*/*trans* isomerism in F2Ala #1 (O27–C26–N1–H2)
1. *cis*/*trans* isomerism in F2Ala #2 (O8–C6–N9–H10)
1. Methyl ester conformation (O16–C14–O17–C18)
- *cis*/*trans* isomerism in F2Ala #1 (O27–C26–N1–H2)
- *cis*/*trans* isomerism in F2Ala #2 (O8–C6–N9–H10)
- Methyl ester conformation (O16–C14–O17–C18)

Each of the dihedral angles in the first list was set to 0, 120, and 240 degrees, while
each of the dihedral angles in the second list was set to 0 and 180 degrees (648 conformations in total).
Expand Down
122 changes: 118 additions & 4 deletions docs/tutorial_04.rst
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,127 @@ Objectives

This tutorial will teach:

- Things
- Rotation/translation of ``Molecule`` objects.
- Direct creation and editing of ``Molecule`` objects.

Overview
========

This tutorial will
Metal triflates are frequently employed as "M+" precursors in Lewis-acid-catalyzed transformations, since the weakly-binding triflate can easily be displaced by Lewis-basic ligands.
However, reported anion effects imply that "weakly-coordinating anions" like triflate may indeed have a non-negligible effect on catalysis.
In `one such study <http://evans.rc.fas.harvard.edu/pdf/evans245.pdf>`_ by Evans and coworkers,
the counterion for a Cu(II)-tBuBox complex was found to have a dramatic effect on rate and small effects on enantioselectivity and diastereoselectivity,
indicating that one or more equivalents of the counterion are present in the transition state:

Creating a Bash Script
======================
.. image:: /img/t04_counterion_effects.png

Understanding how anions are bound to cationic catalysts could improve mechanistic understanding and lead to the design of improved catalytic systems.
Computational modelling of weakly-bound ion pairs is difficult, however, because of the potential for numerous nearly degenerate binding modes.
Accordingly, many distinct arrangements must be sampled and evaluated: although such an approach would be tedious by hand, automation with *cctk* renders it facile.

This tutorial will focus on evaluating the ground-state conformation of Evans's system for the hexafluoroantimonate and triflate anions.
We will make the simplifying assumption that only one anion will coordinate to the catalyst at a time, consistent with observation of singly-cationic metal–ligand complexes in solution.

Creating Structures
===================

The structures of Cu(II)-tBuBox (S=1/2 dication), SbF\ :sub:`6` (S=0 anion), and OTf (S=0 anion) were first optimized separately at the
UB3LYP-D3BJ/6-31G(d)-SDD(Sb,Cu)/SMD(dichloromethane) level of theory.

In order to efficiently manipulate the ion pair, all molecules were loaded into *cctk* and then centered::

def center_molecule(molecule):
"""
Moves a ``Molecule`` object's centroid to the origin
"""
atoms = np.arange(0, molecule.num_atoms())
molecule.translate_molecule(-molecule.geometry[atoms].mean(axis=0))
return molecule

cation = center_molecule(GaussianFile.read_file("CuII-tBuBox-dication.out").get_molecule())
anion1 = center_molecule(GaussianFile.read_file("SbF6_anion.out").get_molecule())
anion2 = center_molecule(GaussianFile.read_file("OTf_anion.out").get_molecule())

anions = [anion1, anion2]
anion_names = ["SbF6", "OTf"]

To determine the relative position of the two molecules, a random vector was sampled from a spherical distribution::

def spherical_random(radius=1):
"""
Generates a random point on a sphere of radius ``radius``.
"""
v = np.random.normal(size=3)
v = v/np.linalg.norm(v)
return v * radius

The anion was then rotated randomly about all 3 Cartesian axes, and the atomic numbers and coordinates were concatenated to produce a new ``Molecule`` object.
The output structures were written to ``.gjf`` files::

for i in range(num_structures):
trans_v = spherical_random(radius=8)
for j in range(len(anions)):

x = copy.deepcopy(anions[j])
x.translate_molecule(trans_v)
x.rotate_molecule(np.array([1,0,0]), np.random.random()*360)
x.rotate_molecule(np.array([0,1,0]), np.random.random()*360)
x.rotate_molecule(np.array([0,0,1]), np.random.random()*360)

atoms = np.hstack((cation.atomic_numbers.T, x.atomic_numbers.T))
geoms = np.vstack((cation.geometry, x.geometry))

mx = Molecule(atomic_numbers=atoms, geometry=geoms, charge=1, multiplicity=2)
GaussianFile.write_molecule_to_file(f"CuII-tBuBox-{anion_names[j]}_c{i}.gjf", mx, "#p opt b3lyp/genecp empiricaldispersion=gd3bj scrf=(smd, solvent=dichloromethane)", footer)


The complete script (``generate_ion_pairs.py``) looks like this::
import copy
import numpy as np
from cctk import Molecule, GaussianFile

num_structures = 25

footer = ""
with open('footer', 'r') as file:
footer = file.read()

def spherical_random(radius=1):
"""
Generates a random point on a sphere of radius ``radius``.
"""
v = np.random.normal(size=3)
v = v/np.linalg.norm(v)
return v * radius

def center_molecule(molecule):
"""
Moves a ``Molecule`` object's centroid to the origin
"""
atoms = np.arange(0, molecule.num_atoms())
molecule.translate_molecule(-molecule.geometry[atoms].mean(axis=0))
return molecule

cation = center_molecule(GaussianFile.read_file("CuII-tBuBox-dication.out").get_molecule())
anion1 = center_molecule(GaussianFile.read_file("SbF6_anion.out").get_molecule())
anion2 = center_molecule(GaussianFile.read_file("OTf_anion.out").get_molecule())

anions = [anion1, anion2]
anion_names = ["SbF6", "OTf"]

for i in range(num_structures):
trans_v = spherical_random(radius=8)
for j in range(len(anions)):

x = copy.deepcopy(anions[j])
x.translate_molecule(trans_v)
x.rotate_molecule(np.array([1,0,0]), np.random.random()*360)
x.rotate_molecule(np.array([0,1,0]), np.random.random()*360)
x.rotate_molecule(np.array([0,0,1]), np.random.random()*360)

atoms = np.hstack((cation.atomic_numbers.T, x.atomic_numbers.T))
geoms = np.vstack((cation.geometry, x.geometry))

mx = Molecule(atomic_numbers=atoms, geometry=geoms, charge=1, multiplicity=2)
GaussianFile.write_molecule_to_file(f"CuII-tBuBox-{anion_names[j]}_c{i}.gjf", mx, "#p opt b3lyp/genecp empiricaldispersion=gd3bj scrf=(smd, solvent=dichloromethane)", footer)
21 changes: 21 additions & 0 deletions docs/tutorial_06.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
.. _tutorial_04:

================================
Tutorial 04: Changing File Types
================================

Objectives
==========

This tutorial will teach:

- Things

Overview
========

This tutorial will

Creating a Bash Script
======================

4 changes: 3 additions & 1 deletion docs/tutorials.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@
Tutorials
==========

The following tutorials have been provided to demonstrate how *cctk* can be used for real-world problems:
The following tutorials have been provided to demonstrate how *cctk* can be used for real-world problems.
Supplemental files and all scripts can be found on Github (``tutorials/``).

.. toctree::
:maxdepth: 2
Expand All @@ -13,3 +14,4 @@ The following tutorials have been provided to demonstrate how *cctk* can be used
tutorial_01
tutorial_02
tutorial_03
tutorial_04
6 changes: 6 additions & 0 deletions test/core_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,12 @@ def test_add_atoms(self):
self.assertEqual(mol.num_atoms(), 4)
self.assertListEqual(list(mol.get_vector(4)), [2, 0, 0])

def test_mass_spec(self):
mol = cctk.Molecule(np.array([12], dtype=np.int8), [[0, 0, 0]])
masses, weights = mol.calculate_mass_spectrum()
self.assertListEqual(list(masses), [23.])
self.assertListEqual(list(weights), [1.])

class TestGroup(unittest.TestCase):
def test_group_add(self):
path = "static/acetaldehyde.out"
Expand Down
9 changes: 9 additions & 0 deletions tutorial/tutorial_04/footer
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
-C -H -N -O -F -S 0
6-31G(d)
****
-Cu -Sb
SDD
****

-Cu -Sb
SDD

0 comments on commit 2ff9972

Please sign in to comment.