Skip to content

Commit

Permalink
fixing errors in the new mass spec code
Browse files Browse the repository at this point in the history
  • Loading branch information
corinwagen committed Mar 4, 2021
1 parent 75bf1e8 commit d0184a7
Show file tree
Hide file tree
Showing 6 changed files with 28 additions and 10 deletions.
1 change: 0 additions & 1 deletion cctk/gaussian_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -390,7 +390,6 @@ def read_file(cls, filename, return_lines=False, extended_opt_info=False):
corrected_free_energy = get_corrected_free_energy(gibbs_vals[0], frequencies,
frequency_cutoff=100.0, temperature=temperature[0])
properties[-1]["quasiharmonic_gibbs_free_energy"] = float(f"{float(corrected_free_energy):.6f}") # yes this is dumb
print(properties[-1])
except:
pass

Expand Down
18 changes: 14 additions & 4 deletions cctk/helper_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
"""

import numpy as np
import math
import math, re
from scipy.spatial.distance import cdist
from io import BytesIO

Expand Down Expand Up @@ -621,7 +621,7 @@ def bytes_to_numpy(arr_bytes):
loaded_np = np.load(load_bytes, allow_pickle=True)
return loaded_np

def compute_mass_spectrum(formula_dict):
def compute_mass_spectrum(formula_dict, **kwargs):
"""
Computes the expected low-res mass spec ions for a given formula.
Expand All @@ -637,7 +637,7 @@ def compute_mass_spectrum(formula_dict):
if isinstance(z, str):
z = get_number(z)
assert isinstance(z, int), "atomic number must be integer"
form_vec[z - 1] += n
form_vec[z] += n

masses, weights = _recurse_through_formula(form_vec, [0], [1], **kwargs)

Expand All @@ -658,7 +658,7 @@ def _recurse_through_formula(formula, masses, weights, cutoff=0.0000001, mass_pr
The default values should work nicely for low-res MS applications.
Args:
formula (np.ndarray, dtype=np.int8): vector containing atoms left to incorporate
formula (np.ndarray, dtype=np.int8): vector containing atoms left to incorporate. first element should always be 0 as there is no element 0.
masses (np.ndarray): list of mass fragments at current iteration
weights (np.ndarray): relative weights at current iteration
cutoff (float): cutoff for similarity (masses within ``cutoff`` will be combined)
Expand Down Expand Up @@ -697,4 +697,14 @@ def _recurse_through_formula(formula, masses, weights, cutoff=0.0000001, mass_pr
above_cutoff = np.nonzero(newer_weights > cutoff)
return _recurse_through_formula(formula, newer_masses[above_cutoff], newer_weights[above_cutoff], cutoff, mass_precision, weight_precision)

def formula_dict_from_string(formula_string):
"""
Eugene challenged me to code golf, this isn't my fault.
Args:
formula_string (str): the formula as a string, e.g. C10H12N2O1. you need the "1" explicitly
Returns:
formula_dict (dict): e.g. {'C': 10, 'H': 12, 'N': 2, 'O': 1}
"""
return {t[0]: int(t[1]) for t in re.findall(r"([a-z]+)([0-9]+)", formula_string, re.I)}
2 changes: 1 addition & 1 deletion cctk/molecule.py
Original file line number Diff line number Diff line change
Expand Up @@ -742,7 +742,7 @@ def calculate_mass_spectrum(self, **kwargs):
"""
form_vec = np.zeros(shape=92, dtype=np.int8)
for z in self.atomic_numbers:
form_vec[z - 1] += 1
form_vec[z] += 1

masses, weights = _recurse_through_formula(form_vec, [0], [1], **kwargs)

Expand Down
6 changes: 3 additions & 3 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,14 @@
packages=["cctk", "cctk.data", "cctk.groups"],
# include_package_data=True,
package_data={"cctk.data": ["*"], "cctk.groups": ["*"],},
version="v0.2.9",
version="v0.2.10",
license="Apache 2.O",
description="computational chemistry toolkit",
author="Corin Wagen and Eugene Kwan",
author_email="corin.wagen@gmail.com",
url="https://github.com/ekwan/cctk",
download_url="https://github.com/ekwan/cctk/archive/v0.2.9.tar.gz",
install_requires=["numpy", "networkx", "importlib_resources", "scipy", "pyahocorasick", "basis_set_exchange"],
download_url="https://github.com/ekwan/cctk/archive/v0.2.10.tar.gz",
install_requires=["numpy", "networkx", "importlib_resources", "scipy", "pyahocorasick", "basis_set_exchange", "pyyaml"],
long_description=long_description,
long_description_content_type='text/markdown',
classifiers=[
Expand Down
9 changes: 9 additions & 0 deletions test/test_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,5 +55,14 @@ def test_avg_mw(self):
self.assertEqual(6, helper.get_z_from_mass(12.011))
self.assertEqual(11, helper.get_z_from_mass(22.9897))

def test_mw_splitting(self):
molecule = cctk.Molecule.new_from_name("ibuprofen")
masses, weights = molecule.calculate_mass_spectrum()
self.assertEqual(masses[0], 206.1)

fdict = helper.formula_dict_from_string("C10H12N2O1")
masses, weights = helper.compute_mass_spectrum(fdict)
self.assertEqual(masses[0], 176.1)

if __name__ == '__main__':
unittest.main()
2 changes: 1 addition & 1 deletion test/test_molecule.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ def test_add_atoms(self):
self.assertListEqual(list(mol.get_vector(4)), [2, 0, 0])

def test_mass_spec(self):
mol = cctk.Molecule(np.array([12], dtype=np.int8), [[0, 0, 0]])
mol = cctk.Molecule(np.array([11], dtype=np.int8), [[0, 0, 0]])
masses, weights = mol.calculate_mass_spectrum()
self.assertListEqual(list(masses), [23.])
self.assertListEqual(list(weights), [1.])
Expand Down

0 comments on commit d0184a7

Please sign in to comment.