Skip to content

Commit

Permalink
restructure xyz to be 1:1 with ensemble
Browse files Browse the repository at this point in the history
  • Loading branch information
corinwagen committed Jun 24, 2021
1 parent df47f56 commit dba720f
Show file tree
Hide file tree
Showing 9 changed files with 83 additions and 78 deletions.
4 changes: 1 addition & 3 deletions cctk/gaussian_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -540,9 +540,7 @@ def get_molecule(self, num=None, properties=False):
# some methods pass num=None, which overrides setting the default above
if num is None:
num = -1

if not isinstance(num, int):
raise TypeError("num must be int")
assert isinstance(num, int), "num must be int"

if properties:
return self.ensemble.molecule_list()[num], self.ensemble.properties_list()[num]
Expand Down
2 changes: 1 addition & 1 deletion cctk/optimize.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ def run_xtb(molecule, nprocs=1, return_energy=False, opt=False):

output_mol, energy = None, None
if opt:
output_mol = cctk.XYZFile.read_file(f"{tmpdir}/xtbopt.xyz").molecule
output_mol = cctk.XYZFile.read_file(f"{tmpdir}/xtbopt.xyz").get_molecule()
energy_file = cctk.File.read_file(f"{tmpdir}/xtbopt.log")
fields = energy_file[1].split()
energy, gradient = float(fields[1]), float(fields[3])
Expand Down
118 changes: 62 additions & 56 deletions cctk/xyz_file.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import re
import re, itertools
import numpy as np

import cctk
Expand All @@ -10,41 +10,58 @@ class XYZFile(cctk.File):
Class representing plain ``.xyz`` files.
Attributes:
title (str): the title from the file
molecule (Molecule): `Molecule` instance
titles (list of str): the title or titles from the file
ensemble (Ensemble): `Ensemble` instance
"""

def __init__(self, molecule, title=None):
if molecule and isinstance(molecule, cctk.Molecule):
self.molecule = molecule
if title and (isinstance(title, str)):
self.title = title
def __init__(self, ensemble, titles):
assert isinstance(ensemble, cctk.Ensemble), "ensemble must be cctk.Ensemble"
self.ensemble = ensemble

assert isinstance(titles, list), "title must be list"
self.titles = titles

@classmethod
def read_file(cls, filename, charge=0, multiplicity=1):
def read_file(cls, filename, charge=0, multiplicity=1, conformational=False):
"""
Factory method to create new XYZFile instances.
Arguments:
filename (str): path to ``.xyz`` file
charge (int): charge of resultant molecule
multiplicity (int): multiplicity of resultant molecule
conformational (bool): whether or not it's a conformational ensemble
"""
lines = super().read_file(filename)
xyzfile = cls.file_from_lines(lines)

assert isinstance(charge, int), "charge must be integer"
assert isinstance(multiplicity, int), "multiplicity must be integer"
assert multiplicity > 0, "multiplicity must be a positive integer"

xyzfile.molecule.charge = charge
xyzfile.molecule.multiplicity = multiplicity
ensemble = cctk.Ensemble()
if conformational:
ensemble = cctk.ConformationalEnsemble()
titles = list()

return xyzfile
lines = super().read_file(filename)
current_lines = list()
for line in lines:
if re.search(r"^\s*\d+$", line):
if len(current_lines) > 0:
t, m = cls.mol_from_lines(current_lines, charge=charge, multiplicity=multiplicity)
ensemble.add_molecule(m)
titles.append(t)
current_lines = list()
current_lines.append(line)

# catch the last molecule
if len(current_lines) > 0:
t, m = cls.mol_from_lines(current_lines, charge=charge, multiplicity=multiplicity)
ensemble.add_molecule(m)
titles.append(t)

return XYZFile(ensemble, titles)

@classmethod
def file_from_lines(cls, lines):
def mol_from_lines(cls, lines, charge=0, multiplicity=1):
num_atoms = 0
try:
num_atoms = int(lines[0])
Expand Down Expand Up @@ -78,8 +95,8 @@ def file_from_lines(cls, lines):
raise ValueError(f"can't parse line {index+2}: {line}")

assert num_atoms == len(atomic_numbers), "wrong number of atoms!"
molecule = cctk.Molecule(atomic_numbers, geometry)
return XYZFile(molecule, title)
molecule = cctk.Molecule(atomic_numbers, geometry, charge=charge, multiplicity=multiplicity)
return title, molecule

@classmethod
def write_molecule_to_file(cls, filename, molecule, title="title", append=False):
Expand All @@ -106,54 +123,29 @@ def write_molecule_to_file(cls, filename, molecule, title="title", append=False)
else:
super().write_file(filename, text)

def write_file(self, filename):
def write_file(self, filename, idx=-1):
"""
Write an ``.xyz`` file, using object attributes.
Args:
idx (int): the index of the molecule to write
"""
self.write_molecule_to_file(filename, self.molecule, title=self.title)
assert isinstance(idx, int), "idx must be int"
self.write_molecule_to_file(filename, self.get_molecule(idx), title=self.titles[idx])

@classmethod
def read_trajectory(cls, filename):
def read_trajectory(cls, filename, **kwargs):
"""
Read an ``.xyz`` trajectory file, which is just a bunch of concatenated ``.xyz`` files.
Currently the files must be separated by nothing (no blank line, just one after the other) although this may be changed in future.
Args:
filename (str): path to file
Returns:
list of ``cctk.XYZFile`` objects in the order they appear in the file
Post refactoring, just an alias for ``XYZFile.read_file()``.
"""
files = []
lines = super().read_file(filename)

current_lines = list()
for line in lines:
if re.search(r"^\s*\d+$", line):
if len(current_lines) > 0:
files.append(cls.file_from_lines(current_lines))
current_lines = list()
current_lines.append(line)

return files
return cls.read_file(filename, **kwargs)

@classmethod
def read_ensemble(cls, filename, conformational=False):
def read_ensemble(cls, filename, **kwargs):
"""
Alias for read_trajectory.
Post refactoring, just an alias for ``XYZFile.read_file()``.
"""
files = cls.read_trajectory(filename)

ensemble = None
if conformational:
ensemble = cctk.ConformationalEnsemble()
else:
ensemble = cctk.Ensemble()

for f in files:
ensemble.add_molecule(f.molecule)

return ensemble
return cls.read_file(filename, **kwargs)

@classmethod
def write_ensemble_to_file(cls, filename, ensemble, title=None):
Expand All @@ -173,3 +165,17 @@ def write_ensemble_to_file(cls, filename, ensemble, title=None):
cls.write_molecule_to_file(filename, molecule, title=title, append=False)
else:
cls.write_molecule_to_file(filename, molecule, title=title, append=True)

def get_molecule(self, num=None):
"""
Returns a given molecule.
If ``num`` is specified, returns ``self.ensemble.molecule_list()[num]``
"""
# some methods pass num=None, which overrides setting the default above
if num is None:
num = -1
assert isinstance(num, int), "num must be int"
return self.ensemble.molecule_list()[num]


8 changes: 5 additions & 3 deletions docs/recipe_07.rst
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ XYZ Files
- *cctk* assumes the first line is the number of atoms, the second line is the title,
and the third and subsequent lines are geometry specifications of the form
``atom_symbol x_position y_position z_position``.
- Multiple structures per ``xyz`` file are not currently supported.

::

Expand All @@ -25,8 +24,11 @@ XYZ Files
file = cctk.XYZFile.read_file(path)
# file contents
file.title == "peptide example"
molecule = file.molecule
file.titles[0] == "peptide example"
ensemble = file.ensemble

# convenient accessor
molecule = file.get_molecule()

# write xyz
assert isinstance(molecule2, Molecule)
Expand Down
17 changes: 8 additions & 9 deletions test/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@ class TestXYZ(unittest.TestCase):
def test_readfile(self):
path = "test/static/test_peptide.xyz"
file = cctk.XYZFile.read_file(path)
self.assertEqual(file.title, "peptide example")
self.assertEqual(file.titles[0], "peptide example")

mol = file.molecule
mol = file.get_molecule()
self.assertTrue(isinstance(mol, cctk.Molecule))
self.assertEqual(mol.num_atoms(), 31)
self.assertTrue(mol.check_for_conflicts())
Expand All @@ -31,19 +31,18 @@ def test_writefile(self):

def test_traj(self):
path = "test/static/methane_traj.xyz"
files = cctk.XYZFile.read_trajectory(path)
file = cctk.XYZFile.read_trajectory(path)

self.assertEqual(len(files), 250)
self.assertTrue(isinstance(files[0], cctk.XYZFile))
self.assertEqual(files[0].molecule.num_atoms(), 5)
self.assertEqual(len(file.ensemble), 251)
self.assertEqual(file.get_molecule().num_atoms(), 5)

def test_ense(self):
path = "test/static/methane_traj.xyz"
ense = cctk.XYZFile.read_ensemble(path)
self.assertEqual(len(ense), 250)
file = cctk.XYZFile.read_ensemble(path)
self.assertEqual(len(file.ensemble), 251)

new_path = "test/static/methane_traj_new.xyz"
cctk.XYZFile.write_ensemble_to_file(new_path, ense, title="sample title")
cctk.XYZFile.write_ensemble_to_file(new_path, file.ensemble, title="sample title")
os.remove(new_path)

class TestMAE(unittest.TestCase):
Expand Down
2 changes: 1 addition & 1 deletion test/test_molecule.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

class TestMolecule(unittest.TestCase):
def load_molecule(self, path="test/static/test_peptide.xyz"):
return cctk.XYZFile.read_file(path).molecule
return cctk.XYZFile.read_file(path).get_molecule()

def test_basic(self):
mol = self.load_molecule()
Expand Down
2 changes: 1 addition & 1 deletion test/test_opt.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

class TestMolecule(unittest.TestCase):
def load_molecule(self, path="test/static/test_peptide.xyz"):
return cctk.XYZFile.read_file(path).molecule
return cctk.XYZFile.read_file(path).get_molecule()

def test_basic(self):
mol = self.load_molecule()
Expand Down
6 changes: 3 additions & 3 deletions test/test_orca.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,13 @@ def test_write(self):
new_path = "test/static/test_peptide_copy.inp"

file = cctk.XYZFile.read_file(read_path)
self.assertTrue(isinstance(file.molecule, cctk.Molecule))
self.assertTrue(isinstance(file.get_molecule(), cctk.Molecule))

header = "! aug-cc-pVTZ aug-cc-pVTZ/C DLPNO-CCSD(T) TightSCF TightPNO MiniPrint"
variables = {"maxcore": 4000}
blocks = {"pal": ["nproc 4"], "mdci": ["density none"]}

cctk.OrcaFile.write_molecule_to_file(new_path, file.molecule, header, variables, blocks)
cctk.OrcaFile.write_molecule_to_file(new_path, file.get_molecule(), header, variables, blocks)

with open(path) as old:
with open(new_path) as new:
Expand All @@ -30,7 +30,7 @@ def test_write(self):
os.remove(new_path)

ensemble = cctk.ConformationalEnsemble()
ensemble.add_molecule(file.molecule)
ensemble.add_molecule(file.get_molecule())

orca_file = cctk.OrcaFile(job_types=[cctk.OrcaJobType.SP], ensemble=ensemble, header=header, blocks=blocks, variables=variables)
orca_file.write_file(new_path)
Expand Down
2 changes: 1 addition & 1 deletion test/test_solvent_extraction.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

class TestSolventExtraction(unittest.TestCase):
def test_basic(self):
mol = cctk.XYZFile.read_file("test/static/acetone_water.xyz").molecule
mol = cctk.XYZFile.read_file("test/static/acetone_water.xyz").get_molecule()
mol.assign_connectivity()
self.assertFalse(mol.num_atoms() == 40)
new_mol = mol.limit_solvent_shell(num_solvents=10)
Expand Down

0 comments on commit dba720f

Please sign in to comment.