made the automatic solvent removal tool smarter

ekwan · Jul 30, 2020 · ba2c89e · ba2c89e
1 parent 49f89f9
commit ba2c89e
Show file tree

Hide file tree

Showing 4 changed files with 69 additions and 10 deletions.
diff --git a/cctk/molecule.py b/cctk/molecule.py
@@ -1464,14 +1464,15 @@ def get_components(self):
         fragments = nx.connected_components(self.bonds)
         return [list(f) for f in list(fragments)]
 
-    def limit_solvent_shell(self, num_atoms=0, num_solvents=10):
+    def limit_solvent_shell(self, solute=0, num_atoms=0, num_solvents=10):
         """
         Automatically detects solvent molecules and removes them until you have a set number of solvents or atoms.
 
-        This code assumes that the fragment of interest comes first in the file, which is almost always true but may not be true.
+        The "distance" between molecules is the minimum of the pairwise atomic distances, to emphasize inner-sphere interactions.
 
         Args:
-            num_atoms (int): remove atoms until there are this number (module size of a solvent molecule)
+            solute (int): which fragment is the solute, 0-indexed
+            num_atoms (int): remove atoms until there are this number (modulo the size of a solvent molecule)
             num_solvents (int): remove solvent molecules until there are this number
 
         Returns:
@@ -1481,16 +1482,20 @@ def limit_solvent_shell(self, num_atoms=0, num_solvents=10):
         assert isinstance(num_solvents, int)
 
         fragments = self.get_components()
+        solute_x = self.geometry[fragments[0]].view(np.ndarray)
 
-        #### not strictly a centroid since it's not mass-weighted.
-        centroids = np.zeros(shape=(len(fragments), 3))
         distances = np.zeros(shape=len(fragments))
         for i, f in enumerate(fragments):
-            centroids[i] = np.mean(self.geometry[f], axis=0)
-            if i > 0:
-                distances[i] = np.linalg.norm(centroids[i] - centroids[0])
+            if i == solute:
+                distances[i] = 0
+            else:
+                solvent_x = self.geometry[f].view(np.ndarray)
+                # cdist is absurdly fast
+                pairwise_distances = cdist(solvent_x, solute_x)
+                distances[i] = np.min(pairwise_distances)
 
         mol = copy.deepcopy(self)
+
         #### reverse order - farthest away comes first
         order = np.argsort(distances)[::-1]
 

diff --git a/setup.py b/setup.py
@@ -11,13 +11,13 @@
     packages=["cctk", "cctk.data", "cctk.groups"],
 #    include_package_data=True,
     package_data={"cctk.data": ["*"], "cctk.groups": ["*"],},
-    version="v0.2.5",
+    version="v0.2.6",
     license="Apache 2.O",
     description="computational chemistry toolkit",
     author="Corin Wagen and Eugene Kwan",
     author_email="corin.wagen@gmail.com",
     url="https://github.com/ekwan/cctk",
-    download_url="https://github.com/ekwan/cctk/archive/v0.2.5.tar.gz",
+    download_url="https://github.com/ekwan/cctk/archive/v0.2.6.tar.gz",
     install_requires=["numpy", "networkx", "importlib_resources", "scipy", "pyahocorasick", "basis_set_exchange"],
     long_description=long_description,
     long_description_content_type='text/markdown',

diff --git a/test/static/new_acetone_10waters.gjf b/test/static/new_acetone_10waters.gjf
@@ -0,0 +1,50 @@
+%nprocshared=4
+%mem=3GB
+#t b3lyp empiricaldispersion=gd3bj gen NMR pop=none int=finegrid nosymm
+
+title
+
+0 1
+ 6          0.59509999   -1.21239996    0.00460000
+ 1          1.58609998   -0.88580000    0.31959999
+ 1          0.66439998   -1.30900002   -1.07889998
+ 1          0.35540000   -2.21000004    0.37279999
+ 6         -0.45060000   -0.13630000    0.30280000
+ 8         -1.52620006   -0.50290000    0.69279999
+ 6         -0.33640000    1.26820004   -0.06850000
+ 1          0.58450001    1.76250005    0.24100000
+ 1         -0.38839999    1.29700005   -1.15690005
+ 1         -1.08399999    1.92879999    0.37059999
+ 8          0.15060000   -5.45139980    0.06700000
+ 1         -0.14160000   -6.40369987   -0.02030000
+ 1         -0.65240002   -4.85550022    0.07330000
+ 8          2.20810008    2.73950005   -2.01079988
+ 1          2.68409991    2.08890009   -1.41910005
+ 1          2.60700011    2.71029997   -2.92729998
+ 8         -2.43860006    3.79489994    1.44229996
+ 1         -3.19260001    3.28940010    1.86160004
+ 1         -2.60859990    3.89479995    0.46190000
+ 8          3.51699996    0.89609998   -0.47130001
+ 1          3.15730000    1.15160000    0.42620000
+ 1          3.44260001   -0.09400000   -0.59060001
+ 8         -4.29230022    0.36210001    0.34750000
+ 1         -3.39549994    0.02130000    0.62940001
+ 1         -4.50799990    1.19449997    0.85799998
+ 8          0.32330000    4.30919981    1.04149997
+ 1         -0.44540000    3.72160006    1.29400003
+ 1          1.14950001    3.99710011    1.51059997
+ 8         -1.63250005   -1.30719995   -2.66939998
+ 1         -0.81050003   -1.06210005   -3.18350005
+ 1         -2.23799992   -0.51349998   -2.61080003
+ 8          3.42820001   -1.82079995   -1.76800001
+ 1          4.26450014   -2.34960008   -1.91310000
+ 1          2.88179994   -2.25149989   -1.04980004
+ 8         -2.07999992   -2.76430011    2.42249990
+ 1         -1.75199997   -1.98179996    1.89320004
+ 1         -1.65160000   -2.75860000    3.32599998
+ 8          2.81480002    2.42569995    1.82219994
+ 1          3.49149990    3.15339994    1.93410003
+ 1          2.82340002    1.83550000    2.62940001
+
+@/n/jacobsen_lab/ekwan/solvent/basis/pcSseg-1.bas
+
diff --git a/test/test_indexing.py b/test/test_indexing.py
@@ -24,6 +24,7 @@ def test_indexing(self):
         self.assertEqual(new_a[1], 8)
 
         a_2d = [[1,1,1],[2,2,2],[3,3,3]]
+
         new_a_2d = cctk.OneIndexedArray(a_2d)
         self.assertEqual(new_a_2d[1,1], 1)
         self.assertEqual(new_a_2d[2,0], 2)
@@ -38,6 +39,9 @@ def test_indexing(self):
         v = new_a_2d[1]
         self.assertTrue(isinstance(v, np.ndarray))
 
+        new_a_2d[1] = np.asarray([4, 4, 4])
+        self.assertListEqual(list(new_a_2d[1]), [4, 4, 4])
+
     def test_smart_indexing(self):
         a1 = cctk.OneIndexedArray([1,2,3])
         a2 = cctk.OneIndexedArray([4,5,6])