Merge pull request #215 from chemprop/fix_extra_atom_bond_feature_bug

Fix bug with extra atom and bond feature scaling
chemprop · Oct 16, 2021 · 04ef57f · 04ef57f
2 parents c46e511 + 314aff1
commit 04ef57f
Show file tree

Hide file tree

Showing 2 changed files with 14 additions and 16 deletions.
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -27,6 +27,11 @@ jobs:
     - uses: conda-incubator/setup-miniconda@v2
       with:
         python-version: ${{ matrix.python-version }}
+        mamba-version: "*"
+        environment-file: environment.yml
+        channels: conda-forge, defaults
+        activate-environment: chemprop
+
     - name: Set temp directories on Windows
       shell: bash -l {0}
       if: matrix.os == 'windows-latest'
@@ -37,10 +42,7 @@ jobs:
     - name: Install dependencies
       shell: bash -l {0}
       run: |
-        conda install -c conda-forge rdkit
-        python -m pip install --upgrade pip
-        python -m pip install flake8 pytest parameterized
-        python -m pip install git+https://github.com/bp-kelley/descriptastorus
+        mamba install flake8 pytest parameterized
         python -m pip install -e .
     - name: Lint with flake8
       shell: bash -l {0}

diff --git a/chemprop/data/data.py b/chemprop/data/data.py
@@ -236,7 +236,6 @@ def __init__(self, data: List[MoleculeDatapoint]):
         :param data: A list of :class:`MoleculeDatapoint`\ s.
         """
         self._data = data
-        self._scaler = None
         self._batch_graph = None
         self._random = Random()
 
@@ -453,10 +452,7 @@ def normalize_features(self, scaler: StandardScaler = None, replace_nan_token: i
                 (self._data[0].features is None and not scale_bond_features and not scale_atom_descriptors):
             return None
 
-        if scaler is not None:
-            self._scaler = scaler
-
-        elif self._scaler is None:
+        if scaler is None:
             if scale_atom_descriptors and not self._data[0].atom_descriptors is None:
                 features = np.vstack([d.raw_atom_descriptors for d in self._data])
             elif scale_atom_descriptors and not self._data[0].atom_features is None:
@@ -465,23 +461,23 @@ def normalize_features(self, scaler: StandardScaler = None, replace_nan_token: i
                 features = np.vstack([d.raw_bond_features for d in self._data])
             else:
                 features = np.vstack([d.raw_features for d in self._data])
-            self._scaler = StandardScaler(replace_nan_token=replace_nan_token)
-            self._scaler.fit(features)
+            scaler = StandardScaler(replace_nan_token=replace_nan_token)
+            scaler.fit(features)
 
         if scale_atom_descriptors and not self._data[0].atom_descriptors is None:
             for d in self._data:
-                d.set_atom_descriptors(self._scaler.transform(d.raw_atom_descriptors))
+                d.set_atom_descriptors(scaler.transform(d.raw_atom_descriptors))
         elif scale_atom_descriptors and not self._data[0].atom_features is None:
             for d in self._data:
-                d.set_atom_features(self._scaler.transform(d.raw_atom_features))
+                d.set_atom_features(scaler.transform(d.raw_atom_features))
         elif scale_bond_features:
             for d in self._data:
-                d.set_bond_features(self._scaler.transform(d.raw_bond_features))
+                d.set_bond_features(scaler.transform(d.raw_bond_features))
         else:
             for d in self._data:
-                d.set_features(self._scaler.transform(d.raw_features.reshape(1, -1))[0])
+                d.set_features(scaler.transform(d.raw_features.reshape(1, -1))[0])
 
-        return self._scaler
+        return scaler
 
     def normalize_targets(self) -> StandardScaler:
         """