Merge pull request #221 from corochann/extract_total_num

add extract_total_num for API compatibility
chainer · Jul 11, 2018 · 834204c · 834204c
2 parents fb9b684 + 7c18ca2
commit 834204c
Show file tree

Hide file tree

Showing 2 changed files with 25 additions and 0 deletions.
diff --git a/chainer_chemistry/dataset/parsers/data_frame_parser.py b/chainer_chemistry/dataset/parsers/data_frame_parser.py
@@ -187,3 +187,20 @@ def parse(self, df, return_smiles=False, target_index=None,
         return {"dataset": dataset,
                 "smiles": smileses,
                 "is_successful": is_successful}
+
+    def extract_total_num(self, df):
+        """Extracts total number of data which can be parsed
+
+        We can use this method to determine the value fed to `target_index`
+        option of `parse` method. For example, if we want to extract input
+        feature from 10% of whole dataset, we need to know how many samples
+        are in a file. The returned value of this method may not to be same as
+        the final dataset size.
+
+        Args:
+            df (pandas.DataFrame): dataframe to be parsed.
+
+        Returns (int): total number of dataset can be parsed.
+
+        """
+        return len(df)
diff --git a/tests/dataset_tests/parsers_tests/test_data_frame_parser.py b/tests/dataset_tests/parsers_tests/test_data_frame_parser.py
@@ -149,5 +149,13 @@ def test_data_frame_parser_return_is_successful(mols, label_a):
         check_features(dataset[i], expect, label_a[i])
 
 
+def test_data_frame_parser_extract_total_num(data_frame):
+    """test `labels` option and retain_smiles=True."""
+    preprocessor = NFPPreprocessor()
+    parser = DataFrameParser(preprocessor)
+    num = parser.extract_total_num(data_frame)
+    assert num == 3
+
+
 if __name__ == '__main__':
     pytest.main([__file__, '-s', '-v'])