Skip to content

Commit

Permalink
Merge pull request #221 from corochann/extract_total_num
Browse files Browse the repository at this point in the history
add extract_total_num for API compatibility
  • Loading branch information
mottodora committed Jul 11, 2018
2 parents fb9b684 + 7c18ca2 commit 834204c
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 0 deletions.
17 changes: 17 additions & 0 deletions chainer_chemistry/dataset/parsers/data_frame_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,3 +187,20 @@ def parse(self, df, return_smiles=False, target_index=None,
return {"dataset": dataset,
"smiles": smileses,
"is_successful": is_successful}

def extract_total_num(self, df):
"""Extracts total number of data which can be parsed
We can use this method to determine the value fed to `target_index`
option of `parse` method. For example, if we want to extract input
feature from 10% of whole dataset, we need to know how many samples
are in a file. The returned value of this method may not to be same as
the final dataset size.
Args:
df (pandas.DataFrame): dataframe to be parsed.
Returns (int): total number of dataset can be parsed.
"""
return len(df)
8 changes: 8 additions & 0 deletions tests/dataset_tests/parsers_tests/test_data_frame_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,5 +149,13 @@ def test_data_frame_parser_return_is_successful(mols, label_a):
check_features(dataset[i], expect, label_a[i])


def test_data_frame_parser_extract_total_num(data_frame):
"""test `labels` option and retain_smiles=True."""
preprocessor = NFPPreprocessor()
parser = DataFrameParser(preprocessor)
num = parser.extract_total_num(data_frame)
assert num == 3


if __name__ == '__main__':
pytest.main([__file__, '-s', '-v'])

0 comments on commit 834204c

Please sign in to comment.