Skip to content

Commit

Permalink
Allow for Running With Empty Test Set and Fix cv-no-test (#284)
Browse files Browse the repository at this point in the history
* Informative cv error

* Change cv split patterning for more even splits

* Bypass test set evaluation if test set is empty

* Allow for empty test set specified in split sizes

* Update split size errors to include the splits at issue

* Fill in missing docstrings
  • Loading branch information
cjmcgill committed May 26, 2022
1 parent ff6eca4 commit 357cbd1
Show file tree
Hide file tree
Showing 4 changed files with 83 additions and 51 deletions.
20 changes: 12 additions & 8 deletions chemprop/args.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

import torch
from tap import Tap # pip install typed-argument-parser (https://github.com/swansonk14/typed-argument-parser)
import numpy as np

import chemprop.data.utils
from chemprop.data import set_cache_mol, empty_cache
Expand Down Expand Up @@ -649,27 +650,30 @@ def process_args(self) -> None:
self.split_sizes = (1., 0., 0.)

else:
if sum(self.split_sizes) != 1.:
if not np.isclose(sum(self.split_sizes), 1):
raise ValueError(f'Provided split sizes of {self.split_sizes} do not sum to 1.')
if any([size < 0 for size in self.split_sizes]):
raise ValueError(f'Split sizes must be non-negative. Received split sizes: {self.split_sizes}')


if len(self.split_sizes) not in [2,3]:
raise ValueError(f'Three values should be provided for train/val/test split sizes. Instead received {len(self.split_sizes)} value(s).')

if self.separate_val_path is None and self.separate_test_path is None: # separate data paths are not provided
if len(self.split_sizes) != 3:
raise ValueError(f'Three values should be provided for train/val/test split sizes. Instead received {len(self.split_sizes)} value(s).')
if 0. in self.split_sizes:
raise ValueError(f'Provided split sizes must be nonzero if no separate data files are provided. Received split sizes of {self.split_sizes}.')
if self.split_sizes[0] == 0.:
raise ValueError(f'Provided split size for train split must be nonzero. Received split size {self.split_sizes[0]}')
if self.split_sizes[1] == 0.:
raise ValueError(f'Provided split size for validation split must be nonzero. Received split size {self.split_sizes[1]}')

elif self.separate_val_path is not None and self.separate_test_path is None: # separate val path only
if len(self.split_sizes) == 2: # allow input of just 2 values
self.split_sizes = (self.split_sizes[0], 0., self.split_sizes[1])
if self.split_sizes[0] == 0.:
raise ValueError('Provided split size for train split must be nonzero.')
if self.split_sizes[1] != 0.:
raise ValueError('Provided split size for validation split must be 0 because validation set is provided separately.')
if self.split_sizes[2] == 0.:
raise ValueError('Provided split size for test split must be nonzero.')
raise ValueError(f'Provided split size for validation split must be 0 because validation set is provided separately. Received split size {self.split_sizes[1]}')

elif self.separate_val_path is None and self.separate_test_path is not None: # separate test path only
if len(self.split_sizes) == 2: # allow input of just 2 values
Expand All @@ -679,12 +683,12 @@ def process_args(self) -> None:
if self.split_sizes[1] == 0.:
raise ValueError('Provided split size for validation split must be nonzero.')
if self.split_sizes[2] != 0.:
raise ValueError('Provided split size for test split must be 0 because test set is provided separately.')
raise ValueError(f'Provided split size for test split must be 0 because test set is provided separately. Received split size {self.split_sizes[2]}')


else: # both separate data paths are provided
if self.split_sizes != (1., 0., 0.):
raise ValueError(f'Separate data paths were provided for val and test splits. Split sizes should not also be provided.')
raise ValueError(f'Separate data paths were provided for val and test splits. Split sizes should not also be provided. Received split sizes: {self.split_sizes}')

# Test settings
if self.test:
Expand Down
8 changes: 6 additions & 2 deletions chemprop/data/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -398,7 +398,9 @@ def targets(self) -> List[List[Optional[float]]]:

def gt_targets(self) -> List[np.ndarray]:
"""
Returns indications of whether the targets associated with each molecule are greater-than inequalities.
:return: A list of lists of booleans indicating whether the targets in those positions are greater-than inequality targets.
"""
if not hasattr(self._data[0], 'gt_targets'):
return None
Expand All @@ -407,7 +409,9 @@ def gt_targets(self) -> List[np.ndarray]:

def lt_targets(self) -> List[np.ndarray]:
"""
Returns indications of whether the targets associated with each molecule are less-than inequalities.
:return: A list of lists of booleans indicating whether the targets in those positions are less-than inequality targets.
"""
if not hasattr(self._data[0], 'lt_targets'):
return None
Expand Down
8 changes: 5 additions & 3 deletions chemprop/data/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -515,7 +515,9 @@ def split_data(data: MoleculeDataset,
validation, and test splits of the data.
"""
if not (len(sizes) == 3 and np.isclose(sum(sizes), 1)):
raise ValueError(f"Invalid train/val/test splits! got: {sizes}")
raise ValueError(f"Split sizes do not sum to 1. Received train/val/test splits: {sizes}")
if any([size < 0 for size in sizes]):
raise ValueError(f"Split sizes must be non-negative. Received train/val/test splits: {sizes}")

random = Random(seed)

Expand All @@ -539,11 +541,11 @@ def split_data(data: MoleculeDataset,

elif split_type in {'cv', 'cv-no-test'}:
if num_folds <= 1 or num_folds > len(data):
raise ValueError('Number of folds for cross-validation must be between 2 and len(data), inclusive.')
raise ValueError(f'Number of folds for cross-validation must be between 2 and the number of valid datapoints ({len(data)}), inclusive.')

random = Random(0)

indices = np.repeat(np.arange(num_folds), 1 + len(data) // num_folds)[:len(data)]
indices = np.tile(np.arange(num_folds), 1 + len(data) // num_folds)[:len(data)]
random.shuffle(indices)
test_index = seed % num_folds
val_index = (seed + 1) % num_folds
Expand Down
98 changes: 60 additions & 38 deletions chemprop/train/run_training.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,20 @@ def run_training(args: TrainArgs,
debug(f'Total size = {len(data):,} | '
f'train size = {len(train_data):,} | val size = {len(val_data):,} | test size = {len(test_data):,}')

if len(val_data) == 0:
raise ValueError('The validation data split is empty. During normal chemprop training (non-sklearn functions), \
a validation set is required to conduct early stopping according to the selected evaluation metric. This \
may have occurred because validation data provided with `--separate_val_path` was empty or contained only invalid molecules.')

if len(test_data) == 0:
debug('The test data split is empty. This may be either because splitting with no test set was selected, \
such as with `cv-no-test`, or because test data provided with `--separate_test_path` was empty or contained only invalid molecules. \
Performance on the test set will not be evaluated and metric scores will return `nan` for each task.')
empty_test_set = True
else:
empty_test_set = False


# Initialize scaler and scale training targets by subtracting mean and dividing standard deviation (regression only)
if args.dataset_type == 'regression':
debug('Fitting scaler')
Expand Down Expand Up @@ -306,13 +320,51 @@ def run_training(args: TrainArgs,
info(f'Model {model_idx} best validation {args.metric} = {best_score:.6f} on epoch {best_epoch}')
model = load_checkpoint(os.path.join(save_dir, MODEL_FILE_NAME), device=args.device, logger=logger)

test_preds = predict(
model=model,
data_loader=test_data_loader,
scaler=scaler
)
test_scores = evaluate_predictions(
preds=test_preds,
if empty_test_set:
info(f'Model {model_idx} provided with no test set, no metric evaluation will be performed.')
else:
test_preds = predict(
model=model,
data_loader=test_data_loader,
scaler=scaler
)
test_scores = evaluate_predictions(
preds=test_preds,
targets=test_targets,
num_tasks=args.num_tasks,
metrics=args.metrics,
dataset_type=args.dataset_type,
gt_targets=test_data.gt_targets(),
lt_targets=test_data.lt_targets(),
logger=logger
)

if len(test_preds) != 0:
sum_test_preds += np.array(test_preds)

# Average test score
for metric, scores in test_scores.items():
avg_test_score = np.nanmean(scores)
info(f'Model {model_idx} test {metric} = {avg_test_score:.6f}')
writer.add_scalar(f'test_{metric}', avg_test_score, 0)

if args.show_individual_scores and args.dataset_type != 'spectra':
# Individual test scores
for task_name, test_score in zip(args.task_names, scores):
info(f'Model {model_idx} test {task_name} {metric} = {test_score:.6f}')
writer.add_scalar(f'test_{task_name}_{metric}', test_score, n_iter)
writer.close()

# Evaluate ensemble on test set
if empty_test_set:
ensemble_scores = {
metric: [np.nan for task in args.task_names] for metric in args.metrics
}
else:
avg_test_preds = (sum_test_preds / args.ensemble_size).tolist()

ensemble_scores = evaluate_predictions(
preds=avg_test_preds,
targets=test_targets,
num_tasks=args.num_tasks,
metrics=args.metrics,
Expand All @@ -322,36 +374,6 @@ def run_training(args: TrainArgs,
logger=logger
)

if len(test_preds) != 0:
sum_test_preds += np.array(test_preds)

# Average test score
for metric, scores in test_scores.items():
avg_test_score = np.nanmean(scores)
info(f'Model {model_idx} test {metric} = {avg_test_score:.6f}')
writer.add_scalar(f'test_{metric}', avg_test_score, 0)

if args.show_individual_scores and args.dataset_type != 'spectra':
# Individual test scores
for task_name, test_score in zip(args.task_names, scores):
info(f'Model {model_idx} test {task_name} {metric} = {test_score:.6f}')
writer.add_scalar(f'test_{task_name}_{metric}', test_score, n_iter)
writer.close()

# Evaluate ensemble on test set
avg_test_preds = (sum_test_preds / args.ensemble_size).tolist()

ensemble_scores = evaluate_predictions(
preds=avg_test_preds,
targets=test_targets,
num_tasks=args.num_tasks,
metrics=args.metrics,
dataset_type=args.dataset_type,
gt_targets=test_data.gt_targets(),
lt_targets=test_data.lt_targets(),
logger=logger
)

for metric, scores in ensemble_scores.items():
# Average ensemble score
avg_ensemble_test_score = np.nanmean(scores)
Expand All @@ -367,7 +389,7 @@ def run_training(args: TrainArgs,
json.dump(ensemble_scores, f, indent=4, sort_keys=True)

# Optionally save test preds
if args.save_preds:
if args.save_preds and not empty_test_set:
test_preds_dataframe = pd.DataFrame(data={'smiles': test_data.smiles()})

for i, task_name in enumerate(args.task_names):
Expand Down

0 comments on commit 357cbd1

Please sign in to comment.