Skip to content

Commit

Permalink
Option to allow multiple metrics
Browse files Browse the repository at this point in the history
  • Loading branch information
swansonk14 committed Aug 24, 2020
1 parent 76e9b68 commit 46b9f64
Show file tree
Hide file tree
Showing 7 changed files with 177 additions and 166 deletions.
30 changes: 24 additions & 6 deletions chemprop/args.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@
from chemprop.features import get_available_features_generators


Metric = Literal['auc', 'prc-auc', 'rmse', 'mae', 'mse', 'r2', 'accuracy', 'cross_entropy']


def get_checkpoint_paths(checkpoint_path: Optional[str] = None,
checkpoint_paths: Optional[List[str]] = None,
checkpoint_dir: Optional[str] = None,
Expand Down Expand Up @@ -170,8 +173,13 @@ class TrainArgs(CommonArgs):
"""
pytorch_seed: int = 0
"""Seed for PyTorch randomness (e.g., random initial weights)."""
metric: Literal['auc', 'prc-auc', 'rmse', 'mae', 'mse', 'r2', 'accuracy', 'cross_entropy'] = None
"""Metric to use during evaluation. Defaults to "auc" for classification and "rmse" for regression."""
metric: Metric = None
"""
Metric to use during evaluation. It is also used with the validation set for early stopping.
Defaults to "auc" for classification and "rmse" for regression.
"""
extra_metrics: List[Metric] = []
"""Additional metrics to use to evaluate the model. Not used for early stopping."""
save_dir: str = None
"""Directory where model checkpoints will be saved."""
save_smiles_splits: bool = False
Expand Down Expand Up @@ -252,6 +260,11 @@ def __init__(self, *args, **kwargs) -> None:
self._features_size = None
self._train_data_size = None

@property
def metrics(self) -> List[str]:
"""The list of metrics used for evaluation. Only the first is used for early stopping."""
return [self.metric] + self.extra_metrics

@property
def minimize_score(self) -> bool:
"""Whether the model should try to minimize the score metric or maximize it."""
Expand Down Expand Up @@ -334,10 +347,15 @@ def process_args(self) -> None:
else:
self.metric = 'rmse'

if not ((self.dataset_type == 'classification' and self.metric in ['auc', 'prc-auc', 'accuracy']) or
(self.dataset_type == 'regression' and self.metric in ['rmse', 'mae', 'mse', 'r2']) or
(self.dataset_type == 'multiclass' and self.metric in ['cross_entropy', 'accuracy'])):
raise ValueError(f'Metric "{self.metric}" invalid for dataset type "{self.dataset_type}".')
if self.metric in self.extra_metrics:
raise ValueError(f'Metric {self.metric} is both the metric and is in extra_metrics. '
f'Please only include it once.')

for metric in self.metrics:
if not ((self.dataset_type == 'classification' and metric in ['auc', 'prc-auc', 'accuracy']) or
(self.dataset_type == 'regression' and metric in ['rmse', 'mae', 'mse', 'r2']) or
(self.dataset_type == 'multiclass' and metric in ['cross_entropy', 'accuracy'])):
raise ValueError(f'Metric "{metric}" invalid for dataset type "{self.dataset_type}".')

# Validate class balance
if self.class_balance and self.dataset_type != 'classification':
Expand Down
1 change: 0 additions & 1 deletion chemprop/constants.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
# Logger names
TRAIN_LOGGER_NAME = 'train'
HYPEROPT_LOGGER_NAME = 'hyperparameter-optimization'
SKLEARN_TRAIN_LOGGER_NAME = 'sklearn-train'

# Save file names
MODEL_FILE_NAME = 'model.pt'
Expand Down
79 changes: 18 additions & 61 deletions chemprop/sklearn_train.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,17 @@
import os
import pickle
from pprint import pformat
from typing import Callable, List, Tuple, Union
from typing import Dict, List, Union

import numpy as np
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.svm import SVC, SVR
from tqdm import trange, tqdm

from chemprop.args import SklearnTrainArgs
from chemprop.constants import SKLEARN_TRAIN_LOGGER_NAME
from chemprop.data import get_data, get_task_names, MoleculeDataset, split_data
from chemprop.features import get_features_generator
from chemprop.train import evaluate_predictions
from chemprop.utils import create_logger, get_metric_func, makedirs, timeit
from chemprop.train import cross_validate, evaluate_predictions


def predict(model: Union[RandomForestRegressor, RandomForestClassifier, SVR, SVC],
Expand Down Expand Up @@ -60,7 +58,7 @@ def predict(model: Union[RandomForestRegressor, RandomForestClassifier, SVR, SVC
def single_task_sklearn(model: Union[RandomForestRegressor, RandomForestClassifier, SVR, SVC],
train_data: MoleculeDataset,
test_data: MoleculeDataset,
metric_func: Callable,
metrics: List[str],
args: SklearnTrainArgs,
logger: Logger = None) -> List[float]:
"""
Expand All @@ -71,11 +69,11 @@ def single_task_sklearn(model: Union[RandomForestRegressor, RandomForestClassifi
:param model: The scikit-learn model to train.
:param train_data: The training data.
:param test_data: The test data.
:param metric_func: Metric function which takes in a list of targets and a list of predictions.
:param metrics: A list of names of metric functions.
:param args: A :class:`~chemprop.args.SklearnTrainArgs` object containing arguments for
training the scikit-learn model.
:param logger: A logger to record output.
:return: A list of scores on the tasks.
:return: A dictionary mapping each metric in :code:`metrics` to a list of values for each task.
"""
scores = []
num_tasks = train_data.num_tasks()
Expand All @@ -102,7 +100,7 @@ def single_task_sklearn(model: Union[RandomForestRegressor, RandomForestClassifi
preds=test_preds,
targets=test_targets,
num_tasks=1,
metric_func=metric_func,
metrics=metrics,
dataset_type=args.dataset_type,
logger=logger
)
Expand All @@ -114,9 +112,9 @@ def single_task_sklearn(model: Union[RandomForestRegressor, RandomForestClassifi
def multi_task_sklearn(model: Union[RandomForestRegressor, RandomForestClassifier, SVR, SVC],
train_data: MoleculeDataset,
test_data: MoleculeDataset,
metric_func: Callable,
metrics: List[str],
args: SklearnTrainArgs,
logger: Logger = None) -> List[float]:
logger: Logger = None) -> Dict[str, List[float]]:
"""
Trains a multi-task scikit-learn model, meaning one model is trained simultaneously on all tasks.
Expand All @@ -125,11 +123,11 @@ def multi_task_sklearn(model: Union[RandomForestRegressor, RandomForestClassifie
:param model: The scikit-learn model to train.
:param train_data: The training data.
:param test_data: The test data.
:param metric_func: Metric function which takes in a list of targets and a list of predictions.
:param metrics: A list of names of metric functions.
:param args: A :class:`~chemprop.args.SklearnTrainArgs` object containing arguments for
training the scikit-learn model.
:param logger: A logger to record output.
:return: A list of scores on the tasks.
:return: A dictionary mapping each metric in :code:`metrics` to a list of values for each task.
"""
num_tasks = train_data.num_tasks()

Expand All @@ -155,22 +153,22 @@ def multi_task_sklearn(model: Union[RandomForestRegressor, RandomForestClassifie
preds=test_preds,
targets=test_data.targets(),
num_tasks=num_tasks,
metric_func=metric_func,
metrics=metrics,
dataset_type=args.dataset_type,
logger=logger
)

return scores


def run_sklearn(args: SklearnTrainArgs, logger: Logger = None) -> List[float]:
def run_sklearn(args: SklearnTrainArgs, logger: Logger = None) -> Dict[str, List[float]]:
"""
Loads data, trains a scikit-learn model, and returns test scores for the model checkpoint with the highest validation score.
:param args: A :class:`~chemprop.args.SklearnTrainArgs` object containing arguments for
loading data and training the scikit-learn model.
:param logger: A logger to record output.
:return: A list of model scores for each task.
:return: A dictionary mapping each metric in :code:`metrics` to a list of values for each task.
"""
if logger is not None:
debug, info = logger.debug, logger.info
Expand All @@ -179,8 +177,6 @@ def run_sklearn(args: SklearnTrainArgs, logger: Logger = None) -> List[float]:

debug(pformat(vars(args)))

metric_func = get_metric_func(args.metric)

debug('Loading data')
data = get_data(path=args.data_path, smiles_column=args.smiles_column, target_columns=args.target_columns)
args.task_names = get_task_names(
Expand Down Expand Up @@ -239,7 +235,7 @@ def run_sklearn(args: SklearnTrainArgs, logger: Logger = None) -> List[float]:
model=model,
train_data=train_data,
test_data=test_data,
metric_func=metric_func,
metrics=args.metrics,
args=args,
logger=logger
)
Expand All @@ -248,59 +244,20 @@ def run_sklearn(args: SklearnTrainArgs, logger: Logger = None) -> List[float]:
model=model,
train_data=train_data,
test_data=test_data,
metric_func=metric_func,
metrics=args.metrics,
args=args,
logger=logger
)

info(f'Test {args.metric} = {np.nanmean(scores)}')
for metric in args.metrics:
info(f'Test {metric} = {np.nanmean(scores[metric])}')

return scores


@timeit(logger_name=SKLEARN_TRAIN_LOGGER_NAME)
def cross_validate_sklearn(args: SklearnTrainArgs) -> Tuple[float, float]:
"""
Runs k-fold cross-validation for a scikit-learn model.
For each of k splits (folds) of the data, trains and tests a model on that split
and aggregates the performance across folds.
:param args: A :class:`~chemprop.args.SklearnTrainArgs` object containing arguments for
loading data and training the scikit-learn model.
:return: A tuple containing the mean and standard deviation performance across folds.
"""
logger = create_logger(name=SKLEARN_TRAIN_LOGGER_NAME, save_dir=args.save_dir, quiet=args.quiet)
info = logger.info if logger is not None else print
init_seed = args.seed
save_dir = args.save_dir

# Run training on different random seeds for each fold
all_scores = []
for fold_num in range(args.num_folds):
info(f'Fold {fold_num}')
args.seed = init_seed + fold_num
args.save_dir = os.path.join(save_dir, f'fold_{fold_num}')
makedirs(args.save_dir)
model_scores = run_sklearn(args, logger)
all_scores.append(model_scores)
all_scores = np.array(all_scores)

# Report scores for each fold
for fold_num, scores in enumerate(all_scores):
info(f'Seed {init_seed + fold_num} ==> test {args.metric} = {np.nanmean(scores):.6f}')

# Report scores across folds
avg_scores = np.nanmean(all_scores, axis=1) # average score for each model across tasks
mean_score, std_score = np.nanmean(avg_scores), np.nanstd(avg_scores)
info(f'Overall test {args.metric} = {mean_score:.6f} +/- {std_score:.6f}')

return mean_score, std_score


def sklearn_train() -> None:
"""Parses scikit-learn training arguments and trains a scikit-learn model.
This is the entry point for the command line command :code:`sklearn_train`.
"""
cross_validate_sklearn(args=SklearnTrainArgs().parse_args())
cross_validate(args=SklearnTrainArgs().parse_args(), train_func=run_sklearn)
77 changes: 50 additions & 27 deletions chemprop/train/cross_validate.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,31 @@
from collections import defaultdict
import csv
from logging import Logger
import os
from typing import Tuple
from typing import Callable, Dict, List, Tuple, Union

import numpy as np

from .run_training import run_training
from chemprop.args import TrainArgs
from chemprop.args import SklearnTrainArgs, TrainArgs
from chemprop.constants import TEST_SCORES_FILE_NAME, TRAIN_LOGGER_NAME
from chemprop.data import get_task_names
from chemprop.utils import create_logger, makedirs, timeit


@timeit(logger_name=TRAIN_LOGGER_NAME)
def cross_validate(args: TrainArgs) -> Tuple[float, float]:
def cross_validate(args: Union[TrainArgs, SklearnTrainArgs],
train_func: Callable[[Union[TrainArgs, SklearnTrainArgs], Logger], Dict[str, List[float]]]
) -> Tuple[float, float]:
"""
Runs k-fold cross-validation for a Chemprop model.
Runs k-fold cross-validation.
For each of k splits (folds) of the data, trains and tests a model on that split
and aggregates the performance across folds.
:param args: A :class:`~chemprop.args.TrainArgs` object containing arguments for
loading data and training the Chemprop model.
:param train_func: Function which runs training.
:return: A tuple containing the mean and standard deviation performance across folds.
"""
logger = create_logger(name=TRAIN_LOGGER_NAME, save_dir=args.save_dir, quiet=args.quiet)
Expand All @@ -37,47 +42,65 @@ def cross_validate(args: TrainArgs) -> Tuple[float, float]:
)

# Run training on different random seeds for each fold
all_scores = []
all_scores = defaultdict(list)
for fold_num in range(args.num_folds):
info(f'Fold {fold_num}')
args.seed = init_seed + fold_num
args.save_dir = os.path.join(save_dir, f'fold_{fold_num}')
makedirs(args.save_dir)
model_scores = run_training(args, logger)
all_scores.append(model_scores)
all_scores = np.array(all_scores)
model_scores = train_func(args, logger)
for metric, scores in model_scores.items():
all_scores[metric].append(scores)
all_scores = dict(all_scores)

# Convert scores to numpy arrays
for metric, scores in all_scores.items():
all_scores[metric] = np.array(scores)

# Report results
info(f'{args.num_folds}-fold cross validation')

# Report scores for each fold
for fold_num, scores in enumerate(all_scores):
info(f'\tSeed {init_seed + fold_num} ==> test {args.metric} = {np.nanmean(scores):.6f}')
for fold_num in range(args.num_folds):
for metric, scores in all_scores.items():
info(f'\tSeed {init_seed + fold_num} ==> test {metric} = {np.nanmean(scores[fold_num]):.6f}')

if args.show_individual_scores:
for task_name, score in zip(args.task_names, scores):
info(f'\t\tSeed {init_seed + fold_num} ==> test {task_name} {args.metric} = {score:.6f}')
if args.show_individual_scores:
for task_name, score in zip(args.task_names, scores[fold_num]):
info(f'\t\tSeed {init_seed + fold_num} ==> test {task_name} {metric} = {score:.6f}')

# Report scores across models
avg_scores = np.nanmean(all_scores, axis=1) # average score for each model across tasks
mean_score, std_score = np.nanmean(avg_scores), np.nanstd(avg_scores)
info(f'Overall test {args.metric} = {mean_score:.6f} +/- {std_score:.6f}')
# Report scores across folds
for metric, scores in all_scores.items():
avg_scores = np.nanmean(scores, axis=1) # average score for each model across tasks
mean_score, std_score = np.nanmean(avg_scores), np.nanstd(avg_scores)
info(f'Overall test {metric} = {mean_score:.6f} +/- {std_score:.6f}')

if args.show_individual_scores:
for task_num, task_name in enumerate(args.task_names):
info(f'\tOverall test {task_name} {args.metric} = '
f'{np.nanmean(all_scores[:, task_num]):.6f} +/- {np.nanstd(all_scores[:, task_num]):.6f}')
if args.show_individual_scores:
for task_num, task_name in enumerate(args.task_names):
info(f'\tOverall test {task_name} {metric} = '
f'{np.nanmean(scores[:, task_num]):.6f} +/- {np.nanstd(scores[:, task_num]):.6f}')

# Save scores
with open(os.path.join(save_dir, TEST_SCORES_FILE_NAME), 'w') as f:
writer = csv.writer(f)
writer.writerow(['Task', f'Mean {args.metric}', f'Standard deviation {args.metric}'] +
[f'Fold {i} {args.metric}' for i in range(args.num_folds)])

header = ['Task']
for metric in args.metrics:
header += [f'Mean {metric}', f'Standard deviation {metric}'] + \
[f'Fold {i} {metric}' for i in range(args.num_folds)]
writer.writerow(header)

for task_num, task_name in enumerate(args.task_names):
task_scores = all_scores[:, task_num]
mean, std = np.nanmean(task_scores), np.nanstd(task_scores)
writer.writerow([task_name, mean, std] + task_scores.tolist())
row = []
for metric, scores in all_scores.items():
task_scores = scores[:, task_num]
mean, std = np.nanmean(task_scores), np.nanstd(task_scores)
row += [task_name, mean, std] + task_scores.tolist()
writer.writerow(row)

# Determine mean and std score of main metric
avg_scores = np.nanmean(all_scores[args.metric], axis=1)
mean_score, std_score = np.nanmean(avg_scores), np.nanstd(avg_scores)

return mean_score, std_score

Expand All @@ -87,4 +110,4 @@ def chemprop_train() -> None:
This is the entry point for the command line command :code:`chemprop_train`.
"""
cross_validate(args=TrainArgs().parse_args())
cross_validate(args=TrainArgs().parse_args(), train_func=run_training)

0 comments on commit 46b9f64

Please sign in to comment.