Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New settings object #1424

Closed
wants to merge 6 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
10 changes: 4 additions & 6 deletions asreview/entry_points/simulate.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@
from asreview.review.simulate import ReviewSimulate
from asreview.settings import ASReviewSettings
from asreview.types import type_n_queries
from asreview.utils import get_random_state


def _get_dataset_path_from_args(args_dataset):
Expand Down Expand Up @@ -155,23 +154,22 @@ def execute(self, argv): # noqa
settings.from_file(args.config_file)

# Initialize models.
random_state = get_random_state(args.seed)
classifier_model = get_classifier(
settings.model, random_state=random_state, **settings.model_param
settings.model, random_seed=args.seed, **settings.model_param
)
query_model = get_query_model(
settings.query_strategy,
random_state=random_state,
random_seed=args.seed,
**settings.query_param,
)
balance_model = get_balance_model(
settings.balance_strategy,
random_state=random_state,
random_seed=args.seed,
**settings.balance_param,
)
feature_model = get_feature_model(
settings.feature_extraction,
random_state=random_state,
random_seed=args.seed,
**settings.feature_param,
)

Expand Down
16 changes: 13 additions & 3 deletions asreview/models/balance/double.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@

from asreview.models.balance.base import BaseBalance
from asreview.models.balance.simple import SimpleBalance
from asreview.utils import get_random_state
from asreview.utils import SeededRandomState


class DoubleBalance(BaseBalance):
Expand Down Expand Up @@ -52,14 +52,24 @@ class DoubleBalance(BaseBalance):
name = "double"
label = "Dynamic resampling (Double)"

def __init__(self, a=2.155, alpha=0.94, b=0.789, beta=1.0, random_state=None):
def __init__(self, a=2.155, alpha=0.94, b=0.789, beta=1.0, random_seed=None):
super(DoubleBalance, self).__init__()
self.a = a
self.alpha = alpha
self.b = b
self.beta = beta
self.fallback_model = SimpleBalance()
self._random_state = get_random_state(random_state)
self._random_state = SeededRandomState(random_seed)

@property
def _settings(self):
return {
"a": self.a,
"alpha": self.alpha,
"b": self.b,
"beta": self.beta,
"random_seed": self._random_state.random_seed,
}

def sample(self, X, y, train_idx):
"""Resample the training data.
Expand Down
4 changes: 4 additions & 0 deletions asreview/models/balance/simple.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,3 +43,7 @@ def sample(self, X, y, train_idx):
Classification of training samples.
"""
return X[train_idx], y[train_idx]

@property
def _settings(self):
return {}
21 changes: 17 additions & 4 deletions asreview/models/balance/triple.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
from asreview.models.balance.double import _zero_weight
from asreview.models.balance.double import fill_training
from asreview.models.balance.double import random_round
from asreview.utils import get_random_state
from asreview.utils import SeededRandomState


class TripleBalance(BaseBalance):
Expand Down Expand Up @@ -75,7 +75,7 @@ def __init__(
c=0.835,
gamma=2.0,
shuffle=True,
random_state=None,
random_seed=None,
):
"""Initialize the triple balance strategy."""

Expand All @@ -88,9 +88,22 @@ def __init__(
self.gamma = gamma
self.shuffle = shuffle
self.fallback_model = DoubleBalance(
a=a, alpha=alpha, b=b, beta=beta, random_state=random_state
a=a, alpha=alpha, b=b, beta=beta, random_seed=random_seed
)
self._random_state = get_random_state(random_state)
self._random_state = SeededRandomState(random_seed)

@property
def _settings(self):
return {
"a": self.a,
"alpha": self.alpha,
"b": self.b,
"beta": self.beta,
"c": self.c,
"gamma": self.gamma,
"shuffle": self.shuffle,
"random_seed": self._random_state.random_seed,
}

def sample(self, X, y, train_idx, shared):
"""Resample the training data.
Expand Down
10 changes: 7 additions & 3 deletions asreview/models/balance/undersample.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
import numpy as np

from asreview.models.balance.base import BaseBalance
from asreview.utils import get_random_state
from asreview.utils import SeededRandomState


class UndersampleBalance(BaseBalance):
Expand All @@ -36,11 +36,15 @@ class UndersampleBalance(BaseBalance):
name = "undersample"
label = "Undersampling"

def __init__(self, ratio=1.0, random_state=None):
def __init__(self, ratio=1.0, random_seed=None):
"""Initialize the undersampling balance strategy."""
super(UndersampleBalance, self).__init__()
self.ratio = ratio
self._random_state = get_random_state(random_state)
self._random_state = SeededRandomState(random_seed)

@property
def _settings(self):
return {"ratio": self.ratio, "random_seed": self._random_state.random_seed}

def sample(self, X, y, train_idx):
"""Resample the training data.
Expand Down
4 changes: 2 additions & 2 deletions asreview/models/balance/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def get_balance_class(name):
return _model_class_from_entry_point(name, entry_name="asreview.models.balance")


def get_balance_model(name, *args, random_state=None, **kwargs):
def get_balance_model(name, *args, random_seed=None, **kwargs):
"""Get an instance of a balance model from a string.

Arguments
Expand All @@ -67,6 +67,6 @@ def get_balance_model(name, *args, random_state=None, **kwargs):
"""
balance_class = get_balance_class(name)
try:
return balance_class(*args, random_state=random_state, **kwargs)
return balance_class(*args, random_seed=random_seed, **kwargs)
except TypeError:
return balance_class(*args, **kwargs)
21 changes: 21 additions & 0 deletions asreview/models/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

import inspect
from abc import ABC
from abc import abstractproperty

import numpy as np

Expand All @@ -31,6 +32,26 @@ class BaseModel(ABC):

name = "base"

@abstractproperty
def _settings(self):
"""Dictionary containing the settings of the model.

This dictionary should contain all the settings necessary to initialize the
model, so that Model(**model._settings) == model."""
return {}

@property
def settings(self):
"""Get the settings of the model object.

Returns
-------
dict
Returns the settings consisting of the name plus
any settings in self._settings.
"""
return {"name": self.name, **self._settings}

@property
def default_param(self):
"""Get the default parameters of the model.
Expand Down
19 changes: 15 additions & 4 deletions asreview/models/classifiers/logistic.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

from asreview.models.classifiers.base import BaseTrainClassifier
from asreview.models.classifiers.utils import _set_class_weight
from asreview.utils import SeededRandomState


class LogisticClassifier(BaseTrainClassifier):
Expand All @@ -33,30 +34,40 @@ class LogisticClassifier(BaseTrainClassifier):
Parameter inverse to the regularization strength of the model.
class_weight: float
Class weight of the inclusions.
random_state: int, RandomState
Random state for the model.
random_seed: int, SeededRandomState
Integer used to seed random processes.
n_jobs: int
Number of CPU cores used.
"""

name = "logistic"
label = "Logistic regression"

def __init__(self, C=1.0, class_weight=1.0, random_state=None, n_jobs=1):
def __init__(self, C=1.0, class_weight=1.0, random_seed=None, n_jobs=1):
super(LogisticClassifier, self).__init__()
self.C = C
self.class_weight = class_weight
self.n_jobs = n_jobs
self._random_state = SeededRandomState(random_seed)

self._model = LogisticRegression(
solver="liblinear",
C=C,
class_weight=_set_class_weight(class_weight),
n_jobs=n_jobs,
random_state=random_state,
random_state=self._random_state,
)
logging.debug(self._model)

@property
def _settings(self):
return {
"C": self.C,
"class_weight": self.class_weight,
"n_jobs": self.n_jobs,
"random_seed": self._random_state.random_seed,
}

def full_hyper_space(self):
from hyperopt import hp

Expand Down
17 changes: 17 additions & 0 deletions asreview/models/classifiers/lstm_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,23 @@ def __init__(
self._model = None
self.sequence_length = None

@property
def _settings(self):
return {
"embedding_matrix": self.embedding_matrix,
"backwards": self.backwards,
"dropout": self.dropout,
"optimizer": self.optimizer,
"lstm_out_width": self.lstm_out_width,
"learn_rate": self.learn_rate,
"dense_width": self.dense_width,
"verbose": self.verbose,
"batch_size": self.batch_size,
"epochs": self.epochs,
"shuffle": self.shuffle,
"class_weight": self.class_weight,
}

def fit(self, X, y):
# check is tensorflow is available
_check_tensorflow()
Expand Down
17 changes: 17 additions & 0 deletions asreview/models/classifiers/lstm_pool.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,23 @@ def __init__(
self._model = None
self.sequence_length = None

@property
def _settings(self):
return {
"embedding_matrix": self.embedding_matrix,
"backwards": self.backwards,
"dropout": self.dropout,
"optimizer": self.optimizer,
"lstm_out_width": self.lstm_out_width,
"lstm_pool_size": self.lstm_pool_size,
"learn_rate": self.learn_rate,
"verbose": self.verbose,
"batch_size": self.batch_size,
"epochs": self.epochs,
"shuffle": self.shuffle,
"class_weight": self.class_weight[1],
}

def fit(self, X, y):
# check is tensorflow is available
_check_tensorflow()
Expand Down
4 changes: 4 additions & 0 deletions asreview/models/classifiers/nb.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,10 @@ def __init__(self, alpha=3.822):
self._model = MultinomialNB(alpha=alpha)
logging.debug(self._model)

@property
def _settings(self):
return {"alpha": self.alpha}

def full_hyper_space(self):
from hyperopt import hp

Expand Down
14 changes: 14 additions & 0 deletions asreview/models/classifiers/nn_2_layer.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,20 @@ def __init__(
self._model = None
self.input_dim = None

@property
def _settings(self):
return {
"dense_width": self.dense_width,
"optimizer": self.optimizer,
"learn_rate": self.learn_rate,
"regularization": self.regularization,
"verbose": self.verbose,
"epochs": self.epochs,
"batch_size": self.batch_size,
"shuffle": self.shuffle,
"class_weight": self.class_weight,
}

def fit(self, X, y):
# check is tensorflow is available
_check_tensorflow()
Expand Down
19 changes: 15 additions & 4 deletions asreview/models/classifiers/rf.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

from asreview.models.classifiers.base import BaseTrainClassifier
from asreview.models.classifiers.utils import _set_class_weight
from asreview.utils import SeededRandomState


class RandomForestClassifier(BaseTrainClassifier):
Expand All @@ -33,7 +34,8 @@ class RandomForestClassifier(BaseTrainClassifier):
Number of features in the model.
class_weight: float, default=1.0
Class weight of the inclusions.
random_state : int or RandomState, default=None
random_seed : int or SeededRandomState, default=None
Integer used to seed random processes.
Controls both the randomness of the bootstrapping of the samples used
when building trees and the sampling of the features to consider when
looking for the best split at each node.
Expand All @@ -43,21 +45,30 @@ class RandomForestClassifier(BaseTrainClassifier):
label = "Random forest"

def __init__(
self, n_estimators=100, max_features=10, class_weight=1.0, random_state=None
self, n_estimators=100, max_features=10, class_weight=1.0, random_seed=None
):
super(RandomForestClassifier, self).__init__()
self.n_estimators = int(n_estimators)
self.max_features = int(max_features)
self.class_weight = class_weight
self._random_state = random_state
self._random_state = SeededRandomState(random_seed)

self._model = SKRandomForestClassifier(
n_estimators=self.n_estimators,
max_features=self.max_features,
class_weight=_set_class_weight(class_weight),
random_state=random_state,
random_state=self._random_state,
)

@property
def _settings(self):
return {
"n_estimators": self.n_estimators,
"max_features": self.max_features,
"class_weight": self.class_weight,
"random_seed": self._random_state.random_seed,
}

def full_hyper_space(self):
from hyperopt import hp

Expand Down