Skip to content

Commit

Permalink
fixes #500
Browse files Browse the repository at this point in the history
  • Loading branch information
amaiya committed Jun 14, 2023
1 parent 39dd6ea commit 5eeeb7a
Show file tree
Hide file tree
Showing 4 changed files with 40 additions and 19 deletions.
4 changes: 2 additions & 2 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,13 @@ Most recent releases are shown at the top. Each release shows:
- **Changed**: Additional parameters, changes to inputs or outputs, etc
- **Fixed**: Bug fixes that don't change documented behaviour

## 0.37.2 (TBD)
## 0.37.2 (2023-06-14)

### new:
- N/A

### changed
- N/A
- `text.models`, `vision.models`, and `tabular.models` now all automatically set metrics to use `binary_accuracy` for multilabel problems

### fixed:
- fix `validate` to support multilabel classification problems (#498)
Expand Down
16 changes: 11 additions & 5 deletions ktrain/tabular/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def _tabular_model(
train_data,
multilabel=None,
is_regression=False,
metrics=["accuracy"],
metrics=None,
hidden_layers=[1000, 500],
hidden_dropouts=[0.0, 0.5],
bn=False,
Expand All @@ -40,7 +40,8 @@ def _tabular_model(
If false, binary/multiclass model will be returned.
If None, multilabel will be inferred from data.
is_regression(bool): If True, will build a regression model, else classification model.
metrics(list): list of metrics to use
metrics(list): List of metrics to use. If None: 'accuracy' is used for binar/multiclass,
'binary_accuracy' is used for multilabel classification, and 'mae' is used for regressio
hidden_layers(list): number of units in each hidden layer of NN
hidden_dropouts(list): Dropout values after each hidden layer of NN
bn(bool): If True, BatchNormalization will be used before each fully-connected layer in NN
Expand Down Expand Up @@ -73,8 +74,12 @@ def _tabular_model(
loss_func = "mse"
activation = "linear"
else: # classification
if metrics is None:
# set metrics
if multilabel and metrics is None:
metrics = ["binary_accuracy"]
elif metrics is None:
metrics = ["accuracy"]

# set number of classes and multilabel flag
num_classes = U.nclasses_from_data(train_data)

Expand Down Expand Up @@ -140,7 +145,7 @@ def tabular_classifier(
name,
train_data,
multilabel=None,
metrics=["accuracy"],
metrics=None,
hidden_layers=[1000, 500],
hidden_dropouts=[0.0, 0.5],
bn=False,
Expand All @@ -156,7 +161,8 @@ def tabular_classifier(
multilabel (bool): If True, multilabel model will be returned.
If false, binary/multiclass model will be returned.
If None, multilabel will be inferred from data.
metrics(list): list of metrics to use
metrics(list): List of metrics to use. If None: 'accuracy' is used for binar/multiclass,
'binary_accuracy' is used for multilabel classification, and 'mae' is used for regressio
hidden_layers(list): number of units in each hidden layer of NN
hidden_dropouts(list): Dropout values after each hidden layer of NN
bn(bool): If True, BatchNormalization will be used before each fully-connected layer in NN
Expand Down
18 changes: 12 additions & 6 deletions ktrain/text/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def _text_model(
preproc=None,
multilabel=None,
classification=True,
metrics=["accuracy"],
metrics=None,
verbose=1,
):
"""
Expand All @@ -82,7 +82,8 @@ def _text_model(
If None, multilabel will be inferred from data.
classification(bool): If True, will build a text classificaton model.
Otherwise, a text regression model will be returned.
metrics(list): list of metrics to use
metrics(list): List of metrics to use. If None: 'accuracy' is used for binar/multiclassification,
'binary_accuracy' is used for multilabel classification, and 'mae' is used for regression.
verbose (boolean): verbosity of output
Return:
model (Model): A Keras Model instance
Expand Down Expand Up @@ -135,8 +136,6 @@ def _text_model(
maxlen = U.shape_from_data(train_data)[1]
U.vprint("maxlen is %s" % (maxlen), verbose=verbose)
else: # classification
if metrics is None:
metrics = ["accuracy"]
# set number of classes and multilabel flag
num_classes = U.nclasses_from_data(train_data)

Expand All @@ -151,6 +150,12 @@ def _text_model(
name = FASTTEXT
U.vprint("Is Multi-Label? %s" % (multilabel), verbose=verbose)

# set metrics
if multilabel and metrics is None:
metrics = ["binary_accuracy"]
elif metrics is None:
metrics = ["accuracy"]

# set loss and activations
loss_func = "categorical_crossentropy"
activation = "softmax"
Expand Down Expand Up @@ -548,7 +553,7 @@ def _build_bigru(


def text_classifier(
name, train_data, preproc=None, multilabel=None, metrics=["accuracy"], verbose=1
name, train_data, preproc=None, multilabel=None, metrics=None, verbose=1
):
"""
```
Expand All @@ -570,7 +575,8 @@ def text_classifier(
multilabel (bool): If True, multilabel model will be returned.
If false, binary/multiclass model will be returned.
If None, multilabel will be inferred from data.
metrics(list): metrics to use
metrics(list): List of metrics to use. If None: 'accuracy' is used for binar/multiclassification,
'binary_accuracy' is used for multilabel classification, and 'mae' is used for regression.
verbose (boolean): verbosity of output
Return:
model (Model): A Keras Model instance
Expand Down
21 changes: 15 additions & 6 deletions ktrain/vision/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ def image_classifier(
train_data,
val_data=None,
freeze_layers=None,
metrics=["accuracy"],
metrics=None,
optimizer_name=U.DEFAULT_OPT,
multilabel=None,
pt_fc=[],
Expand All @@ -140,9 +140,11 @@ def image_classifier(
If None, then all layers except new Dense layers
will be frozen/untrainable.
metrics (list): metrics to use
metrics(list): List of metrics to use. If None: 'accuracy' is used for binar/multiclass,
'binary_accuracy' is used for multilabel classification, and 'mae' is used for regression
optimizer_name(str|obj): name of Keras optimizer (e.g., 'adam', 'sgd') or instance of keras Optimizer
multilabel(bool): If True, model will be build to support
multilabel classificaiton (labels are not mutually exclusive).
multilabel classification (labels are not mutually exclusive).
If False, binary/multiclassification model will be returned.
If None, multilabel status will be inferred from data.
pt_fc (list of ints): number of hidden units in extra Dense layers
Expand Down Expand Up @@ -234,7 +236,7 @@ def image_model(
train_data,
val_data=None,
freeze_layers=None,
metrics=["accuracy"],
metrics=None,
optimizer_name=U.DEFAULT_OPT,
multilabel=None,
pt_fc=[],
Expand All @@ -255,7 +257,8 @@ def image_model(
freeze_layers (int): number of beginning layers to make untrainable
If None, then all layers except new Dense layers
will be frozen/untrainable.
metrics (list): metrics to use
metrics(list): List of metrics to use. If None: 'accuracy' is used for binar/multiclass,
'binary_accuracy' is used for multilabel classification, and 'mae' is used for regression
optimizer_name(str|obj): name of Keras optimizer (e.g., 'adam', 'sgd') or instance of Keras optimizer
multilabel(bool): If True, model will be build to support
multilabel classificaiton (labels are not mutually exclusive).
Expand Down Expand Up @@ -328,6 +331,14 @@ def image_model(
if not multilabel and len(train_data[0][-1].shape) == 1:
is_regression = True

# set metrics
if is_regression and metrics is None:
metrics = ["mae"]
elif multilabel and metrics is None:
metrics = ["binary_accuracy"]
elif metrics is None:
metrics = ["accuracy"]

# set loss and acivations
loss_func = "categorical_crossentropy"
activation = "softmax"
Expand All @@ -337,8 +348,6 @@ def image_model(
elif is_regression:
loss_func = "mse"
activation = None
if metrics == ["accuracy"]:
metrics = ["mae"]

U.vprint("Is Multi-Label? %s" % (multilabel), verbose=verbose)
U.vprint("Is Regression? %s" % (is_regression), verbose=verbose)
Expand Down

0 comments on commit 5eeeb7a

Please sign in to comment.