Skip to content

Commit

Permalink
feat: add new default metrics
Browse files Browse the repository at this point in the history
  • Loading branch information
Optimox authored and eduardocarvp committed Dec 14, 2020
1 parent a1041cc commit 0fe5b72
Show file tree
Hide file tree
Showing 5 changed files with 65 additions and 8 deletions.
25 changes: 23 additions & 2 deletions README.md
Expand Up @@ -70,7 +70,21 @@ clf.fit(
preds = clf.predict(X_test)
```

### Custom early_stopping_metrics
### Default eval_metric

A few classical evaluation metrics are implemented (see bellow section for custom ones):
- binary classification metrics : 'auc', 'accuracy', 'balanced_accuracy', 'logloss'
- multiclass classification : 'accuracy', 'balanced_accuracy', 'logloss'
- regression: 'mse', 'mae', 'rmse', 'rmsle'


Important Note : 'rmsle' will automatically clip negative predictions to 0, because the model can predict negative values.
In order to match the given scores, you need to use `np.clip(clf.predict(X_predict), a_min=0, a_max=None)` when doing predictions.


### Custom evaluation metrics

It's easy to create a metric that matches your specific need. Here is an example for gini score (note that you need to specifiy whether this metric should be maximized or not):

```python
from pytorch_tabnet.metrics import Metric
Expand Down Expand Up @@ -137,6 +151,9 @@ A self supervised loss greater than 1 means that your model is reconstructing wo

A complete example can be found within the notebook `pretraining_example.ipynb`.

/!\ : current implementation is trying to reconstruct the original inputs, but Batch Normalization applies a random transformation that can't be deduced by a single line, making the reconstruction harder. Lowering the `batch_size` might make the pretraining easier.


# Useful links

- [explanatory video](https://youtu.be/ysBaZO8YmX8)
Expand Down Expand Up @@ -275,10 +292,12 @@ A complete example can be found within the notebook `pretraining_example.ipynb`.
- `patience` : int (default = 15)

Number of consecutive epochs without improvement before performing early stopping.

If patience is set to 0 then no early stopping will be performed.

Note that if patience is enabled, best weights from best epoch will automatically be loaded at the end of `fit`.

- weights : int or dict (default=0)
- `weights` : int or dict (default=0)

/!\ Only for TabNetClassifier
Sampling parameter
Expand Down Expand Up @@ -313,5 +332,7 @@ A complete example can be found within the notebook `pretraining_example.ipynb`.
List of custom callbacks

- `pretraining_ratio` : float

/!\ TabNetPretrainer Only : Percentage of input features to mask during pretraining.

Should be between 0 and 1. The bigger the harder the reconstruction task is.
9 changes: 5 additions & 4 deletions multi_regression_example.ipynb
Expand Up @@ -193,7 +193,7 @@
"metadata": {},
"outputs": [],
"source": [
"max_epochs = 10 if not os.getenv(\"CI\", False) else 2"
"max_epochs = 1000 if not os.getenv(\"CI\", False) else 2"
]
},
{
Expand All @@ -208,6 +208,7 @@
" X_train=X_train, y_train=y_train,\n",
" eval_set=[(X_train, y_train), (X_valid, y_valid)],\n",
" eval_name=['train', 'valid'],\n",
" eval_metric=['rmsle', 'mae', 'rmse', 'mse'],\n",
" max_epochs=max_epochs,\n",
" patience=50,\n",
" batch_size=1024, virtual_batch_size=128,\n",
Expand Down Expand Up @@ -297,9 +298,9 @@
],
"metadata": {
"kernelspec": {
"display_name": ".shap",
"display_name": "Python 3",
"language": "python",
"name": ".shap"
"name": "python3"
},
"language_info": {
"codemirror_mode": {
Expand All @@ -311,7 +312,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.8"
"version": "3.7.6"
},
"toc": {
"base_numbering": 1,
Expand Down
34 changes: 34 additions & 0 deletions pytorch_tabnet/metrics.py
Expand Up @@ -8,6 +8,7 @@
accuracy_score,
log_loss,
balanced_accuracy_score,
mean_squared_log_error,
)
import torch

Expand Down Expand Up @@ -347,6 +348,39 @@ def __call__(self, y_true, y_score):
return mean_squared_error(y_true, y_score)


class RMSLE(Metric):
"""
Mean squared logarithmic error regression loss.
Scikit-imeplementation:
https://scikit-learn.org/stable/modules/generated/sklearn.metrics.mean_squared_log_error.html
Note: In order to avoid error, negative predictions are clipped to 0.
This means that you should clip negative predictions manually after calling predict.
"""

def __init__(self):
self._name = "rmsle"
self._maximize = False

def __call__(self, y_true, y_score):
"""
Compute RMSLE of predictions.
Parameters
----------
y_true : np.ndarray
Target matrix or vector
y_score : np.ndarray
Score matrix or vector
Returns
-------
float
RMSLE of predictions vs targets.
"""
y_score = np.clip(y_score, a_min=0, a_max=None)
return mean_squared_log_error(y_true, y_score)


class UnsupervisedMetric(Metric):
"""
Unsupervised metric
Expand Down
2 changes: 1 addition & 1 deletion pytorch_tabnet/tab_network.py
Expand Up @@ -807,7 +807,7 @@ def __init__(self, input_dim, cat_dims, cat_idxs, cat_emb_dim):

# check that all embeddings are provided
if len(self.cat_emb_dims) != len(cat_dims):
msg = """ cat_emb_dim and cat_dims must be lists of same length, got {len(self.cat_emb_dims)}
msg = f"""cat_emb_dim and cat_dims must be lists of same length, got {len(self.cat_emb_dims)}
and {len(cat_dims)}"""
raise ValueError(msg)
self.post_embed_dim = int(
Expand Down
3 changes: 2 additions & 1 deletion regression_example.ipynb
Expand Up @@ -191,6 +191,7 @@
" X_train=X_train, y_train=y_train,\n",
" eval_set=[(X_train, y_train), (X_valid, y_valid)],\n",
" eval_name=['train', 'valid'],\n",
" eval_metric=['rmsle', 'mae', 'rmse', 'mse'],\n",
" max_epochs=max_epochs,\n",
" patience=50,\n",
" batch_size=1024, virtual_batch_size=128,\n",
Expand Down Expand Up @@ -351,7 +352,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.9"
"version": "3.7.6"
},
"toc": {
"base_numbering": 1,
Expand Down

0 comments on commit 0fe5b72

Please sign in to comment.