feat: add new default metrics

dreamquark-ai · Dec 14, 2020 · 0fe5b72 · 0fe5b72
1 parent a1041cc
commit 0fe5b72
Show file tree

Hide file tree

Showing 5 changed files with 65 additions and 8 deletions.
diff --git a/README.md b/README.md
@@ -70,7 +70,21 @@ clf.fit(
 preds = clf.predict(X_test)
 ```
 
-### Custom early_stopping_metrics
+### Default eval_metric
+
+A few classical evaluation metrics are implemented (see bellow section for custom ones):
+- binary classification metrics : 'auc', 'accuracy', 'balanced_accuracy', 'logloss'
+- multiclass classification : 'accuracy', 'balanced_accuracy', 'logloss'
+- regression: 'mse', 'mae', 'rmse', 'rmsle'
+
+
+Important Note : 'rmsle' will automatically clip negative predictions to 0, because the model can predict negative values.
+In order to match the given scores, you need to use `np.clip(clf.predict(X_predict), a_min=0, a_max=None)` when doing predictions.
+
+
+### Custom evaluation metrics
+
+It's easy to create a metric that matches your specific need. Here is an example for gini score (note that you need to specifiy whether this metric should be maximized or not):
 
 ```python
 from pytorch_tabnet.metrics import Metric
@@ -137,6 +151,9 @@ A self supervised loss greater than 1 means that your model is reconstructing wo
 
 A complete example can be found within the notebook `pretraining_example.ipynb`.
 
+/!\ : current implementation is trying to reconstruct the original inputs, but Batch Normalization applies a random transformation that can't be deduced by a single line, making the reconstruction harder. Lowering the `batch_size` might make the pretraining easier.
+
+
 # Useful links
 
 - [explanatory video](https://youtu.be/ysBaZO8YmX8)
@@ -275,10 +292,12 @@ A complete example can be found within the notebook `pretraining_example.ipynb`.
 - `patience` : int (default = 15)
 
     Number of consecutive epochs without improvement before performing early stopping.
+
     If patience is set to 0 then no early stopping will be performed.
+
     Note that if patience is enabled, best weights from best epoch will automatically be loaded at the end of `fit`.
 
-- weights : int or dict (default=0)
+- `weights` : int or dict (default=0)
 
     /!\ Only for TabNetClassifier
     Sampling parameter
@@ -313,5 +332,7 @@ A complete example can be found within the notebook `pretraining_example.ipynb`.
         List of custom callbacks
 
 - `pretraining_ratio` : float
+
         /!\ TabNetPretrainer Only : Percentage of input features to mask during pretraining.
+
         Should be between 0 and 1. The bigger the harder the reconstruction task is.
diff --git a/multi_regression_example.ipynb b/multi_regression_example.ipynb
@@ -193,7 +193,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "max_epochs = 10 if not os.getenv(\"CI\", False) else 2"
+    "max_epochs = 1000 if not os.getenv(\"CI\", False) else 2"
    ]
   },
   {
@@ -208,6 +208,7 @@
     "    X_train=X_train, y_train=y_train,\n",
     "    eval_set=[(X_train, y_train), (X_valid, y_valid)],\n",
     "    eval_name=['train', 'valid'],\n",
+    "    eval_metric=['rmsle', 'mae', 'rmse', 'mse'],\n",
     "    max_epochs=max_epochs,\n",
     "    patience=50,\n",
     "    batch_size=1024, virtual_batch_size=128,\n",
@@ -297,9 +298,9 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": ".shap",
+   "display_name": "Python 3",
    "language": "python",
-   "name": ".shap"
+   "name": "python3"
   },
   "language_info": {
    "codemirror_mode": {
@@ -311,7 +312,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.6.8"
+   "version": "3.7.6"
   },
   "toc": {
    "base_numbering": 1,

diff --git a/pytorch_tabnet/metrics.py b/pytorch_tabnet/metrics.py
@@ -8,6 +8,7 @@
     accuracy_score,
     log_loss,
     balanced_accuracy_score,
+    mean_squared_log_error,
 )
 import torch
 
@@ -347,6 +348,39 @@ def __call__(self, y_true, y_score):
         return mean_squared_error(y_true, y_score)
 
 
+class RMSLE(Metric):
+    """
+    Mean squared logarithmic error regression loss.
+    Scikit-imeplementation:
+    https://scikit-learn.org/stable/modules/generated/sklearn.metrics.mean_squared_log_error.html
+    Note: In order to avoid error, negative predictions are clipped to 0.
+    This means that you should clip negative predictions manually after calling predict.
+    """
+
+    def __init__(self):
+        self._name = "rmsle"
+        self._maximize = False
+
+    def __call__(self, y_true, y_score):
+        """
+        Compute RMSLE of predictions.
+
+        Parameters
+        ----------
+        y_true : np.ndarray
+            Target matrix or vector
+        y_score : np.ndarray
+            Score matrix or vector
+
+        Returns
+        -------
+        float
+            RMSLE of predictions vs targets.
+        """
+        y_score = np.clip(y_score, a_min=0, a_max=None)
+        return mean_squared_log_error(y_true, y_score)
+
+
 class UnsupervisedMetric(Metric):
     """
     Unsupervised metric

diff --git a/pytorch_tabnet/tab_network.py b/pytorch_tabnet/tab_network.py
@@ -807,7 +807,7 @@ def __init__(self, input_dim, cat_dims, cat_idxs, cat_emb_dim):
 
         # check that all embeddings are provided
         if len(self.cat_emb_dims) != len(cat_dims):
-            msg = """ cat_emb_dim and cat_dims must be lists of same length, got {len(self.cat_emb_dims)}
+            msg = f"""cat_emb_dim and cat_dims must be lists of same length, got {len(self.cat_emb_dims)}
                       and {len(cat_dims)}"""
             raise ValueError(msg)
         self.post_embed_dim = int(

diff --git a/regression_example.ipynb b/regression_example.ipynb
@@ -191,6 +191,7 @@
     "    X_train=X_train, y_train=y_train,\n",
     "    eval_set=[(X_train, y_train), (X_valid, y_valid)],\n",
     "    eval_name=['train', 'valid'],\n",
+    "    eval_metric=['rmsle', 'mae', 'rmse', 'mse'],\n",
     "    max_epochs=max_epochs,\n",
     "    patience=50,\n",
     "    batch_size=1024, virtual_batch_size=128,\n",
@@ -351,7 +352,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.6.9"
+   "version": "3.7.6"
   },
   "toc": {
    "base_numbering": 1,