From 02f394f039024a16eec6ce548e2cb9961b65c18b Mon Sep 17 00:00:00 2001 From: Optimox Date: Sun, 23 Jul 2023 15:11:42 +0200 Subject: [PATCH] chore: release v4.1.0 --- CHANGELOG.md | 38 +- docs/_modules/index.html | 1 + .../pytorch_tabnet/abstract_model.html | 109 ++- .../pytorch_tabnet/augmentations.html | 5 +- docs/_modules/pytorch_tabnet/callbacks.html | 19 +- docs/_modules/pytorch_tabnet/metrics.html | 59 +- .../pytorch_tabnet/multiclass_utils.html | 19 +- docs/_modules/pytorch_tabnet/multitask.html | 49 +- docs/_modules/pytorch_tabnet/pretraining.html | 50 +- .../pytorch_tabnet/pretraining_utils.html | 73 +- docs/_modules/pytorch_tabnet/sparsemax.html | 11 +- docs/_modules/pytorch_tabnet/tab_model.html | 27 +- docs/_modules/pytorch_tabnet/tab_network.html | 196 +++--- docs/_modules/pytorch_tabnet/utils.html | 250 ++++++- docs/_modules/torch/optim/adam.html | 626 ++++++++++++++++-- docs/_sources/generated_docs/README.md.txt | 18 +- docs/generated_docs/README.html | 23 +- docs/generated_docs/pytorch_tabnet.html | 291 ++++++-- docs/genindex.html | 65 +- docs/index.html | 2 + docs/py-modindex.html | 1 + docs/search.html | 1 + docs/searchindex.js | 2 +- pyproject.toml | 2 +- 24 files changed, 1550 insertions(+), 387 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4bce2dba..977b206a 100755 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,40 @@ +# [4.1.0](https://github.com/dreamquark-ai/tabnet/compare/v3.1.1...v4.1.0) (2023-07-23) + + +### Bug Fixes + +* 424 allow any np.intX as training target ([63a8dba](https://github.com/dreamquark-ai/tabnet/commit/63a8dba99e4853b9be5d3e6c14909a30685c7532)) +* compute unsupervised loss using numpy ([49bd61b](https://github.com/dreamquark-ai/tabnet/commit/49bd61be4e8faa98ef3b46b4f0115379407e8475)) +* custom loss using inplace operations ([423f7c4](https://github.com/dreamquark-ai/tabnet/commit/423f7c43647f8be53f28c9c6061031b7a2644d20)) +* disable ansi ([60ec6bf](https://github.com/dreamquark-ai/tabnet/commit/60ec6bf7b27795da44e608d6848573bd0fd4ecd5)) +* feature importance not dependent from dataloader ([5b19091](https://github.com/dreamquark-ai/tabnet/commit/5b190916515793114ffa1a9ac4f3869222a14c11)) +* README patience to 10 ([fd2c73a](https://github.com/dreamquark-ai/tabnet/commit/fd2c73a4300a745f540a2a789716ec4cabe90a7c)) +* replace std 0 by the mean or 1 if mean is 0 ([ddf02da](https://github.com/dreamquark-ai/tabnet/commit/ddf02dab9bdc41c6d7736f0be509950e907909a4)) +* try to disable parallel install ([c4963ad](https://github.com/dreamquark-ai/tabnet/commit/c4963ad61e479997c912db816736d073106bcc20)) +* typo in pandas error ([5ac5583](https://github.com/dreamquark-ai/tabnet/commit/5ac55834b32693abc4b22028a74475ee0440c2a5)) +* update gpg key in docker file gpu ([709fcb1](https://github.com/dreamquark-ai/tabnet/commit/709fcb1ab31f8ac232594877a0d2b3922a02360b)) +* upgrade the ressource size ([fc59ea6](https://github.com/dreamquark-ai/tabnet/commit/fc59ea61139228440d2063ead9db42f656d84ff7)) +* use numpy std with bessel correction and test ([3adaf4c](https://github.com/dreamquark-ai/tabnet/commit/3adaf4c0858f5d9af8f0f2a2fdaa92360d12cb87)) + + +### Features + +* add augmentations inside the fit method ([6d0485f](https://github.com/dreamquark-ai/tabnet/commit/6d0485f58bd1028cffd195d9e27eb97915b9cb2c)) +* add warm_start matching scikit-learn ([d725101](https://github.com/dreamquark-ai/tabnet/commit/d725101a559c6be49a6f8e20c3e68b18b8eb7b01)) +* added conda install option ([ca14b76](https://github.com/dreamquark-ai/tabnet/commit/ca14b76fc771459745c49723733ff88ef1126d30)), closes [#346](https://github.com/dreamquark-ai/tabnet/issues/346) +* disable tests in docker file gpu to save CI time ([233f74e](https://github.com/dreamquark-ai/tabnet/commit/233f74e41648dad62899ceba7481d58ecfbd87b7)) +* enable feature grouping for attention mechanism ([bcae5f4](https://github.com/dreamquark-ai/tabnet/commit/bcae5f43b89fb2c53a0fe8be7c218a7b91afac96)) +* enable torch 2.0 by relaxing poetry ([bbd7a4e](https://github.com/dreamquark-ai/tabnet/commit/bbd7a4e96d5503ad23048ce39997462ed1a2eca0)) +* pretraining matches paper ([5adb804](https://github.com/dreamquark-ai/tabnet/commit/5adb80482c8242dde7b7942529db94fa9ccbfe48)) +* raise error in case cat_dims and cat_idxs are incoherent ([8c3b795](https://github.com/dreamquark-ai/tabnet/commit/8c3b7951642f62e7449bb95875b5265d4b89148e)) +* update python ([dea62b4](https://github.com/dreamquark-ai/tabnet/commit/dea62b410e3f4cc729f1c1933018d7d8db24d016)) + + + +## [3.1.1](https://github.com/dreamquark-ai/tabnet/compare/v3.1.0...v3.1.1) (2021-02-02) + + ### Bug Fixes * add preds_mapper to pretraining ([76f2c85](https://github.com/dreamquark-ai/tabnet/commit/76f2c852f59c6ed2c5dc5f0766cb99310bae5f2c)) @@ -166,11 +202,11 @@ ### Bug Fixes -* **regression:** fix scheduler ([01e46b7](https://github.com/dreamquark-ai/tabnet/commit/01e46b7b53aa5cb880cca5d1492ef67788c0075e)) * fixing Dockerfile for poetry 1.0 ([6c5cdec](https://github.com/dreamquark-ai/tabnet/commit/6c5cdeca8f3c5a58e2f557f2d8bb5127d3d7f691)) * importance indexing fixed ([a8382c3](https://github.com/dreamquark-ai/tabnet/commit/a8382c31099d59e03c432479b2798abc90f55a58)) * local explain all batches ([91461fb](https://github.com/dreamquark-ai/tabnet/commit/91461fbcd4b8c806e920936e0154258b2dc02373)) * regression gpu integration an typos ([269b4c5](https://github.com/dreamquark-ai/tabnet/commit/269b4c59fcb12d1c24fea7b9e15c7b63aa9939e0)) +* **regression:** fix scheduler ([01e46b7](https://github.com/dreamquark-ai/tabnet/commit/01e46b7b53aa5cb880cca5d1492ef67788c0075e)) * resolve timer issue and warnings ([ecd2cd9](https://github.com/dreamquark-ai/tabnet/commit/ecd2cd9c39c1f977868888d6b3abd719a7ee21f4)) diff --git a/docs/_modules/index.html b/docs/_modules/index.html index 611a62cf..b9fe26cf 100644 --- a/docs/_modules/index.html +++ b/docs/_modules/index.html @@ -87,6 +87,7 @@
  • README
  • TabNet : Attentive Interpretable Tabular Learning
  • Installation
  • +
  • What is new ?
  • Contributing
  • What problems does pytorch-tabnet handle?
  • How to use it?
  • diff --git a/docs/_modules/pytorch_tabnet/abstract_model.html b/docs/_modules/pytorch_tabnet/abstract_model.html index a50f2d69..9b66381b 100644 --- a/docs/_modules/pytorch_tabnet/abstract_model.html +++ b/docs/_modules/pytorch_tabnet/abstract_model.html @@ -87,6 +87,7 @@
  • README
  • TabNet : Attentive Interpretable Tabular Learning
  • Installation
  • +
  • What is new ?
  • Contributing
  • What problems does pytorch-tabnet handle?
  • How to use it?
  • @@ -168,6 +169,7 @@

    Source code for pytorch_tabnet.abstract_model

    from abc import abstractmethod from pytorch_tabnet import tab_network from pytorch_tabnet.utils import ( + SparsePredictDataset, PredictDataset, create_explain_matrix, validate_eval_set, @@ -175,7 +177,9 @@

    Source code for pytorch_tabnet.abstract_model

    define_device, ComplexEncoder, check_input, - check_warm_start + check_warm_start, + create_group_matrix, + check_embedding_parameters ) from pytorch_tabnet.callbacks import ( CallbackContainer, @@ -194,11 +198,12 @@

    Source code for pytorch_tabnet.abstract_model

    import zipfile import warnings import copy +import scipy
    [docs]@dataclass class TabModel(BaseEstimator): - """ Class for TabNet model.""" + """ Class for TabNet model.""" n_d: int = 8 n_a: int = 8 @@ -225,10 +230,13 @@

    Source code for pytorch_tabnet.abstract_model

    device_name: str = "auto" n_shared_decoder: int = 1 n_indep_decoder: int = 1 + grouped_features: List[List[int]] = field(default_factory=list) def __post_init__(self): + # These are default values needed for saving model self.batch_size = 1024 self.virtual_batch_size = 128 + torch.manual_seed(self.seed) # Defining device self.device = torch.device(define_device(self.device_name)) @@ -239,8 +247,13 @@

    Source code for pytorch_tabnet.abstract_model

    self.optimizer_fn = copy.deepcopy(self.optimizer_fn) self.scheduler_fn = copy.deepcopy(self.scheduler_fn) + updated_params = check_embedding_parameters(self.cat_dims, + self.cat_idxs, + self.cat_emb_dim) + self.cat_dims, self.cat_idxs, self.cat_emb_dim = updated_params + def __update__(self, **kwargs): - """ + """ Updates parameters. If does not already exists, creates it. Otherwise overwrite with warnings. @@ -256,6 +269,7 @@

    Source code for pytorch_tabnet.abstract_model

    "n_independent", "n_shared", "n_steps", + "grouped_features", ] for var_name, value in kwargs.items(): if var_name in update_list: @@ -288,8 +302,9 @@

    Source code for pytorch_tabnet.abstract_model

    from_unsupervised=None, warm_start=False, augmentations=None, + compute_importance=True ): - """Train a neural network stored in self.network + """Train a neural network stored in self.network Using train_dataloader for training data and valid_dataloader for validation. @@ -333,6 +348,8 @@

    Source code for pytorch_tabnet.abstract_model

    Use a previously self supervised model as starting weights warm_start: bool If True, current model parameters are used to start training + compute_importance : bool + Whether to compute feature importance """ # update model name @@ -346,6 +363,7 @@

    Source code for pytorch_tabnet.abstract_model

    self._stop_training = False self.pin_memory = pin_memory and (self.device.type != "cpu") self.augmentations = augmentations + self.compute_importance = compute_importance if self.augmentations is not None: # This ensure reproducibility @@ -417,16 +435,17 @@

    Source code for pytorch_tabnet.abstract_model

    self._callback_container.on_train_end() self.network.eval() - # compute feature importance once the best model is defined - self.feature_importances_ = self._compute_feature_importances(X_train)
    + if self.compute_importance: + # compute feature importance once the best model is defined + self.feature_importances_ = self._compute_feature_importances(X_train)
    [docs] def predict(self, X): - """ + """ Make predictions on a batch (valid) Parameters ---------- - X : a :tensor: `torch.Tensor` + X : a :tensor: `torch.Tensor` or matrix: `scipy.sparse.csr_matrix` Input data Returns @@ -435,11 +454,19 @@

    Source code for pytorch_tabnet.abstract_model

    Predictions of the regression problem """ self.network.eval() - dataloader = DataLoader( - PredictDataset(X), - batch_size=self.batch_size, - shuffle=False, - ) + + if scipy.sparse.issparse(X): + dataloader = DataLoader( + SparsePredictDataset(X), + batch_size=self.batch_size, + shuffle=False, + ) + else: + dataloader = DataLoader( + PredictDataset(X), + batch_size=self.batch_size, + shuffle=False, + ) results = [] for batch_nb, data in enumerate(dataloader): @@ -451,12 +478,12 @@

    Source code for pytorch_tabnet.abstract_model

    return self.predict_func(res)
    [docs] def explain(self, X, normalize=False): - """ + """ Return local explanation Parameters ---------- - X : tensor: `torch.Tensor` + X : tensor: `torch.Tensor` or matrix: `scipy.sparse.csr_matrix` Input data normalize : bool (default False) Wheter to normalize so that sum of features are equal to 1 @@ -470,11 +497,18 @@

    Source code for pytorch_tabnet.abstract_model

    """ self.network.eval() - dataloader = DataLoader( - PredictDataset(X), - batch_size=self.batch_size, - shuffle=False, - ) + if scipy.sparse.issparse(X): + dataloader = DataLoader( + SparsePredictDataset(X), + batch_size=self.batch_size, + shuffle=False, + ) + else: + dataloader = DataLoader( + PredictDataset(X), + batch_size=self.batch_size, + shuffle=False, + ) res_explain = [] @@ -486,7 +520,6 @@

    Source code for pytorch_tabnet.abstract_model

    masks[key] = csc_matrix.dot( value.cpu().detach().numpy(), self.reducing_matrix ) - original_feat_explain = csc_matrix.dot(M_explain.cpu().detach().numpy(), self.reducing_matrix) res_explain.append(original_feat_explain) @@ -523,7 +556,7 @@

    Source code for pytorch_tabnet.abstract_model

    setattr(self, attr_name, attr_value)
    [docs] def save_model(self, path): - """Saving TabNet model in two distinct files. + """Saving TabNet model in two distinct files. Parameters ---------- @@ -566,7 +599,7 @@

    Source code for pytorch_tabnet.abstract_model

    return f"{path}.zip"
    [docs] def load_model(self, filepath): - """Load TabNet model. + """Load TabNet model. Parameters ---------- @@ -602,7 +635,7 @@

    Source code for pytorch_tabnet.abstract_model

    return
    def _train_epoch(self, train_loader): - """ + """ Trains one epoch of the network in self.network Parameters @@ -625,7 +658,7 @@

    Source code for pytorch_tabnet.abstract_model

    return def _train_batch(self, X, y): - """ + """ Trains one batch of data Parameters @@ -670,7 +703,7 @@

    Source code for pytorch_tabnet.abstract_model

    return batch_logs def _predict_epoch(self, name, loader): - """ + """ Predict an epoch and update metrics. Parameters @@ -700,7 +733,7 @@

    Source code for pytorch_tabnet.abstract_model

    return def _predict_batch(self, X): - """ + """ Predict one batch of data. Parameters @@ -726,8 +759,11 @@

    Source code for pytorch_tabnet.abstract_model

    return scores def _set_network(self): - """Setup the network and explain matrix.""" + """Setup the network and explain matrix.""" torch.manual_seed(self.seed) + + self.group_matrix = create_group_matrix(self.grouped_features, self.input_dim) + self.network = tab_network.TabNet( self.input_dim, self.output_dim, @@ -744,6 +780,7 @@

    Source code for pytorch_tabnet.abstract_model

    virtual_batch_size=self.virtual_batch_size, momentum=self.momentum, mask_type=self.mask_type, + group_attention_matrix=self.group_matrix.to(self.device), ).to(self.device) self.reducing_matrix = create_explain_matrix( @@ -754,7 +791,7 @@

    Source code for pytorch_tabnet.abstract_model

    ) def _set_metrics(self, metrics, eval_names): - """Set attributes relative to the metrics. + """Set attributes relative to the metrics. Parameters ---------- @@ -786,7 +823,7 @@

    Source code for pytorch_tabnet.abstract_model

    ) def _set_callbacks(self, custom_callbacks): - """Setup the callbacks functions. + """Setup the callbacks functions. Parameters ---------- @@ -829,13 +866,13 @@

    Source code for pytorch_tabnet.abstract_model

    self._callback_container.set_trainer(self) def _set_optimizer(self): - """Setup optimizer.""" + """Setup optimizer.""" self._optimizer = self.optimizer_fn( self.network.parameters(), **self.optimizer_params ) def _construct_loaders(self, X_train, y_train, eval_set): - """Generate dataloaders for train and eval set. + """Generate dataloaders for train and eval set. Parameters ---------- @@ -873,7 +910,7 @@

    Source code for pytorch_tabnet.abstract_model

    return train_dataloader, valid_dataloaders def _compute_feature_importances(self, X): - """Compute global feature importance. + """Compute global feature importance. Parameters ---------- @@ -891,7 +928,7 @@

    Source code for pytorch_tabnet.abstract_model

    [docs] @abstractmethod def update_fit_params(self, X_train, y_train, eval_set, weights): - """ + """ Set attributes relative to fit function. Parameters @@ -912,7 +949,7 @@

    Source code for pytorch_tabnet.abstract_model

    [docs] @abstractmethod def compute_loss(self, y_score, y_true): - """ + """ Compute the loss. Parameters @@ -933,7 +970,7 @@

    Source code for pytorch_tabnet.abstract_model

    [docs] @abstractmethod def prepare_target(self, y): - """ + """ Prepare target before training. Parameters diff --git a/docs/_modules/pytorch_tabnet/augmentations.html b/docs/_modules/pytorch_tabnet/augmentations.html index 48bc1f47..ca878030 100644 --- a/docs/_modules/pytorch_tabnet/augmentations.html +++ b/docs/_modules/pytorch_tabnet/augmentations.html @@ -87,6 +87,7 @@
  • README
  • TabNet : Attentive Interpretable Tabular Learning
  • Installation
  • +
  • What is new ?
  • Contributing
  • What problems does pytorch-tabnet handle?
  • How to use it?
  • @@ -165,7 +166,7 @@

    Source code for pytorch_tabnet.augmentations

    [docs]class RegressionSMOTE():
    -    """
    +    """
         Apply SMOTE
     
         This will average a percentage p of the elements in the batch with other elements.
    @@ -208,7 +209,7 @@ 

    Source code for pytorch_tabnet.augmentations

    [docs]class ClassificationSMOTE():
    -    """
    +    """
         Apply SMOTE for classification tasks.
     
         This will average a percentage p of the elements in the batch with other elements.
    diff --git a/docs/_modules/pytorch_tabnet/callbacks.html b/docs/_modules/pytorch_tabnet/callbacks.html
    index b4168aaf..2aadf0a0 100644
    --- a/docs/_modules/pytorch_tabnet/callbacks.html
    +++ b/docs/_modules/pytorch_tabnet/callbacks.html
    @@ -87,6 +87,7 @@
     
  • README
  • TabNet : Attentive Interpretable Tabular Learning
  • Installation
  • +
  • What is new ?
  • Contributing
  • What problems does pytorch-tabnet handle?
  • How to use it?
  • @@ -169,7 +170,7 @@

    Source code for pytorch_tabnet.callbacks

     
     
     
    [docs]class Callback: - """ + """ Abstract base class used to build new callbacks. """ @@ -203,7 +204,7 @@

    Source code for pytorch_tabnet.callbacks

     
     
    [docs]@dataclass class CallbackContainer: - """ + """ Container holding a list of callbacks. """ @@ -255,7 +256,7 @@

    Source code for pytorch_tabnet.callbacks

     
     
    [docs]@dataclass class EarlyStopping(Callback): - """EarlyStopping callback to exit the training loop if early_stopping_metric + """EarlyStopping callback to exit the training loop if early_stopping_metric does not improve by a certain amount for a certain number of epochs. @@ -319,14 +320,14 @@

    Source code for pytorch_tabnet.callbacks

                 msg = f"\nEarly stopping occurred at epoch {self.stopped_epoch}"
                 msg += (
                     f" with best_epoch = {self.best_epoch} and "
    -                + f"best_{self.early_stopping_metric} = {round(self.best_loss, 5)}"
    +                + f"best_{self.early_stopping_metric} = {round(self.best_loss, 5)}"
                 )
                 print(msg)
             else:
                 msg = (
                     f"Stop training because you reached max_epochs = {self.trainer.max_epochs}"
                     + f" with best_epoch = {self.best_epoch} and "
    -                + f"best_{self.early_stopping_metric} = {round(self.best_loss, 5)}"
    +                + f"best_{self.early_stopping_metric} = {round(self.best_loss, 5)}"
                 )
                 print(msg)
             wrn_msg = "Best weights from best epoch are automatically used!"
    @@ -335,7 +336,7 @@ 

    Source code for pytorch_tabnet.callbacks

     
     
    [docs]@dataclass class History(Callback): - """Callback that records events into a `History` object. + """Callback that records events into a `History` object. This callback is automatically applied to every SuperModule. @@ -378,9 +379,9 @@

    Source code for pytorch_tabnet.callbacks

             msg = f"epoch {epoch:<3}"
             for metric_name, metric_value in self.epoch_metrics.items():
                 if metric_name != "lr":
    -                msg += f"| {metric_name:<3}: {np.round(metric_value, 5):<8}"
    +                msg += f"| {metric_name:<3}: {np.round(metric_value, 5):<8}"
             self.total_time = int(time.time() - self.start_time)
    -        msg += f"|  {str(datetime.timedelta(seconds=self.total_time)) + 's':<6}"
    +        msg += f"|  {str(datetime.timedelta(seconds=self.total_time)) + 's':<6}"
             print(msg)
    [docs] def on_batch_end(self, batch, logs=None): @@ -402,7 +403,7 @@

    Source code for pytorch_tabnet.callbacks

     
     
    [docs]@dataclass class LRSchedulerCallback(Callback): - """Wrapper for most torch scheduler functions. + """Wrapper for most torch scheduler functions. Parameters --------- diff --git a/docs/_modules/pytorch_tabnet/metrics.html b/docs/_modules/pytorch_tabnet/metrics.html index 267c5532..bff5910b 100644 --- a/docs/_modules/pytorch_tabnet/metrics.html +++ b/docs/_modules/pytorch_tabnet/metrics.html @@ -87,6 +87,7 @@
  • README
  • TabNet : Attentive Interpretable Tabular Learning
  • Installation
  • +
  • What is new ?
  • Contributing
  • What problems does pytorch-tabnet handle?
  • How to use it?
  • @@ -175,7 +176,7 @@

    Source code for pytorch_tabnet.metrics

     
     
     
    [docs]def UnsupervisedLoss(y_pred, embedded_x, obf_vars, eps=1e-9): - """ + """ Implements unsupervised loss function. This differs from orginal paper as it's scaled to be batch size independent and number of features reconstructed independent (by taking the mean) @@ -235,7 +236,7 @@

    Source code for pytorch_tabnet.metrics

     
     
    [docs]@dataclass class UnsupMetricContainer: - """Container holding a list of metrics. + """Container holding a list of metrics. Parameters ---------- @@ -257,7 +258,7 @@

    Source code for pytorch_tabnet.metrics

             self.names = [self.prefix + name for name in self.metric_names]
     
         def __call__(self, y_pred, embedded_x, obf_vars):
    -        """Compute all metrics and store into a dict.
    +        """Compute all metrics and store into a dict.
     
             Parameters
             ----------
    @@ -281,7 +282,7 @@ 

    Source code for pytorch_tabnet.metrics

     
     
    [docs]@dataclass class MetricContainer: - """Container holding a list of metrics. + """Container holding a list of metrics. Parameters ---------- @@ -300,7 +301,7 @@

    Source code for pytorch_tabnet.metrics

             self.names = [self.prefix + name for name in self.metric_names]
     
         def __call__(self, y_true, y_pred):
    -        """Compute all metrics and store into a dict.
    +        """Compute all metrics and store into a dict.
     
             Parameters
             ----------
    @@ -333,7 +334,7 @@ 

    Source code for pytorch_tabnet.metrics

     
     
    [docs] @classmethod def get_metrics_by_names(cls, names): - """Get list of metric classes. + """Get list of metric classes. Parameters ---------- @@ -362,7 +363,7 @@

    Source code for pytorch_tabnet.metrics

     
     
     
    [docs]class AUC(Metric): - """ + """ AUC. """ @@ -371,7 +372,7 @@

    Source code for pytorch_tabnet.metrics

             self._maximize = True
     
         def __call__(self, y_true, y_score):
    -        """
    +        """
             Compute AUC of predictions.
     
             Parameters
    @@ -390,7 +391,7 @@ 

    Source code for pytorch_tabnet.metrics

     
     
     
    [docs]class Accuracy(Metric): - """ + """ Accuracy. """ @@ -399,7 +400,7 @@

    Source code for pytorch_tabnet.metrics

             self._maximize = True
     
         def __call__(self, y_true, y_score):
    -        """
    +        """
             Compute Accuracy of predictions.
     
             Parameters
    @@ -419,7 +420,7 @@ 

    Source code for pytorch_tabnet.metrics

     
     
     
    [docs]class BalancedAccuracy(Metric): - """ + """ Balanced Accuracy. """ @@ -428,7 +429,7 @@

    Source code for pytorch_tabnet.metrics

             self._maximize = True
     
         def __call__(self, y_true, y_score):
    -        """
    +        """
             Compute Accuracy of predictions.
     
             Parameters
    @@ -448,7 +449,7 @@ 

    Source code for pytorch_tabnet.metrics

     
     
     
    [docs]class LogLoss(Metric): - """ + """ LogLoss. """ @@ -457,7 +458,7 @@

    Source code for pytorch_tabnet.metrics

             self._maximize = False
     
         def __call__(self, y_true, y_score):
    -        """
    +        """
             Compute LogLoss of predictions.
     
             Parameters
    @@ -476,7 +477,7 @@ 

    Source code for pytorch_tabnet.metrics

     
     
     
    [docs]class MAE(Metric): - """ + """ Mean Absolute Error. """ @@ -485,7 +486,7 @@

    Source code for pytorch_tabnet.metrics

             self._maximize = False
     
         def __call__(self, y_true, y_score):
    -        """
    +        """
             Compute MAE (Mean Absolute Error) of predictions.
     
             Parameters
    @@ -504,7 +505,7 @@ 

    Source code for pytorch_tabnet.metrics

     
     
     
    [docs]class MSE(Metric): - """ + """ Mean Squared Error. """ @@ -513,7 +514,7 @@

    Source code for pytorch_tabnet.metrics

             self._maximize = False
     
         def __call__(self, y_true, y_score):
    -        """
    +        """
             Compute MSE (Mean Squared Error) of predictions.
     
             Parameters
    @@ -532,8 +533,8 @@ 

    Source code for pytorch_tabnet.metrics

     
     
     
    [docs]class RMSLE(Metric): - """ - Mean squared logarithmic error regression loss. + """ + Root Mean squared logarithmic error regression loss. Scikit-implementation: https://scikit-learn.org/stable/modules/generated/sklearn.metrics.mean_squared_log_error.html Note: In order to avoid error, negative predictions are clipped to 0. @@ -545,7 +546,7 @@

    Source code for pytorch_tabnet.metrics

             self._maximize = False
     
         def __call__(self, y_true, y_score):
    -        """
    +        """
             Compute RMSLE of predictions.
     
             Parameters
    @@ -561,11 +562,11 @@ 

    Source code for pytorch_tabnet.metrics

                 RMSLE of predictions vs targets.
             """
             y_score = np.clip(y_score, a_min=0, a_max=None)
    -        return mean_squared_log_error(y_true, y_score)
    + return np.sqrt(mean_squared_log_error(y_true, y_score))
    [docs]class UnsupervisedMetric(Metric): - """ + """ Unsupervised metric """ @@ -574,7 +575,7 @@

    Source code for pytorch_tabnet.metrics

             self._maximize = False
     
         def __call__(self, y_pred, embedded_x, obf_vars):
    -        """
    +        """
             Compute MSE (Mean Squared Error) of predictions.
     
             Parameters
    @@ -597,7 +598,7 @@ 

    Source code for pytorch_tabnet.metrics

     
     
     
    [docs]class UnsupervisedNumpyMetric(Metric): - """ + """ Unsupervised metric """ @@ -606,7 +607,7 @@

    Source code for pytorch_tabnet.metrics

             self._maximize = False
     
         def __call__(self, y_pred, embedded_x, obf_vars):
    -        """
    +        """
             Compute MSE (Mean Squared Error) of predictions.
     
             Parameters
    @@ -632,7 +633,7 @@ 

    Source code for pytorch_tabnet.metrics

     
     
     
    [docs]class RMSE(Metric): - """ + """ Root Mean Squared Error. """ @@ -641,7 +642,7 @@

    Source code for pytorch_tabnet.metrics

             self._maximize = False
     
         def __call__(self, y_true, y_score):
    -        """
    +        """
             Compute RMSE (Root Mean Squared Error) of predictions.
     
             Parameters
    @@ -660,7 +661,7 @@ 

    Source code for pytorch_tabnet.metrics

     
     
     
    [docs]def check_metrics(metrics): - """Check if custom metrics are provided. + """Check if custom metrics are provided. Parameters ---------- diff --git a/docs/_modules/pytorch_tabnet/multiclass_utils.html b/docs/_modules/pytorch_tabnet/multiclass_utils.html index 0a68885b..a3619838 100644 --- a/docs/_modules/pytorch_tabnet/multiclass_utils.html +++ b/docs/_modules/pytorch_tabnet/multiclass_utils.html @@ -87,6 +87,7 @@
  • README
  • TabNet : Attentive Interpretable Tabular Learning
  • Installation
  • +
  • What is new ?
  • Contributing
  • What problems does pytorch-tabnet handle?
  • How to use it?
  • @@ -181,7 +182,7 @@

    Source code for pytorch_tabnet.multiclass_utils

    < def _assert_all_finite(X, allow_nan=False): - """Like assert_all_finite, but only for ndarray.""" + """Like assert_all_finite, but only for ndarray.""" X = np.asanyarray(X) # First try an O(n) time, O(1) space solution for the common case that @@ -208,7 +209,7 @@

    Source code for pytorch_tabnet.multiclass_utils

    <
    [docs]def assert_all_finite(X, allow_nan=False): - """Throw a ValueError if X contains NaN or infinity. + """Throw a ValueError if X contains NaN or infinity. Parameters ---------- @@ -226,7 +227,7 @@

    Source code for pytorch_tabnet.multiclass_utils

    < def _unique_indicator(y): - """ + """ Not implemented """ raise IndexError( @@ -244,7 +245,7 @@

    Source code for pytorch_tabnet.multiclass_utils

    <
    [docs]def unique_labels(*ys): - """Extract an ordered array of unique labels + """Extract an ordered array of unique labels We don't allow: - mix of multilabel and multiclass (single label) targets @@ -306,7 +307,7 @@

    Source code for pytorch_tabnet.multiclass_utils

    <
    [docs]def is_multilabel(y): - """Check if ``y`` is in a multilabel format. + """Check if ``y`` is in a multilabel format. Parameters ---------- @@ -358,7 +359,7 @@

    Source code for pytorch_tabnet.multiclass_utils

    <
    [docs]def check_classification_targets(y): - """Ensure that target y is of a non-regression type. + """Ensure that target y is of a non-regression type. Only the following target types (as defined in type_of_target) are allowed: 'binary', 'multiclass', 'multiclass-multioutput', @@ -380,7 +381,7 @@

    Source code for pytorch_tabnet.multiclass_utils

    <
    [docs]def type_of_target(y): - """Determine the type of data indicated by the target. + """Determine the type of data indicated by the target. Note that this type is the most specific type that can be inferred. For example: @@ -515,7 +516,7 @@

    Source code for pytorch_tabnet.multiclass_utils

    <
    [docs]def infer_output_dim(y_train): - """ + """ Infer output_dim from targets Parameters @@ -551,7 +552,7 @@

    Source code for pytorch_tabnet.multiclass_utils

    <
    [docs]def infer_multitask_output(y_train): - """ + """ Infer output_dim from targets This is for multiple tasks. diff --git a/docs/_modules/pytorch_tabnet/multitask.html b/docs/_modules/pytorch_tabnet/multitask.html index e16ee2e6..31801db9 100644 --- a/docs/_modules/pytorch_tabnet/multitask.html +++ b/docs/_modules/pytorch_tabnet/multitask.html @@ -87,6 +87,7 @@
  • README
  • TabNet : Attentive Interpretable Tabular Learning
  • Installation
  • +
  • What is new ?
  • Contributing
  • What problems does pytorch-tabnet handle?
  • How to use it?
  • @@ -162,10 +163,11 @@

    Source code for pytorch_tabnet.multitask

     import torch
     import numpy as np
     from scipy.special import softmax
    -from pytorch_tabnet.utils import PredictDataset, filter_weights
    +from pytorch_tabnet.utils import SparsePredictDataset, PredictDataset, filter_weights
     from pytorch_tabnet.abstract_model import TabModel
     from pytorch_tabnet.multiclass_utils import infer_multitask_output, check_output_dim
     from torch.utils.data import DataLoader
    +import scipy
     
     
     
    [docs]class TabNetMultiTaskClassifier(TabModel): @@ -183,7 +185,7 @@

    Source code for pytorch_tabnet.multitask

             return y_mapped
    [docs] def compute_loss(self, y_pred, y_true): - """ + """ Computes the loss according to network output and targets Parameters @@ -243,12 +245,12 @@

    Source code for pytorch_tabnet.multitask

             filter_weights(self.updated_weights)
    [docs] def predict(self, X): - """ + """ Make predictions on a batch (valid) Parameters ---------- - X : a :tensor: `torch.Tensor` + X : a :tensor: `torch.Tensor` or matrix: `scipy.sparse.csr_matrix` Input data Returns @@ -257,11 +259,19 @@

    Source code for pytorch_tabnet.multitask

                 Predictions of the most probable class
             """
             self.network.eval()
    -        dataloader = DataLoader(
    -            PredictDataset(X),
    -            batch_size=self.batch_size,
    -            shuffle=False,
    -        )
    +
    +        if scipy.sparse.issparse(X):
    +            dataloader = DataLoader(
    +                SparsePredictDataset(X),
    +                batch_size=self.batch_size,
    +                shuffle=False,
    +            )
    +        else:
    +            dataloader = DataLoader(
    +                PredictDataset(X),
    +                batch_size=self.batch_size,
    +                shuffle=False,
    +            )
     
             results = {}
             for data in dataloader:
    @@ -288,12 +298,12 @@ 

    Source code for pytorch_tabnet.multitask

             return results
    [docs] def predict_proba(self, X): - """ + """ Make predictions for classification on a batch (valid) Parameters ---------- - X : a :tensor: `torch.Tensor` + X : a :tensor: `torch.Tensor` or matrix: `scipy.sparse.csr_matrix` Input data Returns @@ -303,11 +313,18 @@

    Source code for pytorch_tabnet.multitask

             """
             self.network.eval()
     
    -        dataloader = DataLoader(
    -            PredictDataset(X),
    -            batch_size=self.batch_size,
    -            shuffle=False,
    -        )
    +        if scipy.sparse.issparse(X):
    +            dataloader = DataLoader(
    +                SparsePredictDataset(X),
    +                batch_size=self.batch_size,
    +                shuffle=False,
    +            )
    +        else:
    +            dataloader = DataLoader(
    +                PredictDataset(X),
    +                batch_size=self.batch_size,
    +                shuffle=False,
    +            )
     
             results = {}
             for data in dataloader:
    diff --git a/docs/_modules/pytorch_tabnet/pretraining.html b/docs/_modules/pytorch_tabnet/pretraining.html
    index a4f95e74..cdaef850 100644
    --- a/docs/_modules/pytorch_tabnet/pretraining.html
    +++ b/docs/_modules/pytorch_tabnet/pretraining.html
    @@ -87,6 +87,7 @@
     
  • README
  • TabNet : Attentive Interpretable Tabular Learning
  • Installation
  • +
  • What is new ?
  • Contributing
  • What problems does pytorch-tabnet handle?
  • How to use it?
  • @@ -166,8 +167,10 @@

    Source code for pytorch_tabnet.pretraining

     from pytorch_tabnet.utils import (
         create_explain_matrix,
         filter_weights,
    +    SparsePredictDataset,
         PredictDataset,
    -    check_input
    +    check_input,
    +    create_group_matrix,
     )
     from torch.nn.utils import clip_grad_norm_
     from pytorch_tabnet.pretraining_utils import (
    @@ -180,6 +183,7 @@ 

    Source code for pytorch_tabnet.pretraining

         UnsupervisedLoss,
     )
     from pytorch_tabnet.abstract_model import TabModel
    +import scipy
     
     
     
    [docs]class TabNetPretrainer(TabModel): @@ -221,7 +225,7 @@

    Source code for pytorch_tabnet.pretraining

             pin_memory=True,
             warm_start=False
         ):
    -        """Train a neural network stored in self.network
    +        """Train a neural network stored in self.network
             Using train_dataloader for training data and
             valid_dataloader for validation.
     
    @@ -329,10 +333,13 @@ 

    Source code for pytorch_tabnet.pretraining

             self.network.eval()
    def _set_network(self): - """Setup the network and explain matrix.""" + """Setup the network and explain matrix.""" if not hasattr(self, 'pretraining_ratio'): self.pretraining_ratio = 0.5 torch.manual_seed(self.seed) + + self.group_matrix = create_group_matrix(self.grouped_features, self.input_dim) + self.network = tab_network.TabNetPretraining( self.input_dim, pretraining_ratio=self.pretraining_ratio, @@ -345,10 +352,13 @@

    Source code for pytorch_tabnet.pretraining

                 cat_emb_dim=self.cat_emb_dim,
                 n_independent=self.n_independent,
                 n_shared=self.n_shared,
    +            n_shared_decoder=self.n_shared_decoder,
    +            n_indep_decoder=self.n_indep_decoder,
                 epsilon=self.epsilon,
                 virtual_batch_size=self.virtual_batch_size,
                 momentum=self.momentum,
                 mask_type=self.mask_type,
    +            group_attention_matrix=self.group_matrix.to(self.device),
             ).to(self.device)
     
             self.reducing_matrix = create_explain_matrix(
    @@ -363,7 +373,7 @@ 

    Source code for pytorch_tabnet.pretraining

             self.network.pretraining_ratio = self.pretraining_ratio
     
         def _set_metrics(self, eval_names):
    -        """Set attributes relative to the metrics.
    +        """Set attributes relative to the metrics.
     
             Parameters
             ----------
    @@ -395,7 +405,7 @@ 

    Source code for pytorch_tabnet.pretraining

             )
     
         def _construct_loaders(self, X_train, eval_set):
    -        """Generate dataloaders for unsupervised train and eval set.
    +        """Generate dataloaders for unsupervised train and eval set.
     
             Parameters
             ----------
    @@ -424,7 +434,7 @@ 

    Source code for pytorch_tabnet.pretraining

             return train_dataloader, valid_dataloaders
     
         def _train_epoch(self, train_loader):
    -        """
    +        """
             Trains one epoch of the network in self.network
     
             Parameters
    @@ -447,7 +457,7 @@ 

    Source code for pytorch_tabnet.pretraining

             return
     
         def _train_batch(self, X):
    -        """
    +        """
             Trains one batch of data
     
             Parameters
    @@ -483,7 +493,7 @@ 

    Source code for pytorch_tabnet.pretraining

             return batch_logs
     
         def _predict_epoch(self, name, loader):
    -        """
    +        """
             Predict an epoch and update metrics.
     
             Parameters
    @@ -516,7 +526,7 @@ 

    Source code for pytorch_tabnet.pretraining

             return
     
         def _predict_batch(self, X):
    -        """
    +        """
             Predict one batch of data.
     
             Parameters
    @@ -539,12 +549,12 @@ 

    Source code for pytorch_tabnet.pretraining

             return output, embedded_x, obf_vars
    [docs] def predict(self, X): - """ + """ Make predictions on a batch (valid) Parameters ---------- - X : a :tensor: `torch.Tensor` + X : a :tensor: `torch.Tensor` or matrix: `scipy.sparse.csr_matrix` Input data Returns @@ -553,11 +563,19 @@

    Source code for pytorch_tabnet.pretraining

                 Predictions of the regression problem
             """
             self.network.eval()
    -        dataloader = DataLoader(
    -            PredictDataset(X),
    -            batch_size=self.batch_size,
    -            shuffle=False,
    -        )
    +
    +        if scipy.sparse.issparse(X):
    +            dataloader = DataLoader(
    +                SparsePredictDataset(X),
    +                batch_size=self.batch_size,
    +                shuffle=False,
    +            )
    +        else:
    +            dataloader = DataLoader(
    +                PredictDataset(X),
    +                batch_size=self.batch_size,
    +                shuffle=False,
    +            )
     
             results = []
             embedded_res = []
    diff --git a/docs/_modules/pytorch_tabnet/pretraining_utils.html b/docs/_modules/pytorch_tabnet/pretraining_utils.html
    index 1234f03b..bab368db 100644
    --- a/docs/_modules/pytorch_tabnet/pretraining_utils.html
    +++ b/docs/_modules/pytorch_tabnet/pretraining_utils.html
    @@ -87,6 +87,7 @@
     
  • README
  • TabNet : Attentive Interpretable Tabular Learning
  • Installation
  • +
  • What is new ?
  • Contributing
  • What problems does pytorch-tabnet handle?
  • How to use it?
  • @@ -162,22 +163,24 @@

    Source code for pytorch_tabnet.pretraining_utils

    from torch.utils.data import DataLoader from pytorch_tabnet.utils import ( create_sampler, + SparsePredictDataset, PredictDataset, check_input ) +import scipy
    [docs]def create_dataloaders( X_train, eval_set, weights, batch_size, num_workers, drop_last, pin_memory ): - """ + """ Create dataloaders with or without subsampling depending on weights and balanced. Parameters ---------- - X_train : np.ndarray + X_train : np.ndarray or scipy.sparse.csr_matrix Training data - eval_set : list of np.array + eval_set : list of np.array (for Xs and ys) or scipy.sparse.csr_matrix (for Xs) List of eval sets weights : either 0, 1, dict or iterable if 0 (default) : no weights will be applied @@ -204,35 +207,59 @@

    Source code for pytorch_tabnet.pretraining_utils

    """ need_shuffle, sampler = create_sampler(weights, X_train) - train_dataloader = DataLoader( - PredictDataset(X_train), - batch_size=batch_size, - sampler=sampler, - shuffle=need_shuffle, - num_workers=num_workers, - drop_last=drop_last, - pin_memory=pin_memory, - ) + if scipy.sparse.issparse(X_train): + train_dataloader = DataLoader( + SparsePredictDataset(X_train), + batch_size=batch_size, + sampler=sampler, + shuffle=need_shuffle, + num_workers=num_workers, + drop_last=drop_last, + pin_memory=pin_memory, + ) + else: + train_dataloader = DataLoader( + PredictDataset(X_train), + batch_size=batch_size, + sampler=sampler, + shuffle=need_shuffle, + num_workers=num_workers, + drop_last=drop_last, + pin_memory=pin_memory, + ) valid_dataloaders = [] for X in eval_set: - valid_dataloaders.append( - DataLoader( - PredictDataset(X), - batch_size=batch_size, - sampler=sampler, - shuffle=need_shuffle, - num_workers=num_workers, - drop_last=drop_last, - pin_memory=pin_memory, + if scipy.sparse.issparse(X): + valid_dataloaders.append( + DataLoader( + SparsePredictDataset(X), + batch_size=batch_size, + sampler=sampler, + shuffle=need_shuffle, + num_workers=num_workers, + drop_last=drop_last, + pin_memory=pin_memory, + ) + ) + else: + valid_dataloaders.append( + DataLoader( + PredictDataset(X), + batch_size=batch_size, + sampler=sampler, + shuffle=need_shuffle, + num_workers=num_workers, + drop_last=drop_last, + pin_memory=pin_memory, + ) ) - ) return train_dataloader, valid_dataloaders
    [docs]def validate_eval_set(eval_set, eval_name, X_train): - """Check if the shapes of eval_set are compatible with X_train. + """Check if the shapes of eval_set are compatible with X_train. Parameters ---------- diff --git a/docs/_modules/pytorch_tabnet/sparsemax.html b/docs/_modules/pytorch_tabnet/sparsemax.html index 8f03e2ea..0ebb565a 100644 --- a/docs/_modules/pytorch_tabnet/sparsemax.html +++ b/docs/_modules/pytorch_tabnet/sparsemax.html @@ -87,6 +87,7 @@
  • README
  • TabNet : Attentive Interpretable Tabular Learning
  • Installation
  • +
  • What is new ?
  • Contributing
  • What problems does pytorch-tabnet handle?
  • How to use it?
  • @@ -183,7 +184,7 @@

    Source code for pytorch_tabnet.sparsemax

     
     
     
    [docs]class SparsemaxFunction(Function): - """ + """ An implementation of sparsemax (Martins & Astudillo, 2016). See :cite:`DBLP:journals/corr/MartinsA16` for detailed description. By Ben Peters and Vlad Niculae @@ -191,7 +192,7 @@

    Source code for pytorch_tabnet.sparsemax

     
     
    [docs] @staticmethod def forward(ctx, input, dim=-1): - """sparsemax: normalizing sparse transform (a la softmax) + """sparsemax: normalizing sparse transform (a la softmax) Parameters ---------- @@ -229,7 +230,7 @@

    Source code for pytorch_tabnet.sparsemax

     
         @staticmethod
         def _threshold_and_support(input, dim=-1):
    -        """Sparsemax building block: compute the threshold
    +        """Sparsemax building block: compute the threshold
     
             Parameters
             ----------
    @@ -271,7 +272,7 @@ 

    Source code for pytorch_tabnet.sparsemax

     
     
     
    [docs]class Entmax15Function(Function): - """ + """ An implementation of exact Entmax with alpha=1.5 (B. Peters, V. Niculae, A. Martins). See :cite:`https://arxiv.org/abs/1905.05702 for detailed description. Source: https://github.com/deep-spin/entmax @@ -322,7 +323,7 @@

    Source code for pytorch_tabnet.sparsemax

     
     
     
    [docs]class Entmoid15(Function): - """ A highly optimized equivalent of lambda x: Entmax15([x, 0]) """ + """ A highly optimized equivalent of lambda x: Entmax15([x, 0]) """
    [docs] @staticmethod def forward(ctx, input): diff --git a/docs/_modules/pytorch_tabnet/tab_model.html b/docs/_modules/pytorch_tabnet/tab_model.html index d8bf5379..bf3a0510 100644 --- a/docs/_modules/pytorch_tabnet/tab_model.html +++ b/docs/_modules/pytorch_tabnet/tab_model.html @@ -87,6 +87,7 @@
  • README
  • TabNet : Attentive Interpretable Tabular Learning
  • Installation
  • +
  • What is new ?
  • Contributing
  • What problems does pytorch-tabnet handle?
  • How to use it?
  • @@ -162,10 +163,11 @@

    Source code for pytorch_tabnet.tab_model

     import torch
     import numpy as np
     from scipy.special import softmax
    -from pytorch_tabnet.utils import PredictDataset, filter_weights
    +from pytorch_tabnet.utils import SparsePredictDataset, PredictDataset, filter_weights
     from pytorch_tabnet.abstract_model import TabModel
     from pytorch_tabnet.multiclass_utils import infer_output_dim, check_output_dim
     from torch.utils.data import DataLoader
    +import scipy
     
     
     
    [docs]class TabNetClassifier(TabModel): @@ -176,7 +178,7 @@

    Source code for pytorch_tabnet.tab_model

             self._default_metric = 'accuracy'
     
     
    [docs] def weight_updater(self, weights): - """ + """ Updates weights dictionary according to target_mapper. Parameters @@ -235,12 +237,12 @@

    Source code for pytorch_tabnet.tab_model

             return np.vectorize(self.preds_mapper.get)(outputs.astype(str))
    [docs] def predict_proba(self, X): - """ + """ Make predictions for classification on a batch (valid) Parameters ---------- - X : a :tensor: `torch.Tensor` + X : a :tensor: `torch.Tensor` or matrix: `scipy.sparse.csr_matrix` Input data Returns @@ -250,11 +252,18 @@

    Source code for pytorch_tabnet.tab_model

             """
             self.network.eval()
     
    -        dataloader = DataLoader(
    -            PredictDataset(X),
    -            batch_size=self.batch_size,
    -            shuffle=False,
    -        )
    +        if scipy.sparse.issparse(X):
    +            dataloader = DataLoader(
    +                SparsePredictDataset(X),
    +                batch_size=self.batch_size,
    +                shuffle=False,
    +            )
    +        else:
    +            dataloader = DataLoader(
    +                PredictDataset(X),
    +                batch_size=self.batch_size,
    +                shuffle=False,
    +            )
     
             results = []
             for batch_nb, data in enumerate(dataloader):
    diff --git a/docs/_modules/pytorch_tabnet/tab_network.html b/docs/_modules/pytorch_tabnet/tab_network.html
    index c8ba7358..7070c4d2 100644
    --- a/docs/_modules/pytorch_tabnet/tab_network.html
    +++ b/docs/_modules/pytorch_tabnet/tab_network.html
    @@ -87,6 +87,7 @@
     
  • README
  • TabNet : Attentive Interpretable Tabular Learning
  • Installation
  • +
  • What is new ?
  • Contributing
  • What problems does pytorch-tabnet handle?
  • How to use it?
  • @@ -180,7 +181,7 @@

    Source code for pytorch_tabnet.tab_network

     
     
     
    [docs]class GBN(torch.nn.Module): - """ + """ Ghost Batch Normalization https://arxiv.org/abs/1705.08741 """ @@ -214,8 +215,9 @@

    Source code for pytorch_tabnet.tab_network

             virtual_batch_size=128,
             momentum=0.02,
             mask_type="sparsemax",
    +        group_attention_matrix=None,
         ):
    -        """
    +        """
             Defines main part of the TabNet network without the embedding layers.
     
             Parameters
    @@ -245,6 +247,8 @@ 

    Source code for pytorch_tabnet.tab_network

                 Float value between 0 and 1 which will be used for momentum in all batch norm
             mask_type : str
                 Either "sparsemax" or "entmax" : this is the masking function to use
    +        group_attention_matrix : torch matrix
    +            Matrix of size (n_groups, input_dim), m_ij = importance within group i of feature j
             """
             super(TabNetEncoder, self).__init__()
             self.input_dim = input_dim
    @@ -260,6 +264,14 @@ 

    Source code for pytorch_tabnet.tab_network

             self.virtual_batch_size = virtual_batch_size
             self.mask_type = mask_type
             self.initial_bn = BatchNorm1d(self.input_dim, momentum=0.01)
    +        self.group_attention_matrix = group_attention_matrix
    +
    +        if self.group_attention_matrix is None:
    +            # no groups
    +            self.group_attention_matrix = torch.eye(self.input_dim)
    +            self.attention_dim = self.input_dim
    +        else:
    +            self.attention_dim = self.group_attention_matrix.shape[0]
     
             if self.n_shared > 0:
                 shared_feat_transform = torch.nn.ModuleList()
    @@ -299,7 +311,8 @@ 

    Source code for pytorch_tabnet.tab_network

                 )
                 attention = AttentiveTransformer(
                     n_a,
    -                self.input_dim,
    +                self.attention_dim,
    +                group_matrix=group_attention_matrix,
                     virtual_batch_size=self.virtual_batch_size,
                     momentum=momentum,
                     mask_type=self.mask_type,
    @@ -310,12 +323,12 @@ 

    Source code for pytorch_tabnet.tab_network

     
    [docs] def forward(self, x, prior=None): x = self.initial_bn(x) + bs = x.shape[0] # batch size if prior is None: - prior = torch.ones(x.shape).to(x.device) + prior = torch.ones((bs, self.attention_dim)).to(x.device) M_loss = 0 att = self.initial_splitter(x)[:, self.n_d :] - steps_output = [] for step in range(self.n_steps): M = self.att_transformers[step](prior, att) @@ -325,7 +338,8 @@

    Source code for pytorch_tabnet.tab_network

                 # update prior
                 prior = torch.mul(self.gamma - M, prior)
                 # output
    -            masked_x = torch.mul(M, x)
    +            M_feature_level = torch.matmul(M, self.group_attention_matrix)
    +            masked_x = torch.mul(M_feature_level, x)
                 out = self.feat_transformers[step](masked_x)
                 d = ReLU()(out[:, : self.n_d])
                 steps_output.append(d)
    @@ -337,24 +351,25 @@ 

    Source code for pytorch_tabnet.tab_network

     
     
    [docs] def forward_masks(self, x): x = self.initial_bn(x) - - prior = torch.ones(x.shape).to(x.device) + bs = x.shape[0] # batch size + prior = torch.ones((bs, self.attention_dim)).to(x.device) M_explain = torch.zeros(x.shape).to(x.device) att = self.initial_splitter(x)[:, self.n_d :] masks = {} for step in range(self.n_steps): M = self.att_transformers[step](prior, att) - masks[step] = M + M_feature_level = torch.matmul(M, self.group_attention_matrix) + masks[step] = M_feature_level # update prior prior = torch.mul(self.gamma - M, prior) # output - masked_x = torch.mul(M, x) + masked_x = torch.mul(M_feature_level, x) out = self.feat_transformers[step](masked_x) d = ReLU()(out[:, : self.n_d]) # explain step_importance = torch.sum(d, dim=1) - M_explain += torch.mul(M, step_importance.unsqueeze(dim=1)) + M_explain += torch.mul(M_feature_level, step_importance.unsqueeze(dim=1)) # update attention att = out[:, self.n_d :] @@ -372,7 +387,7 @@

    Source code for pytorch_tabnet.tab_network

             virtual_batch_size=128,
             momentum=0.02,
         ):
    -        """
    +        """
             Defines main part of the TabNet network without the embedding layers.
     
             Parameters
    @@ -410,11 +425,7 @@ 

    Source code for pytorch_tabnet.tab_network

             if self.n_shared > 0:
                 shared_feat_transform = torch.nn.ModuleList()
                 for i in range(self.n_shared):
    -                if i == 0:
    -                    shared_feat_transform.append(Linear(n_d, 2 * n_d, bias=False))
    -                else:
    -                    shared_feat_transform.append(Linear(n_d, 2 * n_d, bias=False))
    -
    +                shared_feat_transform.append(Linear(n_d, 2 * n_d, bias=False))
             else:
                 shared_feat_transform = None
     
    @@ -461,6 +472,7 @@ 

    Source code for pytorch_tabnet.tab_network

             mask_type="sparsemax",
             n_shared_decoder=1,
             n_indep_decoder=1,
    +        group_attention_matrix=None,
         ):
             super(TabNetPretraining, self).__init__()
     
    @@ -487,10 +499,15 @@ 

    Source code for pytorch_tabnet.tab_network

                 raise ValueError("n_shared and n_independent can't be both zero.")
     
             self.virtual_batch_size = virtual_batch_size
    -        self.embedder = EmbeddingGenerator(input_dim, cat_dims, cat_idxs, cat_emb_dim)
    +        self.embedder = EmbeddingGenerator(input_dim,
    +                                           cat_dims,
    +                                           cat_idxs,
    +                                           cat_emb_dim,
    +                                           group_attention_matrix)
             self.post_embed_dim = self.embedder.post_embed_dim
     
    -        self.masker = RandomObfuscator(self.pretraining_ratio)
    +        self.masker = RandomObfuscator(self.pretraining_ratio,
    +                                       group_matrix=self.embedder.embedding_group_matrix)
             self.encoder = TabNetEncoder(
                 input_dim=self.post_embed_dim,
                 output_dim=self.post_embed_dim,
    @@ -504,6 +521,7 @@ 

    Source code for pytorch_tabnet.tab_network

                 virtual_batch_size=virtual_batch_size,
                 momentum=momentum,
                 mask_type=mask_type,
    +            group_attention_matrix=self.embedder.embedding_group_matrix,
             )
             self.decoder = TabNetDecoder(
                 self.post_embed_dim,
    @@ -516,7 +534,7 @@ 

    Source code for pytorch_tabnet.tab_network

             )
     
     
    [docs] def forward(self, x): - """ + """ Returns: res, embedded_x, obf_vars res : output of reconstruction embedded_x : embedded input @@ -524,12 +542,12 @@

    Source code for pytorch_tabnet.tab_network

             """
             embedded_x = self.embedder(x)
             if self.training:
    -            masked_x, obf_vars = self.masker(embedded_x)
    -            # set prior of encoder with obf_mask
    -            prior = 1 - obf_vars
    +            masked_x, obfuscated_groups, obfuscated_vars = self.masker(embedded_x)
    +            # set prior of encoder with obfuscated groups
    +            prior = 1 - obfuscated_groups
                 steps_out, _ = self.encoder(masked_x, prior=prior)
                 res = self.decoder(steps_out)
    -            return res, embedded_x, obf_vars
    +            return res, embedded_x, obfuscated_vars
             else:
                 steps_out, _ = self.encoder(embedded_x)
                 res = self.decoder(steps_out)
    @@ -555,8 +573,9 @@ 

    Source code for pytorch_tabnet.tab_network

             virtual_batch_size=128,
             momentum=0.02,
             mask_type="sparsemax",
    +        group_attention_matrix=None,
         ):
    -        """
    +        """
             Defines main part of the TabNet network without the embedding layers.
     
             Parameters
    @@ -586,6 +605,8 @@ 

    Source code for pytorch_tabnet.tab_network

                 Float value between 0 and 1 which will be used for momentum in all batch norm
             mask_type : str
                 Either "sparsemax" or "entmax" : this is the masking function to use
    +        group_attention_matrix : torch matrix
    +            Matrix of size (n_groups, input_dim), m_ij = importance within group i of feature j
             """
             super(TabNetNoEmbeddings, self).__init__()
             self.input_dim = input_dim
    @@ -615,6 +636,7 @@ 

    Source code for pytorch_tabnet.tab_network

                 virtual_batch_size=virtual_batch_size,
                 momentum=momentum,
                 mask_type=mask_type,
    +            group_attention_matrix=group_attention_matrix
             )
     
             if self.is_multi_task:
    @@ -663,8 +685,9 @@ 

    Source code for pytorch_tabnet.tab_network

             virtual_batch_size=128,
             momentum=0.02,
             mask_type="sparsemax",
    +        group_attention_matrix=[],
         ):
    -        """
    +        """
             Defines TabNet network
     
             Parameters
    @@ -702,6 +725,8 @@ 

    Source code for pytorch_tabnet.tab_network

                 Float value between 0 and 1 which will be used for momentum in all batch norm
             mask_type : str
                 Either "sparsemax" or "entmax" : this is the masking function to use
    +        group_attention_matrix : torch matrix
    +            Matrix of size (n_groups, input_dim), m_ij = importance within group i of feature j
             """
             super(TabNet, self).__init__()
             self.cat_idxs = cat_idxs or []
    @@ -725,8 +750,13 @@ 

    Source code for pytorch_tabnet.tab_network

                 raise ValueError("n_shared and n_independent can't be both zero.")
     
             self.virtual_batch_size = virtual_batch_size
    -        self.embedder = EmbeddingGenerator(input_dim, cat_dims, cat_idxs, cat_emb_dim)
    +        self.embedder = EmbeddingGenerator(input_dim,
    +                                           cat_dims,
    +                                           cat_idxs,
    +                                           cat_emb_dim,
    +                                           group_attention_matrix)
             self.post_embed_dim = self.embedder.post_embed_dim
    +
             self.tabnet = TabNetNoEmbeddings(
                 self.post_embed_dim,
                 output_dim,
    @@ -740,6 +770,7 @@ 

    Source code for pytorch_tabnet.tab_network

                 virtual_batch_size,
                 momentum,
                 mask_type,
    +            self.embedder.embedding_group_matrix
             )
     
     
    [docs] def forward(self, x): @@ -755,20 +786,21 @@

    Source code for pytorch_tabnet.tab_network

         def __init__(
             self,
             input_dim,
    -        output_dim,
    +        group_dim,
    +        group_matrix,
             virtual_batch_size=128,
             momentum=0.02,
             mask_type="sparsemax",
         ):
    -        """
    +        """
             Initialize an attention transformer.
     
             Parameters
             ----------
             input_dim : int
                 Input size
    -        output_dim : int
    -            Output_size
    +        group_dim : int
    +            Number of groups for features
             virtual_batch_size : int
                 Batch size for Ghost Batch Normalization
             momentum : float
    @@ -777,10 +809,10 @@ 

    Source code for pytorch_tabnet.tab_network

                 Either "sparsemax" or "entmax" : this is the masking function to use
             """
             super(AttentiveTransformer, self).__init__()
    -        self.fc = Linear(input_dim, output_dim, bias=False)
    -        initialize_non_glu(self.fc, input_dim, output_dim)
    +        self.fc = Linear(input_dim, group_dim, bias=False)
    +        initialize_non_glu(self.fc, input_dim, group_dim)
             self.bn = GBN(
    -            output_dim, virtual_batch_size=virtual_batch_size, momentum=momentum
    +            group_dim, virtual_batch_size=virtual_batch_size, momentum=momentum
             )
     
             if mask_type == "sparsemax":
    @@ -813,7 +845,7 @@ 

    Source code for pytorch_tabnet.tab_network

             momentum=0.02,
         ):
             super(FeatTransformer, self).__init__()
    -        """
    +        """
             Initialize a feature transformer.
     
             Parameters
    @@ -870,7 +902,7 @@ 

    Source code for pytorch_tabnet.tab_network

     
     
     
    [docs]class GLU_Block(torch.nn.Module): - """ + """ Independent GLU block, specific to each step """ @@ -937,12 +969,12 @@

    Source code for pytorch_tabnet.tab_network

     
     
     
    [docs]class EmbeddingGenerator(torch.nn.Module): - """ + """ Classical embeddings generator """ - def __init__(self, input_dim, cat_dims, cat_idxs, cat_emb_dim): - """This is an embedding module for an entire set of features + def __init__(self, input_dim, cat_dims, cat_idxs, cat_emb_dims, group_matrix): + """This is an embedding module for an entire set of features Parameters ---------- @@ -953,56 +985,54 @@

    Source code for pytorch_tabnet.tab_network

                 If the list is empty, no embeddings will be done
             cat_idxs : list of int
                 Positional index for each categorical features in inputs
    -        cat_emb_dim : int or list of int
    +        cat_emb_dim : list of int
                 Embedding dimension for each categorical features
                 If int, the same embedding dimension will be used for all categorical features
    +        group_matrix : torch matrix
    +            Original group matrix before embeddings
             """
             super(EmbeddingGenerator, self).__init__()
    +
             if cat_dims == [] and cat_idxs == []:
                 self.skip_embedding = True
                 self.post_embed_dim = input_dim
    +            self.embedding_group_matrix = group_matrix.to(group_matrix.device)
                 return
    -        elif (cat_dims == []) ^ (cat_idxs == []):
    -            if cat_dims == []:
    -                msg = "If cat_idxs is non-empty, cat_dims must be defined as a list of same length."
    -            else:
    -                msg = "If cat_dims is non-empty, cat_idxs must be defined as a list of same length."
    -            raise ValueError(msg)
    -        elif len(cat_dims) != len(cat_idxs):
    -            msg = "The lists cat_dims and cat_idxs must have the same length."
    -            raise ValueError(msg)
    -
    -        self.skip_embedding = False
    -        if isinstance(cat_emb_dim, int):
    -            self.cat_emb_dims = [cat_emb_dim] * len(cat_idxs)
             else:
    -            self.cat_emb_dims = cat_emb_dim
    -
    -        # check that all embeddings are provided
    -        if len(self.cat_emb_dims) != len(cat_dims):
    -            msg = f"""cat_emb_dim and cat_dims must be lists of same length, got {len(self.cat_emb_dims)}
    -                      and {len(cat_dims)}"""
    -            raise ValueError(msg)
    -        self.post_embed_dim = int(
    -            input_dim + np.sum(self.cat_emb_dims) - len(self.cat_emb_dims)
    -        )
    +            self.skip_embedding = False
     
    -        self.embeddings = torch.nn.ModuleList()
    +        self.post_embed_dim = int(input_dim + np.sum(cat_emb_dims) - len(cat_emb_dims))
     
    -        # Sort dims by cat_idx
    -        sorted_idxs = np.argsort(cat_idxs)
    -        cat_dims = [cat_dims[i] for i in sorted_idxs]
    -        self.cat_emb_dims = [self.cat_emb_dims[i] for i in sorted_idxs]
    +        self.embeddings = torch.nn.ModuleList()
     
    -        for cat_dim, emb_dim in zip(cat_dims, self.cat_emb_dims):
    +        for cat_dim, emb_dim in zip(cat_dims, cat_emb_dims):
                 self.embeddings.append(torch.nn.Embedding(cat_dim, emb_dim))
     
             # record continuous indices
             self.continuous_idx = torch.ones(input_dim, dtype=torch.bool)
             self.continuous_idx[cat_idxs] = 0
     
    +        # update group matrix
    +        n_groups = group_matrix.shape[0]
    +        self.embedding_group_matrix = torch.empty((n_groups, self.post_embed_dim),
    +                                                  device=group_matrix.device)
    +        for group_idx in range(n_groups):
    +            post_emb_idx = 0
    +            cat_feat_counter = 0
    +            for init_feat_idx in range(input_dim):
    +                if self.continuous_idx[init_feat_idx] == 1:
    +                    # this means that no embedding is applied to this column
    +                    self.embedding_group_matrix[group_idx, post_emb_idx] = group_matrix[group_idx, init_feat_idx]  # noqa
    +                    post_emb_idx += 1
    +                else:
    +                    # this is a categorical feature which creates multiple embeddings
    +                    n_embeddings = cat_emb_dims[cat_feat_counter]
    +                    self.embedding_group_matrix[group_idx, post_emb_idx:post_emb_idx+n_embeddings] = group_matrix[group_idx, init_feat_idx] / n_embeddings  # noqa
    +                    post_emb_idx += n_embeddings
    +                    cat_feat_counter += 1
    +
     
    [docs] def forward(self, x): - """ + """ Apply embeddings to inputs Inputs should be (batch_size, input_dim) Outputs will be of size (batch_size, self.post_embed_dim) @@ -1028,34 +1058,42 @@

    Source code for pytorch_tabnet.tab_network

     
     
     
    [docs]class RandomObfuscator(torch.nn.Module): - """ - Create and applies obfuscation masks + """ + Create and applies obfuscation masks. + The obfuscation is done at group level to match attention. """ - def __init__(self, pretraining_ratio): - """ + def __init__(self, pretraining_ratio, group_matrix): + """ This create random obfuscation for self suppervised pretraining Parameters ---------- pretraining_ratio : float Ratio of feature to randomly discard for reconstruction + """ super(RandomObfuscator, self).__init__() self.pretraining_ratio = pretraining_ratio + # group matrix is set to boolean here to pass all posssible information + self.group_matrix = (group_matrix > 0) + 0. + self.num_groups = group_matrix.shape[0]
    [docs] def forward(self, x): - """ + """ Generate random obfuscation mask. Returns ------- masked input and obfuscated variables. """ - obfuscated_vars = torch.bernoulli( - self.pretraining_ratio * torch.ones(x.shape) - ).to(x.device) + bs = x.shape[0] + + obfuscated_groups = torch.bernoulli( + self.pretraining_ratio * torch.ones((bs, self.num_groups), device=x.device) + ) + obfuscated_vars = torch.matmul(obfuscated_groups, self.group_matrix) masked_input = torch.mul(1 - obfuscated_vars, x) - return masked_input, obfuscated_vars
    + return masked_input, obfuscated_groups, obfuscated_vars
    diff --git a/docs/_modules/pytorch_tabnet/utils.html b/docs/_modules/pytorch_tabnet/utils.html index 17205ca8..7f50a923 100644 --- a/docs/_modules/pytorch_tabnet/utils.html +++ b/docs/_modules/pytorch_tabnet/utils.html @@ -87,6 +87,7 @@
  • README
  • TabNet : Attentive Interpretable Tabular Learning
  • Installation
  • +
  • What is new ?
  • Contributing
  • What problems does pytorch-tabnet handle?
  • How to use it?
  • @@ -171,7 +172,7 @@

    Source code for pytorch_tabnet.utils

     
     
     
    [docs]class TorchDataset(Dataset): - """ + """ Format for numpy array Parameters @@ -194,8 +195,33 @@

    Source code for pytorch_tabnet.utils

             return x, y
    +
    [docs]class SparseTorchDataset(Dataset): + """ + Format for csr_matrix + + Parameters + ---------- + X : CSR matrix + The input matrix + y : 2D array + The one-hot encoded target + """ + + def __init__(self, x, y): + self.x = x + self.y = y + + def __len__(self): + return self.x.shape[0] + + def __getitem__(self, index): + x = torch.from_numpy(self.x[index].toarray()[0]).float() + y = self.y[index] + return x, y
    + +
    [docs]class PredictDataset(Dataset): - """ + """ Format for numpy array Parameters @@ -215,8 +241,29 @@

    Source code for pytorch_tabnet.utils

             return x
    +
    [docs]class SparsePredictDataset(Dataset): + """ + Format for csr_matrix + + Parameters + ---------- + X : CSR matrix + The input matrix + """ + + def __init__(self, x): + self.x = x + + def __len__(self): + return self.x.shape[0] + + def __getitem__(self, index): + x = torch.from_numpy(self.x[index].toarray()[0]).float() + return x
    + +
    [docs]def create_sampler(weights, y_train): - """ + """ This creates a sampler from the given weights Parameters @@ -267,7 +314,7 @@

    Source code for pytorch_tabnet.utils

     
    [docs]def create_dataloaders( X_train, y_train, eval_set, weights, batch_size, num_workers, drop_last, pin_memory ): - """ + """ Create dataloaders with or without subsampling depending on weights and balanced. Parameters @@ -303,33 +350,55 @@

    Source code for pytorch_tabnet.utils

         """
         need_shuffle, sampler = create_sampler(weights, y_train)
     
    -    train_dataloader = DataLoader(
    -        TorchDataset(X_train.astype(np.float32), y_train),
    -        batch_size=batch_size,
    -        sampler=sampler,
    -        shuffle=need_shuffle,
    -        num_workers=num_workers,
    -        drop_last=drop_last,
    -        pin_memory=pin_memory,
    -    )
    +    if scipy.sparse.issparse(X_train):
    +        train_dataloader = DataLoader(
    +            SparseTorchDataset(X_train.astype(np.float32), y_train),
    +            batch_size=batch_size,
    +            sampler=sampler,
    +            shuffle=need_shuffle,
    +            num_workers=num_workers,
    +            drop_last=drop_last,
    +            pin_memory=pin_memory,
    +        )
    +    else:
    +        train_dataloader = DataLoader(
    +            TorchDataset(X_train.astype(np.float32), y_train),
    +            batch_size=batch_size,
    +            sampler=sampler,
    +            shuffle=need_shuffle,
    +            num_workers=num_workers,
    +            drop_last=drop_last,
    +            pin_memory=pin_memory,
    +        )
     
         valid_dataloaders = []
         for X, y in eval_set:
    -        valid_dataloaders.append(
    -            DataLoader(
    -                TorchDataset(X.astype(np.float32), y),
    -                batch_size=batch_size,
    -                shuffle=False,
    -                num_workers=num_workers,
    -                pin_memory=pin_memory,
    +        if scipy.sparse.issparse(X):
    +            valid_dataloaders.append(
    +                DataLoader(
    +                    SparseTorchDataset(X.astype(np.float32), y),
    +                    batch_size=batch_size,
    +                    shuffle=False,
    +                    num_workers=num_workers,
    +                    pin_memory=pin_memory,
    +                )
    +            )
    +        else:
    +            valid_dataloaders.append(
    +                DataLoader(
    +                    TorchDataset(X.astype(np.float32), y),
    +                    batch_size=batch_size,
    +                    shuffle=False,
    +                    num_workers=num_workers,
    +                    pin_memory=pin_memory,
    +                )
                 )
    -        )
     
         return train_dataloader, valid_dataloaders
    [docs]def create_explain_matrix(input_dim, cat_emb_dim, cat_idxs, post_embed_dim): - """ + """ This is a computational trick. In order to rapidly sum importances from same embeddings to the initial index. @@ -377,8 +446,95 @@

    Source code for pytorch_tabnet.utils

         return scipy.sparse.csc_matrix(reducing_matrix)
    +
    [docs]def create_group_matrix(list_groups, input_dim): + """ + Create the group matrix corresponding to the given list_groups + + Parameters + ---------- + - list_groups : list of list of int + Each element is a list representing features in the same group. + One feature should appear in maximum one group. + Feature that don't get assigned a group will be in their own group of one feature. + - input_dim : number of feature in the initial dataset + + Returns + ------- + - group_matrix : torch matrix + A matrix of size (n_groups, input_dim) + where m_ij represents the importance of feature j in group i + The rows must some to 1 as each group is equally important a priori. + + """ + check_list_groups(list_groups, input_dim) + + if len(list_groups) == 0: + group_matrix = torch.eye(input_dim) + return group_matrix + else: + n_groups = input_dim - int(np.sum([len(gp) - 1 for gp in list_groups])) + group_matrix = torch.zeros((n_groups, input_dim)) + + remaining_features = [feat_idx for feat_idx in range(input_dim)] + + current_group_idx = 0 + for group in list_groups: + group_size = len(group) + for elem_idx in group: + # add importrance of element in group matrix and corresponding group + group_matrix[current_group_idx, elem_idx] = 1 / group_size + # remove features from list of features + remaining_features.remove(elem_idx) + # move to next group + current_group_idx += 1 + # features not mentionned in list_groups get assigned their own group of singleton + for remaining_feat_idx in remaining_features: + group_matrix[current_group_idx, remaining_feat_idx] = 1 + current_group_idx += 1 + return group_matrix
    + + +
    [docs]def check_list_groups(list_groups, input_dim): + """ + Check that list groups: + - is a list of list + - does not contain twice the same feature in different groups + - does not contain unknown features (>= input_dim) + - does not contain empty groups + Parameters + ---------- + - list_groups : list of list of int + Each element is a list representing features in the same group. + One feature should appear in maximum one group. + Feature that don't get assign a group will be in their own group of one feature. + - input_dim : number of feature in the initial dataset + """ + assert isinstance(list_groups, list), "list_groups must be a list of list." + + if len(list_groups) == 0: + return + else: + for group_pos, group in enumerate(list_groups): + msg = f"Groups must be given as a list of list, but found {group} in position {group_pos}." # noqa + assert isinstance(group, list), msg + assert len(group) > 0, "Empty groups are forbidding please remove empty groups []" + + n_elements_in_groups = np.sum([len(group) for group in list_groups]) + flat_list = [] + for group in list_groups: + flat_list.extend(group) + unique_elements = np.unique(flat_list) + n_unique_elements_in_groups = len(unique_elements) + msg = f"One feature can only appear in one group, please check your grouped_features." + assert n_unique_elements_in_groups == n_elements_in_groups, msg + + highest_feat = np.max(unique_elements) + assert highest_feat < input_dim, f"Number of features is {input_dim} but one group contains {highest_feat}." # noqa + return
    + +
    [docs]def filter_weights(weights): - """ + """ This function makes sure that weights are in correct format for regression and multitask TabNet @@ -402,7 +558,7 @@

    Source code for pytorch_tabnet.utils

     
     
     
    [docs]def validate_eval_set(eval_set, eval_name, X_train, y_train): - """Check if the shapes of eval_set are compatible with (X_train, y_train). + """Check if the shapes of eval_set are compatible with (X_train, y_train). Parameters ---------- @@ -469,7 +625,7 @@

    Source code for pytorch_tabnet.utils

     
     
     
    [docs]def define_device(device_name): - """ + """ Define the device to use during training and inference. If auto it will detect automatically whether to use cuda or cpu @@ -496,25 +652,25 @@

    Source code for pytorch_tabnet.utils

     
     
    [docs]class ComplexEncoder(json.JSONEncoder):
    [docs] def default(self, obj): - if isinstance(obj, np.int64): - return int(obj) + if isinstance(obj, (np.generic, np.ndarray)): + return obj.tolist() # Let the base class default method raise the TypeError return json.JSONEncoder.default(self, obj)
    [docs]def check_input(X): - """ + """ Raise a clear error if X is a pandas dataframe and check array according to scikit rules """ if isinstance(X, (pd.DataFrame, pd.Series)): err_message = "Pandas DataFrame are not supported: apply X.values when calling fit" raise TypeError(err_message) - check_array(X)
    + check_array(X, accept_sparse=True)
    [docs]def check_warm_start(warm_start, from_unsupervised): - """ + """ Gives a warning about ambiguous usage of the two parameters. """ if warm_start and from_unsupervised is not None: @@ -522,6 +678,40 @@

    Source code for pytorch_tabnet.utils

             warn_msg = "warm_start will be ignore, training will start from unsupervised weights"
             warnings.warn(warn_msg)
         return
    + + +
    [docs]def check_embedding_parameters(cat_dims, cat_idxs, cat_emb_dim): + """ + Check parameters related to embeddings and rearrange them in a unique manner. + """ + if (cat_dims == []) ^ (cat_idxs == []): + if cat_dims == []: + msg = "If cat_idxs is non-empty, cat_dims must be defined as a list of same length." + else: + msg = "If cat_dims is non-empty, cat_idxs must be defined as a list of same length." + raise ValueError(msg) + elif len(cat_dims) != len(cat_idxs): + msg = "The lists cat_dims and cat_idxs must have the same length." + raise ValueError(msg) + + if isinstance(cat_emb_dim, int): + cat_emb_dims = [cat_emb_dim] * len(cat_idxs) + else: + cat_emb_dims = cat_emb_dim + + # check that all embeddings are provided + if len(cat_emb_dims) != len(cat_dims): + msg = f"""cat_emb_dim and cat_dims must be lists of same length, got {len(cat_emb_dims)} + and {len(cat_dims)}""" + raise ValueError(msg) + + # Rearrange to get reproducible seeds with different ordering + if len(cat_idxs) > 0: + sorted_idxs = np.argsort(cat_idxs) + cat_dims = [cat_dims[i] for i in sorted_idxs] + cat_emb_dims = [cat_emb_dims[i] for i in sorted_idxs] + + return cat_dims, cat_idxs, cat_emb_dims
    diff --git a/docs/_modules/torch/optim/adam.html b/docs/_modules/torch/optim/adam.html index 7871f583..2360413c 100644 --- a/docs/_modules/torch/optim/adam.html +++ b/docs/_modules/torch/optim/adam.html @@ -87,6 +87,7 @@
  • README
  • TabNet : Attentive Interpretable Tabular Learning
  • Installation
  • +
  • What is new ?
  • Contributing
  • What problems does pytorch-tabnet handle?
  • How to use it?
  • @@ -159,17 +160,96 @@

    Source code for torch.optim.adam

    -import math
    -import torch
    -from .optimizer import Optimizer
    +from collections import defaultdict
    +import math
    +from typing import cast, List, Optional, Dict, Tuple
     
    +import torch
    +from torch import Tensor
    +from .optimizer import Optimizer, _use_grad_for_differentiable
    +
    +__all__ = ['Adam', 'adam']
    +
    +
    +# TODO(crcrpar): Move this to soemwhere (e.g. torch/optim/_utils?) else when adding another fused optimizer.
    +# NOTE(crcrpar): Almost the same as `_MultiDeviceReplicator` defined in
    +# torch/cuda/amp/grad_scaler.py except for the key being str only for torch script.
    +class _MultiDeviceReplicator:
    +    main_tensor: Tensor
    +    _per_device_tensors: Dict[str, Tensor]
    +
    +    def __init__(self, main_tensor: Tensor) -> None:
    +        self.main_tensor = main_tensor
    +        self._per_device_tensors = {str(main_tensor.device): main_tensor}
    +
    +    def get(self, device: str):
    +        if device in self._per_device_tensors:
    +            return self._per_device_tensors[device]
    +        tensor = self.main_tensor.to(device=device, non_blocking=True, copy=True)
    +        self._per_device_tensors[device] = tensor
    +        return tensor
    +
    +
    +# todo(crcrpar): Move this to another place when adding another fused optimizer.
    +def _get_fp16AMP_params(
    +    *,
    +    optimizer: Optimizer,
    +    grad_scaler: Optional[torch.cuda.amp.GradScaler] = None,
    +    device: torch.device,
    +) -> Optional[_MultiDeviceReplicator]:
    +    if grad_scaler is None:
    +        return None
    +    found_inf_dict = grad_scaler._check_inf_per_device(optimizer)
    +    # Combines found_inf tensors from all devices. As in GradScaler.update(),
    +    # tensors are combined on the scale's device, which is an arbitrary but
    +    # reasonable choice that avoids new context creation.
    +    found_infs = [f.to(device, non_blocking=True) for f in found_inf_dict.values()]
    +    assert len(found_infs) > 0, "No inf checks were recorded in _check_inf_per_device."
    +    with torch.no_grad():
    +        found_inf_combined = cast(torch.Tensor, sum(found_infs))
    +    return _MultiDeviceReplicator(found_inf_combined)
     
     class Adam(Optimizer):
    -    r"""Implements Adam algorithm.
    -
    -    It has been proposed in `Adam: A Method for Stochastic Optimization`_.
    -
    -    Arguments:
    +    r"""Implements Adam algorithm.
    +
    +    .. math::
    +       \begin{aligned}
    +            &\rule{110mm}{0.4pt}                                                                 \\
    +            &\textbf{input}      : \gamma \text{ (lr)}, \beta_1, \beta_2
    +                \text{ (betas)},\theta_0 \text{ (params)},f(\theta) \text{ (objective)}          \\
    +            &\hspace{13mm}      \lambda \text{ (weight decay)},  \: \textit{amsgrad},
    +                \:\textit{maximize}                                                              \\
    +            &\textbf{initialize} :  m_0 \leftarrow 0 \text{ ( first moment)},
    +                v_0\leftarrow 0 \text{ (second moment)},\: \widehat{v_0}^{max}\leftarrow 0\\[-1.ex]
    +            &\rule{110mm}{0.4pt}                                                                 \\
    +            &\textbf{for} \: t=1 \: \textbf{to} \: \ldots \: \textbf{do}                         \\
    +
    +            &\hspace{5mm}\textbf{if} \: \textit{maximize}:                                       \\
    +            &\hspace{10mm}g_t           \leftarrow   -\nabla_{\theta} f_t (\theta_{t-1})         \\
    +            &\hspace{5mm}\textbf{else}                                                           \\
    +            &\hspace{10mm}g_t           \leftarrow   \nabla_{\theta} f_t (\theta_{t-1})          \\
    +            &\hspace{5mm}\textbf{if} \: \lambda \neq 0                                           \\
    +            &\hspace{10mm} g_t \leftarrow g_t + \lambda  \theta_{t-1}                            \\
    +            &\hspace{5mm}m_t           \leftarrow   \beta_1 m_{t-1} + (1 - \beta_1) g_t          \\
    +            &\hspace{5mm}v_t           \leftarrow   \beta_2 v_{t-1} + (1-\beta_2) g^2_t          \\
    +            &\hspace{5mm}\widehat{m_t} \leftarrow   m_t/\big(1-\beta_1^t \big)                   \\
    +            &\hspace{5mm}\widehat{v_t} \leftarrow   v_t/\big(1-\beta_2^t \big)                   \\
    +            &\hspace{5mm}\textbf{if} \: amsgrad                                                  \\
    +            &\hspace{10mm}\widehat{v_t}^{max} \leftarrow \mathrm{max}(\widehat{v_t}^{max},
    +                \widehat{v_t})                                                                   \\
    +            &\hspace{10mm}\theta_t \leftarrow \theta_{t-1} - \gamma \widehat{m_t}/
    +                \big(\sqrt{\widehat{v_t}^{max}} + \epsilon \big)                                 \\
    +            &\hspace{5mm}\textbf{else}                                                           \\
    +            &\hspace{10mm}\theta_t \leftarrow \theta_{t-1} - \gamma \widehat{m_t}/
    +                \big(\sqrt{\widehat{v_t}} + \epsilon \big)                                       \\
    +            &\rule{110mm}{0.4pt}                                                          \\[-1.ex]
    +            &\bf{return} \:  \theta_t                                                     \\[-1.ex]
    +            &\rule{110mm}{0.4pt}                                                          \\[-1.ex]
    +       \end{aligned}
    +
    +    For further details regarding the algorithm we refer to `Adam: A Method for Stochastic Optimization`_.
    +
    +    Args:
             params (iterable): iterable of parameters to optimize or dicts defining
                 parameter groups
             lr (float, optional): learning rate (default: 1e-3)
    @@ -178,9 +258,19 @@ 

    Source code for torch.optim.adam

             eps (float, optional): term added to the denominator to improve
                 numerical stability (default: 1e-8)
             weight_decay (float, optional): weight decay (L2 penalty) (default: 0)
    -        amsgrad (boolean, optional): whether to use the AMSGrad variant of this
    +        amsgrad (bool, optional): whether to use the AMSGrad variant of this
                 algorithm from the paper `On the Convergence of Adam and Beyond`_
                 (default: False)
    +        foreach (bool, optional): whether foreach implementation of optimizer
    +            is used (default: None)
    +        maximize (bool, optional): maximize the params based on the objective, instead of
    +            minimizing (default: False)
    +        capturable (bool, optional): whether this instance is safe to capture in a CUDA graph.
    +            Passing True can impair ungraphed performance, so if you don't intend to
    +            graph capture this instance, leave it False (default: False)
    +        fused (bool, optional): whether fused implementation of optimizer is used.
    +            Currently, `torch.float64`, `torch.float32`, `torch.float16`, and `torch.bfloat16`
    +            are supported. (default: False)
     
         .. _Adam\: A Method for Stochastic Optimization:
             https://arxiv.org/abs/1412.6980
    @@ -189,7 +279,9 @@ 

    Source code for torch.optim.adam

         """
     
         def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), eps=1e-8,
    -                 weight_decay=0, amsgrad=False):
    +                 weight_decay=0, amsgrad=False, *, foreach: Optional[bool] = None,
    +                 maximize: bool = False, capturable: bool = False,
    +                 differentiable: bool = False, fused: bool = False):
             if not 0.0 <= lr:
                 raise ValueError("Invalid learning rate: {}".format(lr))
             if not 0.0 <= eps:
    @@ -198,76 +290,482 @@ 

    Source code for torch.optim.adam

                 raise ValueError("Invalid beta parameter at index 0: {}".format(betas[0]))
             if not 0.0 <= betas[1] < 1.0:
                 raise ValueError("Invalid beta parameter at index 1: {}".format(betas[1]))
    +        if not 0.0 <= weight_decay:
    +            raise ValueError("Invalid weight_decay value: {}".format(weight_decay))
             defaults = dict(lr=lr, betas=betas, eps=eps,
    -                        weight_decay=weight_decay, amsgrad=amsgrad)
    +                        weight_decay=weight_decay, amsgrad=amsgrad,
    +                        maximize=maximize, foreach=foreach, capturable=capturable,
    +                        differentiable=differentiable, fused=fused)
             super(Adam, self).__init__(params, defaults)
     
    +        if fused:
    +            if differentiable:
    +                raise RuntimeError("`fused` cannot be `differentiable`")
    +            self._step_supports_amp_scaling = True
    +            # TODO(crcrpar): [low prec params & their higher prec copy]
    +            # Suppor AMP with FP16/BF16 model params which would need
    +            # higher prec copy of params to do update math in higher prec to
    +            # alleviate the loss of information.
    +            if not all(
    +                p.is_cuda and torch.is_floating_point(p)
    +                for pg in self.param_groups for p in pg['params']
    +            ):
    +                raise RuntimeError("FusedAdam requires all the params to be CUDA, floating point")
    +
         def __setstate__(self, state):
    -        super(Adam, self).__setstate__(state)
    +        super().__setstate__(state)
             for group in self.param_groups:
                 group.setdefault('amsgrad', False)
    -
    -    def step(self, closure=None):
    -        """Performs a single optimization step.
    -
    -        Arguments:
    -            closure (callable, optional): A closure that reevaluates the model
    +            group.setdefault('maximize', False)
    +            group.setdefault('foreach', None)
    +            group.setdefault('capturable', False)
    +            group.setdefault('differentiable', False)
    +            group.setdefault('fused', False)
    +        state_values = list(self.state.values())
    +        step_is_tensor = (len(state_values) != 0) and torch.is_tensor(state_values[0]['step'])
    +        if not step_is_tensor:
    +            for s in state_values:
    +                s['step'] = torch.tensor(float(s['step']))
    +
    +    @_use_grad_for_differentiable
    +    def step(self, closure=None, *, grad_scaler=None):
    +        """Performs a single optimization step.
    +
    +        Args:
    +            closure (Callable, optional): A closure that reevaluates the model
                     and returns the loss.
    +            grad_scaler (:class:`torch.cuda.amp.GradScaler`, optional): A GradScaler which is
    +                supplied from ``grad_scaler.step(optimizer)``.
             """
    +        self._cuda_graph_capture_health_check()
    +
             loss = None
             if closure is not None:
    -            loss = closure()
    +            with torch.enable_grad():
    +                loss = closure()
     
             for group in self.param_groups:
    +            params_with_grad = []
    +            grads = []
    +            exp_avgs = []
    +            exp_avg_sqs = []
    +            max_exp_avg_sqs = []
    +            state_steps = []
    +            beta1, beta2 = group['betas']
    +
    +            grad_scale = None
    +            found_inf = None
    +            if group['fused'] and grad_scaler is not None:
    +                grad_scale = grad_scaler._get_scale_async()
    +                device = grad_scale.device
    +                grad_scale = _MultiDeviceReplicator(grad_scale)
    +                found_inf = _get_fp16AMP_params(optimizer=self, grad_scaler=grad_scaler, device=device)
    +
                 for p in group['params']:
    -                if p.grad is None:
    -                    continue
    -                grad = p.grad.data
    -                if grad.is_sparse:
    -                    raise RuntimeError('Adam does not support sparse gradients, please consider SparseAdam instead')
    -                amsgrad = group['amsgrad']
    -
    -                state = self.state[p]
    -
    -                # State initialization
    -                if len(state) == 0:
    -                    state['step'] = 0
    -                    # Exponential moving average of gradient values
    -                    state['exp_avg'] = torch.zeros_like(p.data, memory_format=torch.preserve_format)
    -                    # Exponential moving average of squared gradient values
    -                    state['exp_avg_sq'] = torch.zeros_like(p.data, memory_format=torch.preserve_format)
    -                    if amsgrad:
    -                        # Maintains max of all exp. moving avg. of sq. grad. values
    -                        state['max_exp_avg_sq'] = torch.zeros_like(p.data, memory_format=torch.preserve_format)
    -
    -                exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq']
    -                if amsgrad:
    -                    max_exp_avg_sq = state['max_exp_avg_sq']
    -                beta1, beta2 = group['betas']
    -
    -                state['step'] += 1
    -                bias_correction1 = 1 - beta1 ** state['step']
    -                bias_correction2 = 1 - beta2 ** state['step']
    -
    -                if group['weight_decay'] != 0:
    -                    grad.add_(group['weight_decay'], p.data)
    -
    -                # Decay the first and second moment running average coefficient
    -                exp_avg.mul_(beta1).add_(1 - beta1, grad)
    -                exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad)
    -                if amsgrad:
    -                    # Maintains the maximum of all 2nd moment running avg. till now
    -                    torch.max(max_exp_avg_sq, exp_avg_sq, out=max_exp_avg_sq)
    -                    # Use the max. for normalizing running avg. of gradient
    -                    denom = (max_exp_avg_sq.sqrt() / math.sqrt(bias_correction2)).add_(group['eps'])
    -                else:
    -                    denom = (exp_avg_sq.sqrt() / math.sqrt(bias_correction2)).add_(group['eps'])
    +                if p.grad is not None:
    +                    params_with_grad.append(p)
    +                    if p.grad.is_sparse:
    +                        raise RuntimeError('Adam does not support sparse gradients, please consider SparseAdam instead')
    +                    grads.append(p.grad)
    +
    +                    state = self.state[p]
    +                    # Lazy state initialization
    +                    if len(state) == 0:
    +                        state['step'] = (
    +                            torch.zeros((1,), dtype=torch.float, device=p.device)
    +                            if self.defaults['capturable'] or self.defaults['fused']
    +                            else torch.tensor(0.)
    +                        )
    +                        # Exponential moving average of gradient values
    +                        state['exp_avg'] = torch.zeros_like(p, memory_format=torch.preserve_format)
    +                        # Exponential moving average of squared gradient values
    +                        state['exp_avg_sq'] = torch.zeros_like(p, memory_format=torch.preserve_format)
    +                        if group['amsgrad']:
    +                            # Maintains max of all exp. moving avg. of sq. grad. values
    +                            state['max_exp_avg_sq'] = torch.zeros_like(p, memory_format=torch.preserve_format)
    +
    +                    exp_avgs.append(state['exp_avg'])
    +                    exp_avg_sqs.append(state['exp_avg_sq'])
    +
    +                    if group['amsgrad']:
    +                        max_exp_avg_sqs.append(state['max_exp_avg_sq'])
    +                    if group['differentiable'] and state['step'].requires_grad:
    +                        raise RuntimeError('`requires_grad` is not supported for `step` in differentiable mode')
    +                    state_steps.append(state['step'])
    +
    +            adam(params_with_grad,
    +                 grads,
    +                 exp_avgs,
    +                 exp_avg_sqs,
    +                 max_exp_avg_sqs,
    +                 state_steps,
    +                 amsgrad=group['amsgrad'],
    +                 beta1=beta1,
    +                 beta2=beta2,
    +                 lr=group['lr'],
    +                 weight_decay=group['weight_decay'],
    +                 eps=group['eps'],
    +                 maximize=group['maximize'],
    +                 foreach=group['foreach'],
    +                 capturable=group['capturable'],
    +                 differentiable=group['differentiable'],
    +                 fused=group['fused'],
    +                 grad_scale=grad_scale,
    +                 found_inf=found_inf)
     
    -                step_size = group['lr'] / bias_correction1
    +        return loss
     
    -                p.data.addcdiv_(-step_size, exp_avg, denom)
     
    -        return loss
    +def adam(params: List[Tensor],
    +         grads: List[Tensor],
    +         exp_avgs: List[Tensor],
    +         exp_avg_sqs: List[Tensor],
    +         max_exp_avg_sqs: List[Tensor],
    +         state_steps: List[Tensor],
    +         # kwonly args with defaults are not supported by functions compiled with torchscript issue #70627
    +         # setting this as kwarg for now as functional API is compiled by torch/distributed/optim
    +         foreach: Optional[bool] = None,
    +         capturable: bool = False,
    +         differentiable: bool = False,
    +         fused: bool = False,
    +         grad_scale: Optional[_MultiDeviceReplicator] = None,
    +         found_inf: Optional[_MultiDeviceReplicator] = None,
    +         *,
    +         amsgrad: bool,
    +         beta1: float,
    +         beta2: float,
    +         lr: float,
    +         weight_decay: float,
    +         eps: float,
    +         maximize: bool):
    +    r"""Functional API that performs Adam algorithm computation.
    +    See :class:`~torch.optim.Adam` for details.
    +    """
    +
    +    if not all(isinstance(t, torch.Tensor) for t in state_steps):
    +        raise RuntimeError("API has changed, `state_steps` argument must contain a list of singleton tensors")
    +
    +    if foreach is None:
    +        # Placeholder for more complex foreach logic to be added when value is not set
    +        foreach = False
    +
    +    if foreach and torch.jit.is_scripting():
    +        raise RuntimeError('torch.jit.script not supported with foreach optimizers')
    +
    +    if foreach and not torch.jit.is_scripting():
    +        func = _multi_tensor_adam
    +    elif fused and not torch.jit.is_scripting():
    +        func = _fused_adam
    +    else:
    +        func = _single_tensor_adam
    +
    +    func(params,
    +         grads,
    +         exp_avgs,
    +         exp_avg_sqs,
    +         max_exp_avg_sqs,
    +         state_steps,
    +         amsgrad=amsgrad,
    +         beta1=beta1,
    +         beta2=beta2,
    +         lr=lr,
    +         weight_decay=weight_decay,
    +         eps=eps,
    +         maximize=maximize,
    +         capturable=capturable,
    +         differentiable=differentiable,
    +         grad_scale=grad_scale,
    +         found_inf=found_inf)
    +
    +
    +def _single_tensor_adam(params: List[Tensor],
    +                        grads: List[Tensor],
    +                        exp_avgs: List[Tensor],
    +                        exp_avg_sqs: List[Tensor],
    +                        max_exp_avg_sqs: List[Tensor],
    +                        state_steps: List[Tensor],
    +                        grad_scale: Optional[_MultiDeviceReplicator],
    +                        found_inf: Optional[_MultiDeviceReplicator],
    +                        *,
    +                        amsgrad: bool,
    +                        beta1: float,
    +                        beta2: float,
    +                        lr: float,
    +                        weight_decay: float,
    +                        eps: float,
    +                        maximize: bool,
    +                        capturable: bool,
    +                        differentiable: bool):
    +
    +    assert grad_scale is None and found_inf is None
    +
    +    for i, param in enumerate(params):
    +
    +        grad = grads[i] if not maximize else -grads[i]
    +        exp_avg = exp_avgs[i]
    +        exp_avg_sq = exp_avg_sqs[i]
    +        step_t = state_steps[i]
    +
    +        if capturable:
    +            assert param.is_cuda and step_t.is_cuda, "If capturable=True, params and state_steps must be CUDA tensors."
    +
    +        # update step
    +        step_t += 1
    +
    +        if weight_decay != 0:
    +            grad = grad.add(param, alpha=weight_decay)
    +
    +        if torch.is_complex(param):
    +            grad = torch.view_as_real(grad)
    +            exp_avg = torch.view_as_real(exp_avg)
    +            exp_avg_sq = torch.view_as_real(exp_avg_sq)
    +            param = torch.view_as_real(param)
    +
    +        # Decay the first and second moment running average coefficient
    +        exp_avg.mul_(beta1).add_(grad, alpha=1 - beta1)
    +        exp_avg_sq.mul_(beta2).addcmul_(grad, grad.conj(), value=1 - beta2)
    +
    +        if capturable or differentiable:
    +            step = step_t
    +
    +            # 1 - beta1 ** step can't be captured in a CUDA graph, even if step is a CUDA tensor
    +            # (incurs "RuntimeError: CUDA error: operation not permitted when stream is capturing")
    +            bias_correction1 = 1 - torch.pow(beta1, step)
    +            bias_correction2 = 1 - torch.pow(beta2, step)
    +
    +            step_size = lr / bias_correction1
    +            step_size_neg = step_size.neg()
    +
    +            bias_correction2_sqrt = bias_correction2.sqrt()
    +
    +            if amsgrad:
    +                # Maintains the maximum of all 2nd moment running avg. till now
    +                if differentiable:
    +                    max_exp_avg_sqs_i = max_exp_avg_sqs[i].clone()
    +                else:
    +                    max_exp_avg_sqs_i = max_exp_avg_sqs[i]
    +                max_exp_avg_sqs[i].copy_(torch.maximum(max_exp_avg_sqs_i, exp_avg_sq))
    +                # Uses the max. for normalizing running avg. of gradient
    +                # Folds in (admittedly ugly) 1-elem step_size math here to avoid extra param-set-sized read+write
    +                # (can't fold it into addcdiv_ below because addcdiv_ requires value is a Number, not a Tensor)
    +                denom = (max_exp_avg_sqs[i].sqrt() / (bias_correction2_sqrt * step_size_neg)).add_(eps / step_size_neg)
    +            else:
    +                denom = (exp_avg_sq.sqrt() / (bias_correction2_sqrt * step_size_neg)).add_(eps / step_size_neg)
    +
    +            param.addcdiv_(exp_avg, denom)
    +        else:
    +            step = step_t.item()
    +
    +            bias_correction1 = 1 - beta1 ** step
    +            bias_correction2 = 1 - beta2 ** step
    +
    +            step_size = lr / bias_correction1
    +
    +            bias_correction2_sqrt = math.sqrt(bias_correction2)
    +
    +            if amsgrad:
    +                # Maintains the maximum of all 2nd moment running avg. till now
    +                torch.maximum(max_exp_avg_sqs[i], exp_avg_sq, out=max_exp_avg_sqs[i])
    +                # Use the max. for normalizing running avg. of gradient
    +                denom = (max_exp_avg_sqs[i].sqrt() / bias_correction2_sqrt).add_(eps)
    +            else:
    +                denom = (exp_avg_sq.sqrt() / bias_correction2_sqrt).add_(eps)
    +
    +            param.addcdiv_(exp_avg, denom, value=-step_size)
    +
    +
    +def _multi_tensor_adam(params: List[Tensor],
    +                       grads: List[Tensor],
    +                       exp_avgs: List[Tensor],
    +                       exp_avg_sqs: List[Tensor],
    +                       max_exp_avg_sqs: List[Tensor],
    +                       state_steps: List[Tensor],
    +                       grad_scale: Optional[_MultiDeviceReplicator],
    +                       found_inf: Optional[_MultiDeviceReplicator],
    +                       *,
    +                       amsgrad: bool,
    +                       beta1: float,
    +                       beta2: float,
    +                       lr: float,
    +                       weight_decay: float,
    +                       eps: float,
    +                       maximize: bool,
    +                       capturable: bool,
    +                       differentiable: bool):
    +    if len(params) == 0:
    +        return
    +
    +    if capturable:
    +        assert all(p.is_cuda and step.is_cuda for p, step in zip(params, state_steps)), \
    +            "If capturable=True, params and state_steps must be CUDA tensors."
    +
    +    assert grad_scale is None and found_inf is None
    +
    +    if maximize:
    +        grads = torch._foreach_neg(tuple(grads))  # type: ignore[assignment]
    +
    +    assert not differentiable, "_foreach ops don't support autograd"
    +    # Handle complex parameters
    +    grads = [torch.view_as_real(x) if torch.is_complex(x) else x for x in grads]
    +    exp_avgs = [torch.view_as_real(x) if torch.is_complex(x) else x for x in exp_avgs]
    +    exp_avg_sqs = [torch.view_as_real(x) if torch.is_complex(x) else x for x in exp_avg_sqs]
    +    params_ = [torch.view_as_real(x) if torch.is_complex(x) else x for x in params]
    +
    +    # update steps
    +    torch._foreach_add_(state_steps, 1)
    +
    +    if weight_decay != 0:
    +        torch._foreach_add_(grads, params, alpha=weight_decay)
    +
    +    # Decay the first and second moment running average coefficient
    +    torch._foreach_mul_(exp_avgs, beta1)
    +    torch._foreach_add_(exp_avgs, grads, alpha=1 - beta1)
    +
    +    torch._foreach_mul_(exp_avg_sqs, beta2)
    +    torch._foreach_addcmul_(exp_avg_sqs, grads, grads, 1 - beta2)
    +
    +    if capturable:
    +        # TODO: use foreach_pow if/when foreach_pow is added
    +        bias_correction1 = [torch.pow(beta1, step) for step in state_steps]
    +        bias_correction2 = [torch.pow(beta2, step) for step in state_steps]
    +        # foreach_sub doesn't allow a scalar as the first arg
    +        torch._foreach_sub_(bias_correction1, 1)
    +        torch._foreach_sub_(bias_correction2, 1)
    +        torch._foreach_neg_(bias_correction1)
    +        torch._foreach_neg_(bias_correction2)
    +
    +        # foreach_div doesn't allow a scalar as the first arg
    +        step_size = torch._foreach_div(bias_correction1, lr)
    +        torch._foreach_reciprocal_(step_size)
    +        torch._foreach_neg_(step_size)
    +
    +        bias_correction2_sqrt = torch._foreach_sqrt(bias_correction2)
    +
    +        if amsgrad:
    +            # Maintains the maximum of all 2nd moment running avg. till now
    +            torch._foreach_maximum_(max_exp_avg_sqs, exp_avg_sqs)  # type: ignore[assignment]
    +
    +            # Use the max. for normalizing running avg. of gradient
    +            max_exp_avg_sq_sqrt = torch._foreach_sqrt(max_exp_avg_sqs)
    +            # Folds in (admittedly ugly) 1-elem step_size math here to avoid extra param-set-sized read+write
    +            # (can't fold it into addcdiv_ below because addcdiv_ requires value is a Number, not a Tensor)
    +            torch._foreach_div_(max_exp_avg_sq_sqrt, torch._foreach_mul(bias_correction2_sqrt, step_size))
    +            eps_over_step_size = torch._foreach_div(step_size, eps)
    +            torch._foreach_reciprocal_(eps_over_step_size)
    +            denom = torch._foreach_add(max_exp_avg_sq_sqrt, eps_over_step_size)
    +        else:
    +            exp_avg_sq_sqrt = torch._foreach_sqrt(exp_avg_sqs)
    +            torch._foreach_div_(exp_avg_sq_sqrt, torch._foreach_mul(bias_correction2_sqrt, step_size))
    +            eps_over_step_size = torch._foreach_div(step_size, eps)
    +            torch._foreach_reciprocal_(eps_over_step_size)
    +            denom = torch._foreach_add(exp_avg_sq_sqrt, eps_over_step_size)
    +
    +        torch._foreach_addcdiv_(params_, exp_avgs, denom)
    +    else:
    +        bias_correction1 = [1 - beta1 ** step.item() for step in state_steps]
    +        bias_correction2 = [1 - beta2 ** step.item() for step in state_steps]
    +
    +        step_size = [(lr / bc) * -1 for bc in bias_correction1]
    +
    +        bias_correction2_sqrt = [math.sqrt(bc) for bc in bias_correction2]
    +
    +        if amsgrad:
    +            # Maintains the maximum of all 2nd moment running avg. till now
    +            torch._foreach_maximum_(max_exp_avg_sqs, exp_avg_sqs)
    +
    +            # Use the max. for normalizing running avg. of gradient
    +            max_exp_avg_sq_sqrt = torch._foreach_sqrt(max_exp_avg_sqs)
    +            torch._foreach_div_(max_exp_avg_sq_sqrt, bias_correction2_sqrt)
    +            denom = torch._foreach_add(max_exp_avg_sq_sqrt, eps)
    +        else:
    +            exp_avg_sq_sqrt = torch._foreach_sqrt(exp_avg_sqs)
    +            torch._foreach_div_(exp_avg_sq_sqrt, bias_correction2_sqrt)
    +            denom = torch._foreach_add(exp_avg_sq_sqrt, eps)
    +
    +        torch._foreach_addcdiv_(params_, exp_avgs, denom, step_size)
    +
    +
    +# TODO(crcrpar): Move this to another place when adding another fused optimizer.
    +# TODO(crcrpar): Make this generic when there's more fused optimizers.
    +# TODO(crcrpar): Think of rewriting this in C++.
    +@torch.no_grad()
    +def _group_params_by_device_and_dtype(
    +    params: List[Tensor],
    +    grads: List[Tensor],
    +    exp_avgs: List[Tensor],
    +    exp_avg_sqs: List[Tensor],
    +    max_exp_avg_sqs: List[Tensor],
    +    state_steps: List[Tensor],
    +) -> Dict[Tuple[str, torch.dtype], List[List[Tensor]]]:
    +    per_device_and_dtype_tensors = defaultdict(lambda: [[] for _ in range(6)])
    +    for i, (p, step) in enumerate(zip(params, state_steps)):
    +        key = (str(p.device), p.dtype)
    +        per_device_and_dtype_tensors[key][0].append(p)
    +        per_device_and_dtype_tensors[key][1].append(grads[i])
    +        per_device_and_dtype_tensors[key][2].append(exp_avgs[i])
    +        per_device_and_dtype_tensors[key][3].append(exp_avg_sqs[i])
    +        if max_exp_avg_sqs:
    +            per_device_and_dtype_tensors[key][4].append(max_exp_avg_sqs[i])
    +        per_device_and_dtype_tensors[key][5].append(step)
    +    return per_device_and_dtype_tensors
    +
    +
    +def _fused_adam(
    +    params: List[Tensor],
    +    grads: List[Tensor],
    +    exp_avgs: List[Tensor],
    +    exp_avg_sqs: List[Tensor],
    +    max_exp_avg_sqs: List[Tensor],
    +    state_steps: List[Tensor],
    +    grad_scale: Optional[_MultiDeviceReplicator],
    +    found_inf: Optional[_MultiDeviceReplicator],
    +    *,
    +    amsgrad: bool,
    +    beta1: float,
    +    beta2: float,
    +    lr: float,
    +    weight_decay: float,
    +    eps: float,
    +    maximize: bool,
    +    capturable: bool,  # Needed for consistency.
    +    differentiable: bool,
    +) -> None:
    +    grouped_tensors = _group_params_by_device_and_dtype(params, grads, exp_avgs, exp_avg_sqs, max_exp_avg_sqs, state_steps)
    +    for (device, dtype) in grouped_tensors:
    +        (
    +            device_params,
    +            device_grads,
    +            device_exp_avgs,
    +            device_exp_avg_sqs,
    +            device_max_exp_avg_sqs,
    +            device_state_steps,
    +        ) = grouped_tensors[(device, dtype)]
    +        if grad_scale is not None and found_inf is not None:
    +            device_grad_scale = grad_scale.get(device)
    +            device_found_inf = found_inf.get(device)
    +        else:
    +            device_grad_scale = None
    +            device_found_inf = None
    +        torch._foreach_add_(device_state_steps, 1)
    +        torch._fused_adam_(
    +            device_params,
    +            device_grads,
    +            device_exp_avgs,
    +            device_exp_avg_sqs,
    +            device_max_exp_avg_sqs,
    +            device_state_steps,
    +            amsgrad=amsgrad,
    +            lr=lr,
    +            beta1=beta1,
    +            beta2=beta2,
    +            weight_decay=weight_decay,
    +            eps=eps,
    +            maximize=maximize,
    +            grad_scale=device_grad_scale,
    +            found_inf=device_found_inf,
    +        )
    +        if device_found_inf is not None:
    +            torch._foreach_sub_(device_state_steps, [device_found_inf] * len(device_state_steps))
     
    diff --git a/docs/_sources/generated_docs/README.md.txt b/docs/_sources/generated_docs/README.md.txt index 0f0954bb..07e79d39 100644 --- a/docs/_sources/generated_docs/README.md.txt +++ b/docs/_sources/generated_docs/README.md.txt @@ -2,7 +2,7 @@ # TabNet : Attentive Interpretable Tabular Learning -This is a pyTorch implementation of Tabnet (Arik, S. O., & Pfister, T. (2019). TabNet: Attentive Interpretable Tabular Learning. arXiv preprint arXiv:1908.07442.) https://arxiv.org/pdf/1908.07442.pdf. +This is a pyTorch implementation of Tabnet (Arik, S. O., & Pfister, T. (2019). TabNet: Attentive Interpretable Tabular Learning. arXiv preprint arXiv:1908.07442.) https://arxiv.org/pdf/1908.07442.pdf. Please note that some different choices have been made overtime to improve the library which can differ from the orginal paper. [![CircleCI](https://circleci.com/gh/dreamquark-ai/tabnet.svg?style=svg)](https://circleci.com/gh/dreamquark-ai/tabnet) @@ -68,6 +68,10 @@ If you wan to use it locally within a docker container: - `make notebook` inside the same terminal. You can then follow the link to a jupyter notebook with tabnet installed. +# What is new ? + +- from version **> 4.0** attention is now embedding aware. This aims to maintain a good attention mechanism even with large number of embedding. It is also now possible to specify attention groups (using `grouped_features`). Attention is now done at the group level and not feature level. This is especially useful if a dataset has a lot of columns coming from on single source of data (exemple: a text column transformed using TD-IDF). + # Contributing When contributing to the TabNet repository, please make sure to first discuss the change you wish to make via a new or already existing issue. @@ -316,6 +320,12 @@ loaded_clf.load_model(saved_filepath) - `mask_type: str` (default='sparsemax') Either "sparsemax" or "entmax" : this is the masking function to use for selecting features. +- `grouped_features: list of list of ints` (default=None) + This allows the model to share it's attention accross feature inside a same group. + This can be especially useful when your preprocessing generates correlated or dependant features: like if you use a TF-IDF or a PCA on a text column. + Note that feature importance will be exactly the same between features on a same group. + Please also note that embeddings generated for a categorical variable are always inside a same group. + - `n_shared_decoder` : int (default=1) Number of shared GLU block in decoder, this is only useful for `TabNetPretrainer`. @@ -326,7 +336,7 @@ loaded_clf.load_model(saved_filepath) ## Fit parameters -- `X_train` : np.array +- `X_train` : np.array or scipy.sparse.csr_matrix Training features @@ -401,3 +411,7 @@ loaded_clf.load_model(saved_filepath) - `warm_start` : bool (default=False) In order to match scikit-learn API, this is set to False. It allows to fit twice the same model and start from a warm start. + +- `compute_importance` : bool (default=True) + + Whether to compute feature importance diff --git a/docs/generated_docs/README.html b/docs/generated_docs/README.html index f6deca28..8e019dd5 100644 --- a/docs/generated_docs/README.html +++ b/docs/generated_docs/README.html @@ -98,6 +98,7 @@ +
  • What is new ?
  • Contributing
  • What problems does pytorch-tabnet handle?
  • How to use it? @@ -1507,6 +1612,11 @@

    pytorch_tabnet packagegamma: float = 1.3
    +
    +
    +grouped_features: List[List[int]] = None
    +
    +
    input_dim: int = None
    @@ -1605,7 +1715,7 @@

    pytorch_tabnet package

    Make predictions on a batch (valid)

    Parameters
    -

    X (a :tensor: torch.Tensor) – Input data

    +

    X (a :tensor: torch.Tensor or matrix: scipy.sparse.csr_matrix) – Input data

    Returns

    predictions – Predictions of the regression problem

    @@ -1694,7 +1804,7 @@

    pytorch_tabnet package

    pytorch_tabnet.pretraining module

    -class pytorch_tabnet.pretraining.TabNetPretrainer(n_d: int = 8, n_a: int = 8, n_steps: int = 3, gamma: float = 1.3, cat_idxs: List[int] = <factory>, cat_dims: List[int] = <factory>, cat_emb_dim: int = 1, n_independent: int = 2, n_shared: int = 2, epsilon: float = 1e-15, momentum: float = 0.02, lambda_sparse: float = 0.001, seed: int = 0, clip_value: int = 1, verbose: int = 1, optimizer_fn: Any = <class 'torch.optim.adam.Adam'>, optimizer_params: Dict = <factory>, scheduler_fn: Any = None, scheduler_params: Dict = <factory>, mask_type: str = 'sparsemax', input_dim: int = None, output_dim: int = None, device_name: str = 'auto', n_shared_decoder: int = 1, n_indep_decoder: int = 1)[source]
    +class pytorch_tabnet.pretraining.TabNetPretrainer(n_d: int = 8, n_a: int = 8, n_steps: int = 3, gamma: float = 1.3, cat_idxs: List[int] = <factory>, cat_dims: List[int] = <factory>, cat_emb_dim: int = 1, n_independent: int = 2, n_shared: int = 2, epsilon: float = 1e-15, momentum: float = 0.02, lambda_sparse: float = 0.001, seed: int = 0, clip_value: int = 1, verbose: int = 1, optimizer_fn: Any = <class 'torch.optim.adam.Adam'>, optimizer_params: Dict = <factory>, scheduler_fn: Any = None, scheduler_params: Dict = <factory>, mask_type: str = 'sparsemax', input_dim: int = None, output_dim: int = None, device_name: str = 'auto', n_shared_decoder: int = 1, n_indep_decoder: int = 1, grouped_features: List[List[int]] = <factory>)[source]

    Bases: pytorch_tabnet.abstract_model.TabModel

    @@ -1758,6 +1868,11 @@

    pytorch_tabnet package

    +
    +
    +grouped_features = None
    +
    +
    optimizer_params = None
    @@ -1769,7 +1884,7 @@

    pytorch_tabnet package

    Make predictions on a batch (valid)

    Parameters
    -

    X (a :tensor: torch.Tensor) – Input data

    +

    X (a :tensor: torch.Tensor or matrix: scipy.sparse.csr_matrix) – Input data

    Returns

    predictions – Predictions of the regression problem

    @@ -1868,6 +1983,33 @@

    pytorch_tabnet package

    +
    +
    +class pytorch_tabnet.utils.SparsePredictDataset(x)[source]
    +

    Bases: torch.utils.data.dataset.Dataset

    +

    Format for csr_matrix

    +
    +
    Parameters
    +

    X (CSR matrix) – The input matrix

    +
    +
    +
    + +
    +
    +class pytorch_tabnet.utils.SparseTorchDataset(x, y)[source]
    +

    Bases: torch.utils.data.dataset.Dataset

    +

    Format for csr_matrix

    +
    +
    Parameters
    +
      +
    • X (CSR matrix) – The input matrix

    • +
    • y (2D array) – The one-hot encoded target

    • +
    +
    +
    +
    +
    class pytorch_tabnet.utils.TorchDataset(x, y)[source]
    @@ -1883,6 +2025,12 @@

    pytorch_tabnet package

    +
    +
    +pytorch_tabnet.utils.check_embedding_parameters(cat_dims, cat_idxs, cat_emb_dim)[source]
    +

    Check parameters related to embeddings and rearrange them in a unique manner.

    +
    +
    pytorch_tabnet.utils.check_input(X)[source]
    @@ -1890,6 +2038,30 @@

    pytorch_tabnet package

    +
    +
    +pytorch_tabnet.utils.check_list_groups(list_groups, input_dim)[source]
    +
    +
    Check that list groups:
      +
    • is a list of list

    • +
    • does not contain twice the same feature in different groups

    • +
    • does not contain unknown features (>= input_dim)

    • +
    • does not contain empty groups

    • +
    +
    +
    +
    +
    Parameters
    +
      +
    • list_groups (-) – Each element is a list representing features in the same group. +One feature should appear in maximum one group. +Feature that don’t get assign a group will be in their own group of one feature.

    • +
    • input_dim (-) –

    • +
    +
    +
    +
    +
    pytorch_tabnet.utils.check_warm_start(warm_start, from_unsupervised)[source]
    @@ -1957,6 +2129,30 @@

    pytorch_tabnet package

    +
    +
    +pytorch_tabnet.utils.create_group_matrix(list_groups, input_dim)[source]
    +

    Create the group matrix corresponding to the given list_groups

    +
    +
    Parameters
    +
      +
    • list_groups (-) – Each element is a list representing features in the same group. +One feature should appear in maximum one group. +Feature that don’t get assigned a group will be in their own group of one feature.

    • +
    • input_dim (-) –

    • +
    +
    +
    Returns
    +

    - group_matrix – A matrix of size (n_groups, input_dim) +where m_ij represents the importance of feature j in group i +The rows must some to 1 as each group is equally important a priori.

    +
    +
    Return type
    +

    torch matrix

    +
    +
    +
    +
    pytorch_tabnet.utils.create_sampler(weights, y_train)[source]
    @@ -2043,7 +2239,7 @@

    pytorch_tabnet package

    pytorch_tabnet.multitask module

    -class pytorch_tabnet.multitask.TabNetMultiTaskClassifier(n_d: int = 8, n_a: int = 8, n_steps: int = 3, gamma: float = 1.3, cat_idxs: List[int] = <factory>, cat_dims: List[int] = <factory>, cat_emb_dim: int = 1, n_independent: int = 2, n_shared: int = 2, epsilon: float = 1e-15, momentum: float = 0.02, lambda_sparse: float = 0.001, seed: int = 0, clip_value: int = 1, verbose: int = 1, optimizer_fn: Any = <class 'torch.optim.adam.Adam'>, optimizer_params: Dict = <factory>, scheduler_fn: Any = None, scheduler_params: Dict = <factory>, mask_type: str = 'sparsemax', input_dim: int = None, output_dim: int = None, device_name: str = 'auto', n_shared_decoder: int = 1, n_indep_decoder: int = 1)[source]
    +class pytorch_tabnet.multitask.TabNetMultiTaskClassifier(n_d: int = 8, n_a: int = 8, n_steps: int = 3, gamma: float = 1.3, cat_idxs: List[int] = <factory>, cat_dims: List[int] = <factory>, cat_emb_dim: int = 1, n_independent: int = 2, n_shared: int = 2, epsilon: float = 1e-15, momentum: float = 0.02, lambda_sparse: float = 0.001, seed: int = 0, clip_value: int = 1, verbose: int = 1, optimizer_fn: Any = <class 'torch.optim.adam.Adam'>, optimizer_params: Dict = <factory>, scheduler_fn: Any = None, scheduler_params: Dict = <factory>, mask_type: str = 'sparsemax', input_dim: int = None, output_dim: int = None, device_name: str = 'auto', n_shared_decoder: int = 1, n_indep_decoder: int = 1, grouped_features: List[List[int]] = <factory>)[source]

    Bases: pytorch_tabnet.abstract_model.TabModel

    @@ -2075,6 +2271,11 @@

    pytorch_tabnet package

    +
    +
    +grouped_features = None
    +
    +
    optimizer_params = None
    @@ -2086,7 +2287,7 @@

    pytorch_tabnet package

    Make predictions on a batch (valid)

    Parameters
    -

    X (a :tensor: torch.Tensor) – Input data

    +

    X (a :tensor: torch.Tensor or matrix: scipy.sparse.csr_matrix) – Input data

    Returns

    results – Predictions of the most probable class

    @@ -2103,7 +2304,7 @@

    pytorch_tabnet package

    Make predictions for classification on a batch (valid)

    Parameters
    -

    X (a :tensor: torch.Tensor) – Input data

    +

    X (a :tensor: torch.Tensor or matrix: scipy.sparse.csr_matrix) – Input data

    Returns

    res

    diff --git a/docs/genindex.html b/docs/genindex.html index 3c9e444d..f4d16cdd 100644 --- a/docs/genindex.html +++ b/docs/genindex.html @@ -88,6 +88,7 @@
  • README
  • TabNet : Attentive Interpretable Tabular Learning
  • Installation
  • +
  • What is new ?
  • Contributing
  • What problems does pytorch-tabnet handle?
  • How to use it?
  • @@ -257,11 +258,15 @@

    C

  • check_classification_targets() (in module pytorch_tabnet.multiclass_utils) +
  • +
  • check_embedding_parameters() (in module pytorch_tabnet.utils)
  • check_input() (in module pytorch_tabnet.utils)
  • create_explain_matrix() (in module pytorch_tabnet.utils) +
  • +
  • create_group_matrix() (in module pytorch_tabnet.utils)
  • create_sampler() (in module pytorch_tabnet.utils)
  • @@ -418,8 +425,6 @@

    G

  • GBN (class in pytorch_tabnet.tab_network)
  • - - +

    H

    @@ -724,14 +743,14 @@

    S

  • seed (pytorch_tabnet.abstract_model.TabModel attribute)
  • - - + +
  • What is new ?
  • Contributing
  • What problems does pytorch-tabnet handle?
  • How to use it?
      diff --git a/docs/py-modindex.html b/docs/py-modindex.html index 6dee3c89..89af9bee 100644 --- a/docs/py-modindex.html +++ b/docs/py-modindex.html @@ -90,6 +90,7 @@
    • README
    • TabNet : Attentive Interpretable Tabular Learning
    • Installation
    • +
    • What is new ?
    • Contributing
    • What problems does pytorch-tabnet handle?
    • How to use it?
    • diff --git a/docs/search.html b/docs/search.html index 54ba30a5..c88bd8e6 100644 --- a/docs/search.html +++ b/docs/search.html @@ -89,6 +89,7 @@
    • README
    • TabNet : Attentive Interpretable Tabular Learning
    • Installation
    • +
    • What is new ?
    • Contributing
    • What problems does pytorch-tabnet handle?
    • How to use it?
    • diff --git a/docs/searchindex.js b/docs/searchindex.js index b069376c..c2d3e89d 100644 --- a/docs/searchindex.js +++ b/docs/searchindex.js @@ -1 +1 @@ -Search.setIndex({docnames:["generated_docs/README","generated_docs/pytorch_tabnet","index"],envversion:{"sphinx.domains.c":1,"sphinx.domains.changeset":1,"sphinx.domains.citation":1,"sphinx.domains.cpp":1,"sphinx.domains.index":1,"sphinx.domains.javascript":1,"sphinx.domains.math":2,"sphinx.domains.python":1,"sphinx.domains.rst":1,"sphinx.domains.std":1,"sphinx.ext.viewcode":1,sphinx:56},filenames:["generated_docs/README.md","generated_docs/pytorch_tabnet.rst","index.rst"],objects:{"pytorch_tabnet.abstract_model":{TabModel:[1,1,1,""]},"pytorch_tabnet.abstract_model.TabModel":{cat_dims:[1,2,1,""],cat_emb_dim:[1,2,1,""],cat_idxs:[1,2,1,""],clip_value:[1,2,1,""],compute_loss:[1,3,1,""],device_name:[1,2,1,""],epsilon:[1,2,1,""],explain:[1,3,1,""],fit:[1,3,1,""],gamma:[1,2,1,""],input_dim:[1,2,1,""],lambda_sparse:[1,2,1,""],load_class_attrs:[1,3,1,""],load_model:[1,3,1,""],load_weights_from_unsupervised:[1,3,1,""],mask_type:[1,2,1,""],momentum:[1,2,1,""],n_a:[1,2,1,""],n_d:[1,2,1,""],n_indep_decoder:[1,2,1,""],n_independent:[1,2,1,""],n_shared:[1,2,1,""],n_shared_decoder:[1,2,1,""],n_steps:[1,2,1,""],optimizer_fn:[1,2,1,""],optimizer_params:[1,2,1,""],output_dim:[1,2,1,""],predict:[1,3,1,""],prepare_target:[1,3,1,""],save_model:[1,3,1,""],scheduler_fn:[1,2,1,""],scheduler_params:[1,2,1,""],seed:[1,2,1,""],update_fit_params:[1,3,1,""],verbose:[1,2,1,""]},"pytorch_tabnet.augmentations":{ClassificationSMOTE:[1,1,1,""],RegressionSMOTE:[1,1,1,""]},"pytorch_tabnet.callbacks":{Callback:[1,1,1,""],CallbackContainer:[1,1,1,""],EarlyStopping:[1,1,1,""],History:[1,1,1,""],LRSchedulerCallback:[1,1,1,""]},"pytorch_tabnet.callbacks.Callback":{on_batch_begin:[1,3,1,""],on_batch_end:[1,3,1,""],on_epoch_begin:[1,3,1,""],on_epoch_end:[1,3,1,""],on_train_begin:[1,3,1,""],on_train_end:[1,3,1,""],set_params:[1,3,1,""],set_trainer:[1,3,1,""]},"pytorch_tabnet.callbacks.CallbackContainer":{append:[1,3,1,""],callbacks:[1,2,1,""],on_batch_begin:[1,3,1,""],on_batch_end:[1,3,1,""],on_epoch_begin:[1,3,1,""],on_epoch_end:[1,3,1,""],on_train_begin:[1,3,1,""],on_train_end:[1,3,1,""],set_params:[1,3,1,""],set_trainer:[1,3,1,""]},"pytorch_tabnet.callbacks.EarlyStopping":{early_stopping_metric:[1,2,1,""],is_maximize:[1,2,1,""],on_epoch_end:[1,3,1,""],on_train_end:[1,3,1,""],patience:[1,2,1,""],tol:[1,2,1,""]},"pytorch_tabnet.callbacks.History":{on_batch_end:[1,3,1,""],on_epoch_begin:[1,3,1,""],on_epoch_end:[1,3,1,""],on_train_begin:[1,3,1,""],trainer:[1,2,1,""],verbose:[1,2,1,""]},"pytorch_tabnet.callbacks.LRSchedulerCallback":{early_stopping_metric:[1,2,1,""],is_batch_level:[1,2,1,""],on_batch_end:[1,3,1,""],on_epoch_end:[1,3,1,""],optimizer:[1,2,1,""],scheduler_fn:[1,2,1,""],scheduler_params:[1,2,1,""]},"pytorch_tabnet.metrics":{AUC:[1,1,1,""],Accuracy:[1,1,1,""],BalancedAccuracy:[1,1,1,""],LogLoss:[1,1,1,""],MAE:[1,1,1,""],MSE:[1,1,1,""],Metric:[1,1,1,""],MetricContainer:[1,1,1,""],RMSE:[1,1,1,""],RMSLE:[1,1,1,""],UnsupMetricContainer:[1,1,1,""],UnsupervisedLoss:[1,4,1,""],UnsupervisedLossNumpy:[1,4,1,""],UnsupervisedMetric:[1,1,1,""],UnsupervisedNumpyMetric:[1,1,1,""],check_metrics:[1,4,1,""]},"pytorch_tabnet.metrics.Metric":{get_metrics_by_names:[1,3,1,""]},"pytorch_tabnet.metrics.MetricContainer":{metric_names:[1,2,1,""],prefix:[1,2,1,""]},"pytorch_tabnet.metrics.UnsupMetricContainer":{metric_names:[1,2,1,""],prefix:[1,2,1,""]},"pytorch_tabnet.multiclass_utils":{assert_all_finite:[1,4,1,""],check_classification_targets:[1,4,1,""],check_output_dim:[1,4,1,""],check_unique_type:[1,4,1,""],infer_multitask_output:[1,4,1,""],infer_output_dim:[1,4,1,""],is_multilabel:[1,4,1,""],type_of_target:[1,4,1,""],unique_labels:[1,4,1,""]},"pytorch_tabnet.multitask":{TabNetMultiTaskClassifier:[1,1,1,""]},"pytorch_tabnet.multitask.TabNetMultiTaskClassifier":{cat_dims:[1,2,1,""],cat_idxs:[1,2,1,""],compute_loss:[1,3,1,""],optimizer_params:[1,2,1,""],predict:[1,3,1,""],predict_proba:[1,3,1,""],prepare_target:[1,3,1,""],scheduler_params:[1,2,1,""],stack_batches:[1,3,1,""],update_fit_params:[1,3,1,""]},"pytorch_tabnet.pretraining":{TabNetPretrainer:[1,1,1,""]},"pytorch_tabnet.pretraining.TabNetPretrainer":{cat_dims:[1,2,1,""],cat_idxs:[1,2,1,""],compute_loss:[1,3,1,""],fit:[1,3,1,""],optimizer_params:[1,2,1,""],predict:[1,3,1,""],prepare_target:[1,3,1,""],scheduler_params:[1,2,1,""],stack_batches:[1,3,1,""],update_fit_params:[1,3,1,""]},"pytorch_tabnet.pretraining_utils":{create_dataloaders:[1,4,1,""],validate_eval_set:[1,4,1,""]},"pytorch_tabnet.sparsemax":{Entmax15:[1,1,1,""],Entmax15Function:[1,1,1,""],Entmoid15:[1,1,1,""],Sparsemax:[1,1,1,""],SparsemaxFunction:[1,1,1,""],entmax15:[1,4,1,""],entmoid15:[1,4,1,""],sparsemax:[1,4,1,""]},"pytorch_tabnet.sparsemax.Entmax15":{forward:[1,3,1,""]},"pytorch_tabnet.sparsemax.Entmax15Function":{backward:[1,3,1,""],forward:[1,3,1,""]},"pytorch_tabnet.sparsemax.Entmoid15":{backward:[1,3,1,""],forward:[1,3,1,""]},"pytorch_tabnet.sparsemax.Sparsemax":{forward:[1,3,1,""]},"pytorch_tabnet.sparsemax.SparsemaxFunction":{backward:[1,3,1,""],forward:[1,3,1,""]},"pytorch_tabnet.tab_model":{TabNetClassifier:[1,1,1,""],TabNetRegressor:[1,1,1,""]},"pytorch_tabnet.tab_model.TabNetClassifier":{cat_dims:[1,2,1,""],cat_idxs:[1,2,1,""],compute_loss:[1,3,1,""],optimizer_params:[1,2,1,""],predict_func:[1,3,1,""],predict_proba:[1,3,1,""],prepare_target:[1,3,1,""],scheduler_params:[1,2,1,""],stack_batches:[1,3,1,""],update_fit_params:[1,3,1,""],weight_updater:[1,3,1,""]},"pytorch_tabnet.tab_model.TabNetRegressor":{cat_dims:[1,2,1,""],cat_idxs:[1,2,1,""],compute_loss:[1,3,1,""],optimizer_params:[1,2,1,""],predict_func:[1,3,1,""],prepare_target:[1,3,1,""],scheduler_params:[1,2,1,""],stack_batches:[1,3,1,""],update_fit_params:[1,3,1,""]},"pytorch_tabnet.tab_network":{AttentiveTransformer:[1,1,1,""],EmbeddingGenerator:[1,1,1,""],FeatTransformer:[1,1,1,""],GBN:[1,1,1,""],GLU_Block:[1,1,1,""],GLU_Layer:[1,1,1,""],RandomObfuscator:[1,1,1,""],TabNet:[1,1,1,""],TabNetDecoder:[1,1,1,""],TabNetEncoder:[1,1,1,""],TabNetNoEmbeddings:[1,1,1,""],TabNetPretraining:[1,1,1,""],initialize_glu:[1,4,1,""],initialize_non_glu:[1,4,1,""]},"pytorch_tabnet.tab_network.AttentiveTransformer":{forward:[1,3,1,""]},"pytorch_tabnet.tab_network.EmbeddingGenerator":{forward:[1,3,1,""]},"pytorch_tabnet.tab_network.FeatTransformer":{forward:[1,3,1,""]},"pytorch_tabnet.tab_network.GBN":{forward:[1,3,1,""]},"pytorch_tabnet.tab_network.GLU_Block":{forward:[1,3,1,""]},"pytorch_tabnet.tab_network.GLU_Layer":{forward:[1,3,1,""]},"pytorch_tabnet.tab_network.RandomObfuscator":{forward:[1,3,1,""]},"pytorch_tabnet.tab_network.TabNet":{forward:[1,3,1,""],forward_masks:[1,3,1,""]},"pytorch_tabnet.tab_network.TabNetDecoder":{forward:[1,3,1,""]},"pytorch_tabnet.tab_network.TabNetEncoder":{forward:[1,3,1,""],forward_masks:[1,3,1,""]},"pytorch_tabnet.tab_network.TabNetNoEmbeddings":{forward:[1,3,1,""],forward_masks:[1,3,1,""]},"pytorch_tabnet.tab_network.TabNetPretraining":{forward:[1,3,1,""],forward_masks:[1,3,1,""]},"pytorch_tabnet.utils":{ComplexEncoder:[1,1,1,""],PredictDataset:[1,1,1,""],TorchDataset:[1,1,1,""],check_input:[1,4,1,""],check_warm_start:[1,4,1,""],create_dataloaders:[1,4,1,""],create_explain_matrix:[1,4,1,""],create_sampler:[1,4,1,""],define_device:[1,4,1,""],filter_weights:[1,4,1,""],validate_eval_set:[1,4,1,""]},"pytorch_tabnet.utils.ComplexEncoder":{"default":[1,3,1,""]},pytorch_tabnet:{abstract_model:[1,0,0,"-"],augmentations:[1,0,0,"-"],callbacks:[1,0,0,"-"],metrics:[1,0,0,"-"],multiclass_utils:[1,0,0,"-"],multitask:[1,0,0,"-"],pretraining:[1,0,0,"-"],pretraining_utils:[1,0,0,"-"],sparsemax:[1,0,0,"-"],tab_model:[1,0,0,"-"],tab_network:[1,0,0,"-"],utils:[1,0,0,"-"]}},objnames:{"0":["py","module","Python module"],"1":["py","class","Python class"],"2":["py","attribute","Python attribute"],"3":["py","method","Python method"],"4":["py","function","Python function"]},objtypes:{"0":"py:module","1":"py:class","2":"py:attribute","3":"py:method","4":"py:function"},terms:{"1st":0,"abstract":1,"boolean":1,"case":1,"class":0,"default":[1,2],"float":[0,1],"function":0,"import":[0,1],"int":[0,1],"new":[0,1],"return":[0,1],"static":1,"throw":1,"true":[0,1],"try":[0,1],"while":1,Added:0,For:1,One:1,The:[0,1],Use:1,Useful:2,Using:1,__call__:0,__init__:0,_contextmethodmixin:1,_maxim:0,_name:0,a_max:0,a_min:0,about:1,abov:1,abs:1,absolut:1,abstract_model:2,accept:1,accord:[0,1],accuraci:[0,1],adam:[0,1],added:0,after:1,afterward:1,alia:1,all:[0,1],allow:[0,1],allow_nan:1,along:1,alpha:1,alreadi:0,also:1,although:1,ambigu:1,amount:1,ani:[0,1],anyth:1,api:0,append:1,appli:[0,1],arbitrari:1,architectur:0,argument:1,arik:0,arrai:[0,1],arxiv:[0,1],assert_all_finit:1,assign:0,astudillo:1,attent:[1,2],attentivetransform:1,attribut:1,auc:[0,1],augment:2,auto:[0,1],autograd:1,autom:[0,1],automat:[0,1],avail:0,averag:1,avoid:1,backward:1,balanc:[0,1],balancedaccuraci:1,base:1,baseestim:1,basic:0,batch:[0,1],batch_out:[],batch_siz:[0,1],becaus:[0,1],been:0,befor:[0,1],bellow:0,below:0,ben:1,best:0,beta:1,better:0,between:[0,1],bigger:0,binari:[0,1],blob:0,block:[0,1],bool:[0,1],both:1,build:[0,1],built:1,call:1,callabl:1,callback:[0,2],callbackcontain:1,can:[0,1],capac:0,care:1,cat:[],cat_dim:[0,1],cat_emb_dim:[0,1],cat_idx:[0,1],categor:[0,1],censu:[],certain:1,chang:[0,1],check:1,check_circular:1,check_classification_target:1,check_input:1,check_metr:1,check_nan:[],check_output_dim:1,check_unique_typ:1,check_warm_start:1,choic:0,cite:1,class_attr:1,classic:[0,1],classif:[0,1],classificationsmot:[0,1],classmethod:1,clear:1,clf:0,clip:[0,1],clip_valu:[0,1],clone:0,close:0,cls:1,code:2,coeffici:0,column:1,com:[0,1],commit:0,compat:[0,1],complet:0,complexencod:1,comput:1,compute_loss:1,conda:0,consecut:[0,1],contain:[0,1],content:2,context:1,continu:1,contribut:2,convert:1,corr:1,correct:1,correl:0,correspond:1,could:[0,1],counter:1,cpu:1,creat:[0,1],create_dataload:1,create_explain_matrix:1,create_sampl:1,cross:0,ctx:1,cuda:1,current:[0,1],custom:[1,2],data:[1,2],datafram:1,dataload:[0,1],dataset:1,dblp:1,decai:0,decis:0,decod:0,deduc:0,deep:1,deeprecomodel:1,def:[0,1],defin:[0,1],define_devic:1,degener:1,depend:[0,1],descript:1,detail:1,detect:[0,1],determin:1,develop:0,devic:1,device_nam:[0,1],dict:[0,1],dictionari:1,dictionnari:[0,1],did:1,differ:1,differenti:1,difficulti:0,dim:1,dimens:1,discret:1,discuss:0,disk:0,distinct:1,distribut:1,divid:0,divis:1,docker:0,doe:[1,2],doing:0,don:1,dreamquark:0,dreamquarktabnet:0,drop:[0,1],drop_last:[0,1],dure:[0,1],each:[0,1],earli:[0,1],early_stopping_metr:1,earlystop:1,easi:2,easier:0,easili:0,either:[0,1],element:1,els:1,emb:[],embded:[],embed:[0,1],embedded_x:1,embeddinggener:1,enabl:0,encod:1,end:0,ensur:1,ensure_ascii:1,entmax15:1,entmax15funct:1,entmax:[0,1],entmoid15:1,entropi:0,epoch:[0,1],eps:1,epsilon:[0,1],equal:1,equival:1,error:1,eval:[0,1,2],eval_metr:1,eval_nam:[0,1],eval_set:[0,1],evalu:[1,2],event:1,everi:[0,1],exact:1,exampl:[0,1],except:1,exist:0,exit:1,expert:1,explain:1,explan:1,explanatori:0,explicit:1,extra:0,extract:1,factori:1,fals:[0,1],feattransform:1,featu:[],featur:[0,1],few:0,file:1,filepath:1,filter_weight:1,first:[0,1],fit:[1,2],fly:2,follow:[0,1],forest:[],forg:0,format:1,former:1,formula:1,forward:1,forward_mask:1,found:0,frequenc:1,from:[0,1],from_unsupervis:[0,1],further:0,gamma:[0,1],gate:0,gbn:1,gener:1,get:[0,1],get_metrics_by_nam:1,ghost:[0,1],gini:0,git:0,github:[0,1],give:[0,1],given:[0,1],glu:[0,1],glu_block:1,glu_lay:1,good:0,gpu:1,grad_output:1,gradient:[0,1],greater:0,handl:2,happen:1,harder:0,has:[0,1],have:[0,1],help:0,here:0,highli:1,histori:1,hold:1,hook:1,hot:1,how:[1,2],html:1,http:[0,1],idx:[],ignor:1,imeplement:[],implement:[0,1],improv:[0,1],includ:0,incomplet:1,indent:1,independ:[0,1],index:[1,2],indic:[0,1],infer:1,infer_multitask_output:1,infer_output_dim:1,infin:1,initi:[0,1],initialize_glu:1,initialize_non_glu:1,input:[0,1],input_dim:1,insid:0,instal:2,instanc:1,instead:1,integ:[0,1],interpret:2,invers:[0,1],ipynb:0,is_batch_level:1,is_maxim:1,is_multilabel:1,issu:0,iter:1,its:0,join:0,journal:1,json:1,jsonencod:1,jupyt:0,kaggl:0,keep:1,kei:[0,1],labda:[],lambda:1,lambda_spars:[0,1],larg:0,last:[0,1],later:0,latter:1,layer:0,learn:[1,2],least:[0,1],left:[0,1],length:[0,1],let:1,like:1,line:0,linear:0,link:2,list:[0,1],list_embedded_x:1,list_obfusc:1,list_output:1,list_y_scor:1,list_y_tru:1,load:[1,2],load_class_attr:1,load_model:[0,1],load_weights_from_unsupervis:1,loaded_clf:0,local:[0,1],log:1,logarithm:1,logloss:[0,1],longtensor:1,loop:1,loss:[0,1],loss_fn:[0,1],lower:0,lr_schedul:[0,1],lrschedulercallback:1,m_explain:1,mae:[0,1],main:1,make:[0,1],mandatori:0,mani:1,manual:1,map:1,martin:1,martinsa16:1,mask:[0,1],mask_typ:[0,1],match:0,matric:1,matrix:1,max:0,max_epoch:[0,1],maxim:[0,1],maximum:[0,1],mean:[0,1],mean_squared_log_error:1,memori:1,mention:0,method:1,metric:2,metric_nam:1,metriccontain:1,might:[0,1],mini:0,minimum:1,mix:1,moa:0,modal:0,model:[1,2],model_nam:0,modul:2,moment:1,momentum:[0,1],monitor:1,more:[0,1],most:1,mse:[0,1],multi:0,multiclass:[0,1],multiclass_util:2,multilabel:1,multioutput:1,multipl:1,multitask:[0,2],must:[0,1],n_a:[0,1],n_d:[0,1],n_glu:1,n_glu_independ:1,n_indep_decod:[0,1],n_independ:[0,1],n_sampl:1,n_share:[0,1],n_shared_decod:[0,1],n_step:[0,1],n_unique_label:1,name:[0,1],nan:1,ndarrai:1,need:[0,1],needs_input_grad:1,neg:[0,1],network:1,neural:1,nicula:1,non:1,none:[0,1],normal:[0,1],note:[0,1],notebook:0,now:0,num:[],num_work:[0,1],number:[0,1],numpi:1,obf_var:1,obfusc:1,obj:1,object:1,occur:[],occurr:0,on_batch_begin:1,on_batch_end:1,on_epoch_begin:1,on_epoch_end:1,on_train_begin:1,on_train_end:1,one:[0,1],onecyclelr:1,ones:0,onli:1,oper:1,optim:[0,1],optimizer_fn:[0,1],optimizer_param:[0,1],optimo:[],option:0,order:[0,1],org:[0,1],orgin:1,origin:[0,1],other:1,otherwis:1,our:0,out:1,output:1,output_dim:1,over:1,overfit:0,overridden:1,overwritten:0,own:[0,1],packag:2,page:2,panda:1,paper:[0,1],param:1,paramet:[1,2],pass:1,path:1,patienc:[0,1],pdf:0,per:[0,1],percentag:[0,1],perform:[0,1],peter:1,pfister:0,pin:1,pin_memori:1,pip:0,pipelin:0,place:0,pleas:0,plot:0,poetri:0,point:1,posit:1,possibl:0,post:1,post_embed_dim:1,pre:2,pred:0,predict:[0,1],predict_func:1,predict_proba:1,predictdataset:1,prefix:1,prepar:1,prepare_target:1,preprint:0,present:0,pretrain:[0,2],pretraining_exampl:0,pretraining_ratio:[0,1],pretraining_util:2,previous:1,print:1,prior:1,probabl:1,problem:[1,2],process:1,processed_feat:1,product:[0,1],propos:0,provid:1,pytorch:[1,2],pytorch_tabnet:0,qualifi:1,question:0,rais:1,random:[0,1],randomobfusc:1,rang:0,rapidli:1,rate:0,readi:0,readm:2,realli:0,recip:1,recommend:0,reconstruct:[0,1],record:1,reduc:[0,1],reducing_matrix:1,regist:1,regress:[0,1],regressionsmot:[0,1],rel:1,repositori:0,repres:1,reproduc:0,res:1,reset:1,result:1,retriev:[0,1],reus:0,reusag:0,risk:0,rmse:[0,1],rmsle:[0,1],roc_auc_scor:0,root:1,row:1,rule:[0,1],run:1,same:[0,1],sampl:[0,1],sampler:1,save:[1,2],save_model:[0,1],saved_filepath:0,saving_path:[],saving_path_nam:0,scale:1,schedul:[0,1],scheduler_fn:[0,1],scheduler_param:[0,1],scikit:[0,1],score:[0,1],search:2,section:[],see:[0,1],seed:[0,1],select:0,self:[0,1],semi:2,separ:1,sequenc:1,serializ:1,set:[0,1],set_param:1,set_train:1,shape:1,share:0,shared_lay:1,should:[0,1],show:1,silent:1,simpl:0,sinc:[0,1],singl:[0,1],size:[0,1],skipkei:1,sklearn:[0,1],slack:0,small:1,smaller:1,smote:1,softmax:1,solut:0,sort:1,sort_kei:1,sourc:[1,2],spars:1,sparsemax:[0,2],sparsemaxfunct:1,sparser:0,sparsiti:0,specif:[0,1],specifii:0,spin:1,squar:1,stabl:1,stack_batch:1,stai:1,start:[0,1],state:0,step:[0,1],step_siz:0,steplr:0,steps_output:1,stop:[0,1],store:1,str:[0,1],string:[0,1],subclass:1,subprocess:1,subsampl:1,sum:1,supermodul:1,supervis:[1,2],support:1,sure:[0,1],tab_model:[0,2],tab_network:2,tabmodel:1,tabnet:[1,2],tabnet_model_test_1:0,tabnetclassifi:[0,1],tabnetdecod:1,tabnetencod:1,tabnetmultitaskclassifi:[0,1],tabnetnoembed:1,tabnetpretrain:[0,1],tabnetregressor:[0,1],tabular:2,take:1,talk:0,target:[0,1],target_mapp:1,target_typ:1,task:[0,1],tasks_dim:1,tasks_label:1,templat:0,tensor:1,term:0,termin:[0,1],than:[0,1],thei:0,them:1,thi:[0,1],tol:1,torch:[0,1],torchdataset:1,train:[1,2],train_dataload:1,train_label:1,trainer:1,trainng:0,transform:[0,1],trick:1,tupl:[0,1],twice:0,two:1,type:[0,1],type_of_target:1,typeerror:1,typic:0,unchang:1,uniqu:[0,1],unique_label:1,unit:0,unknown:1,unsupervis:1,unsupervised_model:[0,1],unsupervisedloss:1,unsupervisedlossnumpi:1,unsupervisedmetr:1,unsupervisednumpymetr:1,unsupmetriccontain:1,untouch:0,updat:1,update_fit_param:1,usag:1,use:[1,2],used:[0,1],useful:0,user:1,using:0,usual:0,util:[0,2],val_metr:1,valid:[0,1],valid_dataload:1,validate_eval_set:1,valu:[0,1],valueerror:1,variabl:1,vector:1,verbos:[0,1],via:0,video:0,virtual:[],virtual_batch_s:[0,1],vlad:1,wait:1,wan:0,want:0,warm:0,warm_start:[0,1],warn:1,weight:[0,1],weight_updat:1,well:1,were:1,what:2,when:[0,1],where:1,wheter:1,whether:[0,1],which:1,width:0,wihtout:[],wish:0,within:[0,1],without:[0,1],work:1,worker:[0,1],wors:0,wrapper:1,wrong:1,www:[],x_predict:0,x_test:0,x_train:[0,1],x_valid:0,y_pred:1,y_score:[0,1],y_train:[0,1],y_true:[0,1],y_valid:0,you:[0,1],your:0,youtu:[],ysbazo8ymx8:[],zerodivisionerror:1,zip:1},titles:["README","pytorch_tabnet package","Welcome to pytorch_tabnet\u2019s documentation!"],titleterms:{"class":1,"default":0,"function":1,Useful:0,abstract_model:1,attent:0,augment:[0,1],callback:1,code:0,contribut:0,cpu:0,custom:0,data:0,doc:[],document:2,doe:0,early_stopping_metr:[],easi:0,eval_metr:0,evalu:0,fit:0,fly:0,gpu:0,handl:0,how:0,indic:2,instal:0,interpret:0,label:1,learn:0,link:0,load:0,metric:[0,1],model:0,modul:1,multi:1,multiclass_util:1,multitask:1,onli:0,packag:1,paramet:0,pre:0,pretrain:1,pretraining_util:1,problem:0,pytorch:0,pytorch_tabnet:[1,2],readm:0,save:0,script:[],semi:0,sourc:0,sparsemax:1,supervis:0,tab_model:1,tab_network:1,tabl:2,tabnet:0,tabular:0,train:0,use:0,util:1,welcom:2,what:0}}) \ No newline at end of file +Search.setIndex({docnames:["generated_docs/README","generated_docs/pytorch_tabnet","index"],envversion:{"sphinx.domains.c":1,"sphinx.domains.changeset":1,"sphinx.domains.citation":1,"sphinx.domains.cpp":1,"sphinx.domains.index":1,"sphinx.domains.javascript":1,"sphinx.domains.math":2,"sphinx.domains.python":1,"sphinx.domains.rst":1,"sphinx.domains.std":1,"sphinx.ext.viewcode":1,sphinx:56},filenames:["generated_docs/README.md","generated_docs/pytorch_tabnet.rst","index.rst"],objects:{"pytorch_tabnet.abstract_model":{TabModel:[1,1,1,""]},"pytorch_tabnet.abstract_model.TabModel":{cat_dims:[1,2,1,""],cat_emb_dim:[1,2,1,""],cat_idxs:[1,2,1,""],clip_value:[1,2,1,""],compute_loss:[1,3,1,""],device_name:[1,2,1,""],epsilon:[1,2,1,""],explain:[1,3,1,""],fit:[1,3,1,""],gamma:[1,2,1,""],grouped_features:[1,2,1,""],input_dim:[1,2,1,""],lambda_sparse:[1,2,1,""],load_class_attrs:[1,3,1,""],load_model:[1,3,1,""],load_weights_from_unsupervised:[1,3,1,""],mask_type:[1,2,1,""],momentum:[1,2,1,""],n_a:[1,2,1,""],n_d:[1,2,1,""],n_indep_decoder:[1,2,1,""],n_independent:[1,2,1,""],n_shared:[1,2,1,""],n_shared_decoder:[1,2,1,""],n_steps:[1,2,1,""],optimizer_fn:[1,2,1,""],optimizer_params:[1,2,1,""],output_dim:[1,2,1,""],predict:[1,3,1,""],prepare_target:[1,3,1,""],save_model:[1,3,1,""],scheduler_fn:[1,2,1,""],scheduler_params:[1,2,1,""],seed:[1,2,1,""],update_fit_params:[1,3,1,""],verbose:[1,2,1,""]},"pytorch_tabnet.augmentations":{ClassificationSMOTE:[1,1,1,""],RegressionSMOTE:[1,1,1,""]},"pytorch_tabnet.callbacks":{Callback:[1,1,1,""],CallbackContainer:[1,1,1,""],EarlyStopping:[1,1,1,""],History:[1,1,1,""],LRSchedulerCallback:[1,1,1,""]},"pytorch_tabnet.callbacks.Callback":{on_batch_begin:[1,3,1,""],on_batch_end:[1,3,1,""],on_epoch_begin:[1,3,1,""],on_epoch_end:[1,3,1,""],on_train_begin:[1,3,1,""],on_train_end:[1,3,1,""],set_params:[1,3,1,""],set_trainer:[1,3,1,""]},"pytorch_tabnet.callbacks.CallbackContainer":{append:[1,3,1,""],callbacks:[1,2,1,""],on_batch_begin:[1,3,1,""],on_batch_end:[1,3,1,""],on_epoch_begin:[1,3,1,""],on_epoch_end:[1,3,1,""],on_train_begin:[1,3,1,""],on_train_end:[1,3,1,""],set_params:[1,3,1,""],set_trainer:[1,3,1,""]},"pytorch_tabnet.callbacks.EarlyStopping":{early_stopping_metric:[1,2,1,""],is_maximize:[1,2,1,""],on_epoch_end:[1,3,1,""],on_train_end:[1,3,1,""],patience:[1,2,1,""],tol:[1,2,1,""]},"pytorch_tabnet.callbacks.History":{on_batch_end:[1,3,1,""],on_epoch_begin:[1,3,1,""],on_epoch_end:[1,3,1,""],on_train_begin:[1,3,1,""],trainer:[1,2,1,""],verbose:[1,2,1,""]},"pytorch_tabnet.callbacks.LRSchedulerCallback":{early_stopping_metric:[1,2,1,""],is_batch_level:[1,2,1,""],on_batch_end:[1,3,1,""],on_epoch_end:[1,3,1,""],optimizer:[1,2,1,""],scheduler_fn:[1,2,1,""],scheduler_params:[1,2,1,""]},"pytorch_tabnet.metrics":{AUC:[1,1,1,""],Accuracy:[1,1,1,""],BalancedAccuracy:[1,1,1,""],LogLoss:[1,1,1,""],MAE:[1,1,1,""],MSE:[1,1,1,""],Metric:[1,1,1,""],MetricContainer:[1,1,1,""],RMSE:[1,1,1,""],RMSLE:[1,1,1,""],UnsupMetricContainer:[1,1,1,""],UnsupervisedLoss:[1,4,1,""],UnsupervisedLossNumpy:[1,4,1,""],UnsupervisedMetric:[1,1,1,""],UnsupervisedNumpyMetric:[1,1,1,""],check_metrics:[1,4,1,""]},"pytorch_tabnet.metrics.Metric":{get_metrics_by_names:[1,3,1,""]},"pytorch_tabnet.metrics.MetricContainer":{metric_names:[1,2,1,""],prefix:[1,2,1,""]},"pytorch_tabnet.metrics.UnsupMetricContainer":{metric_names:[1,2,1,""],prefix:[1,2,1,""]},"pytorch_tabnet.multiclass_utils":{assert_all_finite:[1,4,1,""],check_classification_targets:[1,4,1,""],check_output_dim:[1,4,1,""],check_unique_type:[1,4,1,""],infer_multitask_output:[1,4,1,""],infer_output_dim:[1,4,1,""],is_multilabel:[1,4,1,""],type_of_target:[1,4,1,""],unique_labels:[1,4,1,""]},"pytorch_tabnet.multitask":{TabNetMultiTaskClassifier:[1,1,1,""]},"pytorch_tabnet.multitask.TabNetMultiTaskClassifier":{cat_dims:[1,2,1,""],cat_idxs:[1,2,1,""],compute_loss:[1,3,1,""],grouped_features:[1,2,1,""],optimizer_params:[1,2,1,""],predict:[1,3,1,""],predict_proba:[1,3,1,""],prepare_target:[1,3,1,""],scheduler_params:[1,2,1,""],stack_batches:[1,3,1,""],update_fit_params:[1,3,1,""]},"pytorch_tabnet.pretraining":{TabNetPretrainer:[1,1,1,""]},"pytorch_tabnet.pretraining.TabNetPretrainer":{cat_dims:[1,2,1,""],cat_idxs:[1,2,1,""],compute_loss:[1,3,1,""],fit:[1,3,1,""],grouped_features:[1,2,1,""],optimizer_params:[1,2,1,""],predict:[1,3,1,""],prepare_target:[1,3,1,""],scheduler_params:[1,2,1,""],stack_batches:[1,3,1,""],update_fit_params:[1,3,1,""]},"pytorch_tabnet.pretraining_utils":{create_dataloaders:[1,4,1,""],validate_eval_set:[1,4,1,""]},"pytorch_tabnet.sparsemax":{Entmax15:[1,1,1,""],Entmax15Function:[1,1,1,""],Entmoid15:[1,1,1,""],Sparsemax:[1,1,1,""],SparsemaxFunction:[1,1,1,""],entmax15:[1,4,1,""],entmoid15:[1,4,1,""],sparsemax:[1,4,1,""]},"pytorch_tabnet.sparsemax.Entmax15":{forward:[1,3,1,""],training:[1,2,1,""]},"pytorch_tabnet.sparsemax.Entmax15Function":{backward:[1,3,1,""],forward:[1,3,1,""]},"pytorch_tabnet.sparsemax.Entmoid15":{backward:[1,3,1,""],forward:[1,3,1,""]},"pytorch_tabnet.sparsemax.Sparsemax":{forward:[1,3,1,""],training:[1,2,1,""]},"pytorch_tabnet.sparsemax.SparsemaxFunction":{backward:[1,3,1,""],forward:[1,3,1,""]},"pytorch_tabnet.tab_model":{TabNetClassifier:[1,1,1,""],TabNetRegressor:[1,1,1,""]},"pytorch_tabnet.tab_model.TabNetClassifier":{cat_dims:[1,2,1,""],cat_idxs:[1,2,1,""],compute_loss:[1,3,1,""],grouped_features:[1,2,1,""],optimizer_params:[1,2,1,""],predict_func:[1,3,1,""],predict_proba:[1,3,1,""],prepare_target:[1,3,1,""],scheduler_params:[1,2,1,""],stack_batches:[1,3,1,""],update_fit_params:[1,3,1,""],weight_updater:[1,3,1,""]},"pytorch_tabnet.tab_model.TabNetRegressor":{cat_dims:[1,2,1,""],cat_idxs:[1,2,1,""],compute_loss:[1,3,1,""],grouped_features:[1,2,1,""],optimizer_params:[1,2,1,""],predict_func:[1,3,1,""],prepare_target:[1,3,1,""],scheduler_params:[1,2,1,""],stack_batches:[1,3,1,""],update_fit_params:[1,3,1,""]},"pytorch_tabnet.tab_network":{AttentiveTransformer:[1,1,1,""],EmbeddingGenerator:[1,1,1,""],FeatTransformer:[1,1,1,""],GBN:[1,1,1,""],GLU_Block:[1,1,1,""],GLU_Layer:[1,1,1,""],RandomObfuscator:[1,1,1,""],TabNet:[1,1,1,""],TabNetDecoder:[1,1,1,""],TabNetEncoder:[1,1,1,""],TabNetNoEmbeddings:[1,1,1,""],TabNetPretraining:[1,1,1,""],initialize_glu:[1,4,1,""],initialize_non_glu:[1,4,1,""]},"pytorch_tabnet.tab_network.AttentiveTransformer":{forward:[1,3,1,""],training:[1,2,1,""]},"pytorch_tabnet.tab_network.EmbeddingGenerator":{forward:[1,3,1,""],training:[1,2,1,""]},"pytorch_tabnet.tab_network.FeatTransformer":{forward:[1,3,1,""],training:[1,2,1,""]},"pytorch_tabnet.tab_network.GBN":{forward:[1,3,1,""],training:[1,2,1,""]},"pytorch_tabnet.tab_network.GLU_Block":{forward:[1,3,1,""],training:[1,2,1,""]},"pytorch_tabnet.tab_network.GLU_Layer":{forward:[1,3,1,""],training:[1,2,1,""]},"pytorch_tabnet.tab_network.RandomObfuscator":{forward:[1,3,1,""],training:[1,2,1,""]},"pytorch_tabnet.tab_network.TabNet":{forward:[1,3,1,""],forward_masks:[1,3,1,""],training:[1,2,1,""]},"pytorch_tabnet.tab_network.TabNetDecoder":{forward:[1,3,1,""],training:[1,2,1,""]},"pytorch_tabnet.tab_network.TabNetEncoder":{forward:[1,3,1,""],forward_masks:[1,3,1,""],training:[1,2,1,""]},"pytorch_tabnet.tab_network.TabNetNoEmbeddings":{forward:[1,3,1,""],forward_masks:[1,3,1,""],training:[1,2,1,""]},"pytorch_tabnet.tab_network.TabNetPretraining":{forward:[1,3,1,""],forward_masks:[1,3,1,""],training:[1,2,1,""]},"pytorch_tabnet.utils":{ComplexEncoder:[1,1,1,""],PredictDataset:[1,1,1,""],SparsePredictDataset:[1,1,1,""],SparseTorchDataset:[1,1,1,""],TorchDataset:[1,1,1,""],check_embedding_parameters:[1,4,1,""],check_input:[1,4,1,""],check_list_groups:[1,4,1,""],check_warm_start:[1,4,1,""],create_dataloaders:[1,4,1,""],create_explain_matrix:[1,4,1,""],create_group_matrix:[1,4,1,""],create_sampler:[1,4,1,""],define_device:[1,4,1,""],filter_weights:[1,4,1,""],validate_eval_set:[1,4,1,""]},"pytorch_tabnet.utils.ComplexEncoder":{"default":[1,3,1,""]},pytorch_tabnet:{abstract_model:[1,0,0,"-"],augmentations:[1,0,0,"-"],callbacks:[1,0,0,"-"],metrics:[1,0,0,"-"],multiclass_utils:[1,0,0,"-"],multitask:[1,0,0,"-"],pretraining:[1,0,0,"-"],pretraining_utils:[1,0,0,"-"],sparsemax:[1,0,0,"-"],tab_model:[1,0,0,"-"],tab_network:[1,0,0,"-"],utils:[1,0,0,"-"]}},objnames:{"0":["py","module","Python module"],"1":["py","class","Python class"],"2":["py","attribute","Python attribute"],"3":["py","method","Python method"],"4":["py","function","Python function"]},objtypes:{"0":"py:module","1":"py:class","2":"py:attribute","3":"py:method","4":"py:function"},terms:{"1st":0,"abstract":1,"boolean":1,"case":1,"class":0,"default":[1,2],"float":[0,1],"function":0,"import":[0,1],"int":[0,1],"new":[1,2],"return":[0,1],"static":1,"throw":1,"true":[0,1],"try":[0,1],"while":1,Added:0,For:1,One:1,The:[0,1],Use:1,Useful:2,Using:1,__call__:0,__init__:0,_contextmethodmixin:1,_maxim:0,_name:0,a_max:0,a_min:0,about:1,abov:1,abs:1,absolut:1,abstract_model:2,accept:1,accord:[0,1],accross:0,accuraci:[0,1],adam:[0,1],added:0,after:1,afterward:1,aim:0,alia:1,all:[0,1],allow:[0,1],allow_nan:1,along:1,alpha:1,alreadi:0,also:[0,1],although:1,alwai:0,ambigu:1,amount:1,ani:[0,1],anyth:1,api:0,appear:1,append:1,appli:[0,1],arbitrari:1,architectur:0,arg:1,argument:1,arik:0,arrai:[0,1],arxiv:[0,1],assert_all_finit:1,assign:[0,1],astudillo:1,attent:[1,2],attentivetransform:1,attribut:1,auc:[0,1],augment:2,auto:[0,1],autograd:1,autom:[0,1],automat:[0,1],avail:0,averag:1,avoid:1,awar:0,backward:1,balanc:[0,1],balancedaccuraci:1,base:1,baseestim:1,basic:0,batch:[0,1],batch_out:[],batch_siz:[0,1],becaus:[0,1],been:0,befor:[0,1],bellow:0,below:0,ben:1,best:0,beta:1,better:0,between:[0,1],bigger:0,binari:[0,1],blob:0,block:[0,1],bool:[0,1],both:1,build:[0,1],built:1,call:1,callabl:1,callback:[0,2],callbackcontain:1,can:[0,1],capac:0,care:1,cat:[],cat_dim:[0,1],cat_emb_dim:[0,1],cat_idx:[0,1],categor:[0,1],censu:[],certain:1,chang:[0,1],check:1,check_circular:1,check_classification_target:1,check_embedding_paramet:1,check_input:1,check_list_group:1,check_metr:1,check_nan:[],check_output_dim:1,check_unique_typ:1,check_warm_start:1,choic:0,cite:1,class_attr:1,classic:[0,1],classif:[0,1],classificationsmot:[0,1],classmethod:1,clear:1,clf:0,clip:[0,1],clip_valu:[0,1],clone:0,close:0,cls:1,code:2,coeffici:0,column:[0,1],com:[0,1],come:0,commit:0,compat:[0,1],complet:0,complexencod:1,comput:[0,1],compute_import:[0,1],compute_loss:1,conda:0,consecut:[0,1],contain:[0,1],content:2,context:1,continu:1,contribut:2,convert:1,corr:1,correct:1,correl:0,correspond:1,could:[0,1],counter:1,cpu:1,creat:[0,1],create_dataload:1,create_explain_matrix:1,create_group_matrix:1,create_sampl:1,cross:0,csr:[0,1],csr_matrix:1,ctx:1,cuda:1,current:[0,1],custom:[1,2],data:[1,2],datafram:1,dataload:[0,1],dataset:[0,1],dblp:1,decai:0,decis:0,decod:0,deduc:0,deep:1,deeprecomodel:1,def:[0,1],defin:[0,1],define_devic:1,degener:1,depend:[0,1],descript:1,detail:1,detect:[0,1],determin:1,develop:0,devic:1,device_nam:[0,1],dict:[0,1],dictionari:1,dictionnari:[0,1],did:[],differ:[0,1],differenti:1,difficulti:0,dim:1,dimens:1,directli:1,discret:1,discuss:0,disk:0,distinct:1,distribut:1,divid:0,divis:1,docker:0,doe:[1,2],doing:0,don:1,done:[0,1],dreamquark:0,dreamquarktabnet:0,drop:[0,1],drop_last:[0,1],dure:[0,1],each:[0,1],earli:[0,1],early_stopping_metr:1,earlystop:1,easi:2,easier:0,easili:0,either:[0,1],element:1,els:1,emb:[],embded:[],embed:[0,1],embedded_x:1,embeddinggener:1,empti:1,enabl:0,encod:1,end:0,enforc:1,ensur:1,ensure_ascii:1,entmax15:1,entmax15funct:1,entmax:[0,1],entmoid15:1,entropi:0,epoch:[0,1],eps:1,epsilon:[0,1],equal:1,equival:1,error:1,especi:0,eval:[0,1,2],eval_metr:1,eval_nam:[0,1],eval_set:[0,1],evalu:[1,2],even:0,event:1,everi:[0,1],exact:1,exactli:0,exampl:[0,1],except:1,exempl:0,exist:0,exit:1,expert:1,explain:1,explan:1,explanatori:0,explicit:1,extra:0,extract:1,factori:1,fals:[0,1],feattransform:1,featu:[],featur:[0,1],few:0,file:1,filepath:1,filter_weight:1,first:[0,1],fit:[1,2],fly:2,follow:[0,1],forest:[],forg:0,format:1,former:1,formula:1,forward:1,forward_mask:1,found:0,frequenc:1,from:[0,1],from_unsupervis:[0,1],further:0,gamma:[0,1],gate:0,gbn:1,gener:[0,1],get:[0,1],get_metrics_by_nam:1,ghost:[0,1],gini:0,git:0,github:[0,1],give:[0,1],given:[0,1],glu:[0,1],glu_block:1,glu_lay:1,good:0,gpu:1,grad:1,grad_output:1,gradient:[0,1],greater:0,group:[0,1],group_attention_matrix:1,group_dim:1,group_matrix:1,grouped_featur:[0,1],handl:2,happen:1,harder:0,has:[0,1],have:[0,1],help:0,here:0,highli:1,histori:1,hold:1,hook:1,hot:1,how:[1,2],html:1,http:[0,1],idf:0,idx:[],ignor:1,imeplement:[],implement:[0,1],improv:[0,1],includ:0,incomplet:1,indent:1,independ:[0,1],index:[1,2],indic:[0,1],infer:1,infer_multitask_output:1,infer_output_dim:1,infin:1,initi:[0,1],initialize_glu:1,initialize_non_glu:1,input:[0,1],input_dim:1,insid:0,instal:2,instanc:1,instead:1,integ:[0,1],intend:1,interpret:2,invers:[0,1],ipynb:0,is_batch_level:1,is_maxim:1,is_multilabel:1,issu:0,iter:1,its:0,join:0,journal:1,json:1,jsonencod:1,jupyt:0,just:1,jvp:1,kaggl:0,keep:1,kei:[0,1],kwarg:1,labda:[],lambda:1,lambda_spars:[0,1],larg:0,last:[0,1],later:0,latter:1,layer:0,learn:[1,2],least:[0,1],left:[0,1],length:[0,1],let:1,level:[0,1],librari:0,like:[0,1],line:0,linear:0,link:2,list:[0,1],list_embedded_x:1,list_group:1,list_obfusc:1,list_output:1,list_y_scor:1,list_y_tru:1,load:[1,2],load_class_attr:1,load_model:[0,1],load_weights_from_unsupervis:1,loaded_clf:0,local:[0,1],log:1,logarithm:1,logloss:[0,1],longtensor:1,loop:1,loss:[0,1],loss_fn:[0,1],lot:0,lower:0,lr_schedul:[0,1],lrschedulercallback:1,m_explain:1,m_ij:1,made:0,mae:[0,1],main:1,maintain:0,make:[0,1],mandatori:0,mani:1,manner:1,manual:1,map:1,martin:1,martinsa16:1,mask:[0,1],mask_typ:[0,1],match:[0,1],matric:1,matrix:[0,1],max:0,max_epoch:[0,1],maxim:[0,1],maximum:[0,1],mean:[0,1],mean_squared_log_error:1,mechan:0,memori:1,mention:0,method:1,metric:2,metric_nam:1,metriccontain:1,might:[0,1],mini:0,minimum:1,mix:1,moa:0,modal:0,mode:1,model:[1,2],model_nam:0,modul:2,moment:1,momentum:[0,1],monitor:1,more:[0,1],most:1,mse:[0,1],multi:0,multiclass:[0,1],multiclass_util:2,multilabel:1,multioutput:1,multipl:1,multitask:[0,2],must:[0,1],n_a:[0,1],n_d:[0,1],n_glu:1,n_glu_independ:1,n_group:1,n_indep_decod:[0,1],n_independ:[0,1],n_sampl:1,n_share:[0,1],n_shared_decod:[0,1],n_step:[0,1],n_unique_label:1,name:[0,1],nan:1,ndarrai:1,need:[0,1],needs_input_grad:1,neg:[0,1],network:1,neural:1,nicula:1,non:1,none:[0,1],normal:[0,1],note:[0,1],notebook:0,now:0,num:[],num_work:[0,1],number:[0,1],numpi:1,obf_var:1,obfusc:1,obj:1,object:1,occur:[],occurr:0,on_batch_begin:1,on_batch_end:1,on_epoch_begin:1,on_epoch_end:1,on_train_begin:1,on_train_end:1,one:[0,1],onecyclelr:1,ones:0,onli:1,oper:1,optim:[0,1],optimizer_fn:[0,1],optimizer_param:[0,1],optimo:[],option:0,order:[0,1],org:[0,1],orgin:[0,1],origin:[0,1],other:1,otherwis:1,our:0,out:1,output:1,output_dim:1,over:1,overfit:0,overridden:1,overtim:0,overwritten:0,own:[0,1],packag:2,page:2,panda:1,paper:[0,1],param:1,paramet:[1,2],pass:1,path:1,patienc:[0,1],pca:0,pdf:0,per:[0,1],percentag:[0,1],perform:[0,1],peter:1,pfister:0,pin:1,pin_memori:1,pip:0,pipelin:0,place:0,pleas:0,plot:0,poetri:0,point:1,posit:1,possibl:0,post:1,post_embed_dim:1,pre:2,pred:0,predict:[0,1],predict_func:1,predict_proba:1,predictdataset:1,prefix:1,prepar:1,prepare_target:1,preprint:0,preprocess:0,present:0,pretrain:[0,2],pretraining_exampl:0,pretraining_ratio:[0,1],pretraining_util:2,previous:1,print:1,prior:1,priori:1,probabl:1,problem:[1,2],process:1,processed_feat:1,product:[0,1],propos:0,provid:1,pytorch:[1,2],pytorch_tabnet:0,qualifi:1,question:0,rais:1,random:[0,1],randomobfusc:1,rang:0,rapidli:1,rate:0,readi:0,readm:2,realli:0,rearrang:1,recip:1,recommend:0,reconstruct:[0,1],record:1,reduc:[0,1],reducing_matrix:1,regist:1,regress:[0,1],regressionsmot:[0,1],rel:1,relat:1,repositori:0,repres:1,reproduc:0,requir:1,res:1,reset:1,result:1,retriev:[0,1],reus:0,reusag:0,risk:0,rmse:[0,1],rmsle:[0,1],roc_auc_scor:0,root:1,row:1,rule:[0,1],run:1,same:[0,1],sampl:[0,1],sampler:1,save:[1,2],save_for_backward:1,save_for_forward:1,save_model:[0,1],saved_filepath:0,saving_path:[],saving_path_nam:0,scale:1,schedul:[0,1],scheduler_fn:[0,1],scheduler_param:[0,1],scikit:[0,1],scipi:[0,1],score:[0,1],search:2,section:[],see:[0,1],seed:[0,1],select:0,self:[0,1],semi:2,separ:1,sequenc:1,serializ:1,set:[0,1],set_param:1,set_train:1,shape:1,share:0,shared_lay:1,should:[0,1],show:1,silent:1,simpl:0,sinc:[0,1],singl:[0,1],size:[0,1],skipkei:1,sklearn:[0,1],slack:0,small:1,smaller:1,smote:1,softmax:1,solut:0,some:[0,1],sort:1,sort_kei:1,sourc:[1,2],spars:[0,1],sparsemax:[0,2],sparsemaxfunct:1,sparsepredictdataset:1,sparser:0,sparsetorchdataset:1,sparsiti:0,specif:[0,1],specifi:0,specifii:0,spin:1,squar:1,stabl:1,stack_batch:1,stai:1,start:[0,1],state:0,step:[0,1],step_siz:0,steplr:0,steps_output:1,stop:[0,1],store:1,str:[0,1],string:[0,1],subclass:1,subprocess:1,subsampl:1,sum:1,supermodul:1,supervis:[1,2],support:1,sure:[0,1],tab_model:[0,2],tab_network:2,tabmodel:1,tabnet:[1,2],tabnet_model_test_1:0,tabnetclassifi:[0,1],tabnetdecod:1,tabnetencod:1,tabnetmultitaskclassifi:[0,1],tabnetnoembed:1,tabnetpretrain:[0,1],tabnetregressor:[0,1],tabular:2,take:1,talk:0,target:[0,1],target_mapp:1,target_typ:1,task:[0,1],tasks_dim:1,tasks_label:1,templat:0,tensor:1,term:0,termin:[0,1],text:0,than:[0,1],thei:[0,1],them:1,thi:[0,1],though:1,tol:1,torch:[0,1],torchdataset:1,train:[1,2],train_dataload:1,train_label:1,trainer:1,trainng:0,transform:[0,1],trick:1,tupl:[0,1],twice:[0,1],two:1,type:[0,1],type_of_target:1,typeerror:1,typic:0,unchang:1,uniqu:[0,1],unique_label:1,unit:0,unknown:1,unsupervis:1,unsupervised_model:[0,1],unsupervisedloss:1,unsupervisedlossnumpi:1,unsupervisedmetr:1,unsupervisednumpymetr:1,unsupmetriccontain:1,untouch:0,updat:1,update_fit_param:1,usag:1,use:[1,2],used:[0,1],useful:0,user:1,using:0,usual:0,util:[0,2],val_metr:1,valid:[0,1],valid_dataload:1,validate_eval_set:1,valu:[0,1],valueerror:1,variabl:[0,1],vector:1,verbos:[0,1],version:0,via:0,video:0,virtual:[],virtual_batch_s:[0,1],vjp:1,vlad:1,wait:1,wan:0,want:0,warm:0,warm_start:[0,1],warn:1,weight:[0,1],weight_updat:1,well:1,were:1,what:2,when:[0,1],where:1,wheter:1,whether:[0,1],which:[0,1],width:0,wihtout:[],wish:0,within:[0,1],without:[0,1],work:1,worker:[0,1],wors:0,wrapper:1,wrong:1,www:[],x_predict:0,x_test:0,x_train:[0,1],x_valid:0,y_pred:1,y_score:[0,1],y_train:[0,1],y_true:[0,1],y_valid:0,you:[0,1],your:0,youtu:[],ysbazo8ymx8:[],zerodivisionerror:1,zip:1},titles:["README","pytorch_tabnet package","Welcome to pytorch_tabnet\u2019s documentation!"],titleterms:{"class":1,"default":0,"function":1,"new":0,Useful:0,abstract_model:1,attent:0,augment:[0,1],callback:1,code:0,contribut:0,cpu:0,custom:0,data:0,doc:[],document:2,doe:0,early_stopping_metr:[],easi:0,eval_metr:0,evalu:0,fit:0,fly:0,gpu:0,handl:0,how:0,indic:2,instal:0,interpret:0,label:1,learn:0,link:0,load:0,metric:[0,1],model:0,modul:1,multi:1,multiclass_util:1,multitask:1,onli:0,packag:1,paramet:0,pre:0,pretrain:1,pretraining_util:1,problem:0,pytorch:0,pytorch_tabnet:[1,2],readm:0,save:0,script:[],semi:0,sourc:0,sparsemax:1,supervis:0,tab_model:1,tab_network:1,tabl:2,tabnet:0,tabular:0,train:0,use:0,util:1,welcom:2,what:0}}) \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 537adf0b..09d4f156 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "pytorch_tabnet" -version = "4.0" +version = "4.1.0" description = "PyTorch implementation of TabNet" homepage = "https://github.com/dreamquark-ai/tabnet" repository = "https://github.com/dreamquark-ai/tabnet"