In [1]:
import numpy as np
import pandas as pd
import utils

In [3]:
from pytorch_tabular import TabularModel
from pytorch_tabular.models import (
    CategoryEmbeddingModelConfig, 
    FTTransformerConfig, 
    TabNetModelConfig, 
    GatedAdditiveTreeEnsembleConfig, 
    TabTransformerConfig, 
    AutoIntConfig
)
from pytorch_tabular.config import DataConfig, OptimizerConfig, TrainerConfig, ExperimentConfig
from pytorch_tabular.models.common.heads import LinearHeadConfig

In [7]:
target = ['y_1','y_2']
num_cols = [col for col in utils.df_train.columns if col.startswith('X_')]
cat_cols = []
dat_cols = [('date','D'),('timestamp','H')]
encode_date_columns = True 
validation_split = .2



In [8]:
data_config = DataConfig(
    target=target, #target should always be a list.
    continuous_cols=num_cols,
    categorical_cols=cat_cols,
    date_columns=dat_cols,
    encode_date_columns=encode_date_columns,
    validation_split=validation_split
)

trainer_config = TrainerConfig(
#     auto_lr_find=True, # Runs the LRFinder to automatically derive a learning rate
    batch_size=256,
    max_epochs=50,
    early_stopping="valid_loss", # Monitor valid_loss for early stopping
    early_stopping_mode = "min", # Set the mode as min because for val_loss, lower is better
    early_stopping_patience=5, # No. of epochs of degradation training will wait before terminating
    checkpoints="valid_loss", # Save best checkpoint monitoring val_loss
    load_best=True, # After training, load the best checkpoint
)

optimizer_config = OptimizerConfig()

head_config = LinearHeadConfig(
    layers="", # No additional layer in head, just a mapping layer to output_dim
    dropout=0.1,
    initialization="kaiming"
).__dict__ # Convert to dict to pass to the model config (OmegaConf doesn't accept objects)

In [9]:
model_config = FTTransformerConfig(
    task="regression",
    learning_rate = 1e-3,
    head = "LinearHead", #Linear Head
    head_config = head_config, # Linear Head Config
)

tabular_model = TabularModel(
    data_config=data_config,
    model_config=model_config,
    optimizer_config=optimizer_config,
    trainer_config=trainer_config,
)
tabular_model.fit(train=utils.df_train)
tabular_model.evaluate(utils.df_test)

  "Ignoring the deprecated arguments, `out_ff_layers`, `out_ff_activation`, `out_ff_dropoout`, and `out_ff_initialization` as head_config is passed."
2023-04-04 10:08:10,172 - {pytorch_tabular.tabular_model:102} - INFO - Experiment Tracking is turned off
Global seed set to 42
2023-04-04 10:08:10,236 - {pytorch_tabular.tabular_model:465} - INFO - Preparing the DataLoaders
2023-04-04 10:08:17,730 - {pytorch_tabular.tabular_datamodule:286} - INFO - Setting up the datamodule for regression task
2023-04-04 10:13:51,164 - {pytorch_tabular.tabular_model:508} - INFO - Preparing the Model: FTTransformerModel
2023-04-04 10:13:52,111 - {pytorch_tabular.tabular_model:264} - INFO - Preparing the Trainer
Auto select gpus: [0, 1, 2, 3]
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
2023-04-04 10:14:12,621 - {pytorch_tabular.tabular_model:566} - INFO - Training Started


RuntimeError: Lightning can't create new processes if CUDA is already initialized. Did you manually call `torch.cuda.*` functions, have moved the model to the device, or allocated memory on the GPU any other way? Please remove any such calls, or change the selected strategy. You will have to restart the Python kernel.