In [1]:
import os
os.environ['CUDA_LAUNCH_BLOCKING'] = '1'

!pip install pytorch-tabnet
!pip install optuna

import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader

import optuna
from google.colab import output

from sklearn.model_selection import KFold
from sklearn.metrics import mean_squared_error
from pytorch_tabnet.tab_model import  TabNetRegressor

output.clear()

In [2]:
class Objective(object):
    def __init__(self, model_name, X, y, params):
        self.model_name = model_name

        # Save the trainings data
        self.X = X
        self.y = y
        self.params = params

        
    def __call__(self, trial):
        # Define hyperparameters to optimize
        trial_params = self.model_name.define_trial_parameters(trial, self.params)
        print(trial_params)
        
        score = 0
        # Cross validate the chosen hyperparameters

        kf = KFold(self.params['nfold'], shuffle = False)
        for train, test in kf.split(self.X):
            X_train, y_train = self.X.iloc[train, :], self.y.iloc[train]
            X_val, y_val = self.X.iloc[test, :], self.y.iloc[test]
            
            model = self.model_name(trial_params)
            model.fit(X_train, y_train, X_val, y_val)
            score += mean_squared_error(y_val, model.predict(X_val),
                                        squared = self.params['squared_metrics'])

        score /= self.params['nfold']
        
        return score


def main(X, y, model_name, params, n_trials = 100):
    print("Start hyperparameter optimization")
    
    Sampler = optuna.samplers.TPESampler(seed = 777)
    study = optuna.create_study(sampler = Sampler)
    study.optimize(Objective(model_name, X, y, params), n_trials, show_progress_bar = True, n_jobs = 1)
    
    print("Best parameters:", study.best_trial.params)

    return study

In [3]:
class TabNet():

    def __init__(self, params):
        
        self.model = TabNetRegressor(**params, verbose = False, device_name = 'cpu')
        #if torch.cuda.is_available():
        #    self.model.to('cuda')
        
    def fit(self, X, y, X_val=None, y_val=None):
        X = X.to_numpy()
        y = y.to_numpy().reshape(-1, 1)
        
        if isinstance(X_val, pd.DataFrame):
            X_val, y_val = X_val.to_numpy(), y_val.to_numpy().reshape(-1, 1)
            
        self.model.fit(X, y, eval_set = [(X_val, y_val)], eval_name = ['eval'], max_epochs = 500, patience = 20)
        history = self.model.history
        return history['loss']

    def predict(self, X):
        X = X.to_numpy()

        return self.model.predict(X)
        
    @classmethod
    def define_trial_parameters(cls, trial, params):
        params_tunable = {}
        params_out = {}
        for i, val in params.items():
            if isinstance(val, list):
                params_tunable[f'{i}'] = val
            else:
                params_out[f'{i}'] = val
        
        if 'n_d' in params_tunable:
            params_out[f'n_d'] = trial.suggest_int('n_d', params['n_d'][0], params['n_d'][1], log = False)
        if 'n_steps' in params_tunable:
            params_out[f'n_steps'] = trial.suggest_int('n_steps', params['n_steps'][0], params['n_steps'][1], log = False)
        if 'gamma' in params_tunable:
            params_out[f'gamma'] = trial.suggest_float('gamma', params['gamma'][0], params['gamma'][1], log = False)
        if 'cat_emb_dim' in params_tunable:
            params_out[f'cat_emb_dim'] = trial.suggest_int('cat_emb_dim', params['cat_emb_dim'][0], params['cat_emb_dim'][1], log = False)
        if 'n_independent' in params_tunable:
            params_out[f'n_independent'] = trial.suggest_int('n_independent', params['n_independent'][0], params['n_independent'][1], log = False)
        if 'n_shared' in params_tunable:
            params_out[f'n_shared'] = trial.suggest_int('n_shared', params['n_shared'][0], params['n_shared'][1], log = False)
        if 'momentum' in params_tunable:
            params_out[f'momentum'] = trial.suggest_float('momentum', params['momentum'][0], params['momentum'][1], log = True)
        if 'mask_type' in params_tunable:
            params_out[f'mask_type'] = trial.suggest_categorical('mask_type', params['mask_type'])
        
        
        if 'nfold' in params_out:
            del params_out['nfold']
        if 'squared_metrics' in params_out:
            del params_out['squared_metrics']
        
        return params_out

In [4]:
np.random.seed(7)
X = np.random.randint(0, 11, size = (745, 50))
y = np.random.rand(745) * 175

In [5]:
X = pd.DataFrame(X)
y = pd.DataFrame(y)

torch.cuda.is_available()

False

In [6]:
TabNet_params = {
    'n_d' : [2, 10],
    'n_steps' : [1, 25],
    'gamma' : [1., 2.],
    'n_independent' : [1, 20],
    'n_shared' : [1, 20],
    'momentum' : [1e-3, 0.4],
    'mask_type' : ['sparsemax', 'entmax'],
    'nfold' : 5,
    'squared_metrics' : False
    }

model_name = TabNet

TabNet_res = main(X = X, y = y, model_name = model_name, params = TabNet_params, n_trials = 50)

[32m[I 2022-04-20 08:22:10,365][0m A new study created in memory with name: no-name-6f990b5b-d2c7-48c6-90f3-b5f28aeb4cd7[0m


Start hyperparameter optimization


  self._init_valid()


  0%|          | 0/50 [00:00<?, ?it/s]

{'n_d': 3, 'n_steps': 8, 'gamma': 1.062036414714562, 'n_independent': 10, 'n_shared': 17, 'momentum': 0.2582866324854284, 'mask_type': 'entmax'}

Early stopping occurred at epoch 161 with best_epoch = 141 and best_eval_mse = 2518.04832
Best weights from best epoch are automatically used!

Early stopping occurred at epoch 191 with best_epoch = 171 and best_eval_mse = 2529.04553
Best weights from best epoch are automatically used!

Early stopping occurred at epoch 189 with best_epoch = 169 and best_eval_mse = 2322.93703
Best weights from best epoch are automatically used!

Early stopping occurred at epoch 161 with best_epoch = 141 and best_eval_mse = 2456.25985
Best weights from best epoch are automatically used!

Early stopping occurred at epoch 184 with best_epoch = 164 and best_eval_mse = 2441.63592
Best weights from best epoch are automatically used!
[32m[I 2022-04-20 08:29:05,876][0m Trial 0 finished with value: 49.52804260462696 and parameters: {'n_d': 3, 'n_steps': 8, 'gamma': 1

RuntimeError: ignored

In [None]:
d
%debug

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
      )
      (specifics): GLU_Block(
        (glu_layers): ModuleList(
          (0): GLU_Layer(
            (fc): Linear(in_features=10, out_features=20, bias=False)
            (bn): GBN(
              (bn): BatchNorm1d(20, eps=1e-05, momentum=0.0060940631018202435, affine=True, track_running_stats=True)
            )
          )
          (1): GLU_Layer(
            (fc): Linear(in_features=10, out_features=20, bias=False)
            (bn): GBN(
              (bn): BatchNorm1d(20, eps=1e-05, momentum=0.0060940631018202435, affine=True, track_running_stats=True)
            )
          )
          (2): GLU_Layer(
            (fc): Linear(in_features=10, out_features=20, bias=False)
            (bn): GBN(
              (bn): BatchNorm1d(20, eps=1e-05, momentum=0.0060940631018202435, affine=True, track_running_stats=True)
            )
          )
          (3): GLU_Layer(
            (fc): Linear(in_features=10, out_fea