## Tabnet with extra features

In [1]:
import os
import gc
import random
import pickle

import numpy as np
import pandas as pd
from tqdm.auto import tqdm

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.optim.lr_scheduler import ReduceLROnPlateau

from sklearn.preprocessing import MaxAbsScaler, Normalizer, RobustScaler, StandardScaler,MinMaxScaler
from sklearn.model_selection import KFold, StratifiedKFold      # St for class
from sklearn.decomposition import PCA
from sklearn.metrics import mean_squared_error

from pytorch_tabnet.tab_model import TabNetRegressor
from pytorch_tabnet.metrics import Metric

In [2]:
# set visible device
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]= "1, 2, 3"

CONFIG = {
    'n_worker':16,
    # Tabnet model
    'epochs' : 1000,
    'patience' : 100,
    'learning_rate':2e-3,
    'weight_decay':1e-5,
    'threshold':0.5,
    'seed':42,
    'fold':5
}

# seed setting function
def seed_everything(seed:int):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(CONFIG['seed']) # Seed setting

# tabnet params
tabnet_params = dict(
    n_d = 64,   # 8 to 64
    n_a = 128,  # n_d = n_a usally good
    n_steps = 3,
    gamma = 1.3,
    lambda_sparse = 0,
    n_independent = 2,
    n_shared = 1,
    optimizer_fn = optim.Adam,
    optimizer_params = dict(lr = CONFIG['learning_rate'], weight_decay = CONFIG['weight_decay']),
    mask_type = "entmax",
    scheduler_params = dict(
        mode = "min", patience = 10, min_lr = 1e-5, factor = 0.9),
    scheduler_fn = ReduceLROnPlateau,
    seed = CONFIG["seed"],
    verbose = 15
)

In [3]:
# ============= Norm 
def norm_transform(datatype, data, scaler_name='z-score', scaler=None):
    scaler_dict = {
        'z-score':StandardScaler(),
        'minmax':MinMaxScaler(),
        'maxabs':MaxAbsScaler(),
        'robust':RobustScaler(),
        'norm':Normalizer()
    }
    
    # use only train
    if not datatype=="test":
        scaler = scaler_dict[scaler_name]
        scaled_train = scaler.fit_transform(data)
        return scaled_train, scaler
    else:
        scaled_test = scaler.transform(data)
        return scaled_test

# ============= pca 
def pca_transform(datatype, data, n_comp=300, pca=None):
    if not datatype=="test":
        pca = PCA(n_components=n_comp, random_state=CONFIG["seed"])
        pca_train = pca.fit_transform(data)
        print(f"with {n_comp} components, pca variance ratio : {sum(pca.explained_variance_ratio_)}")
        return pca_train, pca
    else:
        pca_test = pca.transform(data)
        return pca_test


def lg_nrmse(gt, preds):
    # 각 Y Feature별 NRMSE 총합
    # Y_01 ~ Y_08 까지 20% 가중치 부여
    all_nrmse = []
    for idx in range(0,14): # ignore 'ID'
        rmse = mean_squared_error(gt[:,idx], preds[:,idx], squared=False)
        nrmse = rmse/np.mean(np.abs(gt[:,idx]))
        all_nrmse.append(nrmse)
    score = 1.2 * np.sum(all_nrmse[:8]) + 1.0 * np.sum(all_nrmse[8:14])
    return score

    
class NRMSE(Metric):
    def __init__(self):
        self._name = "NormRMSE"
        self._maximize = False

    def __call__(self, y_true, y_score):
        nrmse = lg_nrmse(y_true, y_score)
        return nrmse


In [4]:
input_dir = '../../../dataset/'
TRAIN_DATA_PATH = input_dir+'train_features.csv'
TEST_DATA_PATH = input_dir+'test_features.csv'
SAMPLE_SUB_PATH = input_dir+'sample_submission.csv'

MODEL_DIR_NAME = "./Tabnet_models"
SCALER_PATH = os.path.join(MODEL_DIR_NAME, "x_scaler.pkl")

if not os.path.exists(MODEL_DIR_NAME):
    os.makedirs(MODEL_DIR_NAME)

n_targets = 14

df_train = pd.read_csv(TRAIN_DATA_PATH)
df_test = pd.read_csv(TEST_DATA_PATH)

df_train = df_train.drop(labels=['X_10', 'X_11'], axis=1)
df_test = df_test.drop(labels=['X_10', 'X_11', 'seat_kurtosis', 'area_kurtosis'], axis=1)

X_features, y_features = df_train.iloc[:, 1:-14].values, df_train.iloc[:, -14:].values
X_test_features = df_test.iloc[:, 1:].values
# X_norm_features, scaler = norm_transform("train", X_features, "z-score")
# with open(SCALER_PATH, "wb") as fw:
#     pickle.dump(scaler, fw)
# X_test_norm_features = norm_transform("test", X_test_features, "z-score", scaler)

kf = KFold(n_splits=CONFIG['fold'], random_state=CONFIG['seed'], shuffle=True)
avg_loss, avg_nrmse = 0, 0

LOG_PATH = os.path.join(MODEL_DIR_NAME, "log.txt")

with open(LOG_PATH, "w") as fw:
    fw.write("Tabnet model ==")

for fold, (train_idx, test_idx) in enumerate(kf.split(X_features)):
    with open(LOG_PATH, "a") as fa:
        fa.write(f": ========= FOLD {fold+1} ========= :\n")

    model = TabNetRegressor(**tabnet_params)
    model.fit(X_train=X_features[train_idx], y_train=y_features[train_idx],
                eval_set=[(X_features[test_idx],y_features[test_idx])],
                loss_fn=nn.MSELoss(),
                max_epochs=CONFIG['epochs'], patience=CONFIG['patience'], 
                eval_metric=['rmse', 'mse', NRMSE])
    model_name = f'./tabnet_fold{fold+1}'
    model_path = os.path.join(MODEL_DIR_NAME, model_name)
    model.save_model(model_path)

Device used : cuda
epoch 0  | loss: 382.77832| val_0_rmse: 350.20084| val_0_mse: 122640.62865| val_0_NormRMSE: 858.85796|  0:00:01s
epoch 15 | loss: 1.57123 | val_0_rmse: 2.3921  | val_0_mse: 5.72217 | val_0_NormRMSE: 4.04192 |  0:00:21s
epoch 30 | loss: 1.42931 | val_0_rmse: 1.21813 | val_0_mse: 1.48385 | val_0_NormRMSE: 2.03212 |  0:00:41s
epoch 45 | loss: 1.15811 | val_0_rmse: 1.24842 | val_0_mse: 1.55856 | val_0_NormRMSE: 2.02706 |  0:01:02s
epoch 60 | loss: 0.94341 | val_0_rmse: 1.30655 | val_0_mse: 1.70706 | val_0_NormRMSE: 2.06667 |  0:01:23s
epoch 75 | loss: 0.8046  | val_0_rmse: 1.32967 | val_0_mse: 1.76803 | val_0_NormRMSE: 2.07642 |  0:01:44s
epoch 90 | loss: 0.70645 | val_0_rmse: 1.3353  | val_0_mse: 1.78302 | val_0_NormRMSE: 2.07545 |  0:02:04s
epoch 105| loss: 0.6424  | val_0_rmse: 1.347   | val_0_mse: 1.8144  | val_0_NormRMSE: 2.07877 |  0:02:25s
epoch 120| loss: 0.59632 | val_0_rmse: 1.35713 | val_0_mse: 1.8418  | val_0_NormRMSE: 2.09307 |  0:02:45s

Early stopping occu

In [5]:
preds_reg = np.zeros((len(df_test), n_targets))

for fold in range(CONFIG['fold']):
    model_path = os.path.join(MODEL_DIR_NAME, f"tabnet_fold{fold+1}.zip")
    infer_model = TabNetRegressor(**tabnet_params)
    infer_model.load_model(model_path)

    preds_reg += infer_model.predict(X_test_features)

preds_reg /= CONFIG['fold']


Device used : cuda
Device used : cuda
Device used : cuda
Device used : cuda
Device used : cuda
Device used : cuda
Device used : cuda
Device used : cuda
Device used : cuda
Device used : cuda


In [6]:
SAMPLE_SUB_PATH = input_dir+'sample_submission.csv'
submit = pd.read_csv(SAMPLE_SUB_PATH)
submit.iloc[:, 1:] = preds_reg
submit.to_csv('./tabnet_ext_submit.csv', index=False)

: 