In [1]:
import sys
import pandas as pd
import numpy as np
pd.options.display.max_columns = None
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import mean_absolute_error
import time

sys.path.append('../')

### Binary

In [2]:
train = pd.read_parquet('/www/dslib/spark_sota_modeling/dataset/home-credit-default-risk/train.parquet')
target_col = 'target'
index_col = 'sk_id_curr'
train, test = train_test_split(train, test_size=0.2, random_state=42, stratify=train[target_col])
X_train = train.drop(columns=[target_col, index_col])
y_train = train[target_col]
X_test = test.drop(columns=[target_col, index_col])
y_test = test[target_col]
with open('/www/dslib/spark_sota_modeling/dataset/home-credit-default-risk/categorical_features.txt', 'r') as f:
    categorical_features = [line.strip() for line in f.readlines()]
len(categorical_features)

16

In [None]:
from models.estimators.tabnet_estimator import TabNetBinary

model = TabNetBinary(
    d_model = 8,
    n_steps = 5,
    decision_dim = 512,
    n_shared = 4,
    n_independent = 4,
    glu_dropout = 0.2,
    dropout_emb = 0.6,
    glu_norm = 'batch',
    gamma = 1.5,
    lambda_sparse = 1e-4,
    batch_size = 1024,
    epochs = 200,
    learning_rate = 0.01,
    early_stopping_patience = 10,
    weight_decay = 1e-5,
    reducelronplateau_patience = 5,
    reducelronplateau_factor = 0.7,
    verbose = True,
    random_state = 42,
)
model.fit(X_train, y_train, eval_set=(X_test, y_test), eval_metric='roc_auc', mode='max', cat_features=categorical_features)
y_pred_proba = model.predict_proba(X_test, cat_features=categorical_features)
roc_auc_score(y_test, y_pred_proba[:,1])

In [None]:
from models.estimators.tabnet_estimator import TabNetBinary

for n_shared in [1, 2, 3, 4, 5, 6, 7, 8, 9]:
    start_time = time.time()
    model = TabNetBinary(
        d_model = 8,
        n_steps = 5,
        decision_dim = 64,
        n_shared = n_shared,
        n_independent = n_shared,
        glu_dropout = 0.3,
        dropout_emb = 0.3,
        glu_norm = 'batch',
        gamma = 1.5,
        lambda_sparse = 1e-5,
        batch_size = 1024,
        epochs = 200,
        learning_rate = 0.01,
        early_stopping_patience = 10,
        weight_decay = 1e-5,
        reducelronplateau_patience = 5,
        reducelronplateau_factor = 0.7,
        verbose = False,
        random_state = 42,
    )

    model.fit(X_train, y_train, eval_set=(X_test, y_test), eval_metric='roc_auc', mode='max', 
              cat_features=categorical_features)
    y_pred_proba = model.predict_proba(X_test, cat_features=categorical_features)
    formatted = time.strftime("%H:%M:%S", time.gmtime(time.time()-start_time))
    print(
        f"n_shared: {n_shared}, "
        f"ROC AUC: {roc_auc_score(y_test, y_pred_proba[:,1]):.4f}, "
        f"Time: {formatted}"
    )

### Multiclass

In [2]:
train = pd.read_parquet('/www/dslib/spark_sota_modeling/dataset/forest-cover-type/train.parquet')
target_col = 'cover_type'
train, test = train_test_split(train, test_size=0.2, random_state=42, stratify=train[target_col])
X_train = train.drop(columns=[target_col])
y_train = train[target_col]
X_test = test.drop(columns=[target_col])
y_test = test[target_col]
with open('/www/dslib/spark_sota_modeling/dataset/forest-cover-type/categorical_features.txt', 'r') as f:
    categorical_features = [line.strip() for line in f.readlines()]
len(categorical_features)

44

In [None]:
n_classes=train[target_col].nunique(),
d_model = 8,
n_steps = 6,
decision_dim = 128,
n_shared = 2,
n_independent = 2,
glu_dropout = 0.1,
dropout_emb = 0.1,
glu_norm = 'batch',
gamma = 1.5,
lambda_sparse = 1e-3,
batch_size = 1024,
epochs = 200,
learning_rate = 0.025,
early_stopping_patience = 15,
weight_decay = 1e-5,
reducelronplateau_patience = 5,
reducelronplateau_factor = 0.5,
verbose = True,
random_state = 42,
momentum=0.6
# 0.9256215416125229

n_classes=train[target_col].nunique(),
glu_dropout = 0.05,
dropout_emb = 0.05,
lambda_sparse = 1e-3,
momentum=0.1,
# 0.942256224021755

lambda_sparse = 1e-4,
momentum=0.1,
# 0.9434954347133895

lambda_sparse = 1e-5,
momentum=0.1,
# 0.9424799703966334

lambda_sparse = 1e-4,
learning_rate = 0.01,
momentum=0.1,
# 0.9477035876870649

momentum=0.6,
# 

In [None]:
from models.estimators.tabnet_estimator import TabNetMulticlass

model = TabNetMulticlass(
    n_classes=train[target_col].nunique(),
    d_model = 8,
    n_steps = 6, # 5
    decision_dim = 128, # 64 + 64
    n_shared = 2, # 2
    n_independent = 2, # 2
    glu_dropout = 0.05,
    dropout_emb = 0.05,
    glu_norm = 'batch',
    gamma = 1.5, # 1.5
    lambda_sparse = 0.0001, # 0.0001
    batch_size = 1024, # 512
    momentum = 0.6, # 0.7
    epochs = 200, 
    learning_rate = 0.01, # 0.02
    early_stopping_patience = 15,
    weight_decay = 1e-5,
    reducelronplateau_patience = 5,
    reducelronplateau_factor = 0.5,
    verbose = True,
    random_state = 42,
)

model.fit(X_train, y_train, eval_set=(X_test, y_test), cat_features=categorical_features)

y_pred = model.predict(X_test, cat_features=categorical_features)
y_pred_proba = model.predict_proba(X_test, cat_features=categorical_features)
accuracy_score(y_test, y_pred)

Начинаем обучение на cuda...


Training:   0%|          | 0/454 [00:00<?, ?it/s]

Validation:   0%|          | 0/114 [00:00<?, ?it/s]

Epoch 1/200, Train loss: 0.5335, Train accuracy: 0.7726, Val loss: 0.4006, Val accuracy: 0.8303
---> Сохранена лучшая модель (Эпоха 1) с Val accuracy: 0.8303


Training:   0%|          | 0/454 [00:00<?, ?it/s]

Validation:   0%|          | 0/114 [00:00<?, ?it/s]

Epoch 2/200, Train loss: 0.3862, Train accuracy: 0.8366, Val loss: 0.3610, Val accuracy: 0.8457
---> Сохранена лучшая модель (Эпоха 2) с Val accuracy: 0.8457


Training:   0%|          | 0/454 [00:00<?, ?it/s]

Validation:   0%|          | 0/114 [00:00<?, ?it/s]

Epoch 3/200, Train loss: 0.3514, Train accuracy: 0.8526, Val loss: 0.3152, Val accuracy: 0.8688
---> Сохранена лучшая модель (Эпоха 3) с Val accuracy: 0.8688


Training:   0%|          | 0/454 [00:00<?, ?it/s]

Validation:   0%|          | 0/114 [00:00<?, ?it/s]

Epoch 4/200, Train loss: 0.3321, Train accuracy: 0.8610, Val loss: 0.2959, Val accuracy: 0.8769
---> Сохранена лучшая модель (Эпоха 4) с Val accuracy: 0.8769


Training:   0%|          | 0/454 [00:00<?, ?it/s]

Validation:   0%|          | 0/114 [00:00<?, ?it/s]

Epoch 5/200, Train loss: 0.3204, Train accuracy: 0.8667, Val loss: 0.2901, Val accuracy: 0.8806
---> Сохранена лучшая модель (Эпоха 5) с Val accuracy: 0.8806


Training:   0%|          | 0/454 [00:00<?, ?it/s]

Validation:   0%|          | 0/114 [00:00<?, ?it/s]

Epoch 6/200, Train loss: 0.3145, Train accuracy: 0.8695, Val loss: 0.2659, Val accuracy: 0.8919
---> Сохранена лучшая модель (Эпоха 6) с Val accuracy: 0.8919


Training:   0%|          | 0/454 [00:00<?, ?it/s]

Validation:   0%|          | 0/114 [00:00<?, ?it/s]

Epoch 7/200, Train loss: 0.3061, Train accuracy: 0.8730, Val loss: 0.2871, Val accuracy: 0.8806
Нет улучшения 1/15 эпох.


Training:   0%|          | 0/454 [00:00<?, ?it/s]

Validation:   0%|          | 0/114 [00:00<?, ?it/s]

Epoch 8/200, Train loss: 0.2698, Train accuracy: 0.8880, Val loss: 0.2191, Val accuracy: 0.9103
---> Сохранена лучшая модель (Эпоха 8) с Val accuracy: 0.9103


Training:   0%|          | 0/454 [00:00<?, ?it/s]

Validation:   0%|          | 0/114 [00:00<?, ?it/s]

Epoch 9/200, Train loss: 0.2650, Train accuracy: 0.8901, Val loss: 0.2096, Val accuracy: 0.9159
---> Сохранена лучшая модель (Эпоха 9) с Val accuracy: 0.9159


Training:   0%|          | 0/454 [00:00<?, ?it/s]

Validation:   0%|          | 0/114 [00:00<?, ?it/s]

Epoch 10/200, Train loss: 0.2610, Train accuracy: 0.8917, Val loss: 0.2212, Val accuracy: 0.9088
Нет улучшения 1/15 эпох.


Training:   0%|          | 0/454 [00:00<?, ?it/s]

Validation:   0%|          | 0/114 [00:00<?, ?it/s]

Epoch 11/200, Train loss: 0.2589, Train accuracy: 0.8925, Val loss: 0.2075, Val accuracy: 0.9160
---> Сохранена лучшая модель (Эпоха 11) с Val accuracy: 0.9160


Training:   0%|          | 0/454 [00:00<?, ?it/s]

Validation:   0%|          | 0/114 [00:00<?, ?it/s]

Epoch 12/200, Train loss: 0.2558, Train accuracy: 0.8940, Val loss: 0.2070, Val accuracy: 0.9157
Нет улучшения 1/15 эпох.


Training:   0%|          | 0/454 [00:00<?, ?it/s]

Validation:   0%|          | 0/114 [00:00<?, ?it/s]

Epoch 13/200, Train loss: 0.2531, Train accuracy: 0.8950, Val loss: 0.2076, Val accuracy: 0.9145
Нет улучшения 2/15 эпох.


Training:   0%|          | 0/454 [00:00<?, ?it/s]

Validation:   0%|          | 0/114 [00:00<?, ?it/s]

Epoch 14/200, Train loss: 0.2321, Train accuracy: 0.9039, Val loss: 0.1813, Val accuracy: 0.9271
---> Сохранена лучшая модель (Эпоха 14) с Val accuracy: 0.9271


Training:   0%|          | 0/454 [00:00<?, ?it/s]

Validation:   0%|          | 0/114 [00:00<?, ?it/s]

Epoch 15/200, Train loss: 0.2281, Train accuracy: 0.9059, Val loss: 0.1776, Val accuracy: 0.9289
---> Сохранена лучшая модель (Эпоха 15) с Val accuracy: 0.9289


Training:   0%|          | 0/454 [00:00<?, ?it/s]

Validation:   0%|          | 0/114 [00:00<?, ?it/s]

Epoch 16/200, Train loss: 0.2259, Train accuracy: 0.9061, Val loss: 0.1771, Val accuracy: 0.9275
Нет улучшения 1/15 эпох.


Training:   0%|          | 0/454 [00:00<?, ?it/s]

Validation:   0%|          | 0/114 [00:00<?, ?it/s]

Epoch 17/200, Train loss: 0.2238, Train accuracy: 0.9075, Val loss: 0.1722, Val accuracy: 0.9303
---> Сохранена лучшая модель (Эпоха 17) с Val accuracy: 0.9303


Training:   0%|          | 0/454 [00:00<?, ?it/s]

Validation:   0%|          | 0/114 [00:00<?, ?it/s]

Epoch 18/200, Train loss: 0.2225, Train accuracy: 0.9081, Val loss: 0.1755, Val accuracy: 0.9285
Нет улучшения 1/15 эпох.


Training:   0%|          | 0/454 [00:00<?, ?it/s]

Validation:   0%|          | 0/114 [00:00<?, ?it/s]

Epoch 19/200, Train loss: 0.2212, Train accuracy: 0.9088, Val loss: 0.1750, Val accuracy: 0.9290
Нет улучшения 2/15 эпох.


Training:   0%|          | 0/454 [00:00<?, ?it/s]

Validation:   0%|          | 0/114 [00:00<?, ?it/s]

Epoch 20/200, Train loss: 0.2102, Train accuracy: 0.9132, Val loss: 0.1581, Val accuracy: 0.9369
---> Сохранена лучшая модель (Эпоха 20) с Val accuracy: 0.9369


Training:   0%|          | 0/454 [00:00<?, ?it/s]

Validation:   0%|          | 0/114 [00:00<?, ?it/s]

Epoch 21/200, Train loss: 0.2069, Train accuracy: 0.9143, Val loss: 0.1556, Val accuracy: 0.9379
---> Сохранена лучшая модель (Эпоха 21) с Val accuracy: 0.9379


Training:   0%|          | 0/454 [00:00<?, ?it/s]

Validation:   0%|          | 0/114 [00:00<?, ?it/s]

Epoch 22/200, Train loss: 0.2059, Train accuracy: 0.9148, Val loss: 0.1570, Val accuracy: 0.9365
Нет улучшения 1/15 эпох.


Training:   0%|          | 0/454 [00:00<?, ?it/s]

Validation:   0%|          | 0/114 [00:00<?, ?it/s]

Epoch 23/200, Train loss: 0.2047, Train accuracy: 0.9149, Val loss: 0.1517, Val accuracy: 0.9395
---> Сохранена лучшая модель (Эпоха 23) с Val accuracy: 0.9395


Training:   0%|          | 0/454 [00:00<?, ?it/s]

Validation:   0%|          | 0/114 [00:00<?, ?it/s]

Epoch 24/200, Train loss: 0.2031, Train accuracy: 0.9157, Val loss: 0.1503, Val accuracy: 0.9397
---> Сохранена лучшая модель (Эпоха 24) с Val accuracy: 0.9397


Training:   0%|          | 0/454 [00:00<?, ?it/s]

Validation:   0%|          | 0/114 [00:00<?, ?it/s]

Epoch 25/200, Train loss: 0.2032, Train accuracy: 0.9160, Val loss: 0.1555, Val accuracy: 0.9367
Нет улучшения 1/15 эпох.


Training:   0%|          | 0/454 [00:00<?, ?it/s]

Validation:   0%|          | 0/114 [00:00<?, ?it/s]

Epoch 26/200, Train loss: 0.1966, Train accuracy: 0.9188, Val loss: 0.1450, Val accuracy: 0.9422
---> Сохранена лучшая модель (Эпоха 26) с Val accuracy: 0.9422


Training:   0%|          | 0/454 [00:00<?, ?it/s]

Validation:   0%|          | 0/114 [00:00<?, ?it/s]

Epoch 27/200, Train loss: 0.1942, Train accuracy: 0.9199, Val loss: 0.1446, Val accuracy: 0.9418
Нет улучшения 1/15 эпох.


Training:   0%|          | 0/454 [00:00<?, ?it/s]

Validation:   0%|          | 0/114 [00:00<?, ?it/s]

Epoch 28/200, Train loss: 0.1931, Train accuracy: 0.9198, Val loss: 0.1424, Val accuracy: 0.9423
---> Сохранена лучшая модель (Эпоха 28) с Val accuracy: 0.9423


Training:   0%|          | 0/454 [00:00<?, ?it/s]

Validation:   0%|          | 0/114 [00:00<?, ?it/s]

Epoch 29/200, Train loss: 0.1933, Train accuracy: 0.9196, Val loss: 0.1412, Val accuracy: 0.9436
---> Сохранена лучшая модель (Эпоха 29) с Val accuracy: 0.9436


Training:   0%|          | 0/454 [00:00<?, ?it/s]

Validation:   0%|          | 0/114 [00:00<?, ?it/s]

Epoch 30/200, Train loss: 0.1922, Train accuracy: 0.9204, Val loss: 0.1422, Val accuracy: 0.9427
Нет улучшения 1/15 эпох.


Training:   0%|          | 0/454 [00:00<?, ?it/s]

Validation:   0%|          | 0/114 [00:00<?, ?it/s]

Epoch 31/200, Train loss: 0.1909, Train accuracy: 0.9209, Val loss: 0.1409, Val accuracy: 0.9439
---> Сохранена лучшая модель (Эпоха 31) с Val accuracy: 0.9439


Training:   0%|          | 0/454 [00:00<?, ?it/s]

Validation:   0%|          | 0/114 [00:00<?, ?it/s]

Epoch 32/200, Train loss: 0.1886, Train accuracy: 0.9218, Val loss: 0.1378, Val accuracy: 0.9451
---> Сохранена лучшая модель (Эпоха 32) с Val accuracy: 0.9451


Training:   0%|          | 0/454 [00:00<?, ?it/s]

Validation:   0%|          | 0/114 [00:00<?, ?it/s]

Epoch 33/200, Train loss: 0.1879, Train accuracy: 0.9219, Val loss: 0.1370, Val accuracy: 0.9456
---> Сохранена лучшая модель (Эпоха 33) с Val accuracy: 0.9456


Training:   0%|          | 0/454 [00:00<?, ?it/s]

Validation:   0%|          | 0/114 [00:00<?, ?it/s]

Epoch 34/200, Train loss: 0.1864, Train accuracy: 0.9227, Val loss: 0.1369, Val accuracy: 0.9458
---> Сохранена лучшая модель (Эпоха 34) с Val accuracy: 0.9458


Training:   0%|          | 0/454 [00:00<?, ?it/s]

Validation:   0%|          | 0/114 [00:00<?, ?it/s]

Epoch 35/200, Train loss: 0.1867, Train accuracy: 0.9228, Val loss: 0.1360, Val accuracy: 0.9462
---> Сохранена лучшая модель (Эпоха 35) с Val accuracy: 0.9462


Training:   0%|          | 0/454 [00:00<?, ?it/s]

Validation:   0%|          | 0/114 [00:00<?, ?it/s]

Epoch 36/200, Train loss: 0.1852, Train accuracy: 0.9226, Val loss: 0.1364, Val accuracy: 0.9450
Нет улучшения 1/15 эпох.


Training:   0%|          | 0/454 [00:00<?, ?it/s]

Validation:   0%|          | 0/114 [00:00<?, ?it/s]

Epoch 37/200, Train loss: 0.1861, Train accuracy: 0.9231, Val loss: 0.1359, Val accuracy: 0.9460
Нет улучшения 2/15 эпох.


Training:   0%|          | 0/454 [00:00<?, ?it/s]

Validation:   0%|          | 0/114 [00:00<?, ?it/s]

Epoch 38/200, Train loss: 0.1840, Train accuracy: 0.9238, Val loss: 0.1342, Val accuracy: 0.9463
---> Сохранена лучшая модель (Эпоха 38) с Val accuracy: 0.9463


Training:   0%|          | 0/454 [00:00<?, ?it/s]

Validation:   0%|          | 0/114 [00:00<?, ?it/s]

Epoch 39/200, Train loss: 0.1841, Train accuracy: 0.9240, Val loss: 0.1344, Val accuracy: 0.9467
---> Сохранена лучшая модель (Эпоха 39) с Val accuracy: 0.9467


Training:   0%|          | 0/454 [00:00<?, ?it/s]

Validation:   0%|          | 0/114 [00:00<?, ?it/s]

Epoch 40/200, Train loss: 0.1837, Train accuracy: 0.9238, Val loss: 0.1341, Val accuracy: 0.9463
Нет улучшения 1/15 эпох.


Training:   0%|          | 0/454 [00:00<?, ?it/s]

Validation:   0%|          | 0/114 [00:00<?, ?it/s]

Epoch 41/200, Train loss: 0.1841, Train accuracy: 0.9239, Val loss: 0.1345, Val accuracy: 0.9466
Нет улучшения 2/15 эпох.


Training:   0%|          | 0/454 [00:00<?, ?it/s]

Validation:   0%|          | 0/114 [00:00<?, ?it/s]

Epoch 42/200, Train loss: 0.1823, Train accuracy: 0.9246, Val loss: 0.1341, Val accuracy: 0.9462
Нет улучшения 3/15 эпох.


Training:   0%|          | 0/454 [00:00<?, ?it/s]

Validation:   0%|          | 0/114 [00:00<?, ?it/s]

Epoch 43/200, Train loss: 0.1823, Train accuracy: 0.9247, Val loss: 0.1332, Val accuracy: 0.9464
Нет улучшения 4/15 эпох.


Training:   0%|          | 0/454 [00:00<?, ?it/s]

Validation:   0%|          | 0/114 [00:00<?, ?it/s]

Epoch 44/200, Train loss: 0.1815, Train accuracy: 0.9249, Val loss: 0.1327, Val accuracy: 0.9470
---> Сохранена лучшая модель (Эпоха 44) с Val accuracy: 0.9470


Training:   0%|          | 0/454 [00:00<?, ?it/s]

Validation:   0%|          | 0/114 [00:00<?, ?it/s]

Epoch 45/200, Train loss: 0.1812, Train accuracy: 0.9251, Val loss: 0.1329, Val accuracy: 0.9469
Нет улучшения 1/15 эпох.


Training:   0%|          | 0/454 [00:00<?, ?it/s]

Validation:   0%|          | 0/114 [00:00<?, ?it/s]

Epoch 46/200, Train loss: 0.1810, Train accuracy: 0.9247, Val loss: 0.1324, Val accuracy: 0.9473
---> Сохранена лучшая модель (Эпоха 46) с Val accuracy: 0.9473


Training:   0%|          | 0/454 [00:00<?, ?it/s]

Validation:   0%|          | 0/114 [00:00<?, ?it/s]

Epoch 47/200, Train loss: 0.1805, Train accuracy: 0.9252, Val loss: 0.1323, Val accuracy: 0.9471
Нет улучшения 1/15 эпох.


Training:   0%|          | 0/454 [00:00<?, ?it/s]

Validation:   0%|          | 0/114 [00:00<?, ?it/s]

Epoch 48/200, Train loss: 0.1815, Train accuracy: 0.9252, Val loss: 0.1322, Val accuracy: 0.9474
---> Сохранена лучшая модель (Эпоха 48) с Val accuracy: 0.9474


Training:   0%|          | 0/454 [00:00<?, ?it/s]

Validation:   0%|          | 0/114 [00:00<?, ?it/s]

Epoch 49/200, Train loss: 0.1815, Train accuracy: 0.9249, Val loss: 0.1323, Val accuracy: 0.9469
Нет улучшения 1/15 эпох.


Training:   0%|          | 0/454 [00:00<?, ?it/s]

Validation:   0%|          | 0/114 [00:00<?, ?it/s]

Epoch 50/200, Train loss: 0.1809, Train accuracy: 0.9253, Val loss: 0.1327, Val accuracy: 0.9468
Нет улучшения 2/15 эпох.


Training:   0%|          | 0/454 [00:00<?, ?it/s]

Validation:   0%|          | 0/114 [00:00<?, ?it/s]

Epoch 51/200, Train loss: 0.1801, Train accuracy: 0.9256, Val loss: 0.1325, Val accuracy: 0.9470
Нет улучшения 3/15 эпох.


Training:   0%|          | 0/454 [00:00<?, ?it/s]

Validation:   0%|          | 0/114 [00:00<?, ?it/s]

Epoch 52/200, Train loss: 0.1804, Train accuracy: 0.9255, Val loss: 0.1327, Val accuracy: 0.9467
Нет улучшения 4/15 эпох.


Training:   0%|          | 0/454 [00:00<?, ?it/s]

Validation:   0%|          | 0/114 [00:00<?, ?it/s]

Epoch 53/200, Train loss: 0.1807, Train accuracy: 0.9252, Val loss: 0.1318, Val accuracy: 0.9475
---> Сохранена лучшая модель (Эпоха 53) с Val accuracy: 0.9475


Training:   0%|          | 0/454 [00:00<?, ?it/s]

Validation:   0%|          | 0/114 [00:00<?, ?it/s]

Epoch 54/200, Train loss: 0.1805, Train accuracy: 0.9255, Val loss: 0.1319, Val accuracy: 0.9470
Нет улучшения 1/15 эпох.


Training:   0%|          | 0/454 [00:00<?, ?it/s]

### Regression

In [2]:
train = pd.read_parquet('/www/dslib/spark_sota_modeling/dataset/allstate-claims-severity/train.parquet')
target_col = 'loss'
index_col = 'id'
train, test = train_test_split(train, test_size=0.2, random_state=42)
X_train = train.drop(columns=[target_col, index_col])
y_train = train[target_col]
X_test = test.drop(columns=[target_col, index_col])
y_test = test[target_col]
with open('/www/dslib/spark_sota_modeling/dataset/forest-cover-type/categorical_features.txt', 'r') as f:
    categorical_features = [line.strip() for line in f.readlines()]
len(categorical_features)

44

In [4]:
from models.estimators.tabnet_estimator import TabNetRegressor
model = TabNetRegressor(
    verbose=True,
    dropout=0.4,
    early_stopping_patience=10,
    batch_size=8192,
    virtual_batch_size=128,
    learning_rate=0.005,
    epochs=1000,
    dynamic_emb_size=True,
    n_glu_layers=4,
    n_steps=8,
)

model.fit(X_train, y_train, eval_set=(X_test, y_test), eval_metric='mae', mode='min', cat_features=categorical_features)
y_pred = model.predict(X_test, cat_features=categorical_features)
mean_absolute_error(y_test, y_pred)

Начинаем обучение на cuda...


Training:   0%|          | 0/19 [00:00<?, ?it/s]

Validation:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 1/1000, Train loss: 17736628.4211, Train mae: 3037.8191, Val loss: 17156994.6000, Val mae: 3020.7126
Сохраняем лучшую модель с метрикой mae: 3020.7126


Training:   0%|          | 0/19 [00:00<?, ?it/s]

Validation:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 2/1000, Train loss: 17544206.2632, Train mae: 3026.8928, Val loss: 16990062.2000, Val mae: 3009.5676
Сохраняем лучшую модель с метрикой mae: 3009.5676


Training:   0%|          | 0/19 [00:00<?, ?it/s]

Validation:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 3/1000, Train loss: 17218003.7368, Train mae: 3000.8059, Val loss: 16474540.0000, Val mae: 2970.7705
Сохраняем лучшую модель с метрикой mae: 2970.7705


Training:   0%|          | 0/19 [00:00<?, ?it/s]

Validation:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 4/1000, Train loss: 16567930.1053, Train mae: 2953.0950, Val loss: 15702457.2000, Val mae: 2910.0957
Сохраняем лучшую модель с метрикой mae: 2910.0957


Training:   0%|          | 0/19 [00:00<?, ?it/s]

Validation:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 5/1000, Train loss: 15612058.9474, Train mae: 2875.8118, Val loss: 14551942.6000, Val mae: 2803.4485
Сохраняем лучшую модель с метрикой mae: 2803.4485


Training:   0%|          | 0/19 [00:00<?, ?it/s]

Validation:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 6/1000, Train loss: 14361104.6842, Train mae: 2760.3044, Val loss: 12900314.6000, Val mae: 2634.5330
Сохраняем лучшую модель с метрикой mae: 2634.5330


Training:   0%|          | 0/19 [00:00<?, ?it/s]

Validation:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 7/1000, Train loss: 12796134.6842, Train mae: 2600.8000, Val loss: 11028994.8000, Val mae: 2433.8059
Сохраняем лучшую модель с метрикой mae: 2433.8059


Training:   0%|          | 0/19 [00:00<?, ?it/s]

Validation:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 8/1000, Train loss: 10982551.8947, Train mae: 2391.1379, Val loss: 9236196.0000, Val mae: 2207.5151
Сохраняем лучшую модель с метрикой mae: 2207.5151


Training:   0%|          | 0/19 [00:00<?, ?it/s]

Validation:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 9/1000, Train loss: 9043137.1316, Train mae: 2135.0728, Val loss: 7941694.2000, Val mae: 1998.9813
Сохраняем лучшую модель с метрикой mae: 1998.9813


Training:   0%|          | 0/19 [00:00<?, ?it/s]

Validation:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 10/1000, Train loss: 7133563.0789, Train mae: 1848.3549, Val loss: 5193469.7000, Val mae: 1565.9465
Сохраняем лучшую модель с метрикой mae: 1565.9465


Training:   0%|          | 0/19 [00:00<?, ?it/s]

Validation:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 11/1000, Train loss: 5480894.4474, Train mae: 1544.3003, Val loss: 4284051.6000, Val mae: 1321.5331
Сохраняем лучшую модель с метрикой mae: 1321.5331


Training:   0%|          | 0/19 [00:00<?, ?it/s]

Validation:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 12/1000, Train loss: 4314708.3289, Train mae: 1283.0709, Val loss: 4102255.5000, Val mae: 1233.5286
Сохраняем лучшую модель с метрикой mae: 1233.5286


Training:   0%|          | 0/19 [00:00<?, ?it/s]

Validation:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 13/1000, Train loss: 3848562.0526, Train mae: 1230.1996, Val loss: 3807088.5500, Val mae: 1233.6938
Нет улучшения в течение 1 эпох


Training:   0%|          | 0/19 [00:00<?, ?it/s]

Validation:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 14/1000, Train loss: 3751835.5789, Train mae: 1232.9912, Val loss: 3740425.8000, Val mae: 1191.3141
Сохраняем лучшую модель с метрикой mae: 1191.3141


Training:   0%|          | 0/19 [00:00<?, ?it/s]

Validation:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 15/1000, Train loss: 3688754.0395, Train mae: 1218.2343, Val loss: 3658540.9500, Val mae: 1209.9343
Нет улучшения в течение 1 эпох


Training:   0%|          | 0/19 [00:00<?, ?it/s]

Validation:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 16/1000, Train loss: 3660616.7763, Train mae: 1217.2234, Val loss: 3698995.8000, Val mae: 1233.9890
Нет улучшения в течение 2 эпох


Training:   0%|          | 0/19 [00:00<?, ?it/s]

Validation:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 17/1000, Train loss: 3590811.9605, Train mae: 1212.9056, Val loss: 3615913.4000, Val mae: 1200.4834
Нет улучшения в течение 3 эпох


Training:   0%|          | 0/19 [00:00<?, ?it/s]

Validation:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 18/1000, Train loss: 3575402.8816, Train mae: 1209.4240, Val loss: 3896725.0000, Val mae: 1190.2937
Сохраняем лучшую модель с метрикой mae: 1190.2937


Training:   0%|          | 0/19 [00:00<?, ?it/s]

Validation:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 19/1000, Train loss: 3595939.7105, Train mae: 1203.6512, Val loss: 3795234.7000, Val mae: 1276.5647
Нет улучшения в течение 1 эпох


Training:   0%|          | 0/19 [00:00<?, ?it/s]

Validation:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 20/1000, Train loss: 3475995.2632, Train mae: 1199.1082, Val loss: 4011197.5500, Val mae: 1294.5388
Нет улучшения в течение 2 эпох


Training:   0%|          | 0/19 [00:00<?, ?it/s]

Validation:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 21/1000, Train loss: 3405961.5132, Train mae: 1195.4229, Val loss: 3632093.4000, Val mae: 1210.5027
Нет улучшения в течение 3 эпох


Training:   0%|          | 0/19 [00:00<?, ?it/s]

Validation:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 22/1000, Train loss: 3404366.5526, Train mae: 1194.0480, Val loss: 3644471.2000, Val mae: 1191.4274
Нет улучшения в течение 4 эпох


Training:   0%|          | 0/19 [00:00<?, ?it/s]

Validation:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 23/1000, Train loss: 3342978.5263, Train mae: 1189.0792, Val loss: 3651136.1000, Val mae: 1228.3481
Нет улучшения в течение 5 эпох


Training:   0%|          | 0/19 [00:00<?, ?it/s]

Validation:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 24/1000, Train loss: 3363555.9474, Train mae: 1181.4467, Val loss: 3650705.5000, Val mae: 1200.8220
Нет улучшения в течение 6 эпох


Training:   0%|          | 0/19 [00:00<?, ?it/s]

Validation:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 25/1000, Train loss: 3279125.6974, Train mae: 1177.8409, Val loss: 3828234.4500, Val mae: 1182.5989
Сохраняем лучшую модель с метрикой mae: 1182.5989


Training:   0%|          | 0/19 [00:00<?, ?it/s]

Validation:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 26/1000, Train loss: 3252203.1579, Train mae: 1180.2190, Val loss: 3673739.1500, Val mae: 1228.7843
Нет улучшения в течение 1 эпох


Training:   0%|          | 0/19 [00:00<?, ?it/s]

Validation:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 27/1000, Train loss: 3208322.9079, Train mae: 1173.7385, Val loss: 3830107.0000, Val mae: 1184.7953
Нет улучшения в течение 2 эпох


Training:   0%|          | 0/19 [00:00<?, ?it/s]

Validation:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 28/1000, Train loss: 3185257.0789, Train mae: 1173.2975, Val loss: 3760264.4500, Val mae: 1278.0347
Нет улучшения в течение 3 эпох


Training:   0%|          | 0/19 [00:00<?, ?it/s]

Validation:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 29/1000, Train loss: 3153728.3947, Train mae: 1168.4672, Val loss: 3663151.9000, Val mae: 1218.9254
Нет улучшения в течение 4 эпох


Training:   0%|          | 0/19 [00:00<?, ?it/s]

Validation:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 30/1000, Train loss: 3132350.2763, Train mae: 1162.8906, Val loss: 3757586.5500, Val mae: 1182.5039
Сохраняем лучшую модель с метрикой mae: 1182.5039


Training:   0%|          | 0/19 [00:00<?, ?it/s]

Validation:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 31/1000, Train loss: 3103909.1184, Train mae: 1163.1898, Val loss: 4201436.7000, Val mae: 1304.0480
Нет улучшения в течение 1 эпох


Training:   0%|          | 0/19 [00:00<?, ?it/s]

Validation:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 32/1000, Train loss: 3078361.6447, Train mae: 1158.8154, Val loss: 3701126.5500, Val mae: 1182.6556
Нет улучшения в течение 2 эпох


Training:   0%|          | 0/19 [00:00<?, ?it/s]

Validation:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 33/1000, Train loss: 3040562.9342, Train mae: 1157.6251, Val loss: 3682963.4000, Val mae: 1220.7441
Нет улучшения в течение 3 эпох


Training:   0%|          | 0/19 [00:00<?, ?it/s]

Validation:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 34/1000, Train loss: 3046074.6974, Train mae: 1155.4351, Val loss: 3708806.7000, Val mae: 1210.7677
Нет улучшения в течение 4 эпох


Training:   0%|          | 0/19 [00:00<?, ?it/s]

Validation:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 35/1000, Train loss: 3018947.2105, Train mae: 1154.3625, Val loss: 3940862.8000, Val mae: 1190.4852
Нет улучшения в течение 5 эпох


Training:   0%|          | 0/19 [00:00<?, ?it/s]

Validation:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 36/1000, Train loss: 3005163.4868, Train mae: 1153.9227, Val loss: 3722117.6000, Val mae: 1213.1774
Нет улучшения в течение 6 эпох


Training:   0%|          | 0/19 [00:00<?, ?it/s]

Validation:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 37/1000, Train loss: 2951427.9474, Train mae: 1147.1807, Val loss: 3767357.7000, Val mae: 1175.2067
Сохраняем лучшую модель с метрикой mae: 1175.2067


Training:   0%|          | 0/19 [00:00<?, ?it/s]

Validation:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 38/1000, Train loss: 2948691.2763, Train mae: 1144.3381, Val loss: 3715668.7500, Val mae: 1178.2507
Нет улучшения в течение 1 эпох


Training:   0%|          | 0/19 [00:00<?, ?it/s]

Validation:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 39/1000, Train loss: 2930374.3289, Train mae: 1143.0785, Val loss: 3823451.4500, Val mae: 1200.8057
Нет улучшения в течение 2 эпох


Training:   0%|          | 0/19 [00:00<?, ?it/s]

Validation:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 40/1000, Train loss: 2943262.6842, Train mae: 1143.3665, Val loss: 3717933.8500, Val mae: 1188.0441
Нет улучшения в течение 3 эпох


Training:   0%|          | 0/19 [00:00<?, ?it/s]

Validation:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 41/1000, Train loss: 2942030.8553, Train mae: 1139.8966, Val loss: 3917823.5000, Val mae: 1186.9147
Нет улучшения в течение 4 эпох


Training:   0%|          | 0/19 [00:00<?, ?it/s]

Validation:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 42/1000, Train loss: 2901955.3947, Train mae: 1135.4130, Val loss: 3744474.0000, Val mae: 1201.1935
Нет улучшения в течение 5 эпох


Training:   0%|          | 0/19 [00:00<?, ?it/s]

Validation:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 43/1000, Train loss: 2878934.5921, Train mae: 1139.4635, Val loss: 3700197.1000, Val mae: 1206.5312
Нет улучшения в течение 6 эпох


Training:   0%|          | 0/19 [00:00<?, ?it/s]

Validation:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 44/1000, Train loss: 2886656.4737, Train mae: 1136.7701, Val loss: 3795499.5000, Val mae: 1185.3755
Нет улучшения в течение 7 эпох


Training:   0%|          | 0/19 [00:00<?, ?it/s]

Validation:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 45/1000, Train loss: 2868273.9737, Train mae: 1137.8246, Val loss: 3776895.7500, Val mae: 1203.0428
Нет улучшения в течение 8 эпох


Training:   0%|          | 0/19 [00:00<?, ?it/s]

Validation:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 46/1000, Train loss: 2867790.1842, Train mae: 1135.5657, Val loss: 3751529.3000, Val mae: 1201.7115
Нет улучшения в течение 9 эпох


Training:   0%|          | 0/19 [00:00<?, ?it/s]

Validation:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 47/1000, Train loss: 2834983.6711, Train mae: 1133.0852, Val loss: 3755053.0000, Val mae: 1187.2515
Останавливаем обучение из-за отсутствия улучшений в течение 10 эпох
Загружена лучшая модель


Predicting:   0%|          | 0/5 [00:00<?, ?it/s]

1175.2066374185233

In [None]:
1185.3089724793513
1182.7640692897073
1175.2066374185233