In [1]:
import sys
import pandas as pd
import numpy as np
pd.options.display.max_columns = None
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import mean_absolute_error
import time

sys.path.append('../')

### Binary

In [2]:
train = pd.read_parquet('/www/dslib/spark_sota_modeling/dataset/home-credit-default-risk/train.parquet')
target_col = 'target'
index_col = 'sk_id_curr'
train, test = train_test_split(train, test_size=0.2, random_state=42, stratify=train[target_col])
X_train = train.drop(columns=[target_col, index_col])
y_train = train[target_col]
X_test = test.drop(columns=[target_col, index_col])
y_test = test[target_col]
with open('/www/dslib/spark_sota_modeling/dataset/home-credit-default-risk/categorical_features.txt', 'r') as f:
    categorical_features = [line.strip() for line in f.readlines()]
len(categorical_features)

16

In [None]:
from models.estimators.tabnet_estimator import TabNetBinary

model = TabNetBinary(
    d_model = 8,
    n_steps = 5,
    decision_dim = 512,
    n_shared = 4,
    n_independent = 4,
    glu_dropout = 0.2,
    dropout_emb = 0.6,
    glu_norm = 'batch',
    gamma = 1.5,
    lambda_sparse = 1e-4,
    batch_size = 1024,
    epochs = 200,
    learning_rate = 0.01,
    early_stopping_patience = 10,
    weight_decay = 1e-5,
    reducelronplateau_patience = 5,
    reducelronplateau_factor = 0.7,
    verbose = True,
    random_state = 42,
)
model.fit(X_train, y_train, eval_set=(X_test, y_test), eval_metric='roc_auc', mode='max', cat_features=categorical_features)
y_pred_proba = model.predict_proba(X_test, cat_features=categorical_features)
roc_auc_score(y_test, y_pred_proba[:,1])

In [None]:
from models.estimators.tabnet_estimator import TabNetBinary

for n_shared in [1, 2, 3, 4, 5, 6, 7, 8, 9]:
    start_time = time.time()
    model = TabNetBinary(
        d_model = 8,
        n_steps = 5,
        decision_dim = 64,
        n_shared = n_shared,
        n_independent = n_shared,
        glu_dropout = 0.3,
        dropout_emb = 0.3,
        glu_norm = 'batch',
        gamma = 1.5,
        lambda_sparse = 1e-5,
        batch_size = 1024,
        epochs = 200,
        learning_rate = 0.01,
        early_stopping_patience = 10,
        weight_decay = 1e-5,
        reducelronplateau_patience = 5,
        reducelronplateau_factor = 0.7,
        verbose = False,
        random_state = 42,
    )

    model.fit(X_train, y_train, eval_set=(X_test, y_test), eval_metric='roc_auc', mode='max', 
              cat_features=categorical_features)
    y_pred_proba = model.predict_proba(X_test, cat_features=categorical_features)
    formatted = time.strftime("%H:%M:%S", time.gmtime(time.time()-start_time))
    print(
        f"n_shared: {n_shared}, "
        f"ROC AUC: {roc_auc_score(y_test, y_pred_proba[:,1]):.4f}, "
        f"Time: {formatted}"
    )

### Multiclass

In [2]:
train = pd.read_parquet('/www/dslib/spark_sota_modeling/dataset/forest-cover-type/train.parquet')
target_col = 'cover_type'
train, test = train_test_split(train, test_size=0.2, random_state=42, stratify=train[target_col])
X_train = train.drop(columns=[target_col])
y_train = train[target_col]
X_test = test.drop(columns=[target_col])
y_test = test[target_col]
with open('/www/dslib/spark_sota_modeling/dataset/forest-cover-type/categorical_features.txt', 'r') as f:
    categorical_features = [line.strip() for line in f.readlines()]
len(categorical_features)

44

In [None]:
n_classes=train[target_col].nunique(),
d_model = 8,
n_steps = 6,
decision_dim = 128,
n_shared = 2,
n_independent = 2,
glu_dropout = 0.1,
dropout_emb = 0.1,
glu_norm = 'batch',
gamma = 1.5,
lambda_sparse = 1e-3,
batch_size = 1024,
epochs = 200,
learning_rate = 0.025,
early_stopping_patience = 15,
weight_decay = 1e-5,
reducelronplateau_patience = 5,
reducelronplateau_factor = 0.5,
verbose = True,
random_state = 42,
momentum=0.6
# 0.9256215416125229

n_classes=train[target_col].nunique(),
glu_dropout = 0.05,
dropout_emb = 0.05,
lambda_sparse = 1e-3,
momentum=0.1,
# 0.942256224021755

lambda_sparse = 1e-4,
momentum=0.1,
# 0.9434954347133895

lambda_sparse = 1e-5,
momentum=0.1,
# 0.9424799703966334

lambda_sparse = 1e-4,
learning_rate = 0.01,
momentum=0.1,
# 0.9477035876870649

momentum=0.6,
batch_size = 1024,
# 0.9477035876870649

momentum=0.1,
batch_size = 2048, # 16384
virtual_batch_size=512, # 512 m=0.01
# 0.946765574038536

momentum=0.1,
batch_size = 2048, # 16384
virtual_batch_size=512, # 512 m=0.1
# 0.9467053346299149

momentum=0.7,
batch_size = 2048, # 16384
virtual_batch_size=512, # 512 m=0.1

In [None]:
from models.estimators.tabnet_estimator import TabNetMulticlass

model = TabNetMulticlass(
    n_classes=train[target_col].nunique(),
    d_model = 8,
    n_steps = 6, # 5
    decision_dim = 128, # 64 + 64
    n_shared = 2, # 2
    n_independent = 2, # 2
    glu_dropout = 0.05,
    dropout_emb = 0.05,
    gamma = 1.5, # 1.5
    lambda_sparse = 0.0001, # 0.0001
    batch_size = 2048, # 16384
    virtual_batch_size=512, # 512
    momentum = 0.7, # 0.7
    epochs = 200, 
    learning_rate = 0.01, # 0.02
    early_stopping_patience = 15,
    weight_decay = 1e-5,
    reducelronplateau_patience = 5,
    reducelronplateau_factor = 0.5,
    verbose = True,
    random_state = 42,
)

model.fit(X_train, y_train, eval_set=(X_test, y_test), cat_features=categorical_features)

y_pred = model.predict(X_test, cat_features=categorical_features)
y_pred_proba = model.predict_proba(X_test, cat_features=categorical_features)
accuracy_score(y_test, y_pred)

Начинаем обучение на cuda...


Training:   0%|          | 0/227 [00:00<?, ?it/s]

Validation:   0%|          | 0/57 [00:00<?, ?it/s]

Epoch 1/200, Train loss: 0.5643, Train accuracy: 0.7601, Val loss: 0.4188, Val accuracy: 0.8213
---> Сохранена лучшая модель (Эпоха 1) с Val accuracy: 0.8213


Training:   0%|          | 0/227 [00:00<?, ?it/s]

Validation:   0%|          | 0/57 [00:00<?, ?it/s]

Epoch 2/200, Train loss: 0.3979, Train accuracy: 0.8319, Val loss: 0.3255, Val accuracy: 0.8639
---> Сохранена лучшая модель (Эпоха 2) с Val accuracy: 0.8639


Training:   0%|          | 0/227 [00:00<?, ?it/s]

Validation:   0%|          | 0/57 [00:00<?, ?it/s]

Epoch 3/200, Train loss: 0.3532, Train accuracy: 0.8516, Val loss: 0.2996, Val accuracy: 0.8756
---> Сохранена лучшая модель (Эпоха 3) с Val accuracy: 0.8756


Training:   0%|          | 0/227 [00:00<?, ?it/s]

Validation:   0%|          | 0/57 [00:00<?, ?it/s]

Epoch 4/200, Train loss: 0.3325, Train accuracy: 0.8613, Val loss: 0.2825, Val accuracy: 0.8826
---> Сохранена лучшая модель (Эпоха 4) с Val accuracy: 0.8826


Training:   0%|          | 0/227 [00:00<?, ?it/s]

Validation:   0%|          | 0/57 [00:00<?, ?it/s]

Epoch 5/200, Train loss: 0.3173, Train accuracy: 0.8675, Val loss: 0.2663, Val accuracy: 0.8909
---> Сохранена лучшая модель (Эпоха 5) с Val accuracy: 0.8909


Training:   0%|          | 0/227 [00:00<?, ?it/s]

Validation:   0%|          | 0/57 [00:00<?, ?it/s]

Epoch 6/200, Train loss: 0.3097, Train accuracy: 0.8710, Val loss: 0.2571, Val accuracy: 0.8924
---> Сохранена лучшая модель (Эпоха 6) с Val accuracy: 0.8924


Training:   0%|          | 0/227 [00:00<?, ?it/s]

Validation:   0%|          | 0/57 [00:00<?, ?it/s]

Epoch 7/200, Train loss: 0.3003, Train accuracy: 0.8751, Val loss: 0.2692, Val accuracy: 0.8877
Нет улучшения 1/15 эпох.


Training:   0%|          | 0/227 [00:00<?, ?it/s]

Validation:   0%|          | 0/57 [00:00<?, ?it/s]

Epoch 8/200, Train loss: 0.2685, Train accuracy: 0.8879, Val loss: 0.2151, Val accuracy: 0.9115
---> Сохранена лучшая модель (Эпоха 8) с Val accuracy: 0.9115


Training:   0%|          | 0/227 [00:00<?, ?it/s]

Validation:   0%|          | 0/57 [00:00<?, ?it/s]

Epoch 9/200, Train loss: 0.2633, Train accuracy: 0.8905, Val loss: 0.2094, Val accuracy: 0.9154
---> Сохранена лучшая модель (Эпоха 9) с Val accuracy: 0.9154


Training:   0%|          | 0/227 [00:00<?, ?it/s]

Validation:   0%|          | 0/57 [00:00<?, ?it/s]

Epoch 10/200, Train loss: 0.2588, Train accuracy: 0.8924, Val loss: 0.2082, Val accuracy: 0.9151
Нет улучшения 1/15 эпох.


Training:   0%|          | 0/227 [00:00<?, ?it/s]

Validation:   0%|          | 0/57 [00:00<?, ?it/s]

Epoch 11/200, Train loss: 0.2583, Train accuracy: 0.8932, Val loss: 0.2234, Val accuracy: 0.9087
Нет улучшения 2/15 эпох.


Training:   0%|          | 0/227 [00:00<?, ?it/s]

Validation:   0%|          | 0/57 [00:00<?, ?it/s]

Epoch 12/200, Train loss: 0.2560, Train accuracy: 0.8941, Val loss: 0.2040, Val accuracy: 0.9163
---> Сохранена лучшая модель (Эпоха 12) с Val accuracy: 0.9163


Training:   0%|          | 0/227 [00:00<?, ?it/s]

Validation:   0%|          | 0/57 [00:00<?, ?it/s]

Epoch 13/200, Train loss: 0.2531, Train accuracy: 0.8953, Val loss: 0.1962, Val accuracy: 0.9205
---> Сохранена лучшая модель (Эпоха 13) с Val accuracy: 0.9205


Training:   0%|          | 0/227 [00:00<?, ?it/s]

Validation:   0%|          | 0/57 [00:00<?, ?it/s]

Epoch 14/200, Train loss: 0.2356, Train accuracy: 0.9023, Val loss: 0.1784, Val accuracy: 0.9277
---> Сохранена лучшая модель (Эпоха 14) с Val accuracy: 0.9277


Training:   0%|          | 0/227 [00:00<?, ?it/s]

Validation:   0%|          | 0/57 [00:00<?, ?it/s]

Epoch 15/200, Train loss: 0.2295, Train accuracy: 0.9049, Val loss: 0.1746, Val accuracy: 0.9305
---> Сохранена лучшая модель (Эпоха 15) с Val accuracy: 0.9305


Training:   0%|          | 0/227 [00:00<?, ?it/s]

Validation:   0%|          | 0/57 [00:00<?, ?it/s]

Epoch 16/200, Train loss: 0.2281, Train accuracy: 0.9054, Val loss: 0.1729, Val accuracy: 0.9301
Нет улучшения 1/15 эпох.


Training:   0%|          | 0/227 [00:00<?, ?it/s]

Validation:   0%|          | 0/57 [00:00<?, ?it/s]

Epoch 17/200, Train loss: 0.2272, Train accuracy: 0.9059, Val loss: 0.1727, Val accuracy: 0.9300
Нет улучшения 2/15 эпох.


Training:   0%|          | 0/227 [00:00<?, ?it/s]

Validation:   0%|          | 0/57 [00:00<?, ?it/s]

Epoch 18/200, Train loss: 0.2258, Train accuracy: 0.9065, Val loss: 0.1708, Val accuracy: 0.9314
---> Сохранена лучшая модель (Эпоха 18) с Val accuracy: 0.9314


Training:   0%|          | 0/227 [00:00<?, ?it/s]

Validation:   0%|          | 0/57 [00:00<?, ?it/s]

Epoch 19/200, Train loss: 0.2248, Train accuracy: 0.9068, Val loss: 0.1703, Val accuracy: 0.9313
Нет улучшения 1/15 эпох.


Training:   0%|          | 0/227 [00:00<?, ?it/s]

Validation:   0%|          | 0/57 [00:00<?, ?it/s]

Epoch 20/200, Train loss: 0.2133, Train accuracy: 0.9111, Val loss: 0.1595, Val accuracy: 0.9358
---> Сохранена лучшая модель (Эпоха 20) с Val accuracy: 0.9358


Training:   0%|          | 0/227 [00:00<?, ?it/s]

Validation:   0%|          | 0/57 [00:00<?, ?it/s]

Epoch 21/200, Train loss: 0.2119, Train accuracy: 0.9121, Val loss: 0.1564, Val accuracy: 0.9372
---> Сохранена лучшая модель (Эпоха 21) с Val accuracy: 0.9372


Training:   0%|          | 0/227 [00:00<?, ?it/s]

Validation:   0%|          | 0/57 [00:00<?, ?it/s]

Epoch 22/200, Train loss: 0.2108, Train accuracy: 0.9122, Val loss: 0.1560, Val accuracy: 0.9376
---> Сохранена лучшая модель (Эпоха 22) с Val accuracy: 0.9376


Training:   0%|          | 0/227 [00:00<?, ?it/s]

Validation:   0%|          | 0/57 [00:00<?, ?it/s]

Epoch 23/200, Train loss: 0.2099, Train accuracy: 0.9130, Val loss: 0.1549, Val accuracy: 0.9378
---> Сохранена лучшая модель (Эпоха 23) с Val accuracy: 0.9378


Training:   0%|          | 0/227 [00:00<?, ?it/s]

Validation:   0%|          | 0/57 [00:00<?, ?it/s]

Epoch 24/200, Train loss: 0.2079, Train accuracy: 0.9135, Val loss: 0.1549, Val accuracy: 0.9375
Нет улучшения 1/15 эпох.


Training:   0%|          | 0/227 [00:00<?, ?it/s]

Validation:   0%|          | 0/57 [00:00<?, ?it/s]

Epoch 25/200, Train loss: 0.2073, Train accuracy: 0.9138, Val loss: 0.1541, Val accuracy: 0.9380
---> Сохранена лучшая модель (Эпоха 25) с Val accuracy: 0.9380


Training:   0%|          | 0/227 [00:00<?, ?it/s]

Validation:   0%|          | 0/57 [00:00<?, ?it/s]

Epoch 26/200, Train loss: 0.2028, Train accuracy: 0.9157, Val loss: 0.1472, Val accuracy: 0.9406
---> Сохранена лучшая модель (Эпоха 26) с Val accuracy: 0.9406


Training:   0%|          | 0/227 [00:00<?, ?it/s]

Validation:   0%|          | 0/57 [00:00<?, ?it/s]

Epoch 27/200, Train loss: 0.1988, Train accuracy: 0.9173, Val loss: 0.1461, Val accuracy: 0.9417
---> Сохранена лучшая модель (Эпоха 27) с Val accuracy: 0.9417


Training:   0%|          | 0/227 [00:00<?, ?it/s]

Validation:   0%|          | 0/57 [00:00<?, ?it/s]

Epoch 28/200, Train loss: 0.2000, Train accuracy: 0.9168, Val loss: 0.1455, Val accuracy: 0.9418
---> Сохранена лучшая модель (Эпоха 28) с Val accuracy: 0.9418


Training:   0%|          | 0/227 [00:00<?, ?it/s]

Validation:   0%|          | 0/57 [00:00<?, ?it/s]

Epoch 29/200, Train loss: 0.1999, Train accuracy: 0.9168, Val loss: 0.1453, Val accuracy: 0.9424
---> Сохранена лучшая модель (Эпоха 29) с Val accuracy: 0.9424


Training:   0%|          | 0/227 [00:00<?, ?it/s]

Validation:   0%|          | 0/57 [00:00<?, ?it/s]

Epoch 30/200, Train loss: 0.1980, Train accuracy: 0.9179, Val loss: 0.1441, Val accuracy: 0.9422
Нет улучшения 1/15 эпох.


Training:   0%|          | 0/227 [00:00<?, ?it/s]

Validation:   0%|          | 0/57 [00:00<?, ?it/s]

Epoch 31/200, Train loss: 0.1976, Train accuracy: 0.9178, Val loss: 0.1431, Val accuracy: 0.9428
---> Сохранена лучшая модель (Эпоха 31) с Val accuracy: 0.9428


Training:   0%|          | 0/227 [00:00<?, ?it/s]

Validation:   0%|          | 0/57 [00:00<?, ?it/s]

Epoch 32/200, Train loss: 0.1946, Train accuracy: 0.9190, Val loss: 0.1415, Val accuracy: 0.9437
---> Сохранена лучшая модель (Эпоха 32) с Val accuracy: 0.9437


Training:   0%|          | 0/227 [00:00<?, ?it/s]

Validation:   0%|          | 0/57 [00:00<?, ?it/s]

Epoch 33/200, Train loss: 0.1940, Train accuracy: 0.9197, Val loss: 0.1408, Val accuracy: 0.9441
---> Сохранена лучшая модель (Эпоха 33) с Val accuracy: 0.9441


Training:   0%|          | 0/227 [00:00<?, ?it/s]

Validation:   0%|          | 0/57 [00:00<?, ?it/s]

Epoch 34/200, Train loss: 0.1939, Train accuracy: 0.9196, Val loss: 0.1396, Val accuracy: 0.9449
---> Сохранена лучшая модель (Эпоха 34) с Val accuracy: 0.9449


Training:   0%|          | 0/227 [00:00<?, ?it/s]

Validation:   0%|          | 0/57 [00:00<?, ?it/s]

Epoch 35/200, Train loss: 0.1934, Train accuracy: 0.9197, Val loss: 0.1396, Val accuracy: 0.9450
---> Сохранена лучшая модель (Эпоха 35) с Val accuracy: 0.9450


Training:   0%|          | 0/227 [00:00<?, ?it/s]

Validation:   0%|          | 0/57 [00:00<?, ?it/s]

Epoch 36/200, Train loss: 0.1917, Train accuracy: 0.9204, Val loss: 0.1399, Val accuracy: 0.9439
Нет улучшения 1/15 эпох.


Training:   0%|          | 0/227 [00:00<?, ?it/s]

Validation:   0%|          | 0/57 [00:00<?, ?it/s]

Epoch 37/200, Train loss: 0.1926, Train accuracy: 0.9202, Val loss: 0.1392, Val accuracy: 0.9452
---> Сохранена лучшая модель (Эпоха 37) с Val accuracy: 0.9452


Training:   0%|          | 0/227 [00:00<?, ?it/s]

Validation:   0%|          | 0/57 [00:00<?, ?it/s]

Epoch 38/200, Train loss: 0.1906, Train accuracy: 0.9208, Val loss: 0.1383, Val accuracy: 0.9455
---> Сохранена лучшая модель (Эпоха 38) с Val accuracy: 0.9455


Training:   0%|          | 0/227 [00:00<?, ?it/s]

Validation:   0%|          | 0/57 [00:00<?, ?it/s]

Epoch 39/200, Train loss: 0.1901, Train accuracy: 0.9210, Val loss: 0.1375, Val accuracy: 0.9455
Нет улучшения 1/15 эпох.


Training:   0%|          | 0/227 [00:00<?, ?it/s]

Validation:   0%|          | 0/57 [00:00<?, ?it/s]

Epoch 40/200, Train loss: 0.1907, Train accuracy: 0.9207, Val loss: 0.1373, Val accuracy: 0.9458
---> Сохранена лучшая модель (Эпоха 40) с Val accuracy: 0.9458


Training:   0%|          | 0/227 [00:00<?, ?it/s]

Validation:   0%|          | 0/57 [00:00<?, ?it/s]

Epoch 41/200, Train loss: 0.1894, Train accuracy: 0.9215, Val loss: 0.1376, Val accuracy: 0.9453
Нет улучшения 1/15 эпох.


Training:   0%|          | 0/227 [00:00<?, ?it/s]

Validation:   0%|          | 0/57 [00:00<?, ?it/s]

Epoch 42/200, Train loss: 0.1897, Train accuracy: 0.9208, Val loss: 0.1382, Val accuracy: 0.9451
Нет улучшения 2/15 эпох.


Training:   0%|          | 0/227 [00:00<?, ?it/s]

Validation:   0%|          | 0/57 [00:00<?, ?it/s]

Epoch 43/200, Train loss: 0.1890, Train accuracy: 0.9214, Val loss: 0.1381, Val accuracy: 0.9450
Нет улучшения 3/15 эпох.


Training:   0%|          | 0/227 [00:00<?, ?it/s]

Validation:   0%|          | 0/57 [00:00<?, ?it/s]

Epoch 44/200, Train loss: 0.1888, Train accuracy: 0.9215, Val loss: 0.1366, Val accuracy: 0.9458
---> Сохранена лучшая модель (Эпоха 44) с Val accuracy: 0.9458


Training:   0%|          | 0/227 [00:00<?, ?it/s]

Validation:   0%|          | 0/57 [00:00<?, ?it/s]

Epoch 45/200, Train loss: 0.1881, Train accuracy: 0.9221, Val loss: 0.1363, Val accuracy: 0.9459
---> Сохранена лучшая модель (Эпоха 45) с Val accuracy: 0.9459


Training:   0%|          | 0/227 [00:00<?, ?it/s]

Validation:   0%|          | 0/57 [00:00<?, ?it/s]

Epoch 46/200, Train loss: 0.1881, Train accuracy: 0.9221, Val loss: 0.1366, Val accuracy: 0.9459
Нет улучшения 1/15 эпох.


Training:   0%|          | 0/227 [00:00<?, ?it/s]

Validation:   0%|          | 0/57 [00:00<?, ?it/s]

Epoch 47/200, Train loss: 0.1879, Train accuracy: 0.9221, Val loss: 0.1358, Val accuracy: 0.9462
---> Сохранена лучшая модель (Эпоха 47) с Val accuracy: 0.9462


Training:   0%|          | 0/227 [00:00<?, ?it/s]

Validation:   0%|          | 0/57 [00:00<?, ?it/s]

Epoch 48/200, Train loss: 0.1882, Train accuracy: 0.9218, Val loss: 0.1363, Val accuracy: 0.9461
Нет улучшения 1/15 эпох.


Training:   0%|          | 0/227 [00:00<?, ?it/s]

Validation:   0%|          | 0/57 [00:00<?, ?it/s]

Epoch 49/200, Train loss: 0.1877, Train accuracy: 0.9220, Val loss: 0.1360, Val accuracy: 0.9462
Нет улучшения 2/15 эпох.


Training:   0%|          | 0/227 [00:00<?, ?it/s]

Validation:   0%|          | 0/57 [00:00<?, ?it/s]

Epoch 50/200, Train loss: 0.1879, Train accuracy: 0.9219, Val loss: 0.1361, Val accuracy: 0.9463
---> Сохранена лучшая модель (Эпоха 50) с Val accuracy: 0.9463


Training:   0%|          | 0/227 [00:00<?, ?it/s]

Validation:   0%|          | 0/57 [00:00<?, ?it/s]

Epoch 51/200, Train loss: 0.1877, Train accuracy: 0.9217, Val loss: 0.1363, Val accuracy: 0.9460
Нет улучшения 1/15 эпох.


Training:   0%|          | 0/227 [00:00<?, ?it/s]

Validation:   0%|          | 0/57 [00:00<?, ?it/s]

Epoch 52/200, Train loss: 0.1886, Train accuracy: 0.9222, Val loss: 0.1356, Val accuracy: 0.9460
Нет улучшения 2/15 эпох.


Training:   0%|          | 0/227 [00:00<?, ?it/s]

Validation:   0%|          | 0/57 [00:00<?, ?it/s]

Epoch 53/200, Train loss: 0.1879, Train accuracy: 0.9221, Val loss: 0.1359, Val accuracy: 0.9462
Нет улучшения 3/15 эпох.


Training:   0%|          | 0/227 [00:00<?, ?it/s]

Validation:   0%|          | 0/57 [00:00<?, ?it/s]

Epoch 54/200, Train loss: 0.1870, Train accuracy: 0.9226, Val loss: 0.1355, Val accuracy: 0.9462
Нет улучшения 4/15 эпох.


Training:   0%|          | 0/227 [00:00<?, ?it/s]

Validation:   0%|          | 0/57 [00:00<?, ?it/s]

Epoch 55/200, Train loss: 0.1877, Train accuracy: 0.9223, Val loss: 0.1358, Val accuracy: 0.9463
Нет улучшения 5/15 эпох.


Training:   0%|          | 0/227 [00:00<?, ?it/s]

Validation:   0%|          | 0/57 [00:00<?, ?it/s]

Epoch 56/200, Train loss: 0.1865, Train accuracy: 0.9226, Val loss: 0.1357, Val accuracy: 0.9463
Нет улучшения 6/15 эпох.


Training:   0%|          | 0/227 [00:00<?, ?it/s]

Validation:   0%|          | 0/57 [00:00<?, ?it/s]

Epoch 57/200, Train loss: 0.1868, Train accuracy: 0.9226, Val loss: 0.1357, Val accuracy: 0.9461
Нет улучшения 7/15 эпох.


Training:   0%|          | 0/227 [00:00<?, ?it/s]

Validation:   0%|          | 0/57 [00:00<?, ?it/s]

Epoch 58/200, Train loss: 0.1870, Train accuracy: 0.9219, Val loss: 0.1354, Val accuracy: 0.9464
---> Сохранена лучшая модель (Эпоха 58) с Val accuracy: 0.9464


Training:   0%|          | 0/227 [00:00<?, ?it/s]

Validation:   0%|          | 0/57 [00:00<?, ?it/s]

Epoch 59/200, Train loss: 0.1883, Train accuracy: 0.9220, Val loss: 0.1350, Val accuracy: 0.9466
---> Сохранена лучшая модель (Эпоха 59) с Val accuracy: 0.9466


Training:   0%|          | 0/227 [00:00<?, ?it/s]

Validation:   0%|          | 0/57 [00:00<?, ?it/s]

Epoch 60/200, Train loss: 0.1867, Train accuracy: 0.9230, Val loss: 0.1355, Val accuracy: 0.9466
Нет улучшения 1/15 эпох.


Training:   0%|          | 0/227 [00:00<?, ?it/s]

Validation:   0%|          | 0/57 [00:00<?, ?it/s]

Epoch 61/200, Train loss: 0.1871, Train accuracy: 0.9223, Val loss: 0.1356, Val accuracy: 0.9465
Нет улучшения 2/15 эпох.


Training:   0%|          | 0/227 [00:00<?, ?it/s]

Validation:   0%|          | 0/57 [00:00<?, ?it/s]

Epoch 62/200, Train loss: 0.1870, Train accuracy: 0.9224, Val loss: 0.1353, Val accuracy: 0.9464
Нет улучшения 3/15 эпох.


Training:   0%|          | 0/227 [00:00<?, ?it/s]

Validation:   0%|          | 0/57 [00:00<?, ?it/s]

Epoch 63/200, Train loss: 0.1873, Train accuracy: 0.9221, Val loss: 0.1355, Val accuracy: 0.9462
Нет улучшения 4/15 эпох.


Training:   0%|          | 0/227 [00:00<?, ?it/s]

Validation:   0%|          | 0/57 [00:00<?, ?it/s]

Epoch 64/200, Train loss: 0.1870, Train accuracy: 0.9227, Val loss: 0.1349, Val accuracy: 0.9464
Нет улучшения 5/15 эпох.


Training:   0%|          | 0/227 [00:00<?, ?it/s]

Validation:   0%|          | 0/57 [00:00<?, ?it/s]

Epoch 65/200, Train loss: 0.1864, Train accuracy: 0.9229, Val loss: 0.1351, Val accuracy: 0.9466
Нет улучшения 6/15 эпох.


Training:   0%|          | 0/227 [00:00<?, ?it/s]

Validation:   0%|          | 0/57 [00:00<?, ?it/s]

Epoch 66/200, Train loss: 0.1859, Train accuracy: 0.9225, Val loss: 0.1355, Val accuracy: 0.9465
Нет улучшения 7/15 эпох.


Training:   0%|          | 0/227 [00:00<?, ?it/s]

Validation:   0%|          | 0/57 [00:00<?, ?it/s]

Epoch 67/200, Train loss: 0.1858, Train accuracy: 0.9228, Val loss: 0.1356, Val accuracy: 0.9460
Нет улучшения 8/15 эпох.


Training:   0%|          | 0/227 [00:00<?, ?it/s]

Validation:   0%|          | 0/57 [00:00<?, ?it/s]

Epoch 68/200, Train loss: 0.1874, Train accuracy: 0.9219, Val loss: 0.1351, Val accuracy: 0.9466
Нет улучшения 9/15 эпох.


Training:   0%|          | 0/227 [00:00<?, ?it/s]

Validation:   0%|          | 0/57 [00:00<?, ?it/s]

Epoch 69/200, Train loss: 0.1864, Train accuracy: 0.9223, Val loss: 0.1352, Val accuracy: 0.9463
Нет улучшения 10/15 эпох.


Training:   0%|          | 0/227 [00:00<?, ?it/s]

Validation:   0%|          | 0/57 [00:00<?, ?it/s]

Epoch 70/200, Train loss: 0.1870, Train accuracy: 0.9223, Val loss: 0.1353, Val accuracy: 0.9464
Нет улучшения 11/15 эпох.


Training:   0%|          | 0/227 [00:00<?, ?it/s]

Validation:   0%|          | 0/57 [00:00<?, ?it/s]

Epoch 71/200, Train loss: 0.1879, Train accuracy: 0.9219, Val loss: 0.1353, Val accuracy: 0.9467
---> Сохранена лучшая модель (Эпоха 71) с Val accuracy: 0.9467


Training:   0%|          | 0/227 [00:00<?, ?it/s]

Validation:   0%|          | 0/57 [00:00<?, ?it/s]

Epoch 72/200, Train loss: 0.1865, Train accuracy: 0.9227, Val loss: 0.1351, Val accuracy: 0.9466
Нет улучшения 1/15 эпох.


Training:   0%|          | 0/227 [00:00<?, ?it/s]

Validation:   0%|          | 0/57 [00:00<?, ?it/s]

Epoch 73/200, Train loss: 0.1878, Train accuracy: 0.9219, Val loss: 0.1351, Val accuracy: 0.9463
Нет улучшения 2/15 эпох.


Training:   0%|          | 0/227 [00:00<?, ?it/s]

Validation:   0%|          | 0/57 [00:00<?, ?it/s]

Epoch 74/200, Train loss: 0.1859, Train accuracy: 0.9228, Val loss: 0.1352, Val accuracy: 0.9464
Нет улучшения 3/15 эпох.


Training:   0%|          | 0/227 [00:00<?, ?it/s]

### Regression

In [2]:
train = pd.read_parquet('/www/dslib/spark_sota_modeling/dataset/allstate-claims-severity/train.parquet')
target_col = 'loss'
index_col = 'id'
train, test = train_test_split(train, test_size=0.2, random_state=42)
X_train = train.drop(columns=[target_col, index_col])
y_train = train[target_col]
X_test = test.drop(columns=[target_col, index_col])
y_test = test[target_col]
with open('/www/dslib/spark_sota_modeling/dataset/forest-cover-type/categorical_features.txt', 'r') as f:
    categorical_features = [line.strip() for line in f.readlines()]
len(categorical_features)

44

In [4]:
from models.estimators.tabnet_estimator import TabNetRegressor
model = TabNetRegressor(
    verbose=True,
    dropout=0.4,
    early_stopping_patience=10,
    batch_size=8192,
    virtual_batch_size=128,
    learning_rate=0.005,
    epochs=1000,
    dynamic_emb_size=True,
    n_glu_layers=4,
    n_steps=8,
)

model.fit(X_train, y_train, eval_set=(X_test, y_test), eval_metric='mae', mode='min', cat_features=categorical_features)
y_pred = model.predict(X_test, cat_features=categorical_features)
mean_absolute_error(y_test, y_pred)

Начинаем обучение на cuda...


Training:   0%|          | 0/19 [00:00<?, ?it/s]

Validation:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 1/1000, Train loss: 17736628.4211, Train mae: 3037.8191, Val loss: 17156994.6000, Val mae: 3020.7126
Сохраняем лучшую модель с метрикой mae: 3020.7126


Training:   0%|          | 0/19 [00:00<?, ?it/s]

Validation:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 2/1000, Train loss: 17544206.2632, Train mae: 3026.8928, Val loss: 16990062.2000, Val mae: 3009.5676
Сохраняем лучшую модель с метрикой mae: 3009.5676


Training:   0%|          | 0/19 [00:00<?, ?it/s]

Validation:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 3/1000, Train loss: 17218003.7368, Train mae: 3000.8059, Val loss: 16474540.0000, Val mae: 2970.7705
Сохраняем лучшую модель с метрикой mae: 2970.7705


Training:   0%|          | 0/19 [00:00<?, ?it/s]

Validation:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 4/1000, Train loss: 16567930.1053, Train mae: 2953.0950, Val loss: 15702457.2000, Val mae: 2910.0957
Сохраняем лучшую модель с метрикой mae: 2910.0957


Training:   0%|          | 0/19 [00:00<?, ?it/s]

Validation:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 5/1000, Train loss: 15612058.9474, Train mae: 2875.8118, Val loss: 14551942.6000, Val mae: 2803.4485
Сохраняем лучшую модель с метрикой mae: 2803.4485


Training:   0%|          | 0/19 [00:00<?, ?it/s]

Validation:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 6/1000, Train loss: 14361104.6842, Train mae: 2760.3044, Val loss: 12900314.6000, Val mae: 2634.5330
Сохраняем лучшую модель с метрикой mae: 2634.5330


Training:   0%|          | 0/19 [00:00<?, ?it/s]

Validation:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 7/1000, Train loss: 12796134.6842, Train mae: 2600.8000, Val loss: 11028994.8000, Val mae: 2433.8059
Сохраняем лучшую модель с метрикой mae: 2433.8059


Training:   0%|          | 0/19 [00:00<?, ?it/s]

Validation:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 8/1000, Train loss: 10982551.8947, Train mae: 2391.1379, Val loss: 9236196.0000, Val mae: 2207.5151
Сохраняем лучшую модель с метрикой mae: 2207.5151


Training:   0%|          | 0/19 [00:00<?, ?it/s]

Validation:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 9/1000, Train loss: 9043137.1316, Train mae: 2135.0728, Val loss: 7941694.2000, Val mae: 1998.9813
Сохраняем лучшую модель с метрикой mae: 1998.9813


Training:   0%|          | 0/19 [00:00<?, ?it/s]

Validation:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 10/1000, Train loss: 7133563.0789, Train mae: 1848.3549, Val loss: 5193469.7000, Val mae: 1565.9465
Сохраняем лучшую модель с метрикой mae: 1565.9465


Training:   0%|          | 0/19 [00:00<?, ?it/s]

Validation:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 11/1000, Train loss: 5480894.4474, Train mae: 1544.3003, Val loss: 4284051.6000, Val mae: 1321.5331
Сохраняем лучшую модель с метрикой mae: 1321.5331


Training:   0%|          | 0/19 [00:00<?, ?it/s]

Validation:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 12/1000, Train loss: 4314708.3289, Train mae: 1283.0709, Val loss: 4102255.5000, Val mae: 1233.5286
Сохраняем лучшую модель с метрикой mae: 1233.5286


Training:   0%|          | 0/19 [00:00<?, ?it/s]

Validation:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 13/1000, Train loss: 3848562.0526, Train mae: 1230.1996, Val loss: 3807088.5500, Val mae: 1233.6938
Нет улучшения в течение 1 эпох


Training:   0%|          | 0/19 [00:00<?, ?it/s]

Validation:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 14/1000, Train loss: 3751835.5789, Train mae: 1232.9912, Val loss: 3740425.8000, Val mae: 1191.3141
Сохраняем лучшую модель с метрикой mae: 1191.3141


Training:   0%|          | 0/19 [00:00<?, ?it/s]

Validation:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 15/1000, Train loss: 3688754.0395, Train mae: 1218.2343, Val loss: 3658540.9500, Val mae: 1209.9343
Нет улучшения в течение 1 эпох


Training:   0%|          | 0/19 [00:00<?, ?it/s]

Validation:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 16/1000, Train loss: 3660616.7763, Train mae: 1217.2234, Val loss: 3698995.8000, Val mae: 1233.9890
Нет улучшения в течение 2 эпох


Training:   0%|          | 0/19 [00:00<?, ?it/s]

Validation:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 17/1000, Train loss: 3590811.9605, Train mae: 1212.9056, Val loss: 3615913.4000, Val mae: 1200.4834
Нет улучшения в течение 3 эпох


Training:   0%|          | 0/19 [00:00<?, ?it/s]

Validation:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 18/1000, Train loss: 3575402.8816, Train mae: 1209.4240, Val loss: 3896725.0000, Val mae: 1190.2937
Сохраняем лучшую модель с метрикой mae: 1190.2937


Training:   0%|          | 0/19 [00:00<?, ?it/s]

Validation:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 19/1000, Train loss: 3595939.7105, Train mae: 1203.6512, Val loss: 3795234.7000, Val mae: 1276.5647
Нет улучшения в течение 1 эпох


Training:   0%|          | 0/19 [00:00<?, ?it/s]

Validation:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 20/1000, Train loss: 3475995.2632, Train mae: 1199.1082, Val loss: 4011197.5500, Val mae: 1294.5388
Нет улучшения в течение 2 эпох


Training:   0%|          | 0/19 [00:00<?, ?it/s]

Validation:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 21/1000, Train loss: 3405961.5132, Train mae: 1195.4229, Val loss: 3632093.4000, Val mae: 1210.5027
Нет улучшения в течение 3 эпох


Training:   0%|          | 0/19 [00:00<?, ?it/s]

Validation:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 22/1000, Train loss: 3404366.5526, Train mae: 1194.0480, Val loss: 3644471.2000, Val mae: 1191.4274
Нет улучшения в течение 4 эпох


Training:   0%|          | 0/19 [00:00<?, ?it/s]

Validation:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 23/1000, Train loss: 3342978.5263, Train mae: 1189.0792, Val loss: 3651136.1000, Val mae: 1228.3481
Нет улучшения в течение 5 эпох


Training:   0%|          | 0/19 [00:00<?, ?it/s]

Validation:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 24/1000, Train loss: 3363555.9474, Train mae: 1181.4467, Val loss: 3650705.5000, Val mae: 1200.8220
Нет улучшения в течение 6 эпох


Training:   0%|          | 0/19 [00:00<?, ?it/s]

Validation:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 25/1000, Train loss: 3279125.6974, Train mae: 1177.8409, Val loss: 3828234.4500, Val mae: 1182.5989
Сохраняем лучшую модель с метрикой mae: 1182.5989


Training:   0%|          | 0/19 [00:00<?, ?it/s]

Validation:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 26/1000, Train loss: 3252203.1579, Train mae: 1180.2190, Val loss: 3673739.1500, Val mae: 1228.7843
Нет улучшения в течение 1 эпох


Training:   0%|          | 0/19 [00:00<?, ?it/s]

Validation:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 27/1000, Train loss: 3208322.9079, Train mae: 1173.7385, Val loss: 3830107.0000, Val mae: 1184.7953
Нет улучшения в течение 2 эпох


Training:   0%|          | 0/19 [00:00<?, ?it/s]

Validation:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 28/1000, Train loss: 3185257.0789, Train mae: 1173.2975, Val loss: 3760264.4500, Val mae: 1278.0347
Нет улучшения в течение 3 эпох


Training:   0%|          | 0/19 [00:00<?, ?it/s]

Validation:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 29/1000, Train loss: 3153728.3947, Train mae: 1168.4672, Val loss: 3663151.9000, Val mae: 1218.9254
Нет улучшения в течение 4 эпох


Training:   0%|          | 0/19 [00:00<?, ?it/s]

Validation:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 30/1000, Train loss: 3132350.2763, Train mae: 1162.8906, Val loss: 3757586.5500, Val mae: 1182.5039
Сохраняем лучшую модель с метрикой mae: 1182.5039


Training:   0%|          | 0/19 [00:00<?, ?it/s]

Validation:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 31/1000, Train loss: 3103909.1184, Train mae: 1163.1898, Val loss: 4201436.7000, Val mae: 1304.0480
Нет улучшения в течение 1 эпох


Training:   0%|          | 0/19 [00:00<?, ?it/s]

Validation:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 32/1000, Train loss: 3078361.6447, Train mae: 1158.8154, Val loss: 3701126.5500, Val mae: 1182.6556
Нет улучшения в течение 2 эпох


Training:   0%|          | 0/19 [00:00<?, ?it/s]

Validation:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 33/1000, Train loss: 3040562.9342, Train mae: 1157.6251, Val loss: 3682963.4000, Val mae: 1220.7441
Нет улучшения в течение 3 эпох


Training:   0%|          | 0/19 [00:00<?, ?it/s]

Validation:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 34/1000, Train loss: 3046074.6974, Train mae: 1155.4351, Val loss: 3708806.7000, Val mae: 1210.7677
Нет улучшения в течение 4 эпох


Training:   0%|          | 0/19 [00:00<?, ?it/s]

Validation:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 35/1000, Train loss: 3018947.2105, Train mae: 1154.3625, Val loss: 3940862.8000, Val mae: 1190.4852
Нет улучшения в течение 5 эпох


Training:   0%|          | 0/19 [00:00<?, ?it/s]

Validation:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 36/1000, Train loss: 3005163.4868, Train mae: 1153.9227, Val loss: 3722117.6000, Val mae: 1213.1774
Нет улучшения в течение 6 эпох


Training:   0%|          | 0/19 [00:00<?, ?it/s]

Validation:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 37/1000, Train loss: 2951427.9474, Train mae: 1147.1807, Val loss: 3767357.7000, Val mae: 1175.2067
Сохраняем лучшую модель с метрикой mae: 1175.2067


Training:   0%|          | 0/19 [00:00<?, ?it/s]

Validation:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 38/1000, Train loss: 2948691.2763, Train mae: 1144.3381, Val loss: 3715668.7500, Val mae: 1178.2507
Нет улучшения в течение 1 эпох


Training:   0%|          | 0/19 [00:00<?, ?it/s]

Validation:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 39/1000, Train loss: 2930374.3289, Train mae: 1143.0785, Val loss: 3823451.4500, Val mae: 1200.8057
Нет улучшения в течение 2 эпох


Training:   0%|          | 0/19 [00:00<?, ?it/s]

Validation:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 40/1000, Train loss: 2943262.6842, Train mae: 1143.3665, Val loss: 3717933.8500, Val mae: 1188.0441
Нет улучшения в течение 3 эпох


Training:   0%|          | 0/19 [00:00<?, ?it/s]

Validation:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 41/1000, Train loss: 2942030.8553, Train mae: 1139.8966, Val loss: 3917823.5000, Val mae: 1186.9147
Нет улучшения в течение 4 эпох


Training:   0%|          | 0/19 [00:00<?, ?it/s]

Validation:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 42/1000, Train loss: 2901955.3947, Train mae: 1135.4130, Val loss: 3744474.0000, Val mae: 1201.1935
Нет улучшения в течение 5 эпох


Training:   0%|          | 0/19 [00:00<?, ?it/s]

Validation:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 43/1000, Train loss: 2878934.5921, Train mae: 1139.4635, Val loss: 3700197.1000, Val mae: 1206.5312
Нет улучшения в течение 6 эпох


Training:   0%|          | 0/19 [00:00<?, ?it/s]

Validation:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 44/1000, Train loss: 2886656.4737, Train mae: 1136.7701, Val loss: 3795499.5000, Val mae: 1185.3755
Нет улучшения в течение 7 эпох


Training:   0%|          | 0/19 [00:00<?, ?it/s]

Validation:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 45/1000, Train loss: 2868273.9737, Train mae: 1137.8246, Val loss: 3776895.7500, Val mae: 1203.0428
Нет улучшения в течение 8 эпох


Training:   0%|          | 0/19 [00:00<?, ?it/s]

Validation:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 46/1000, Train loss: 2867790.1842, Train mae: 1135.5657, Val loss: 3751529.3000, Val mae: 1201.7115
Нет улучшения в течение 9 эпох


Training:   0%|          | 0/19 [00:00<?, ?it/s]

Validation:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 47/1000, Train loss: 2834983.6711, Train mae: 1133.0852, Val loss: 3755053.0000, Val mae: 1187.2515
Останавливаем обучение из-за отсутствия улучшений в течение 10 эпох
Загружена лучшая модель


Predicting:   0%|          | 0/5 [00:00<?, ?it/s]

1175.2066374185233

In [None]:
1185.3089724793513
1182.7640692897073
1175.2066374185233