In [1]:
import numpy as np
import pandas as pd
import gc
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import train_test_split

# LightAutoML presets, task and report generation
from lightautoml.automl.presets.tabular_presets import TabularAutoML
from lightautoml.tasks import Task

In [3]:
train = pd.read_csv(r'C:\Users\satis\OneDrive\Desktop\Data Science\DataSet\playground-series-s4e7\train.csv', index_col='id')
test = pd.read_csv(r'C:\Users\satis\OneDrive\Desktop\Data Science\\DataSet\playground-series-s4e7\test.csv', index_col='id')

In [4]:
X_train, X_val = train_test_split(train, test_size=0.2, random_state=42, shuffle=True, stratify=train.Response)

In [7]:
task = Task('binary') #‘binary’ 
automl = TabularAutoML(
    task = task, 
    timeout = 300,#600 * 3600,
    cpu_limit = 2,
    gpu_ids = '0',
    general_params = {"use_algos": [['denselight']]},
    nn_params = {
        "n_epochs": 10, 
        "bs": 1024, 
        "num_workers": 0, 
        "path_to_save": None, 
        "freeze_defaults": True,
        "cont_embedder": 'plr',
        'cat_embedder': 'weighted',
        'act_fun': 'SiLU',
        "hidden_size": [512, 128], #32,
        'stop_by_metric': True,
        'embedding_size': 32,
        'verbose_bar': True,
        "snap_params": { 'k': 2, 'early_stopping': True, 'patience': 1, 'swa': True }, 
        'opt_params': { 'lr': 0.0003 , 'weight_decay': 0 }
    },
    nn_pipeline_params = {"use_qnt": True, "use_te": False},
    reader_params = {'n_jobs': 1, 'cv': 5, 'random_state': 42, 'advanced_roles': True}
)



In [9]:
out_of_fold_predictions = automl.fit_predict(
    X_train, valid_data=X_val,
    roles = {
        'target': 'Response',
        'drop': [ ],
    }, 
    verbose = 4
)

[22:48:20] Stdout logging level is DEBUG.
[22:48:20] Task: binary

[22:48:20] Start automl preset with listed constraints:
[22:48:20] - time: 300.00 seconds
[22:48:20] - CPU: 2 cores
[22:48:20] - memory: 16 GB

[22:48:20] [1mTrain data shape: (9203838, 11)[0m

[22:48:29] Feats was rejected during automatic roles guess: []
[22:48:29] Layer [1m1[0m train process start. Time left 290.94 secs
[22:48:51] number of text features: 0 
[22:48:51] number of categorical features: 8 
[22:48:51] number of continuous features: 2 


train (loss=0.2491): 100%|██████████| 8988/8988 [18:21<00:00,  8.16it/s]  
val: 100%|██████████| 2248/2248 [01:54<00:00, 19.55it/s]


[23:09:12] Epoch: 0, train loss: 0.24909962713718414, val loss: 0.24440772831439972, val metric: 0.890978893707482


train (loss=0.242869): 100%|██████████| 8988/8988 [13:51<00:00, 10.81it/s]
val: 100%|██████████| 2248/2248 [01:56<00:00, 19.32it/s]


[23:25:02] Epoch: 1, train loss: 0.24286898970603943, val loss: 0.2428414225578308, val metric: 0.8920264361969843


train (loss=0.241009): 100%|██████████| 8988/8988 [13:40<00:00, 10.96it/s]
val: 100%|██████████| 2248/2248 [18:12<00:00,  2.06it/s]   


[23:56:57] Epoch: 2, train loss: 0.24100887775421143, val loss: 0.242805615067482, val metric: 0.8920922775296694


train (loss=0.239401): 100%|██████████| 8988/8988 [19:22<00:00,  7.73it/s]
val: 100%|██████████| 2248/2248 [02:47<00:00, 13.44it/s]


[00:19:09] Epoch: 3, train loss: 0.2394014447927475, val loss: 0.24285581707954407, val metric: 0.8920530368858552


train (loss=0.237905): 100%|██████████| 8988/8988 [18:59<00:00,  7.89it/s]
val: 100%|██████████| 2248/2248 [02:58<00:00, 12.57it/s]


[00:41:10] Epoch: 4, train loss: 0.23790539801120758, val loss: 0.24323755502700806, val metric: 0.8917466926020413


val: 100%|██████████| 2248/2248 [02:36<00:00, 14.38it/s]


[00:43:49] Early stopping: val loss: 0.2425648421049118, val metric: 0.8923657195890699
[00:43:49] [1mLvl_0_Pipe_0_Mod_0_TorchNN_denselight_0[0m fitting and predicting completed
[00:43:49] Time left -6628.87 secs

[00:43:49] Time limit exceeded. Last level models will be blended and unused pipelines will be pruned.

[00:43:49] [1mLayer 1 training completed.[0m

[00:43:50] [1mAutoml preset training completed in 6929.49 seconds[0m

[00:43:50] Model description:
Final prediction for new objects (level 0) = 
	 1.00000 * (1 averaged models Lvl_0_Pipe_0_Mod_0_TorchNN_denselight_0) 



In [10]:
gc.collect()

0

In [16]:
# Assuming out_of_fold_predictions.data is a NumPy array
data = out_of_fold_predictions.data

# Convert the NumPy array to a pandas DataFrame
df = pd.DataFrame(data)

# Save the DataFrame to a CSV file
df.to_csv('oof_DenceLight_LightAutoML_dsLightAutoMLlgb.csv', index=False)

In [19]:
pred_proba = automl.predict(test).data[:,  0]

test: 100%|██████████| 7491/7491 [05:18<00:00, 23.54it/s]


In [25]:
pred_proba.to_csv('pred_proba_DenceLight_LightAutoML_dsLightAutoMLlgb.csv', index=None)

In [24]:
pred_proba = pd.DataFrame(pred_proba)
pred_proba.to_csv(r'C:\Users\satis\OneDrive\Desktop\Data Science\DataSet\playground-series-s4e7\sample_submission.csv', index=None)

In [28]:
ss = pd.read_csv(r'C:\Users\satis\OneDrive\Desktop\Data Science\DataSet\playground-series-s4e7\sample_submission.csv')

In [None]:
pred_proba = automl.predict(test).data[:,  0]

test: 100%|██████████| 7491/7491 [05:18<00:00, 23.54it/s]


In [None]:
pred_proba = automl.predict(test).data[:,  0]

test: 100%|██████████| 7491/7491 [05:18<00:00, 23.54it/s]


In [None]:
pred_proba = automl.predict(test).data[:,  0]

test: 100%|██████████| 7491/7491 [05:18<00:00, 23.54it/s]


In [None]:
pred_proba = automl.predict(test).data[:,  0]

test: 100%|██████████| 7491/7491 [05:18<00:00, 23.54it/s]
