# Example of parameters

https://github.com/sberbank-ai-lab/LightAutoML/blob/master/lightautoml/automl/presets/tabular_config.yml

# TabularAutoML init for regression task

Import libraries

In [None]:
import os
import logging

import pandas as pd

from lightautoml.automl.presets.tabular_presets import TabularAutoML, TabularUtilizedAutoML
from lightautoml.tasks import Task
import multiprocessing as mp

from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split

Prepare dataset

In [None]:
matrix = pd.read_pickle("checkpoint_final.pkl")

# Downcast the float columns to reduce RAM usage
floatcols = [c for c in matrix.columns if matrix[c].dtype=="float32"]
matrix[floatcols] = matrix[floatcols].astype("float16")
matrix['item_cnt_month'] = matrix['item_cnt_month'].clip(0,20)

keep_from_month = 12  # The first couple of months are dropped because of distortions to their features (e.g. wrong item age)
test_month = 33
dropcols = [
    "shop_id",
    "item_id",
    "new_item",
]  # The features are dropped to reduce overfitting

train = matrix.drop(columns=dropcols).loc[matrix.date_block_num < test_month, :]
train = train[train.date_block_num >= keep_from_month]
valid = matrix.drop(columns=dropcols).loc[matrix.date_block_num == test_month, :]

del matrix

Set params

In [None]:
N_THREADS = mp.cpu_count()
RANDOM_STATE = 42
N_FOLDS = 5
TIMEOUT = 2*3600
TARGET_NAME = 'item_cnt_month'


task = Task('reg', loss='mse', metric='mse')
roles = {
    'target': TARGET_NAME
}

Init TabularAutoML instance

In [None]:
automl = TabularAutoML(task=task,
                       timeout=TIMEOUT,
                       memory_limit=30,
                       cpu_limit=N_THREADS,
                       general_params={'use_algos':
                                          [['linear_l2', 'lgb_tuned', 'cb_tuned'],
                                           ['lgb_tuned']
                                           ]},
                       reader_params={'cv': N_FOLDS,
                                      'random_state': RANDOM_STATE,
                                      'n_jobs': N_THREADS},
                       timing_params = {'mode': 0}
                          ) 


oof_pred = automl.fit_predict(train, roles=roles)

# Set custom split (TimeSeriesIterator)

Example can be found here:
https://github.com/sberbank-ai-lab/LightAutoML/blob/master/tests/demo12.py

In [None]:
from lightautoml.validation.np_iterators import TimeSeriesIterator



automl = TabularAutoML(task=task,
                       timeout=TIMEOUT,
                       memory_limit=30,
                       cpu_limit=N_THREADS,
                       general_params={'use_algos':
                                          [['linear_l2', 'lgb_tuned', 'cb_tuned'] # custom iterator works only for
                                           ]},                                    # one-level models
                       reader_params={'cv': N_FOLDS,
                                      'random_state': RANDOM_STATE,
                                      'n_jobs': N_THREADS},
                       timing_params = {'mode': 0}
                          ) 


dates = [27, 28, 29, 30, 31, 32] # set dates by which you are going make TS split
tss = TimeSeriesIterator(train.item_cnt_month, date_splits=dates)
oof_pred = automl.fit_predict(train, cv_iter=tss, roles=roles)

Prediction and count of RMSE

In [None]:
test_pred = automl.predict(test_data)

rmse = mean_squared_error(valid.item_cnt_month, test_pred.data, squared=False)

# Using GPU on LAMA - ?

In [1]:
def split_by_dates(datetime_col, splitter):
        """Create indexes of folds splitted by thresholds.

        Args:
            datetime_col: Column with value which can be interpreted
              as time/ordinal value (ex: np.datetime64).
            splitter: List of thresholds (same value as ).

        Returns:
            folds: Array of folds' indexes.

        """

        splitter = np.sort(splitter)
        folds = np.searchsorted(splitter, datetime_col)

        return folds