In [None]:
import os
from itertools import combinations
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import warnings
warnings.filterwarnings('ignore')
import optuna

from sklearn.model_selection import KFold
from sklearn.metrics import mean_absolute_error
import lightgbm as lgbm
import xgboost as xgb
import seaborn as sns

optuna.logging.set_verbosity(optuna.logging.WARNING)
import warnings
warnings.filterwarnings('ignore')

from lightgbm import *
pd.set_option("display.max_columns", None)
import time
import numba
import gc

In [None]:
df = pd.read_csv('/kaggle/input/optiver-trading-at-the-close/train.csv')

In [None]:
df.head()

In [None]:
df.isna().sum()

In [None]:
df.dropna(subset=['target', 'wap'], inplace=True)

In [None]:
df.isna().sum()

In [None]:
IS_ONLINE = False

In [None]:
def reduce_mem_usage(df, cols=None): # уменьшения веча датасета чтобы в оперативу залез
    start_mem = df.memory_usage().sum() / 1024**2
    print(f'Memory usage of dataframe is {start_mem:.2f} MB')
    
    if cols == None:
        cols = df.columns
    for col in cols:
        col_type = df[col].dtype

        if col_type != object:
            c_min = df[col].min()
            c_max = df[col].max()
            if str(col_type)[:3] == 'int':
                if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
                    df[col] = df[col].astype(np.int8)
                elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
                    df[col] = df[col].astype(np.int16)
                elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
                    df[col] = df[col].astype(np.int32)
                elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
                    df[col] = df[col].astype(np.int64)  
            else:
                if c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max:
                    df[col] = df[col].astype(np.float16)
                elif c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
                    df[col] = df[col].astype(np.float32)
                else:
                    df[col] = df[col].astype(np.float32)
    end_mem = df.memory_usage().sum() / 1024**2
    print(f'Memory usage after optimization is: {end_mem:.2f} MB')
    decrease = 100 * (start_mem - end_mem) / start_mem
    print(f'Decreased by {decrease:.2f}%')
    
    return df

In [None]:
best_imb1_feats = ['wap+near_price_imb1', 'bid_price+reference_price_imb1', 'bid_price+near_price_imb1', 'ask_price+near_price_imb1', 'ask_price+reference_price_imb1', 'imbalance_buy_sell_flag+seconds_in_bucket_imb1', 'near_price+reference_price_imb1', 'wap+bid_price_imb1', 'wap+reference_price_imb1', 'imbalance_size_matched_size_div+near_price_imb1', 'wap+ask_price_imb1', 'ask_price+bid_price_imb1', 'imbalance_size_matched_size_div+seconds_in_bucket_imb1', 'bid_price_ask_price_diff+reference_price_wap_diff_imb1', 'imbalance_size_matched_size_div+bid_price_ask_price_diff_imb1', 'imbalance_size_matched_size_div+far_price_imb1', 'imbalance_size_matched_size_div+imbalance_buy_sell_flag_imb1', 'bid_price+seconds_in_bucket_imb1', 'far_price+imbalance_buy_sell_flag_imb1', 'wap+imbalance_buy_sell_flag_imb1', 'ask_price+imbalance_buy_sell_flag_imb1', 'matched_size+imbalance_size_imb1', 'bid_price_ask_price_diff+far_price_imb1', 'imbalance_size_matched_size_div+near_price_far_price_diff_imb1', 'bid_price+imbalance_buy_sell_flag_imb1', 'reference_price+imbalance_buy_sell_flag_imb1', 'reference_price+seconds_in_bucket_imb1', 'imbalance_size_matched_size_div+reference_price_wap_diff_imb1', 'bid_price_ask_price_diff+imbalance_buy_sell_flag_imb1', 'near_price+seconds_in_bucket_imb1', 'near_price+imbalance_buy_sell_flag_imb1', 'wap+seconds_in_bucket_imb1', 'far_price+seconds_in_bucket_imb1', 'reference_price_wap_diff+reference_price_imb1', 'imbalance_size_signed+matched_size_imb1', 'reference_price_wap_diff+wap_imb1', 'matched_size_bid_ask_size_div+imbalance_size_matched_size_div_imb1', 'ask_price+seconds_in_bucket_imb1', 'imbalance_size_matched_size_div+reference_price_imb1', 'bid_size_ask_size_div+bid_price_ask_price_diff_imb1', 'bid_size+seconds_in_bucket_imb1', 'ask_size+matched_size_imb1', 'ask_price+far_price_imb1', 'reference_price_wap_diff+bid_price_imb1', 'bid_price+far_price_imb1', 'target_volatility+bid_size_ask_size_div_imb1', 'matched_size_bid_ask_size_div+seconds_in_bucket_imb1', 'matched_size_bid_ask_size_div+bid_price_ask_price_diff_imb1', 'imbalance_size_matched_size_div+bid_price_imb1', 'imbalance_size_matched_size_div+wap_imb1', 'bid_size_ask_size_div+reference_price_wap_diff_imb1', 'ask_size+seconds_in_bucket_imb1', 'matched_size_bid_ask_size_div+far_price_imb1', 'imbalance_size_matched_size_div+ask_price_imb1', 'matched_size_bid_ask_size_div+near_price_imb1', 'matched_size_bid_ask_size_div+imbalance_buy_sell_flag_imb1', 'reference_price_wap_diff+ask_price_imb1', 'far_price+reference_price_imb1', 'wap+far_price_imb1', 'matched_size_bid_ask_size_div+bid_size_ask_size_div_imb1', 'matched_size_bid_ask_size_div+imbalance_size_signed_ask_size_div_imb1', 'imbalance_size_signed_ask_size_div+imbalance_size_matched_size_div_imb1', 'near_price_far_price_diff+imbalance_buy_sell_flag_imb1', 'reference_price_wap_diff+imbalance_buy_sell_flag_imb1', 'bid_price_ask_price_diff+bid_price_imb1', 'bid_price_ask_price_diff+ask_price_imb1', 'imbalance_size_signed_ask_size_div+near_price_imb1', 'bid_price_ask_price_diff+reference_price_imb1', 'near_price_far_price_diff+reference_price_imb1', 'imbalance_size_signed_ask_size_div+near_price_far_price_diff_imb1', 'bid_size_ask_size_div+reference_price_imb1', 'bid_size_ask_size_div+seconds_in_bucket_imb1', 'ask_size+bid_size_imb1', 'near_price+far_price_imb1', 'near_price_far_price_diff+bid_price_imb1', 'bid_price_ask_price_diff+near_price_far_price_diff_imb1', 'matched_size_bid_ask_size_div+bid_size_imb1', 'bid_size+matched_size_imb1', 'imbalance_size_signed+seconds_in_bucket_imb1', 'target_volatility+reference_price_wap_diff_imb1', 'imbalance_size_signed_bid_size_div+imbalance_size_matched_size_div_imb1', 'target_volatility+matched_size_bid_ask_size_div_imb1', 'target_volatility+imbalance_size_signed_ask_size_div_imb1', 'imbalance_size_signed_bid_size_div+near_price_imb1', 'imbalance_size_signed_ask_size_div+imbalance_buy_sell_flag_imb1', 'bid_size_ask_size_div+imbalance_buy_sell_flag_imb1', 'imbalance_size_signed_ask_size_div+far_price_imb1', 'imbalance_size_signed_ask_size_div+ask_size_imb1', 'near_price_far_price_diff+far_price_imb1', 'matched_size_bid_ask_size_div+ask_size_imb1', 'target_volatility+near_price_far_price_diff_imb1', 'bid_size_ask_size_div+ask_size_imb1', 'imbalance_size_signed_bid_size_div+imbalance_buy_sell_flag_imb1', 'bid_price_ask_price_diff+near_price_imb1', 'imbalance_size_signed_ask_size_div+seconds_in_bucket_imb1', 'matched_size_bid_ask_size_div+bid_price_imb1', 'near_price_far_price_diff+near_price_imb1', 'bid_size_ask_size_div+ask_price_imb1', 'bid_price_ask_price_diff+wap_imb1', 'target_volatility+bid_size_imb1', 'target_volatility+imbalance_size_signed_bid_size_div_imb1', 'imbalance_size_signed_ask_size_div+bid_price_imb1', 'imbalance_size_signed_ask_size_div+ask_price_imb1', 'imbalance_size_signed_bid_size_div+bid_size_imb1', 'bid_size+imbalance_size_imb1', 'bid_size_ask_size_div+bid_price_imb1', 'imbalance_size_signed+ask_size_imb1', 'target_volatility+bid_price_ask_price_diff_imb1', 'near_price_far_price_diff+ask_price_imb1', 'reference_price_wap_diff+near_price_imb1', 'imbalance_size_signed_ask_size_div+reference_price_imb1', 'imbalance_size_signed_bid_size_div+ask_size_imb1', 'bid_size_ask_size_div+wap_imb1', 'matched_size_bid_ask_size_div+imbalance_size_signed_imb1', 'imbalance_size_signed_bid_size_div+near_price_far_price_diff_imb1', 'reference_price_wap_diff+far_price_imb1', 'matched_size_bid_ask_size_div+wap_imb1', 'matched_size_bid_ask_size_div+reference_price_wap_diff_imb1', 'matched_size+seconds_in_bucket_imb1', 'imbalance_size_signed_ask_size_div+wap_imb1', 'imbalance_size_signed_ask_size_div+imbalance_size_signed_bid_size_div_imb1', 'imbalance_size_signed_ask_size_div+bid_price_ask_price_diff_imb1', 'near_price_far_price_diff+wap_imb1', 'matched_size_bid_ask_size_div+ask_price_imb1', 'imbalance_size_signed_bid_size_div+seconds_in_bucket_imb1', 'bid_size_ask_size_div+imbalance_size_matched_size_div_imb1', 'bid_size_ask_size_div+imbalance_size_signed_ask_size_div_imb1', 'bid_size_ask_size_div+imbalance_size_signed_bid_size_div_imb1', 'imbalance_size_signed+imbalance_buy_sell_flag_imb1', 'imbalance_size_signed_bid_size_div+bid_price_ask_price_diff_imb1', 'target_volatility+ask_size_imb1', 'matched_size_bid_ask_size_div+imbalance_size_signed_bid_size_div_imb1', 'matched_size_bid_ask_size_div+near_price_far_price_diff_imb1', 'matched_size_bid_ask_size_div+reference_price_imb1', 'near_price_far_price_diff+reference_price_wap_diff_imb1', 'bid_size_ask_size_div+far_price_imb1', 'imbalance_size_signed_bid_size_div+far_price_imb1', 'imbalance_size_signed_bid_size_div+wap_imb1', 'imbalance_size_signed_ask_size_div+bid_size_imb1', 'imbalance_size+seconds_in_bucket_imb1', 'imbalance_size_signed_bid_size_div+bid_price_imb1', 'imbalance_size_signed_bid_size_div+ask_price_imb1', 'bid_size_ask_size_div+near_price_imb1', 'bid_size_ask_size_div+near_price_far_price_diff_imb1', 'imbalance_size_signed_ask_size_div+reference_price_wap_diff_imb1', 'imbalance_size_signed+bid_size_imb1', 'bid_size+far_price_imb1', 'ask_size+imbalance_size_imb1', 'imbalance_size_matched_size_div+bid_size_imb1', 'bid_size+imbalance_buy_sell_flag_imb1', 'imbalance_size_signed_ask_size_div+imbalance_size_signed_imb1', 'matched_size_bid_ask_size_div+imbalance_size_imb1', 'ask_size+bid_price_imb1', 'ask_size+reference_price_imb1', 'imbalance_size_signed_ask_size_div+imbalance_size_imb1', 'imbalance_buy_sell_flag+imbalance_size_imb1', 'imbalance_size_signed_bid_size_div+imbalance_size_imb1', 'imbalance_size_signed_bid_size_div+reference_price_wap_diff_imb1', 'reference_price_wap_diff+seconds_in_bucket_imb1', 'target_volatility+imbalance_size_imb1', 'imbalance_size_signed_bid_size_div+reference_price_imb1', 'imbalance_size_signed_ask_size_div+matched_size_imb1', 'bid_size+reference_price_imb1', 'ask_size+near_price_imb1', 'ask_size+ask_price_imb1', 'ask_size+imbalance_buy_sell_flag_imb1', 'imbalance_size_signed_bid_size_div+imbalance_size_signed_imb1', 'wap+bid_size_imb1', 'imbalance_size_signed_bid_size_div+matched_size_imb1', 'imbalance_size_matched_size_div+ask_size_imb1', 'bid_size_ask_size_div+imbalance_size_imb1', 'ask_size+far_price_imb1', 'imbalance_size_signed+ask_price_imb1', 'ask_price+bid_size_imb1', 'bid_size_ask_size_div+bid_size_imb1', 'bid_size_ask_size_div+imbalance_size_signed_imb1', 'target_volatility+imbalance_size_signed_imb1', 'near_price_far_price_diff+seconds_in_bucket_imb1']
best_movingav_feats = list(set(['imbalance_size_signed', 'bid_price_ask_price_diff', 'imbalance_size_signed', 'matched_size_bid_ask_size_div', 'imbalance_size_matched_size_div', 'imbalance_size_signed', 'bid_price_ask_price_diff', 'bid_price_ask_price_diff', 'ask_price', 'imbalance_size_matched_size_div', 'imbalance_size_matched_size_div', 'matched_size_bid_ask_size_div', 'bid_price', 'bid_price', 'ask_price', 'matched_size_bid_ask_size_div', 'ask_price', 'bid_price']))
best_std_feats = list(set(['bid_price_ask_price_diff', 'matched_size_bid_ask_size_div', 'matched_size', 'bid_size', 'ask_size', 'bid_size_ask_size_div', 'bid_price_ask_price_diff', 'ask_price', 'imbalance_size_matched_size_div', 'imbalance_size', 'imbalance_size_signed_bid_size_div', 'reference_price_wap_diff', 'matched_size', 'imbalance_size_signed_ask_size_div', 'near_price', 'imbalance_size_signed', 'bid_price', 'matched_size_bid_ask_size_div', 'near_price_far_price_diff', 'far_price', 'reference_price', 'imbalance_size_signed_ask_size_div', 'imbalance_buy_sell_flag', 'ask_size', 'reference_price_wap_diff', 'bid_size', 'wap', 'bid_price', 'imbalance_size_signed', 'imbalance_size_matched_size_div', 'imbalance_size', 'near_price_far_price_diff', 'imbalance_size_signed_bid_size_div', 'bid_size_ask_size_div', 'ask_price', 'far_price', 'near_price', 'reference_price', 'wap', 'imbalance_buy_sell_flag']))
best_lagged_feats = list(set(['matched_size', 'imbalance_size_signed', 'far_price', 'imbalance_size_matched_size_div', 'near_price', 'near_price_far_price_diff', 'imbalance_size', 'imbalance_size_signed_bid_size_div', 'ask_price', 'bid_price_ask_price_diff', 'bid_price', 'reference_price_wap_diff', 'imbalance_size_signed_ask_size_div', 'reference_price', 'wap', 'bid_size', 'ask_size', 'matched_size_bid_ask_size_div', 'bid_size_ask_size_div', 'imbalance_buy_sell_flag']))
best_momentum_feats = list(set(['matched_size', 'imbalance_size_signed', 'far_price', 'imbalance_size_matched_size_div', 'near_price', 'near_price_far_price_diff', 'imbalance_size', 'imbalance_size_signed_bid_size_div', 'ask_price', 'bid_price_ask_price_diff', 'bid_price', 'reference_price_wap_diff', 'imbalance_size_signed_ask_size_div', 'reference_price', 'wap', 'bid_size', 'ask_size', 'matched_size_bid_ask_size_div', 'bid_size_ask_size_div', 'imbalance_buy_sell_flag']))

def feature_cols(df) : # убираем бессмысленные в предсказании колонки
    cols = [c for c in df.columns if c not in ['row_id', 'time_id', 'date_id', 'date_stock_id', 'stock_id', 'currently_scored']]
    df = df[cols]    
    return df

def feature_engineering(df, training=False, last_df=None):
    size = df.shape[0]
    if not training:
        df = pd.concat(last_df + [df], ignore_index=True)
    #df.fillna(0, inplace=True)!!!!!!!!!!!!!!!!!!!!
 
    df['reference_price_wap_diff'] = df['reference_price'] - df['wap']
    df['near_price_far_price_diff'] = df['near_price'] - df['far_price']
    df['bid_price_ask_price_diff'] = df['bid_price'] - df['ask_price']
    df['imbalance_size_signed'] = df['imbalance_size'] * df['imbalance_buy_sell_flag']
    df['imbalance_size_matched_size_div'] = df['imbalance_size_signed'] / df['matched_size']
    df['imbalance_size_signed_bid_size_div'] = df['imbalance_size_signed'] / df['bid_size']
    df['imbalance_size_signed_ask_size_div'] = df['imbalance_size_signed'] / df['ask_size']
    df['bid_size_ask_size_div'] = df['bid_size'] / df['ask_size']
    df['matched_size_bid_ask_size_div'] = df['matched_size'] / (df['bid_size'] + df['ask_size'])
        
    base_feat = [c for c in df.columns if c not in ['row_id', 'time_id', 'date_id', 'stock_id']]
    
    start_time = time.time()
    e = ['matched_size', 'imbalance_buy_sell_flag', 'imbalance_size_signed', 'imbalance_size']
    if training:
        a = df[['time_id'] + e].groupby('time_id').transform('sum')
        for i in e:
            df[i + '_divsum'] = df[i] / a[i]
        del a
        gc.collect()
    else:
        for i in e:
            df[i + '_divsum'] = df[i] / df[i].sum() 
         
    start_time = time.time()
    for (i, c1) in enumerate(base_feat):
        for (j, c2) in enumerate(base_feat): # гармонические средние всех пар
            if c1 + '+' + c2 + '_imb1' in best_imb1_feats[:-105]:
                df[c1 + '+' + c2 + '_imb1'] = (df[c1] - df[c2]) / (df[c1] + df[c2])   
    if training:
        print(-(start_time - time.time()))
        
    start_time = time.time()
    p = ['wap', 'ask_price', 'bid_price']
    max_ = df[p].max(axis=1)
    min_ = df[p].min(axis=1)
    mid_ = df[p].sum(axis=1)-min_-max_
    df['wap_ask_bid_imb2'] = (max_-mid_)/(mid_-min_) # imb2 метрика для wap ask_price bid_price
    del max_, min_, mid_
    gc.collect()
    if training:
        print(-(start_time - time.time()))
        df = reduce_mem_usage(df)
    
    start_time = time.time()
    q = best_lagged_feats
    a = df[['stock_id'] + q].groupby(['stock_id'], as_index=False).shift(1)
    for (i, c1) in enumerate(q):
        df[c1 + '+lagged'] = a[c1]
    del a
    gc.collect()
    if training:
        print(-(start_time - time.time()))
    
    start_time = time.time()
    q = best_movingav_feats + best_imb1_feats[:10]
    for window in [3, 5, 7, 10]:
        a = df[['stock_id'] + q].groupby(['stock_id'], as_index=False).rolling(window).mean(engine='numba').sort_index()
        for i in q:
            df[i + '+movingav' + str(window)] = a[i]
        del a
        gc.collect()
    for window_l in [3, 5, 7, 10]:
        for window_s in [3, 5, 7, 10]:
            if window_s < window_l:
                for i in q:
                    df[i + '+movingav_diff' + str(window_l) + str(window_s)] = \
                    df[i + '+movingav' + str(window_l)] - df[i + '+movingav' + str(window_s)]
    if training:
        print(-(start_time - time.time()))
        df = reduce_mem_usage(df)
    
    start_time = time.time()
    q = best_std_feats  + best_imb1_feats[:5]
    for window in [5, 10]:
        a = df[['stock_id'] + q].groupby(['stock_id'], as_index=False).rolling(window).std(engine='numba').sort_index()
        for i in q:
            df[i + '+std' + str(window)] = a[i]
        del a
        gc.collect()
    if training:
        print(-(start_time - time.time()))
    
    start_time = time.time()
    q = best_momentum_feats + best_imb1_feats[:5]
    for length in [1, 2]:
        a = df[['stock_id'] + q].groupby(['stock_id'], as_index=False).shift(length)
        for i in q:
            df[i + '+momentum' + str(length)] = df[i] - a[i]
        del a
        gc.collect()
    if training:
        print(-(start_time - time.time()))
    
        
    df = feature_cols(df)
    if training:
        df = reduce_mem_usage(df)
    if not training:
        df = df.iloc[-size:]
    return df

In [None]:
x_train = feature_engineering(df.drop(columns='target'), training=True)
y_train = df['target'].values

In [None]:
x_train.head(197)

In [None]:
from sklearn.model_selection import train_test_split
if not IS_ONLINE:
    x_train, x_test, y_train, y_test = train_test_split(x_train, y_train, shuffle=False, test_size=0.25)

In [None]:


def objective(trial):
    """Define the objective function"""

    params = {
 'max_depth': 9,
 'n_estimators': 804,
 'subsample': 0.5,
 'reg_alpha': 0.07781911066465229,
 'reg_lambda': 0.0030475099211469707,
 'colsample_bytree': 0.9,
 'objective': 'mae',
 'device': 'gpu',
 'random_state': 123,
        
 'learning_rate': trial.suggest_loguniform('learning_rate', 0.04, 0.06),

        
 'metric': 'mae',
 'n_jobs' : -1,
 'device' : 'gpu',
 'random_state': 123
    }

    # Fit the model
    optuna_model = LGBMRegressor(**params)
    optuna_model.fit(x_train, y_train)

    # Make predictions
    y_pred = optuna_model.predict(x_test)

    # Evaluate predictions
    mae = mean_absolute_error(y_test, y_pred)
    return mae


In [None]:
# study = optuna.create_study(direction='minimize')
# study.optimize(objective, n_trials=7)
# trial = study.best_trial

In [None]:
# study.best_params

In [None]:
%%time
# топ параметры
params = {
 'max_depth': 9,
 'n_estimators': 804,
 'learning_rate': 0.056299296237544046,
 'subsample': 0.5,
 'reg_alpha': 0.07781911066465229,
 'reg_lambda': 0.0030475099211469707,
 'colsample_bytree': 0.9,
 'objective': 'mae',
 'device': 'gpu',
 'random_state': 123
}

lgbm = LGBMRegressor(**params)
lgbm.fit(x_train, y_train)

In [None]:
if not IS_ONLINE:
    print(mean_absolute_error(lgbm.predict(x_test), y_test))

In [None]:
feature_imp = pd.DataFrame(sorted(zip(lgbm.feature_importances_,x_train.columns)), columns=['Value','Feature'])
data = feature_imp.sort_values(by="Value", ascending=False)

In [None]:
warnings.simplefilter(action='ignore', category=FutureWarning)

# sorted(zip(clf.feature_importances_, X.columns), reverse=True)


plt.figure(figsize=(20, 10))
sns.barplot(x="Value", y="Feature", data=data.iloc[:60])
plt.title('LightGBM Features (avg over folds)')
plt.tight_layout()
plt.show()


In [None]:
pd.set_option("display.max_rows", None)

In [None]:
data

In [None]:
best_imb1_featsn = []
best_movingav_featsn = []
best_lagged_featsn = []
best_std_featsn = []
best_momentum_featsn = []
for i in data.values:
    a = i[0]
    feat = i[1]
    if a != 0:
        if ('std' in feat[:-1]):
            best_std_featsn.append(feat)
        if ('movingav' in feat[:-1]):
            best_movingav_featsn.append(feat)
        if (feat.endswith('imb1')):
            best_imb1_featsn.append(feat)
        if (feat.endswith('lagged')):
            best_lagged_featsn.append(feat)
        if ('momentum' in feat[:-1]):
            best_momentum_featsn.append(feat)

In [None]:
print([i.split('+')[0] for i in best_std_featsn])

In [None]:
print([i.split('+')[0] for i in best_movingav_featsn])

In [None]:
print([i.split('+')[0] for i in best_lagged_featsn])

In [None]:
print([i.split('+')[0] for i in best_momentum_featsn])

In [None]:
print(best_imb1_featsn)

In [None]:
import optiver2023
env = optiver2023.make_env()
iter_test = env.iter_test()

In [None]:
counter = 0
last_df = []
for (test, revealed_targets, sample_prediction) in iter_test:
    test_df = feature_engineering(test, training=False, last_df=last_df)
    sample_prediction['target'] = lgbm.predict(test_df)
    env.predict(sample_prediction)
    last_df.append(test)
    if len(last_df) > 10:
        last_df = last_df[1:]
    
    counter += 1
    if not IS_ONLINE:
        print('WARNING NOT ONLINE')
    print(counter)

In [None]:
test

In [None]:
sample_prediction