# Submission Pipeline

In [None]:
import sys
sys.path.insert(1, "../input/gresearch-src")

In [None]:
# Imports
import gresearch_crypto
import pandas as pd
import numpy as np
import lightgbm as lgb
from datetime import datetime 

# Custom
from features import engineer_all_features 

In [None]:
# OHLCV Features
def fast_ohlcv_features(df, out_features):
    
    out_features[0] = df['Count']
    out_features[1] = df['Open']
    out_features[2] = df['High']
    out_features[3] = df['Low']
    out_features[4] = df['Close']
    out_features[5] = df['Volume']
    out_features[6] = df['VWAP']
    out_features[7] = (df.Close - df.Open) / df.Open
    out_features[8] = (np.log(df.Close / df.Open))
    out_features[9] = (df.High / df.Low)
    
    return out_features

# Relative change features
def fast_relative_features(feature_array, columns, period, out_features):
    
    for j in columns:
        
        value = np.log(feature_array[0, j] /
                       feature_array[-period, j])
        i = np.min(np.where(np.isnan(current_features)))
        out_features[i]= value
    
    return out_features

def fast_lagged_features(feature_array, columns, period, out_features):
    
    for j in columns:
        value = feature_array[-period, j]
        i = np.min(np.where(np.isnan(current_features)))
        out_features[i]= value
        
    return out_features

In [None]:
env = gresearch_crypto.make_env()
iter_test = env.iter_test()

In [None]:
# Model 
bst = lgb.Booster(model_file='../input/gresearch-src/lgbmBaseline.txt')
    

In [None]:
# Empty initialization period 
features = np.repeat(np.nan, 14 * 66 * 74).reshape(14,66,74)

In [None]:
# From engineer_all_features
relative_cols = ['Count', 'Open',
                 'High', 'Low', 'Close',
                 'Volume', 'VWAP']

lagged_cols = ['direct_return', 'log_return', 'high_low_ratio',
                   'log_change_Count_1min', 'log_change_Open_1min',
                   'log_change_High_1min', 'log_change_Low_1min',
                   'log_change_Close_1min',
                   'log_change_Volume_1min', 'log_change_VWAP_1min']


relative_periods = [1, 60]
lagged_periods = [1, 2, 3, 4, 5]
feature_names = bst.feature_name()
assets = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]

In [None]:
for test_df, prediction_df in iter_test:
    
    new_features = []
    
    for asset_id in assets:
        
        # Check if asset in test_df, if not forward fill 
        if not asset_id in test_df.Asset_ID.values:
            new_features.append(features[asset_id][-1].reshape(1,-1))
            continue
        
        asset = test_df[test_df.Asset_ID == asset_id].iloc[0]
        
        row_id = asset['row_id']
        
        asset_features = features[asset_id]
        
        current_features = np.repeat(np.nan, repeats=features.shape[2])

        current_features = fast_ohlcv_features(asset, current_features)

        # Relative change features
        for period in relative_periods:
            current_features = fast_relative_features(asset_features,
                                                      columns=np.where(np.isin(feature_names,relative_cols))[0],
                                                      period=period,
                                                      out_features=current_features)

        # Lagged features
        for period in lagged_periods:
            current_features = fast_lagged_features(asset_features,
                                                    np.where(np.isin(feature_names,lagged_cols))[0],
                                                    period=period,
                                                    out_features=current_features)
        # Predict
        current_features = np.array(current_features).reshape(1,-1)
        pred = bst.predict(np.array(current_features))[0]
        prediction_df.loc[prediction_df.row_id == row_id, 'Target'] = pred
        #predictions.append()
        
        # Append data
        new_features.append(current_features)

    new_features = np.array(new_features)

    # Append to old array and shift one forward
    features = np.append(features, new_features,axis=1)
    features = features[:,1:,:]
    
    # Submit 
    env.predict(prediction_df)
print('Done')