# Submission Pipeline

In [2]:
import sys
sys.path.insert(1, "../input/gresearch-src")

In [3]:
# Imports
import gresearch_crypto
import pandas as pd
import numpy as np
import lightgbm as lgb

# Custom
from features import engineer_all_features 
from models import BaseCryptoLearner

In [4]:
# Constants 
max_date = pd.to_datetime('2021-06-13 00:00:00') # Public LB "Exclusion"
max_time_lag = pd.to_timedelta('01:00:00') # Maximum Time lag needed for feature creation
min_date = pd.to_datetime('2021-03-13 00:00:00') # For easier loading 

In [5]:
# Full Data
data_path= "../input/g-research-crypto-forecasting/"
data = pd.read_csv(data_path + 'train.csv')
data['timestamp'] = pd.to_datetime(data.timestamp, unit='s')
asset_info = pd.read_csv(data_path + 'asset_details.csv')
data = data.merge(asset_info[['Asset_ID', 'Weight']],
                  on='Asset_ID', how='left')

# Drop Public LB section -> Also remove for real submission!
data = data[(data.timestamp<max_date) & (data.timestamp>=min_date)]

In [6]:
# Model 
bst = lgb.Booster(model_file='../input/gresearch-src/lgbmBaseline.txt')
    
# Features used
feature_cols = bst.feature_name()


In [6]:
env = gresearch_crypto.make_env()
iter_test = env.iter_test()

In [7]:
pred_id =  0
for test_df, _ in iter_test:
    
    # To Timestamp
    test_df['timestamp'] = pd.to_datetime(test_df.timestamp, unit='s')
    
    # Current prediction timestamps, asset_ids, row_ids
    test_df['pred_id'] = pred_id
    
    # Append test_df to existing data for feature engineering
    data = data.append(test_df) 
    
    # Current Time to predict
    pred_time = test_df.timestamp.unique()[0]
    
    # Last lag needed from full data for feature engineering
    last_train_time = pred_time - max_time_lag
    
    # Create Features 
    prediction_data = []
    
    for asset_id in asset_info.Asset_ID.unique():
    
        asset_name = (asset_info[asset_info.Asset_ID == asset_id]
                          .Asset_Name.iloc[0].replace(' ', '_'))
        #print(f'Writing {asset_name}')

        asset = data[data.Asset_ID == asset_id]

        features, _ = engineer_all_features(asset)

        asset_full = asset.merge(features,
                                     on=['timestamp', 'Asset_ID'])

        prediction_data.append(asset_full)

    prediction_data = pd.concat(prediction_data)
    
    # Only retain the actual rows needed for prediction 
    prediction_data = prediction_data[prediction_data.pred_id == pred_id]

    # Predict
    prediction_data['Target'] = bst.predict(prediction_data[feature_cols])
    prediction_df = prediction_data[['Target', 'row_id']]
    
    # Submit
    env.predict(prediction_df)
    print(f'Round: {pred_id}, Time: {prediction_data.timestamp.unique()}')
    pred_id += 1

print('MONEY MONEY MONEY')