In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import sys
sys.path.append("../")

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import os
from datetime import datetime
import matplotlib.pyplot as plt
from tqdm import tqdm
from scipy import stats
import shap
import lightgbm as lgb
import pickle
from datetime import datetime

from src.utils import load_fold, load_gresearch_raw
from src.evaluation import corr_score
from src.settings import *
from src.features import engineer_all_features, fast_engineer_all_features
from src.models import CryptoDART

plt.rcParams["figure.figsize"] = (16,9)

# New submission routine

In [None]:
train = pd.read_pickle('../data/gresearch/processed/dev/train.pkl')
test = pd.read_pickle('../data/gresearch/processed/dev/test.pkl')
submission_df = pd.read_csv('../data/gresearch/raw/example_sample_submission.csv')

In [None]:
# Setup features (in )
feature_array = []
for asset_id in assets:
    
    asset = train[train.Asset_ID == asset_id]
    features = engineer_all_features(asset,
                                     relative_cols,
                                     relative_periods,
                                     lagged_cols,
                                     lagged_periods,
                                     rolling_cols,
                                     rolling_periods).iloc[:,2:]
    feature_array.append(features.values)
feature_array = np.array(feature_array)

#


In [None]:
feature_array.shape

In [None]:
# Get Model
model_save_dir = '../models/fold_10_submission/'
base_name = 'submission'
cryptoDart = CryptoDART(assets=assets,
                                weights=weights,
                                names=names,
                                params=dart_base_params)
model_save_dir = '../models/fold_10_submission/'
cryptoDart.load_models(model_save_dir, base_name)

In [None]:
feature_names = np.array(cryptoDart.models[0].feature_name())

In [None]:
rolling_features = [f for f in features if f[:7]=='rolling' ]

In [None]:
# Empty initialization period 
feature_array = np.repeat(np.nan, len(assets) * int(max_lookback_minutes) * len(feature_names)).reshape(len(assets),
                                                                                                        int(max_lookback_minutes),
                                                                                                        len(feature_names))

In [None]:
train.timestamp.nunique()

In [None]:
for i, ts in enumerate(train.timestamp.unique()):
    
    prediction_df = submission_df[submission_df.group_num==i]
    
    test_df = train[train.timestamp == ts]
    
    new_feature_array = []
    
    start_time = datetime.now()
    for asset_id in assets:
        
        if not asset_id in test_df.Asset_ID.values:
            
            # Append and move forward by one
            asset_features = np.append(asset_features[1:, :],
                                       asset_features[-1,:], axis=0)
            
            new_feature_array.append(asset_features)
            
            continue
        
        asset = test_df[test_df.Asset_ID == asset_id].iloc[0]
        
        #row_id = prediction_df[prediction_df.row_id == asset_id]['row_id'].iloc[0]
        
        # get assets features
        asset_features = feature_array[asset_id]
        
        asset_features = fast_engineer_all_features(asset,
                                                    asset_features,
                                                    feature_names,
                                                    relative_cols,
                                                    relative_periods,
                                                    lagged_cols,
                                                    lagged_periods,
                                                    rolling_features)
        
        # Predict
        pred = cryptoDart.predict_asset(asset_id, asset_features[-1].reshape(1,-1))[0]
        #prediction_df.loc[prediction_df.row_id == row_id, 'Target'] = pred
        
        # Append data
        new_feature_array.append(asset_features)


    feature_array = np.array(new_feature_array)
    
    elapsed = (datetime.now() - start_time).total_seconds()
    #print(f'Elapsed seconds {elapsed}')