In [1]:
import numpy as np
import pandas as pd
import pandas_ta as ta
import os

from os.path import join
from os import getcwd
from pathlib import Path
from sys import path

full_path = getcwd()

functions_path = join( Path(full_path).parents[0].parents[0] )
path.append( functions_path  )

import functions.features_func as fe

#from tensortrader.Features import feature_func as fe

import yaml
CONF = yaml.safe_load(Path('config.yml').read_text())

pd.set_option('display.max_columns', None)

In [2]:
CONF['Feature_Engineering'][1]

{'ta': [{'kind': 'sma', 'length': 15},
  {'kind': 'sma', 'length': 120},
  {'kind': 'vwma', 'length': 60},
  {'kind': 'vwma', 'length': 15},
  {'kind': 'bbands', 'length': 15},
  {'kind': 'bbands', 'length': 120},
  {'kind': 'macd', 'fast': 15, 'slow': 120},
  {'kind': 'rsi', 'length': 15},
  {'kind': 'atr', 'length': 15},
  {'kind': 'pdist'}],
 'include_lags': True,
 'lags': [1, 3, 5, 15, 30],
 'ref_variable_lags': ['Close'],
 'drop_lags': False,
 'Volume_Features': True,
 'Volume_Col': 'Volume',
 'Volume_Windows': [5, 30],
 'EntryPrice_PrevClose': False,
 'lags_diff': [1, 15, 30, 60],
 'binary_lags': False,
 'Return_Features': True,
 'return_lags': [1, 5, 15, 60, 240],
 'return_lags_variable': 'Close',
 'Momentum_Features': True,
 'Time_Features': True,
 'Time_Fourier_Features': False,
 'use_prob_features': True,
 'probability_features': ['entry_type', 'risk_type'],
 'Prob_Features_Windows': [2, 6],
 'Apply_Standard_Scaler': True,
 'Standard_Scaler_Cols': ['Volume', 'Number of Trades

# 0. Parameters

In [3]:
storage_folder = '/media/john/Data/Tensor_Invest_Fund/data/Cryptos/TBM/'

storage_folder = '/mnt/Data/Tensor_Invest_Fund/data/Cryptos/TBM/'

# Feature Configuration ID
feature_id = 1

# Strategy
strategy = '1-1_vb_15m' # '2-1_vb_15m' or 2-2_vb_15m

SYMBOLS = ['ADAUSDT',
 'BNBBTC',
 'BNBUSDT',
 'BTCUSDT',
 'DOGEUSDT',
 'EOSUSDT',
 'ETCUSDT',
 'ETHUSDT',
 'IOTAUSDT',
 'LTCUSDT',
 'MKRUSDT',
 'TRXUSDT',
 'XLMUSDT',
 'XMRBTC']

SYMBOLS = ['ADAUSDT',
 'BNBBTC',
 'BNBUSDT',
 'ETHUSDT']

# 1. Data Load

In [4]:
input_folder_db = storage_folder

dfs = []

for ticker in SYMBOLS:

    data_path = f'{ticker}/Tripe_Barrier_Method_{ticker}_ptsl_{strategy}.parquet'

    dfs.append(pd.read_parquet(os.path.join(input_folder_db, data_path)))

data = pd.concat(dfs, ignore_index= True)

In [5]:
print(data['Date'].max(), data['Date'].min())
print(data['Ticker'].unique())

2022-03-25 14:59:00 2021-09-01 01:02:00
['ADAUSDT' 'BNBBTC' 'BNBUSDT' 'ETHUSDT']


In [6]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1050253 entries, 0 to 1050252
Data columns (total 19 columns):
 #   Column                        Non-Null Count    Dtype         
---  ------                        --------------    -----         
 0   Open Time                     1050253 non-null  int64         
 1   Open                          1050253 non-null  float64       
 2   High                          1050253 non-null  float64       
 3   Low                           1050253 non-null  float64       
 4   Close                         1050253 non-null  float64       
 5   Volume                        1050253 non-null  float64       
 6   Clos Time                     1050253 non-null  int64         
 7   Quote Asset Volume            1050253 non-null  float64       
 8   Number of Trades              1050253 non-null  int32         
 9   Taker Buy Base Asset Volume   1050253 non-null  float64       
 10  Taker Buy Quote Asset Volume  1050253 non-null  float64       
 11

# 2. Calculate Features

In [7]:
# Features Configuration
features_conf = CONF['Feature_Engineering'][feature_id]

# (1) Calculate Technical Indicators
ta_config = features_conf['ta']
data = fe.calculate_technical_indicators(data, features_conf, SYMBOLS)

# (2) Calculate Lag Features
if features_conf['include_lags']:
    data = fe.calculate_lag_features(data, features_conf, SYMBOLS)

# (3) Calculate Return Features
if features_conf['Return_Features']:
    date_col = 'Date'
    data = fe.calculate_returns_per_ticker(data, features_conf, SYMBOLS, date_col)

# (4) Momemtum Features
if features_conf['Return_Features'] and features_conf['Momentum_Features']:
    data = fe.calculate_momemtum_features(data, features_conf, SYMBOLS)

# (5) Time Features
if features_conf['Time_Features']:
  
    time_levels =  ['month', 'day', 'hour', 'minute']
    timestamp_col = 'Date'
    data = fe.build_time_columns(data, timestamp_col, time_levels)

    if features_conf['Time_Fourier_Features']:                    
        data = fe.build_fourier_time_features(data, time_levels = ['month', 'day', 'hour', 'minute'], max_levels = [12, 30, 24, 60], drop_columns = True)

# (6) Volume Features
if features_conf['Volume_Features']:
    group_level = ['Ticker']
    data = fe.calculate_volume_features(data, group_level, features_conf)

# (7) Apply Standard Scaler
if features_conf['Apply_Standard_Scaler']:

    for col in features_conf['Standard_Scaler_Cols']:
        data.loc[:,f'{col}_standard'] = data.groupby('Ticker')[col].transform(lambda x: fe.apply_standard_scaler(x))


Calculating Technical Indicators for ticker ADAUSDT
Calculating Technical Indicators for ticker BNBBTC
Calculating Technical Indicators for ticker BNBUSDT
Calculating Technical Indicators for ticker ETHUSDT
Calculating lags for ticker ADAUSDT
Calculating lags for ticker BNBBTC
Calculating lags for ticker BNBUSDT
Calculating lags for ticker ETHUSDT
Calculating returns for ticker ADAUSDT
Calculating returns for ticker BNBBTC
Calculating returns for ticker BNBUSDT
Calculating returns for ticker ETHUSDT
Calculating momemtum for ticker ADAUSDT
Calculating momemtum for ticker BNBBTC
Calculating momemtum for ticker BNBUSDT
Calculating momemtum for ticker ETHUSDT


In [8]:
if features_conf['Apply_Standard_Scaler']:
    
    cols_to_add = []
    for lag_variable in features_conf['ref_variable_lags']:
        for lag in features_conf['lags']:
            cols_to_add.append(f'{lag_variable}_lag_{lag}')


    cols = features_conf['Standard_Scaler_Cols'] + cols_to_add
    #for col in cols:
    #    data.loc[:,f'{col}_standard'] = data.groupby('Ticker')[col].transform(lambda x: fe.apply_standard_scaler(x))

In [9]:
for col in cols:
    data.loc[:,f'{col}_standard'] = data.groupby('Ticker')[col].transform(lambda x: fe.apply_standard_scaler(x))

In [10]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1079664 entries, 0 to 1079663
Data columns (total 71 columns):
 #   Column                        Non-Null Count    Dtype         
---  ------                        --------------    -----         
 0   Date                          1079664 non-null  datetime64[ns]
 1   Open Time                     1079664 non-null  int64         
 2   Open                          1079664 non-null  float64       
 3   High                          1079664 non-null  float64       
 4   Low                           1079664 non-null  float64       
 5   Close                         1079664 non-null  float64       
 6   Volume                        1079664 non-null  float64       
 7   Clos Time                     1079664 non-null  int64         
 8   Quote Asset Volume            1079664 non-null  float64       
 9   Number of Trades              1079664 non-null  int32         
 10  Taker Buy Base Asset Volume   1079664 non-null  float64       
 11

# 3. Metalabels

In [11]:
windows = [5,30]
group_level = ['Ticker']

for window in windows:

    data[f'SMA_{window}'] = data.groupby(group_level)['Close'].transform(lambda x: x.rolling(window = window, closed = 'left').mean())

data = fe.strategy_crossing_sma(data, sma_w = windows)

In [12]:
data['metalabel'] = fe.get_metalabels(y_model1 = data['sma_cross_over'] , y_true = data['label'])

In [13]:
data['metalabel'].value_counts()

0.0    1072106
1.0       7558
Name: metalabel, dtype: int64

In [14]:
storage_folder

'/media/john/Data/Tensor_Invest_Fund/data/Cryptos/TBM/'

In [15]:
output_folder_db ='/media/john/Data/Tensor_Invest_Fund/data/Cryptos/Features_Eng'

sub_experiment_type = 'conf_{}_Tickers_{}_Stategy_{}'.format(feature_id, data['Ticker'].nunique(), strategy)

output_location = os.path.join(output_folder_db, 
                            ('Feature_Engineering_{}.parquet'
                            .format( sub_experiment_type )))

print(output_location)


/media/john/Data/Tensor_Invest_Fund/data/Cryptos/Features_Eng/Feature_Engineering_conf_1_Tickers_4_Stategy_1-1_vb_15m.parquet


In [16]:
data.to_parquet(output_location , engine = 'fastparquet', compression = 'gzip')