
by: Hamid Reza Mazandarani @ Part AI Research Center

In this notebook, we use tsfresh library to extract features from bitcoin price time series

In [3]:
import numpy as np
import pandas as pd

# import your dataset

df = df[df.date >= '2021-01-01']

df.reset_index(inplace=True, drop=True)

df


Unnamed: 0,date,open,high,low,close,volume,tic
0,2021-01-01,28951.7,29627.1,28712.4,29359.9,100902.0,BTC-USD
1,2021-01-02,29359.7,33233.5,29008.0,32193.3,240865.0,BTC-USD
2,2021-01-03,32192.9,34755.9,32029.6,32958.9,155210.0,BTC-USD
3,2021-01-04,33016.6,33587.5,28204.5,32022.6,255269.0,BTC-USD
4,2021-01-05,32015.4,34414.7,30010.5,33991.5,202128.0,BTC-USD
...,...,...,...,...,...,...,...
246,2021-09-04,49999.3,50534.6,49461.4,49918.4,48142.0,BTC-USD
247,2021-09-05,49918.7,51878.2,49492.4,51768.6,57301.0,BTC-USD
248,2021-09-06,51769.4,52726.2,51002.8,52672.1,69456.0,BTC-USD
249,2021-09-07,52674.2,52885.3,43072.4,46779.6,187558.0,BTC-USD


In [4]:
from tsfresh.utilities.dataframe_functions import roll_time_series

df_rolled = roll_time_series(df, column_id="tic", column_sort="date")

df_rolled = df_rolled.drop(['tic'], axis=1)

df_rolled


Rolling: 100%|██████████| 10/10 [00:00<00:00, 28.27it/s]


Unnamed: 0,date,open,high,low,close,volume,id
0,2021-01-01,28951.7,29627.1,28712.4,29359.9,100902.0,"(BTC-USD, 2021-01-01)"
1,2021-01-01,28951.7,29627.1,28712.4,29359.9,100902.0,"(BTC-USD, 2021-01-02)"
2,2021-01-02,29359.7,33233.5,29008.0,32193.3,240865.0,"(BTC-USD, 2021-01-02)"
3,2021-01-01,28951.7,29627.1,28712.4,29359.9,100902.0,"(BTC-USD, 2021-01-03)"
4,2021-01-02,29359.7,33233.5,29008.0,32193.3,240865.0,"(BTC-USD, 2021-01-03)"
...,...,...,...,...,...,...,...
31621,2021-09-04,49999.3,50534.6,49461.4,49918.4,48142.0,"(BTC-USD, 2021-09-08)"
31622,2021-09-05,49918.7,51878.2,49492.4,51768.6,57301.0,"(BTC-USD, 2021-09-08)"
31623,2021-09-06,51769.4,52726.2,51002.8,52672.1,69456.0,"(BTC-USD, 2021-09-08)"
31624,2021-09-07,52674.2,52885.3,43072.4,46779.6,187558.0,"(BTC-USD, 2021-09-08)"


In [5]:
from tsfresh import extract_features
from tsfresh.feature_extraction import ComprehensiveFCParameters

extracted_features = extract_features(df_rolled, column_id="id", column_sort="date")


Feature Extraction: 100%|██████████| 10/10 [00:49<00:00,  4.93s/it]


In [6]:
from tsfresh.utilities.dataframe_functions import impute

extracted_features = impute(extracted_features)


 'high__query_similarity_count__query_None__threshold_0.0'
 'low__query_similarity_count__query_None__threshold_0.0'
 'close__query_similarity_count__query_None__threshold_0.0'
 'volume__query_similarity_count__query_None__threshold_0.0'] did not have any finite values. Filling with zeros.


In [7]:
from tsfresh import select_features

trend_type = lambda horizon: df['close'].rolling(horizon).mean().fillna(0)
target = (trend_type(4).shift(-4) / trend_type(3))
target.replace([np.inf, -np.inf], np.nan, inplace=True)
target = target.fillna(method="bfill").fillna(method="ffill")
target.index = extracted_features.index

small_feature_set = select_features(extracted_features, target, fdr_level = 0.00001)

In [8]:
small_feature_set

Unnamed: 0,Unnamed: 1,"close__change_quantiles__f_agg_""mean""__isabs_True__qh_0.8__ql_0.4","open__change_quantiles__f_agg_""mean""__isabs_True__qh_0.8__ql_0.4","close__change_quantiles__f_agg_""mean""__isabs_True__qh_0.8__ql_0.2","open__change_quantiles__f_agg_""mean""__isabs_True__qh_0.8__ql_0.0","open__change_quantiles__f_agg_""mean""__isabs_True__qh_0.4__ql_0.0","open__change_quantiles__f_agg_""mean""__isabs_True__qh_0.8__ql_0.2","close__change_quantiles__f_agg_""mean""__isabs_True__qh_0.8__ql_0.0","volume__change_quantiles__f_agg_""mean""__isabs_False__qh_1.0__ql_0.6","close__change_quantiles__f_agg_""var""__isabs_False__qh_0.8__ql_0.0","close__change_quantiles__f_agg_""var""__isabs_False__qh_0.8__ql_0.4",...,"low__change_quantiles__f_agg_""var""__isabs_True__qh_0.4__ql_0.0",close__quantile__q_0.4,high__quantile__q_0.1,high__quantile__q_0.4,close__quantile__q_0.6,low__quantile__q_0.4,high__benford_correlation,open__quantile__q_0.4,"close__change_quantiles__f_agg_""mean""__isabs_True__qh_0.4__ql_0.2","high__change_quantiles__f_agg_""var""__isabs_False__qh_0.4__ql_0.0"
BTC-USD,2021-01-01,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000e+00,0.000000e+00,...,0.000000e+00,29359.90,29627.10,29627.10,29359.90,28712.40,0.295657,28951.70,0.000000,0.000000e+00
BTC-USD,2021-01-02,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000e+00,0.000000e+00,...,0.000000e+00,30493.26,29987.74,31069.66,31059.94,28830.64,0.271055,29114.90,0.000000,0.000000e+00
BTC-USD,2021-01-03,0.000000,0.000000,0.000000,408.000000,0.000000,0.000000,2833.400000,0.000000,0.000000e+00,0.000000e+00,...,0.000000e+00,31626.62,30348.38,32512.22,32346.42,28948.88,0.198691,29278.10,0.000000,0.000000e+00
BTC-USD,2021-01-04,0.000000,0.000000,0.000000,1620.600000,408.000000,2833.200000,2833.400000,0.000000,0.000000e+00,0.000000e+00,...,0.000000e+00,32056.74,30709.02,33304.30,32159.16,28771.52,0.159271,29926.34,0.000000,0.000000e+00
BTC-USD,2021-01-05,765.600000,0.000000,850.950000,1620.600000,408.000000,2833.200000,1511.766667,0.000000,2.375878e+06,0.000000e+00,...,0.000000e+00,32125.02,31069.66,33445.90,32499.54,28889.76,0.136829,30953.12,0.000000,0.000000e+00
BTC-USD,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
BTC-USD,2021-09-04,1317.990244,1323.545122,1449.475758,1396.841053,1260.823596,1457.187786,1396.539474,2011.681818,3.473867e+06,3.061808e+06,...,1.282318e+06,39171.32,34010.68,40467.16,47885.42,37191.92,-0.098986,39081.42,1242.512903,2.102720e+06
BTC-USD,2021-09-05,1324.402410,1308.569880,1452.488722,1389.949738,1260.823596,1446.759091,1398.914660,2011.681818,3.471144e+06,3.059966e+06,...,1.282318e+06,39174.24,34019.96,40485.32,47947.18,37193.04,-0.099859,39138.74,1242.512903,2.102720e+06
BTC-USD,2021-09-06,1319.391667,1322.404819,1448.391791,1392.349479,1257.692222,1449.796241,1396.334375,411.411765,3.456116e+06,3.030129e+06,...,1.255865e+06,39178.02,34029.24,40500.10,47979.82,37227.96,-0.100718,39169.06,1292.160606,2.091312e+06
BTC-USD,2021-09-07,1373.192941,1317.433333,1481.311111,1389.823316,1257.692222,1445.729104,1419.630570,2011.681818,3.625792e+06,3.420928e+06,...,1.255865e+06,39182.66,34038.52,40511.50,47958.06,37296.68,-0.101565,39172.38,1292.160606,2.091312e+06


In [9]:
small_feature_set.columns

Index(['close__change_quantiles__f_agg_"mean"__isabs_True__qh_0.8__ql_0.4',
       'open__change_quantiles__f_agg_"mean"__isabs_True__qh_0.8__ql_0.4',
       'close__change_quantiles__f_agg_"mean"__isabs_True__qh_0.8__ql_0.2',
       'open__change_quantiles__f_agg_"mean"__isabs_True__qh_0.8__ql_0.0',
       'open__change_quantiles__f_agg_"mean"__isabs_True__qh_0.4__ql_0.0',
       'open__change_quantiles__f_agg_"mean"__isabs_True__qh_0.8__ql_0.2',
       'close__change_quantiles__f_agg_"mean"__isabs_True__qh_0.8__ql_0.0',
       'volume__change_quantiles__f_agg_"mean"__isabs_False__qh_1.0__ql_0.6',
       'close__change_quantiles__f_agg_"var"__isabs_False__qh_0.8__ql_0.0',
       'close__change_quantiles__f_agg_"var"__isabs_False__qh_0.8__ql_0.4',
       'open__change_quantiles__f_agg_"var"__isabs_False__qh_0.8__ql_0.4',
       'open__change_quantiles__f_agg_"var"__isabs_False__qh_0.8__ql_0.2',
       'high__change_quantiles__f_agg_"var"__isabs_True__qh_0.4__ql_0.0',
       'open__chan