In [1]:

import yfinance as yf, pandas as pd
from ta import add_all_ta_features
import pandas as pd
import numpy as np
from pandas import DataFrame
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from ta.utils import dropna
import requests


In [12]:
def calculate_correlation(df, target_column, correlation_method='pearson'):
    res = []
    for column in df.columns:
        try:
            correlation  = df[target_column].corr(df[column], method=correlation_method)
        except:
            print(column)
            raise
        #print(f'Correlation between {target_column} and {column} = {correlation}, method={correlation_method}')
        res.append(
            {
                #'target_column': target_column,
                'column': column,
                'correlation': correlation
            }
        )
    sorted_corr = sorted(res, key=lambda d: d['correlation'], reverse=True )
    return sorted_corr

def get_cols_with_correlation_gte(df, target_column, correlation_method='pearson', thresh=0.1):
    return [el['column'] for el in calculate_correlation(df, target_column, correlation_method) if abs(el['correlation']) >= thresh]

def print_correlation(df, target_column, correlation_method='pearson', thresh=0.1):
    for el in calculate_correlation(df, target_column, correlation_method):
        print(el['column'], el['correlation'] )

def parse_time_of_tradingview(df, column_name='time'):
    df[column_name] = pd.to_datetime(df[column_name], format='%Y-%m-%dT%H:%M:%S', errors='coerce', utc=True)
    df[column_name] = df[column_name].dt.date
    df = df.set_index([column_name])
    return df

In [3]:


class DatasetMaker:

    def __init__(self, ticker='BTC-USD'):
        self.tick = yf.Ticker(ticker)
        self.df = self.tick.history(period="max")
        self.df = add_all_ta_features(self.df, open="Open", high="High", low="Low", close="Close", volume="Volume")
        self.df.index.name = 'timestamp'
        self.df.index = self.df.index.tz_localize(None)

    def get_selected_rows(self, rows):
        return rows or cols_corelated_01_with_log_rets


    def prepare(self, selected_rows):
        self.append_rows()
        self.remove_nan()
        self.drop_columns(selected_rows)

        self.df = self.df.dropna(how='any')
        return self.df

    def save(self, name):
        path = './datasets/prepared'
        self.df.to_csv(f'{path}/{name}', index_label='timestamp')


    def append_rows(self):
        self.append_log_returns()
        self.append_greed_fear()
        #self.append_btc_dominance()


    def remove_nan(self):
        self.df['fear_greed'].fillna((self.df['fear_greed'].mean()))
        self.df['diff_fear_greed'].fillna((self.df['diff_fear_greed'].mean()), inplace=True)
        self.df['fear_greed'] = self.df['fear_greed'].fillna((self.df['fear_greed'].mean())) + self.df['log_close']
        self.df['diff_fear_greed'] = self.df['diff_fear_greed'].fillna((self.df['diff_fear_greed'].mean())) + self.df['log_returns']


    def append_log_returns(self):
        self.df['log_close'] = np.log(self.df['Close'])
        self.df['log_returns'] = self.df['log_close'].diff()


    def append_greed_fear(self):
        data = requests.get('https://api.alternative.me/fng/?limit=0')
        res = data.json()
        fear_greed = pd.DataFrame(res['data'])
        fear_greed = fear_greed[['timestamp', 'value']]
        fear_greed['timestamp']  = pd.to_datetime(fear_greed['timestamp'],unit='s')
        fear_greed = fear_greed.set_index(['timestamp'])
        fear_greed['value'] = fear_greed['value'].astype(float)

        fear_greed.rename(columns = {'value':'fear_greed'}, inplace = True)
        fear_greed['diff_fear_greed'] = fear_greed['fear_greed'].diff()
        self.df = pd.merge(self.df, fear_greed, right_index=True, left_index=True, how='left')

    def append_btc_dominance(self):
        pass

    def drop_columns(self, selected_rows):
        self.df = self.df[selected_rows]


cols_corelated_01_with_log_rets =  [
    'log_close',
    'Close',


    'log_returns',
    'diff_fear_greed',
    'volatility_kcp',
    'volume_em',
    'volume_vpt',
    'momentum_stoch_rsi',
    'volatility_bbp',
    'momentum_stoch',
    'momentum_wr',
    'volatility_dcp',
    'momentum_rsi',
    'momentum_uo',
    'trend_cci',
    'volatility_kchi',
    'momentum_roc',
    'volatility_bbhi',
    'volume_fi',
    'trend_adx_pos',
    'volume_cmf',
    'momentum_ppo_hist',
    'momentum_stoch_rsi_k',
    'trend_vortex_ind_pos',
    'volume_mfi',
    'momentum_stoch_signal',
    'trend_vortex_ind_diff',
    'trend_psar_up_indicator',
    'trend_macd_diff',
    'trend_stc',
    'volume_sma_em',
    'trend_aroon_ind',
    'momentum_tsi',
    'trend_aroon_down',
    'trend_vortex_ind_neg',
    'trend_psar_down_indicator',
    'trend_adx_neg',
    'volatility_bbli',
    #'volatility_kcli'
]


In [4]:

dm = DatasetMaker('BCH-USD')
dm.prepare(cols_corelated_01_with_log_rets)

  dip[idx] = 100 * (self._dip[idx] / value)
  din[idx] = 100 * (self._din[idx] / value)


Unnamed: 0_level_0,log_close,Close,log_returns,diff_fear_greed,volatility_kcp,volume_em,volume_vpt,momentum_stoch_rsi,volatility_bbp,momentum_stoch,...,trend_macd_diff,trend_stc,volume_sma_em,trend_aroon_ind,momentum_tsi,trend_aroon_down,trend_vortex_ind_neg,trend_psar_down_indicator,trend_adx_neg,volatility_bbli
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2018-01-19,7.478520,1769.619995,0.007299,0.000112,-0.149539,48.723667,1.054174e+07,0.025920,0.043720,23.892781,...,-129.162791,0.292969,-2666.250373,-88.0,-7.278785,92.0,1.146744,0.0,28.889118,0.0
2018-01-20,7.616835,2032.119995,0.138315,0.131128,0.264113,4780.600857,1.710247e+08,0.341293,0.252157,39.242402,...,-104.588403,0.146484,-2542.512534,-28.0,-7.653928,88.0,1.130895,0.0,26.974696,0.0
2018-01-21,7.482479,1776.640015,-0.134356,-0.141542,-0.022618,-2759.282776,7.189503e+07,0.116843,0.123595,25.727497,...,-99.690545,0.073242,-3152.814761,-28.0,-9.120186,84.0,1.195030,0.0,26.322426,0.0
2018-01-22,7.391366,1621.920044,-0.091113,-0.098300,-0.108161,-9079.705329,-1.530204e+08,0.000000,0.083283,16.150101,...,-100.724826,0.036621,-2843.381129,-48.0,-11.008334,100.0,1.196083,0.0,28.871581,0.0
2018-01-23,7.391082,1621.459961,-0.000284,-0.007471,0.043653,-2295.137133,-5.950009e+07,0.000000,0.130643,16.121621,...,-95.348959,0.018311,-2886.041028,-52.0,-12.554688,100.0,1.156523,0.0,28.159084,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-06-02,4.743436,114.828041,0.012813,-2.987187,0.742420,4.117265,9.949467e+05,0.756958,0.492242,65.505909,...,0.232543,96.415747,-0.290208,-64.0,-8.672552,68.0,1.025498,0.0,23.578831,0.0
2023-06-03,4.744270,114.923843,0.000834,1.000834,0.765119,1.133908,9.823917e+05,0.781152,0.511224,66.918642,...,0.285041,98.207874,-0.312147,-32.0,-7.959495,64.0,1.054859,0.0,23.052470,0.0
2023-06-04,4.751249,115.728775,0.006980,-0.993020,0.856593,2.923252,5.238000e+05,0.990139,0.648563,78.788428,...,0.365772,99.103937,-0.027947,-32.0,-6.795276,60.0,1.004394,0.0,21.491838,0.0
2023-06-05,4.689479,108.796463,-0.061770,8.938230,-0.550603,-31.213940,-5.479560e+06,0.000000,-0.226773,11.344383,...,-0.034639,88.918142,-2.035917,-76.0,-10.002396,100.0,0.983623,0.0,32.694960,1.0


In [6]:
dm.save('bch/ta_corr_01_fd.csv')

In [8]:
df = dm.df
df

Unnamed: 0_level_0,log_close,Close,log_returns,diff_fear_greed,volatility_kcp,volume_em,volume_vpt,momentum_stoch_rsi,volatility_bbp,momentum_stoch,...,trend_macd_diff,trend_stc,volume_sma_em,trend_aroon_ind,momentum_tsi,trend_aroon_down,trend_vortex_ind_neg,trend_psar_down_indicator,trend_adx_neg,volatility_bbli
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2018-01-19,7.478520,1769.619995,0.007299,0.000112,-0.149539,48.723667,1.054174e+07,0.025920,0.043720,23.892781,...,-129.162791,0.292969,-2666.250373,-88.0,-7.278785,92.0,1.146744,0.0,28.889118,0.0
2018-01-20,7.616835,2032.119995,0.138315,0.131128,0.264113,4780.600857,1.710247e+08,0.341293,0.252157,39.242402,...,-104.588403,0.146484,-2542.512534,-28.0,-7.653928,88.0,1.130895,0.0,26.974696,0.0
2018-01-21,7.482479,1776.640015,-0.134356,-0.141542,-0.022618,-2759.282776,7.189503e+07,0.116843,0.123595,25.727497,...,-99.690545,0.073242,-3152.814761,-28.0,-9.120186,84.0,1.195030,0.0,26.322426,0.0
2018-01-22,7.391366,1621.920044,-0.091113,-0.098300,-0.108161,-9079.705329,-1.530204e+08,0.000000,0.083283,16.150101,...,-100.724826,0.036621,-2843.381129,-48.0,-11.008334,100.0,1.196083,0.0,28.871581,0.0
2018-01-23,7.391082,1621.459961,-0.000284,-0.007471,0.043653,-2295.137133,-5.950009e+07,0.000000,0.130643,16.121621,...,-95.348959,0.018311,-2886.041028,-52.0,-12.554688,100.0,1.156523,0.0,28.159084,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-06-02,4.743436,114.828041,0.012813,-2.987187,0.742420,4.117265,9.949467e+05,0.756958,0.492242,65.505909,...,0.232543,96.415747,-0.290208,-64.0,-8.672552,68.0,1.025498,0.0,23.578831,0.0
2023-06-03,4.744270,114.923843,0.000834,1.000834,0.765119,1.133908,9.823917e+05,0.781152,0.511224,66.918642,...,0.285041,98.207874,-0.312147,-32.0,-7.959495,64.0,1.054859,0.0,23.052470,0.0
2023-06-04,4.751249,115.728775,0.006980,-0.993020,0.856593,2.923252,5.238000e+05,0.990139,0.648563,78.788428,...,0.365772,99.103937,-0.027947,-32.0,-6.795276,60.0,1.004394,0.0,21.491838,0.0
2023-06-05,4.689479,108.796463,-0.061770,8.938230,-0.550603,-31.213940,-5.479560e+06,0.000000,-0.226773,11.344383,...,-0.034639,88.918142,-2.035917,-76.0,-10.002396,100.0,0.983623,0.0,32.694960,1.0


In [10]:
print(df.columns)

Index(['log_close', 'Close', 'log_returns', 'diff_fear_greed',
       'volatility_kcp', 'volume_em', 'volume_vpt', 'momentum_stoch_rsi',
       'volatility_bbp', 'momentum_stoch', 'momentum_wr', 'volatility_dcp',
       'momentum_rsi', 'momentum_uo', 'trend_cci', 'volatility_kchi',
       'momentum_roc', 'volatility_bbhi', 'volume_fi', 'trend_adx_pos',
       'volume_cmf', 'momentum_ppo_hist', 'momentum_stoch_rsi_k',
       'trend_vortex_ind_pos', 'volume_mfi', 'momentum_stoch_signal',
       'trend_vortex_ind_diff', 'trend_psar_up_indicator', 'trend_macd_diff',
       'trend_stc', 'volume_sma_em', 'trend_aroon_ind', 'momentum_tsi',
       'trend_aroon_down', 'trend_vortex_ind_neg', 'trend_psar_down_indicator',
       'trend_adx_neg', 'volatility_bbli'],
      dtype='object')


In [13]:
print_correlation(df, 'log_returns')

log_returns 0.9999999999999999
volatility_kcp 0.5146730623796546
volume_vpt 0.4490476317399207
momentum_stoch_rsi 0.44248812673530974
volatility_bbp 0.41398503741025694
momentum_stoch 0.4122296942832495
momentum_wr 0.4122296942832495
volatility_dcp 0.36446752247330544
momentum_uo 0.36304512492529406
volume_em 0.34639801486594257
momentum_rsi 0.34198731947812
volatility_bbhi 0.3280696500473764
trend_cci 0.3228743676629783
volatility_kchi 0.32194309241055774
momentum_roc 0.2902611311519988
volume_fi 0.27682267434633606
trend_adx_pos 0.2472891221883884
momentum_stoch_rsi_k 0.20802755848954557
momentum_ppo_hist 0.20797865881688982
volume_cmf 0.20293995836514656
momentum_stoch_signal 0.18655360671744056
trend_vortex_ind_diff 0.16214686338225068
trend_macd_diff 0.16087529423946334
volume_mfi 0.15921672257587466
trend_vortex_ind_pos 0.15341743176971556
trend_psar_up_indicator 0.1455433469354609
trend_stc 0.11415847336722763
trend_aroon_ind 0.09353385020773035
momentum_tsi 0.07240536834896764


In [11]:
for col in df.columns:
    print(col)

log_close
Close
log_returns
diff_fear_greed
volatility_kcp
volume_em
volume_vpt
momentum_stoch_rsi
volatility_bbp
momentum_stoch
momentum_wr
volatility_dcp
momentum_rsi
momentum_uo
trend_cci
volatility_kchi
momentum_roc
volatility_bbhi
volume_fi
trend_adx_pos
volume_cmf
momentum_ppo_hist
momentum_stoch_rsi_k
trend_vortex_ind_pos
volume_mfi
momentum_stoch_signal
trend_vortex_ind_diff
trend_psar_up_indicator
trend_macd_diff
trend_stc
volume_sma_em
trend_aroon_ind
momentum_tsi
trend_aroon_down
trend_vortex_ind_neg
trend_psar_down_indicator
trend_adx_neg
volatility_bbli


In [None]:
['log_close', 'Close', 'log_returns', 'diff_fear_greed', 'volatility_kcp', 'volume_em', 'volume_vpt', 'momentum_stoch_rsi', 'volatility_bbp', 'momentum_stoch', 'momentum_wr', 'volatility_dcp', 'momentum_rsi', 'momentum_uo', 'trend_cci', 'volatility_kchi', 'momentum_roc', 'volatility_bbhi', 'volume_fi', 'trend_adx_pos', 'volume_cmf', 'momentum_ppo_hist', 'momentum_stoch_rsi_k', 'trend_vortex_ind_pos', 'volume_mfi', 'momentum_stoch_signal', 'trend_vortex_ind_diff', 'trend_psar_up_indicator', 'trend_macd_diff', 'trend_stc', 'volume_sma_em', 'trend_aroon_ind', 'momentum_tsi', 'trend_aroon_down', 'trend_vortex_ind_neg', 'trend_psar_down_indicator', 'trend_adx_neg', 'volatility_bbli']

In [65]:
df['log_returns'].cumsum()


timestamp
2014-11-27    0.003523
2014-11-28    0.021689
2014-11-29    0.019147
2014-11-30    0.025931
2014-12-01    0.029095
                ...   
2023-05-29    4.321755
2023-05-30    4.320185
2023-05-31    4.302607
2023-06-01    4.287814
2023-06-02    4.297684
Name: log_returns, Length: 3110, dtype: float64

In [None]:
def get_scaled_dataframe(df, exclude_cols, target_col, ntest=21):
    df = df.drop(exclude_cols, axis=1)
    train = df.iloc[:-ntest]
    test = df.iloc[-ntest:]

    scaler = StandardScaler()
    train_scaled = scaler.fit_transform(train)
    test_scaled = scaler.transform(test)

    y = train[target_col].to_numpy()
    scaler.fit_transform(y.reshape(-1, 1))
    scaled_df = pd.DataFrame(np.concatenate((train_scaled, test_scaled)), columns=df.columns, index=df.index)
    return scaled_df, scaler

get_scaled_dataframe(df, [], 'log_returns')


In [29]:
exclude_cols = []
target_col = 'log_returns'
ntest=21

In [30]:
train = df.iloc[:-ntest]
test = df.iloc[-ntest:]

In [31]:
scaler_orig = StandardScaler()
train_scaled = scaler_orig.fit_transform(train)
test_scaled = scaler_orig.transform(test)

In [39]:
from models.transformer.my_functrions import make_dataset

Xtrain, Ytrain, Xtest, Ytest, XVal, YVal, scaler = make_dataset(df, target_col='log_returns', exclude_cols=[], timestep=10, ntest=ntest, horizon=21)

X.shape (3080, 10, 38) Y.shape (3080, 21)


In [40]:
Ytest

array([[-0.24977587, -0.0989844 ,  0.69665939,  0.06983557,  0.91348648,
        -0.1550632 , -0.11824151, -0.01813808, -1.11005927,  0.50640595,
         0.25911439, -0.17970416,  0.57899408, -0.6000471 , -0.44637812,
        -0.7450495 , -0.06984866, -0.07133242, -0.6310154 , -0.22666163,
        -0.05671485],
       [-0.0989844 ,  0.69665939,  0.06983557,  0.91348648, -0.1550632 ,
        -0.11824151, -0.01813808, -1.11005927,  0.50640595,  0.25911439,
        -0.17970416,  0.57899408, -0.6000471 , -0.44637812, -0.7450495 ,
        -0.06984866, -0.07133242, -0.6310154 , -0.22666163, -0.05671485,
         0.10642935],
       [ 0.69665939,  0.06983557,  0.91348648, -0.1550632 , -0.11824151,
        -0.01813808, -1.11005927,  0.50640595,  0.25911439, -0.17970416,
         0.57899408, -0.6000471 , -0.44637812, -0.7450495 , -0.06984866,
        -0.07133242, -0.6310154 , -0.22666163, -0.05671485,  0.10642935,
         0.21698017],
       [ 0.06983557,  0.91348648, -0.1550632 , -0.11824151

In [47]:
Ytest.T[0]

array([-0.24977587, -0.0989844 ,  0.69665939,  0.06983557,  0.91348648,
       -0.1550632 , -0.11824151, -0.01813808, -1.11005927,  0.50640595,
        0.25911439, -0.17970416,  0.57899408, -0.6000471 , -0.44637812,
       -0.7450495 , -0.06984866, -0.07133242, -0.6310154 , -0.22666163,
       -0.05671485])

In [50]:
kek  = scaler.inverse_transform(Ytest.T[-1].reshape(-1, 1))

In [51]:
kek.cumsum()

array([-0.00078046,  0.00467655,  0.01436024,  0.00860529,  0.02191124,
        0.00101492,  0.00317119,  0.01203679, -0.00191058,  0.00172532,
        0.01557428, -0.01769612, -0.01234159, -0.00320226,  0.00236107,
        0.04667057,  0.03449946,  0.03292916,  0.01535137,  0.00055879,
        0.01042884])

In [67]:
np.random.seed(0)

s = pd.Series(np.random.random(10))

print(s.values)

# [ 0.5488135   0.71518937  0.60276338  0.54488318  0.4236548   0.64589411
#   0.43758721  0.891773    0.96366276  0.38344152]

# t = np.log(s).diff()
# t.iat[0] = np.log(s.iat[0])
# res = np.exp(t.cumsum())
#
# print(res.values)

# [ 0.5488135   0.71518937  0.60276338  0.54488318  0.4236548   0.64589411
#   0.43758721  0.891773    0.96366276  0.38344152]

[0.5488135  0.71518937 0.60276338 0.54488318 0.4236548  0.64589411
 0.43758721 0.891773   0.96366276 0.38344152]


In [68]:
t = np.log(s).diff()

In [69]:
t

0         NaN
1    0.264789
2   -0.171023
3   -0.100953
4   -0.251652
5    0.421717
6   -0.389360
7    0.711936
8    0.077530
9   -0.921554
dtype: float64

In [72]:
t.iat[0] = np.log(s.iat[0])
t

0   -0.599997
1    0.264789
2   -0.171023
3   -0.100953
4   -0.251652
5    0.421717
6   -0.389360
7    0.711936
8    0.077530
9   -0.921554
dtype: float64