In [2]:

import yfinance as yf, pandas as pd
from ta import add_all_ta_features
import pandas as pd
import numpy as np
from pandas import DataFrame
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from ta.utils import dropna
import requests


In [3]:
def calculate_correlation(df, target_column, correlation_method='pearson'):
    res = []
    for column in df.columns:
        try:
            correlation  = df[target_column].corr(df[column], method=correlation_method)
        except:
            print(column)
            raise
        #print(f'Correlation between {target_column} and {column} = {correlation}, method={correlation_method}')
        res.append(
            {
                #'target_column': target_column,
                'column': column,
                'correlation': correlation
            }
        )
    sorted_corr = sorted(res, key=lambda d: d['correlation'], reverse=True )
    return sorted_corr

def get_cols_with_correlation_gte(df, target_column, correlation_method='pearson', thresh=0.1):
    return [el['column'] for el in calculate_correlation(df, target_column, correlation_method) if abs(el['correlation']) >= thresh]

def parse_time_of_tradingview(df, column_name='time'):
    df[column_name] = pd.to_datetime(df[column_name], format='%Y-%m-%dT%H:%M:%S', errors='coerce', utc=True)
    df[column_name] = df[column_name].dt.date
    df = df.set_index([column_name])
    return df

In [4]:


class DatasetMaker:

    def __init__(self, ticker='BTC-USD'):
        self.tick = yf.Ticker(ticker)
        self.df = self.tick.history(period="max")
        self.df = add_all_ta_features(self.df, open="Open", high="High", low="Low", close="Close", volume="Volume")
        self.df.index.name = 'timestamp'
        self.df.index = self.df.index.tz_localize(None)

    def get_selected_rows(self, rows):
        return rows or cols_corelated_01_with_log_rets


    def prepare(self, selected_rows):
        self.append_rows()
        self.remove_nan()
        self.drop_columns(selected_rows)

        self.df = self.df.dropna(how='any')
        return self.df

    def save(self, name):
        path = './datasets/prepared'
        self.df.to_csv(f'{path}/{name}', index_label='timestamp')


    def append_rows(self):
        self.append_log_returns()
        self.append_greed_fear()
        #self.append_btc_dominance()


    def remove_nan(self):
        self.df['fear_greed'].fillna((self.df['fear_greed'].mean()))
        self.df['diff_fear_greed'].fillna((self.df['diff_fear_greed'].mean()), inplace=True)
        self.df['fear_greed'] = self.df['fear_greed'].fillna((self.df['fear_greed'].mean())) + self.df['log_close']
        self.df['diff_fear_greed'] = self.df['diff_fear_greed'].fillna((self.df['diff_fear_greed'].mean())) + self.df['log_returns']


    def append_log_returns(self):
        self.df['log_close'] = np.log(self.df['Close'])
        self.df['log_returns'] = self.df['log_close'].diff()


    def append_greed_fear(self):
        data = requests.get('https://api.alternative.me/fng/?limit=0')
        res = data.json()
        fear_greed = pd.DataFrame(res['data'])
        fear_greed = fear_greed[['timestamp', 'value']]
        fear_greed['timestamp']  = pd.to_datetime(fear_greed['timestamp'],unit='s')
        fear_greed = fear_greed.set_index(['timestamp'])
        fear_greed['value'] = fear_greed['value'].astype(float)

        fear_greed.rename(columns = {'value':'fear_greed'}, inplace = True)
        fear_greed['diff_fear_greed'] = fear_greed['fear_greed'].diff()
        self.df = pd.merge(self.df, fear_greed, right_index=True, left_index=True, how='left')

    def append_btc_dominance(self):
        pass

    def drop_columns(self, selected_rows):
        self.df = self.df[selected_rows]


cols_corelated_01_with_log_rets =  [
    'log_close',
    'Close',


    'log_returns',
    'diff_fear_greed',
    'volatility_kcp',
    'volume_em',
    'volume_vpt',
    'momentum_stoch_rsi',
    'volatility_bbp',
    'momentum_stoch',
    'momentum_wr',
    'volatility_dcp',
    'momentum_rsi',
    'momentum_uo',
    'trend_cci',
    'volatility_kchi',
    'momentum_roc',
    'volatility_bbhi',
    'volume_fi',
    'trend_adx_pos',
    'volume_cmf',
    'momentum_ppo_hist',
    'momentum_stoch_rsi_k',
    'trend_vortex_ind_pos',
    'volume_mfi',
    'momentum_stoch_signal',
    'trend_vortex_ind_diff',
    'trend_psar_up_indicator',
    'trend_macd_diff',
    'trend_stc',
    'volume_sma_em',
    'trend_aroon_ind',
    'momentum_tsi',
    'trend_aroon_down',
    'trend_vortex_ind_neg',
    'trend_psar_down_indicator',
    'trend_adx_neg',
    'volatility_bbli',
    #'volatility_kcli'
]


In [None]:

dm = DatasetMaker('ADA-USD')
dm.prepare(cols_corelated_01_with_log_rets)

In [None]:
dm.save('ada/ta_corr_01_fd.csv')

In [None]:
df = dm.df
df

In [None]:
df['log_returns'].cumsum()


In [None]:
def get_scaled_dataframe(df, exclude_cols, target_col, ntest=21):
    df = df.drop(exclude_cols, axis=1)
    train = df.iloc[:-ntest]
    test = df.iloc[-ntest:]

    scaler = StandardScaler()
    train_scaled = scaler.fit_transform(train)
    test_scaled = scaler.transform(test)

    y = train[target_col].to_numpy()
    scaler.fit_transform(y.reshape(-1, 1))
    scaled_df = pd.DataFrame(np.concatenate((train_scaled, test_scaled)), columns=df.columns, index=df.index)
    return scaled_df, scaler

get_scaled_dataframe(df, [], 'log_returns')


In [None]:
exclude_cols = []
target_col = 'log_returns'
ntest=21

In [None]:
train = df.iloc[:-ntest]
test = df.iloc[-ntest:]

In [None]:
scaler_orig = StandardScaler()
train_scaled = scaler_orig.fit_transform(train)
test_scaled = scaler_orig.transform(test)

In [None]:
from models.transformer.my_functrions import make_dataset

Xtrain, Ytrain, Xtest, Ytest, XVal, YVal, scaler = make_dataset(df, target_col='log_returns', exclude_cols=[], timestep=10, ntest=ntest, horizon=21)

In [None]:
Ytest

In [None]:
Ytest.T[0]

In [None]:
kek  = scaler.inverse_transform(Ytest.T[-1].reshape(-1, 1))

In [None]:
kek.cumsum()

In [None]:
np.random.seed(0)

s = pd.Series(np.random.random(10))

print(s.values)

# [ 0.5488135   0.71518937  0.60276338  0.54488318  0.4236548   0.64589411
#   0.43758721  0.891773    0.96366276  0.38344152]

# t = np.log(s).diff()
# t.iat[0] = np.log(s.iat[0])
# res = np.exp(t.cumsum())
#
# print(res.values)

# [ 0.5488135   0.71518937  0.60276338  0.54488318  0.4236548   0.64589411
#   0.43758721  0.891773    0.96366276  0.38344152]

In [None]:
t = np.log(s).diff()

In [None]:
t

In [None]:
t.iat[0] = np.log(s.iat[0])
t

In [None]:
def get_scaled_dataframe(df, exclude_cols, target_col, ntest=21):
    df = df.drop(exclude_cols, axis=1)
    train = df.iloc[:-ntest]
    test = df.iloc[-ntest:]

    scaler = StandardScaler()
    train_scaled = scaler.fit_transform(train)
    test_scaled = scaler.transform(test)

    y = train[target_col].to_numpy()
    scaler.fit_transform(y.reshape(-1, 1))
    scaled_df = pd.DataFrame(np.concatenate((train_scaled, test_scaled)), columns=df.columns, index=df.index)
    return scaled_df, scaler

get_scaled_dataframe(df, [], 'log_returns')


In [29]:
exclude_cols = []
target_col = 'log_returns'
ntest=21

In [30]:
train = df.iloc[:-ntest]
test = df.iloc[-ntest:]

In [31]:
scaler_orig = StandardScaler()
train_scaled = scaler_orig.fit_transform(train)
test_scaled = scaler_orig.transform(test)

In [39]:
from models.transformer.my_functrions import make_dataset

Xtrain, Ytrain, Xtest, Ytest, XVal, YVal, scaler = make_dataset(df, target_col='log_returns', exclude_cols=[], timestep=10, ntest=ntest, horizon=21)

X.shape (3080, 10, 38) Y.shape (3080, 21)


In [40]:
Ytest

array([[-0.24977587, -0.0989844 ,  0.69665939,  0.06983557,  0.91348648,
        -0.1550632 , -0.11824151, -0.01813808, -1.11005927,  0.50640595,
         0.25911439, -0.17970416,  0.57899408, -0.6000471 , -0.44637812,
        -0.7450495 , -0.06984866, -0.07133242, -0.6310154 , -0.22666163,
        -0.05671485],
       [-0.0989844 ,  0.69665939,  0.06983557,  0.91348648, -0.1550632 ,
        -0.11824151, -0.01813808, -1.11005927,  0.50640595,  0.25911439,
        -0.17970416,  0.57899408, -0.6000471 , -0.44637812, -0.7450495 ,
        -0.06984866, -0.07133242, -0.6310154 , -0.22666163, -0.05671485,
         0.10642935],
       [ 0.69665939,  0.06983557,  0.91348648, -0.1550632 , -0.11824151,
        -0.01813808, -1.11005927,  0.50640595,  0.25911439, -0.17970416,
         0.57899408, -0.6000471 , -0.44637812, -0.7450495 , -0.06984866,
        -0.07133242, -0.6310154 , -0.22666163, -0.05671485,  0.10642935,
         0.21698017],
       [ 0.06983557,  0.91348648, -0.1550632 , -0.11824151

In [47]:
Ytest.T[0]

array([-0.24977587, -0.0989844 ,  0.69665939,  0.06983557,  0.91348648,
       -0.1550632 , -0.11824151, -0.01813808, -1.11005927,  0.50640595,
        0.25911439, -0.17970416,  0.57899408, -0.6000471 , -0.44637812,
       -0.7450495 , -0.06984866, -0.07133242, -0.6310154 , -0.22666163,
       -0.05671485])

In [50]:
kek  = scaler.inverse_transform(Ytest.T[-1].reshape(-1, 1))

In [51]:
kek.cumsum()

array([-0.00078046,  0.00467655,  0.01436024,  0.00860529,  0.02191124,
        0.00101492,  0.00317119,  0.01203679, -0.00191058,  0.00172532,
        0.01557428, -0.01769612, -0.01234159, -0.00320226,  0.00236107,
        0.04667057,  0.03449946,  0.03292916,  0.01535137,  0.00055879,
        0.01042884])

In [67]:
np.random.seed(0)

s = pd.Series(np.random.random(10))

print(s.values)

# [ 0.5488135   0.71518937  0.60276338  0.54488318  0.4236548   0.64589411
#   0.43758721  0.891773    0.96366276  0.38344152]

# t = np.log(s).diff()
# t.iat[0] = np.log(s.iat[0])
# res = np.exp(t.cumsum())
#
# print(res.values)

# [ 0.5488135   0.71518937  0.60276338  0.54488318  0.4236548   0.64589411
#   0.43758721  0.891773    0.96366276  0.38344152]

[0.5488135  0.71518937 0.60276338 0.54488318 0.4236548  0.64589411
 0.43758721 0.891773   0.96366276 0.38344152]


In [68]:
t = np.log(s).diff()

In [69]:
t

0         NaN
1    0.264789
2   -0.171023
3   -0.100953
4   -0.251652
5    0.421717
6   -0.389360
7    0.711936
8    0.077530
9   -0.921554
dtype: float64

In [72]:
t.iat[0] = np.log(s.iat[0])
t

0   -0.599997
1    0.264789
2   -0.171023
3   -0.100953
4   -0.251652
5    0.421717
6   -0.389360
7    0.711936
8    0.077530
9   -0.921554
dtype: float64