In [1]:
import pandas as pd
import numpy as np
import talib as ta
from pycoingecko import CoinGeckoAPI
from datetime import datetime as dt, timedelta as td
from sklearn.preprocessing import StandardScaler as ss, MinMaxScaler as mms

In [2]:
config = {
    'id': 'bitcoin',
    'vs_curr': 'usd',
    'range': 24, # max 24
    'save_csv_path': 'dataset3/crypto_ta_btc.csv'
}

In [3]:
now = dt.strptime(dt.now().strftime("%Y-%m-%d %H:00:00"),"%Y-%m-%d %H:00:00")
timestamp_list = [{'from': dt.timestamp(now - td(days=(x+1)*90)), 'to': dt.timestamp(now - td(days=x*90))} for x in reversed(range(config['range']))]

print(timestamp_list)

[{'from': 1443290400.0, 'to': 1451070000.0}, {'from': 1451070000.0, 'to': 1458846000.0}, {'from': 1458846000.0, 'to': 1466618400.0}, {'from': 1466618400.0, 'to': 1474394400.0}, {'from': 1474394400.0, 'to': 1482174000.0}, {'from': 1482174000.0, 'to': 1489950000.0}, {'from': 1489950000.0, 'to': 1497722400.0}, {'from': 1497722400.0, 'to': 1505498400.0}, {'from': 1505498400.0, 'to': 1513278000.0}, {'from': 1513278000.0, 'to': 1521054000.0}, {'from': 1521054000.0, 'to': 1528826400.0}, {'from': 1528826400.0, 'to': 1536602400.0}, {'from': 1536602400.0, 'to': 1544382000.0}, {'from': 1544382000.0, 'to': 1552158000.0}, {'from': 1552158000.0, 'to': 1559930400.0}, {'from': 1559930400.0, 'to': 1567706400.0}, {'from': 1567706400.0, 'to': 1575486000.0}, {'from': 1575486000.0, 'to': 1583262000.0}, {'from': 1583262000.0, 'to': 1591034400.0}, {'from': 1591034400.0, 'to': 1598810400.0}, {'from': 1598810400.0, 'to': 1606590000.0}, {'from': 1606590000.0, 'to': 1614366000.0}, {'from': 1614366000.0, 'to': 16

In [4]:
cg = CoinGeckoAPI()
temp_closes = []
temp_volumes = []

for timestamp in timestamp_list:
    mk_chart = cg.get_coin_market_chart_range_by_id(id=config['id'], vs_currency=config['vs_curr'], from_timestamp=timestamp['from'], to_timestamp=timestamp['to'])
    for i in range(len(mk_chart['prices'])):
        temp_closes.append(mk_chart['prices'][i][1])
        temp_volumes.append(mk_chart['total_volumes'][i][1])

closes = np.array(temp_closes)
volumes = np.array(temp_volumes)

print(closes)
print(volumes)

[  232.8347       239.2765       237.2262     ... 48494.59325882
 48742.85846749 48714.4185389 ]
[7.57765266e+07 2.45683532e+08 1.15160471e+08 ... 3.52250915e+10
 3.50337362e+10 3.37745393e+10]


In [5]:
sma_7 = ta.ROC(ta.SMA(closes, 7), timeperiod=1)
sma_25 = ta.ROC(ta.SMA(closes, 25), timeperiod=1)
sma_99 = ta.ROC(ta.SMA(closes, 99), timeperiod=1)
sma_200 = ta.ROC(ta.SMA(closes, 200), timeperiod=1)
ema_9 = ta.ROC(ta.EMA(closes, 9), timeperiod=1)
ema_26 = ta.ROC(ta.EMA(closes, 26), timeperiod=1)
sma_vol = ta.ROC(ta.SMA(volumes, 20), timeperiod=1)
bbands_up, bbands_mid, bbands_low = ta.BBANDS(closes, timeperiod=20, nbdevup=2, nbdevdn=2, matype=0)
bbands_up = ta.ROC(bbands_up, timeperiod=1)
bbands_mid = ta.ROC(bbands_mid, timeperiod=1)
bbands_low = ta.ROC(bbands_low, timeperiod=1)
rsi = ta.RSI(closes, 14)
cmo = ta.CMO(closes, timeperiod=14)
macd, macdsignal, macdhist = ta.MACD(closes, fastperiod=12, slowperiod=26, signalperiod=9)
ppo = ta.PPO(closes, fastperiod=12, slowperiod=26, matype=0)
roc = ta.ROC(closes, timeperiod=1)
rocr = ta.ROCR(closes, timeperiod=1)
linearreg = ta.ROC(ta.LINEARREG(closes, timeperiod=14), timeperiod=1)
tsf = ta.ROC(ta.TSF(closes, timeperiod=14), timeperiod=1)

In [6]:
class_data = []

for n in ta.ROC(closes, timeperiod=24):
    if n < -3:
        class_data.append('V_LOW')
    elif -3 <= n <= 0:
        class_data.append('LOW')
    elif 0 < n <= 3:
        class_data.append('HIGH')
    else:
        class_data.append('V_HIGH')

In [7]:
dt = pd.DataFrame({
    'sma_7': sma_7[201:-1],
    'sma_25': sma_25[201:-1],
    'sma_99': sma_99[201:-1],
    'sma_200': sma_200[201:-1],
    'ema_8': ema_8[201:-1],
    'ema_20': ema_20[201:-1],
    'sma_vol': sma_vol[201:-1],
    'bbands_up': bbands_up[201:-1],
    'bbands_mid': bbands_mid[201:-1],
    'bbands_low': bbands_low[201:-1],
    'rsi': rsi[201:-1],
    'cmo': cmo[201:-1],
    'macd': macd[201:-1],
    'macdsignal': macdsignal[201:-1],
    'macdhist': macdhist[201:-1],
    'ppo': ppo[201:-1],
    'roc': roc[201:-1],
    'rocr': rocr[201:-1],
    'linearreg': linearreg[201:-1],
    'tsf': tsf[201:-1],
    'trend': np.array(class_data[202::])
})

dt.describe()
print(dt)

          sma_7    sma_25    sma_99   sma_200     ema_8    ema_20    sma_vol  \
0      0.389069  0.180146 -0.065214  0.251872  0.382682  0.219033  -3.532641   
1      0.498987  0.138913 -0.058568  0.255225  0.347571  0.219784  -7.599545   
2      0.231342  0.094671 -0.051021  0.252297  0.117309  0.132675  -8.452679   
3      0.198266  0.129483 -0.046158  0.250075  0.116104  0.130672 -12.107501   
4      0.340448  0.195801 -0.029580  0.260357  0.499679  0.295022 -11.063468   
...         ...       ...       ...       ...       ...       ...        ...   
22700 -0.155741 -0.137875 -0.024924  0.012674 -0.104886 -0.113915  -0.394903   
22701 -0.205752 -0.125204 -0.019734  0.015449  0.050839 -0.046804  -0.509280   
22702 -0.174705 -0.112871 -0.025720  0.016703  0.013208 -0.053574  -0.610728   
22703  0.082261 -0.004702 -0.001615  0.027847  0.322070  0.084389  -0.567466   
22704  0.415341  0.002867 -0.009235  0.029517  0.364738  0.125434  -0.689503   

       bbands_up  bbands_mid  bbands_lo

In [8]:
dt.to_csv(config['save_csv_path'], index=False)