In [180]:
import os
import warnings

warnings.filterwarnings('ignore')
from pathlib import Path
import pandas as pd
from tqdm import tqdm
import exchange_calendars as xcals
import requests
import json

In [17]:
always_open_calendars = xcals.get_calendar('24/7')

## Ticker 가져오기

In [77]:
ticker_url = 'http://127.0.0.1:8000/upbit/ticker/'
response = requests.get('http://127.0.0.1:8000/upbit/ticker/')
ticker_df = pd.DataFrame(json.loads(response.text))

In [84]:
ticker_df = ticker_df[ticker_df['market'].str.contains('KRW')].reset_index().drop('index', axis=1)

In [85]:
equities = ticker_df.reset_index().rename(columns={'index': 'sid', 'market': 'ticker', 'korean_name': 'name'}).loc[:, ['sid', 'ticker', 'name']]

## Pickle 삭제

In [178]:
paths = Path('data').glob('*.pkl')

In [181]:
for path in paths:
    os.remove(path)

## Price Data 가져오기

In [86]:
paths = Path('data').glob('*.csv')

In [87]:
paths = list(map(lambda x: x, paths))

In [88]:
price = (pd.read_csv(paths[0], parse_dates=['candle_date_time_utc', 'candle_date_time_kst'])
         .drop('Unnamed: 0', axis=1)
         .rename(
    columns={'candle_date_time_kst': 'date', 'opening_price': 'open', 'high_price': 'high', 'low_price': 'low',
             'trade_price': 'close', 'candle_acc_trade_volume': 'volume'})
         .set_index(['date'])
         ).loc[:, ['open', 'high', 'low', 'close', 'volume']]

In [89]:
price = price.resample('1T').last()

In [90]:
ohlc = price[['high', 'low', 'open', 'close']].fillna(method='ffill')

In [91]:
v = price[['volume']].fillna(0)

In [92]:
price = pd.concat([ohlc, v], axis=1)

In [141]:
min_time = price.index.min().strftime("%Y-%m-%d")
max_time = price.index.max().strftime("%Y-%m-%d")

In [145]:
min_time.strftime("%Y-%m-%d")

'2020-12-31'

In [146]:
time_range = always_open_calendars.minutes_for_sessions_in_range(min_time.strftime("%Y-%m-%d"), max_time.strftime("%Y-%m-%d"))

In [147]:
time_range

DatetimeIndex(['2020-12-31 00:00:00+00:00', '2020-12-31 00:01:00+00:00',
               '2020-12-31 00:02:00+00:00', '2020-12-31 00:03:00+00:00',
               '2020-12-31 00:04:00+00:00', '2020-12-31 00:05:00+00:00',
               '2020-12-31 00:06:00+00:00', '2020-12-31 00:07:00+00:00',
               '2020-12-31 00:08:00+00:00', '2020-12-31 00:09:00+00:00',
               ...
               '2022-03-17 23:50:00+00:00', '2022-03-17 23:51:00+00:00',
               '2022-03-17 23:52:00+00:00', '2022-03-17 23:53:00+00:00',
               '2022-03-17 23:54:00+00:00', '2022-03-17 23:55:00+00:00',
               '2022-03-17 23:56:00+00:00', '2022-03-17 23:57:00+00:00',
               '2022-03-17 23:58:00+00:00', '2022-03-17 23:59:00+00:00'],
              dtype='datetime64[ns, UTC]', length=636480, freq=None)

In [93]:
price = price.tz_localize('UTC')

## Data Preprocessing

In [154]:
def preprocessing_data(ticker):
    price = (pd.read_csv(f"data/{ticker}.csv", parse_dates=['candle_date_time_utc', 'candle_date_time_kst'])
             .rename(columns={
                    'candle_date_time_kst': 'date',
                    'opening_price': 'open',
                    'high_price': 'high',
                    'low_price': 'low',
                    'trade_price': 'close',
                    'candle_acc_trade_volume': 'volume'
                })
             .set_index(['date'])
             ).loc[:, ['open', 'high', 'low', 'close', 'volume']]
    price = price.tz_localize('UTC')
    # Get time range
    min_time = price.index.min().strftime("%Y-%m-%d")
    max_time = price.index.max().strftime("%Y-%m-%d")

    time_range = always_open_calendars.minutes_for_sessions_in_range(min_time, max_time)

    price = price.reindex(index=time_range)
    # price = price.resample('1T').last()
    ohlc = price[['high', 'low', 'open', 'close']].fillna(method='ffill')
    v = price[['volume']].fillna(0)
    return pd.concat([ohlc, v], axis=1).dropna()


## Data Save

In [155]:
DATA_STORE = 'upbit.h5'

In [156]:
for sid, ticker, name in tqdm(equities.values):
    try:
        price = preprocessing_data(ticker)
        with pd.HDFStore(DATA_STORE) as store:
            store.put(key=f"upbit/{sid}", value=price)
    except Exception as e:
        print(e)

 18%|█▊        | 19/107 [00:18<01:40,  1.14s/it]

[Errno 2] No such file or directory: 'data/KRW-CELO.csv'


100%|██████████| 107/107 [01:50<00:00,  1.03s/it]


In [157]:
def create_split_table():
    with pd.HDFStore(DATA_STORE) as store:
        store.put('upbit/splits', pd.DataFrame(columns=['sid', 'effective_date', 'ratio'], data=[[1, pd.to_datetime('2020-12-31'), 1.0]]), format='t')

In [158]:
create_split_table()

In [159]:
with pd.HDFStore(DATA_STORE) as store:
    store.put(key='upbit/equities', value=equities)

## Show result

In [160]:
with pd.HDFStore(DATA_STORE) as store:
    print(store.keys())

['/upbit/0', '/upbit/1', '/upbit/10', '/upbit/100', '/upbit/101', '/upbit/102', '/upbit/103', '/upbit/104', '/upbit/105', '/upbit/106', '/upbit/11', '/upbit/12', '/upbit/13', '/upbit/14', '/upbit/15', '/upbit/16', '/upbit/17', '/upbit/18', '/upbit/2', '/upbit/20', '/upbit/21', '/upbit/22', '/upbit/23', '/upbit/24', '/upbit/25', '/upbit/26', '/upbit/27', '/upbit/28', '/upbit/29', '/upbit/3', '/upbit/30', '/upbit/31', '/upbit/32', '/upbit/33', '/upbit/34', '/upbit/35', '/upbit/36', '/upbit/37', '/upbit/38', '/upbit/39', '/upbit/4', '/upbit/40', '/upbit/41', '/upbit/42', '/upbit/43', '/upbit/44', '/upbit/45', '/upbit/46', '/upbit/47', '/upbit/48', '/upbit/49', '/upbit/5', '/upbit/50', '/upbit/51', '/upbit/52', '/upbit/53', '/upbit/54', '/upbit/55', '/upbit/56', '/upbit/57', '/upbit/58', '/upbit/59', '/upbit/6', '/upbit/60', '/upbit/61', '/upbit/62', '/upbit/63', '/upbit/64', '/upbit/65', '/upbit/66', '/upbit/67', '/upbit/68', '/upbit/69', '/upbit/7', '/upbit/70', '/upbit/71', '/upbit/72',

In [164]:
with pd.HDFStore(DATA_STORE) as store:
    test = store['upbit/15']

In [165]:
test.tail()

Unnamed: 0,high,low,open,close,volume
2022-03-19 23:55:00+00:00,50849000.0,50723000.0,50833000.0,50770000.0,0.0
2022-03-19 23:56:00+00:00,50849000.0,50723000.0,50833000.0,50770000.0,0.0
2022-03-19 23:57:00+00:00,50849000.0,50723000.0,50833000.0,50770000.0,0.0
2022-03-19 23:58:00+00:00,50849000.0,50723000.0,50833000.0,50770000.0,0.0
2022-03-19 23:59:00+00:00,50849000.0,50723000.0,50833000.0,50770000.0,0.0


In [166]:
test

Unnamed: 0,high,low,open,close,volume
2021-01-01 05:40:00+00:00,32097000.0,32096000.0,32097000.0,32097000.0,0.027209
2021-01-01 05:41:00+00:00,32097000.0,32080000.0,32097000.0,32095000.0,0.147304
2021-01-01 05:42:00+00:00,32098000.0,32059000.0,32095000.0,32098000.0,0.428093
2021-01-01 05:43:00+00:00,32098000.0,32094000.0,32098000.0,32098000.0,0.239791
2021-01-01 05:44:00+00:00,32071000.0,32049000.0,32071000.0,32049000.0,2.696126
...,...,...,...,...,...
2022-03-19 23:55:00+00:00,50849000.0,50723000.0,50833000.0,50770000.0,0.000000
2022-03-19 23:56:00+00:00,50849000.0,50723000.0,50833000.0,50770000.0,0.000000
2022-03-19 23:57:00+00:00,50849000.0,50723000.0,50833000.0,50770000.0,0.000000
2022-03-19 23:58:00+00:00,50849000.0,50723000.0,50833000.0,50770000.0,0.000000


In [169]:
equities[equities['sid'] == 15]

Unnamed: 0,sid,ticker,name
15,15,KRW-BTC,비트코인
