해당문서에 있는 내용
- 1. Binance data api 로 받아오기
- 2. 예전에 만들었던 업비트 api함수로 데이터 받아오기

In [61]:
import ccxt
import pandas as pd
from datetime import datetime
# pip install finance-datareader 패키지 이름 주의
import FinanceDataReader as fdr

In [62]:
def scrape_ohlcv(exchange, max_retries, symbol, timeframe, since, limit):
    earliest_timestamp = exchange.milliseconds() # 현재시간?
    timeframe_duration_in_seconds = exchange.parse_timeframe(timeframe) # 초단위로 바꿈
    timeframe_duration_in_ms = timeframe_duration_in_seconds * 1000
    timedelta = limit * timeframe_duration_in_ms
    if isinstance(since, str):
        since = exchange.parse8601(since)

    all_ohlcv = []
    while True:
        fetch_since = earliest_timestamp - timedelta
        ohlcv = retry_fetch_ohlcv(exchange, max_retries, symbol, timeframe, fetch_since, limit)
        # if we have reached the beginning of history
        if ohlcv[0][0] >= earliest_timestamp:
            break
        earliest_timestamp = ohlcv[0][0]
        all_ohlcv = ohlcv + all_ohlcv
        if len(all_ohlcv) % 100000 == 0:
            print(len(all_ohlcv), symbol, 'candles in total from', exchange.iso8601(all_ohlcv[0][0]), 'to', exchange.iso8601(all_ohlcv[-1][0]))
        # if we have reached the checkpoint
        if fetch_since < since:
            break
    return all_ohlcv

def retry_fetch_ohlcv(exchange, max_retries, symbol, timeframe, since, limit):
    num_retries = 0
    try:
        num_retries += 1
        ohlcv = exchange.fetch_ohlcv(symbol, timeframe, since, limit)
        # print('Fetched', len(ohlcv), symbol, 'candles from', exchange.iso8601 (ohlcv[0][0]), 'to', exchange.iso8601 (ohlcv[-1][0]))
        return ohlcv
    except Exception:
        if num_retries > max_retries:
            raise  # Exception('Failed to fetch', timeframe, symbol, 'OHLCV in', max_retries, 'attempts')

In [63]:
binance = ccxt.binance()
all_ohlcv = scrape_ohlcv(binance, 3, 'XRP/USDT', '1m', '2017-01-01 00:00:00Z', 1000)

100000 XRP/USDT candles in total from 2021-09-18T23:00:00.000Z to 2021-11-27T08:19:00.000Z
200000 XRP/USDT candles in total from 2021-07-11T15:10:00.000Z to 2021-11-27T08:19:00.000Z
300000 XRP/USDT candles in total from 2021-05-03T04:30:00.000Z to 2021-11-27T08:19:00.000Z
400000 XRP/USDT candles in total from 2021-02-22T17:50:00.000Z to 2021-11-27T08:19:00.000Z
500000 XRP/USDT candles in total from 2020-12-15T07:10:00.000Z to 2021-11-27T08:19:00.000Z
600000 XRP/USDT candles in total from 2020-10-06T20:30:00.000Z to 2021-11-27T08:19:00.000Z
700000 XRP/USDT candles in total from 2020-07-29T09:50:00.000Z to 2021-11-27T08:19:00.000Z
800000 XRP/USDT candles in total from 2020-05-21T00:50:00.000Z to 2021-11-27T08:19:00.000Z
900000 XRP/USDT candles in total from 2020-03-12T14:10:00.000Z to 2021-11-27T08:19:00.000Z
1000000 XRP/USDT candles in total from 2020-01-03T03:30:00.000Z to 2021-11-27T08:19:00.000Z
1100000 XRP/USDT candles in total from 2019-10-25T16:50:00.000Z to 2021-11-27T08:19:00.00

In [64]:
dat = pd.DataFrame(all_ohlcv, columns=['time','open','high','low','close','volume'])
dat['time'] = [datetime.fromtimestamp(t/1000).strftime('%Y-%m-%d %H:%M:%S') for t in dat['time']]
# dat.to_csv('XRP-USDT_1min.csv', index=False)

환율데이터 불러와서 저장

In [65]:
# pip install finance-datareader 패키지 이름 주의

import FinanceDataReader as fdr


ex_rate = fdr.DataReader('USD/KRW','2018')
ex_rate = ex_rate.reset_index()[['Date','Close']]
ex_rate['Date'] = ex_rate['Date'].astype('str')
ex_rate.head()

# index 사용해서 전처리가 용이
ex_rate = ex_rate.set_index(['Date'])
ex_rate.index = pd.DatetimeIndex(ex_rate.index)

# 공휴일 빈날짜까지 다 채워서 환율 만들기
idx = pd.date_range(ex_rate.index[0],ex_rate.index[-1])
ex_rate = ex_rate.reindex(idx)

# 금요일값으로 토일 채우기 : 밀어채우기
ex_rate = ex_rate.fillna(method='pad')
ex_rate = ex_rate.reset_index().rename(columns={"index": "Date"})

# 데이터 하루 뒤로 만들어서 환율 오늘 종가를 내일데이터랑 결합
ex_rate['Date'] = ex_rate['Date'] + timedelta(days=1)
ex_rate['Date'] = ex_rate['Date'].astype('str')

In [66]:
dat['time'] = pd.to_datetime(dat['time'])
# yyyy-mm-dd 형식 변수 만들어주기
dat['ymd'] = dat['time'].dt.date.astype('str')
dat = pd.merge(dat,ex_rate, left_on='ymd', right_on='Date', how='left')

for i in ['open','high','low','close']:
    dat[i] = dat[i] * dat['Close']

In [67]:
dat = dat.drop(['ymd','Date','Close'],axis=1).rename(columns={'time':'date'})
dat = dat[1:].drop_duplicates()
dat.to_csv('XRP-Binance-USDT_1min.csv', index=False)

In [69]:
dat

Unnamed: 0,date,open,high,low,close,volume
1,2018-05-04 17:12:00,968.004000,1000.260044,914.226000,968.004000,167543.47
2,2018-05-04 17:13:00,968.004000,1000.270800,957.259156,978.759600,179750.59
3,2018-05-04 17:14:00,978.759600,1613.340000,968.100800,1000.260044,107222.48
4,2018-05-04 17:15:00,1000.260044,1021.792756,978.974712,978.974712,171304.56
5,2018-05-04 17:16:00,978.974712,1000.066444,978.974712,1000.023421,62126.16
...,...,...,...,...,...,...
1877995,2021-11-27 17:15:00,1143.188953,1143.666725,1141.038979,1141.038979,331385.00
1877996,2021-11-27 17:16:00,1140.919536,1141.636194,1139.366777,1140.083435,143167.00
1877997,2021-11-27 17:17:00,1139.725106,1141.038979,1139.008448,1140.083435,296882.00
1877998,2021-11-27 17:18:00,1140.083435,1141.038979,1139.127891,1139.725106,219522.00


In [17]:
# from datetime import datetime, timedelta

# dat['time'] = pd.to_datetime(dat['time'])
# # yyyy-mm-dd 형식 변수 만들어주기
# dat['ymd'] = dat['time'].dt.date.astype('str')



# dfs = pd.DataFrame()
# for df in dat.groupby(['ymd']):
#     # df[0]:날짜, df[1]:dateframe
#     USD = ex_rate.loc[ex_rate['Date']==df[0]]['Close']
#     try:
#         df[1].loc[:, ['open','high','low','close']] *= float(USD)
#         dfs = dfs.append(df[1])
#     except:
#         continue
#     # print(USD)
# dat = dfs[1:].set_index(['time']).drop_duplicates()


# dat = dat.drop(['ymd'],axis=1).reset_index()
# dat.rename(columns={'time':'date'}).to_csv('XRP-Binance-USDT_1min.csv', index=False)

In [82]:
import math
import pyupbit
import time
def read_min_price(ticker, count):
    """업비트에서 1분봉 코인시세를 200개씩 읽어서 count개로 합친 데이터프레임 반환"""
    date = None
    dfs = pd.DataFrame()
    for i in range(math.ceil(count / 200)):
        df = pyupbit.get_ohlcv(ticker, interval="minute1", to=date)
        dfs = dfs.append(df)
        try:
            date = df.index[0]
        except IndexError:
            break
        time.sleep(0.101) #요청 수 제한
        if i % 500 == 499:
            print(f"[{ticker}] CONCATENATING {len(dfs)} rows...")
    dfs["date"] = dfs.index
    return dfs

In [87]:
XRP_KRW = read_min_price('KRW-XRP',365*5*60*24)

[KRW-XRP] CONCATENATING 100000 rows...
[KRW-XRP] CONCATENATING 200000 rows...
[KRW-XRP] CONCATENATING 300000 rows...
[KRW-XRP] CONCATENATING 400000 rows...
[KRW-XRP] CONCATENATING 500000 rows...
[KRW-XRP] CONCATENATING 600000 rows...
[KRW-XRP] CONCATENATING 700000 rows...
[KRW-XRP] CONCATENATING 800000 rows...
[KRW-XRP] CONCATENATING 900000 rows...
[KRW-XRP] CONCATENATING 1000000 rows...
[KRW-XRP] CONCATENATING 1100000 rows...
[KRW-XRP] CONCATENATING 1200000 rows...
[KRW-XRP] CONCATENATING 1300000 rows...
[KRW-XRP] CONCATENATING 1400000 rows...
[KRW-XRP] CONCATENATING 1500000 rows...
[KRW-XRP] CONCATENATING 1600000 rows...
[KRW-XRP] CONCATENATING 1700000 rows...
[KRW-XRP] CONCATENATING 1800000 rows...
[KRW-XRP] CONCATENATING 1900000 rows...


In [89]:
XRP_KRW.to_csv('XRP-Upbit-KRW_1min.csv',index=False)

In [86]:
upbit = ccxt.upbit()
all_ohlcv = scrape_ohlcv(upbit, 3, 'KRW-XRP', '1m', '2017-01-01 00:00:00Z', 1000)

100000 KRW-XRP candles in total from 2021-09-16T17:17:00.000Z to 2021-11-25T11:23:00.000Z


KeyboardInterrupt: 

In [91]:
XRP_KRW

Unnamed: 0,open,high,low,close,volume,date
2021-11-25 17:15:00,1295.0,1295.0,1290.0,1290.0,237319.885600,2021-11-25 17:15:00
2021-11-25 17:16:00,1295.0,1295.0,1290.0,1295.0,289277.052712,2021-11-25 17:16:00
2021-11-25 17:17:00,1290.0,1295.0,1290.0,1290.0,122817.126053,2021-11-25 17:17:00
2021-11-25 17:18:00,1295.0,1295.0,1290.0,1295.0,140624.675257,2021-11-25 17:18:00
2021-11-25 17:19:00,1295.0,1295.0,1290.0,1295.0,125020.405307,2021-11-25 17:19:00
...,...,...,...,...,...,...
2017-09-26 08:01:00,203.0,203.0,203.0,203.0,0.000100,2017-09-26 08:01:00
2017-09-26 08:04:00,202.0,202.0,202.0,202.0,0.000100,2017-09-26 08:04:00
2017-09-26 08:07:00,202.0,202.0,202.0,202.0,0.000100,2017-09-26 08:07:00
2017-09-26 08:09:00,202.0,202.0,202.0,202.0,0.000100,2017-09-26 08:09:00


In [51]:
def make_candle(df,interval = 10, open_time: str = None ):
    
    offset = timedelta(minutes = interval)
    if open_time is None:
        start_date = df.index[0]
    else: 
        start_date = df.index[0].replace(hour = int(open_time[:2]), minute = int(open_time[3:]))
    end_date = df.index[-1]
    counts = (pd.to_datetime(end_date)-pd.to_datetime(start_date)) // offset + 1
    rows = []
    for i in range(1,counts):
        reduced_df = df.loc[start_date + i * offset : start_date + (i+1)*offset - timedelta(minutes = 1)]
        try :#예전자료에는 없는 분봉들이 있음
            date = start_date + i * offset
            open = reduced_df['open'][0]
            high = reduced_df['high'].max()
            low = reduced_df['low'].min()
            close = reduced_df['close'][-1]
            volume = reduced_df['volume'].sum()
            rows.append((date, open, high,low,close,volume))
        except IndexError:
            continue
    data = pd.DataFrame(rows, columns = ['date', 'open', 'high','low','close', 'volume'])
    data.index = pd.to_datetime(data['date'], format='%Y-%m-%d %H:%M')
    data = data.drop("date", axis =1 )
    return data

Unnamed: 0_level_0,open,high,low,close,volume
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2018-05-05,1.199438e+12,1.229040e+12,1.166587e+12,1.217913e+12,1.647264e+07
2018-05-06,1.217247e+12,1.244534e+12,1.115076e+12,1.152451e+12,1.919342e+07
2018-05-07,1.178042e+12,1.185798e+12,1.088512e+12,1.104023e+12,1.592508e+07
2018-05-08,1.097836e+12,1.147581e+12,1.071772e+12,1.091624e+12,1.380833e+07
2018-05-09,1.092708e+12,1.107340e+12,1.027392e+12,1.083206e+12,1.424413e+07
...,...,...,...,...,...
2021-11-19,2.126859e+12,2.190407e+12,2.015651e+12,2.173527e+12,4.884885e+08
2021-11-20,2.173328e+12,2.187428e+12,2.136590e+12,2.157441e+12,2.394293e+08
2021-11-21,2.157441e+12,2.179484e+12,2.103823e+12,2.121696e+12,2.551969e+08
2021-11-22,2.143081e+12,2.177582e+12,2.056226e+12,2.120615e+12,2.864686e+08
