해당 문서에 있는 내용
- 1. 3번 스크립트 수집한 리플 데이터 품질을 확인하고
- 2. 데이터 Join해서 1분봉 종가로 1분단위 김프데이터 만들기
- 3. 종가기준 1분단위 김프 데이터로 캔들 만들기

In [1]:
import pandas as pd
from datetime import datetime, timedelta

In [2]:
# 3번 ipynb에서 만든 데이터
XRP_KRW = pd.read_csv('XRP-Upbit-KRW_1min.csv')
XRP_USDT = pd.read_csv('XRP-Binance-USDT_1min.csv')

In [5]:
# index로 넣어주고 정렬
XRP_KRW = XRP_KRW.set_index(['date']).sort_index()
XRP_USDT = XRP_USDT.set_index(['date']).sort_index()

In [6]:
def make_fulltime_data(df, freq='min'):
    df.index = pd.DatetimeIndex(df.index)
    full_idx = pd.date_range(df.index[0],df.index[-1], freq=freq)
    df = df.reindex(full_idx).fillna(method='pad')
    return df

XRP_KRW = make_fulltime_data(XRP_KRW)
XRP_USDT = make_fulltime_data(XRP_USDT)
XRP_KRW_close = XRP_KRW.drop(['open','high','low','volume'],axis=1).rename(columns={'close':'KRW_close'})
XRP_USDT_close = XRP_USDT.drop(['open','high','low','volume'],axis=1).rename(columns={'close':'USDT_close'})

In [7]:
kimp_dat = pd.merge(XRP_KRW_close, XRP_USDT_close,
    left_index=True, right_index=True,
    how = 'inner')
kimp_dat['kimp'] = (kimp_dat['KRW_close'] - kimp_dat['USDT_close']) / kimp_dat['USDT_close'] * 100


In [8]:
# 4번에서 만드는 데이터셋
XRP_KRW.to_csv('XRP-Upbit-KRW_1min.csv', index_label='date')
XRP_USDT.to_csv('XRP-Upbit-KRW_1min.csv', index_label='date')
kimp_dat.to_csv('kimp_dat_1min.csv', index_label='date')

In [9]:
# def close_to_candle()
def make_candle(df,interval = 10, open_time: str = None ):
    
    offset = timedelta(minutes = interval)
    if open_time is None:
        start_date = df.index[0]
    else: 
        start_date = df.index[0].replace(hour = int(open_time[:2]), minute = int(open_time[3:]))
    end_date = df.index[-1]
    counts = (pd.to_datetime(end_date)-pd.to_datetime(start_date)) // offset + 1
    rows = []
    for i in range(counts):
        try :#예전자료에는 없는 분봉들이 있음
            reduced_df = df.loc[start_date + i * offset : start_date + (i+1)*offset - timedelta(minutes = 1)]
            date = start_date + i * offset
            open = reduced_df['open'][0]
            high = reduced_df['high'].max()
            low = reduced_df['low'].min()
            close = reduced_df['close'][-1]
            volume = reduced_df['volume'].sum()
            rows.append((date, open, high,low,close,volume))
        except IndexError:
            continue
    data = pd.DataFrame(rows, columns = ['date', 'open', 'high','low','close', 'volume'])
    data.index = pd.to_datetime(data['date'], format='%Y-%m-%d %H:%M')
    data = data.drop("date", axis =1 )
    return data
def to_candle(df, interval=60*24, open_time='00:00'):
    offset = timedelta(minutes=interval)
    if open_time is None:
        start_date = df.index[0]
    else:
        start_date = df.index[0].replace(hour = int(open_time[:2]), minute = int(open_time[3:]))
    end_date = df.index[-1]
    counts = (pd.to_datetime(end_date) - pd.to_datetime(start_date)) // offset + 1
    rows = []
    for i in range(0,counts):
        try:
            reduced_df = df.loc[start_date + i * offset : start_date + (i+1)*offset - timedelta(minutes=1)]

            date = start_date + (i * offset)
            open = reduced_df['kimp'][0]
            high = reduced_df['kimp'].max()
            low = reduced_df['kimp'].min()
            close = reduced_df['kimp'][-1]
            rows.append((date, open, high, low, close))
        except:
            continue
    data = pd.DataFrame(rows, columns = ['date', 'open', 'high', 'low', 'close'])
    data.index = pd.to_datetime(data['date'], format = '%Y-%m-%d %H:%M')
    data = data.drop('date', axis=1)
    return data



In [14]:
kimp_dat_1day = to_candle(kimp_dat, interval=6*60, open_time='00:00')

In [15]:
kimp_dat_1day

Unnamed: 0_level_0,open,high,low,close
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2018-05-04 12:00:00,-0.087320,-0.087320,-0.087320,-0.087320
2018-05-04 18:00:00,-0.087320,-0.087320,-0.180046,-0.133705
2018-05-05 00:00:00,-0.268338,-0.268338,-0.268338,-0.268338
2018-05-05 06:00:00,-0.268338,-0.268338,-0.268338,-0.268338
2018-05-05 12:00:00,-0.268338,-0.268338,-0.268338,-0.268338
...,...,...,...,...
2021-11-25 18:00:00,-0.145317,-0.145317,-0.187238,-0.145317
2021-11-26 00:00:00,-0.028559,-0.028559,-0.028559,-0.028559
2021-11-26 06:00:00,-0.028559,0.038665,-0.296557,-0.279849
2021-11-26 12:00:00,-0.279849,-0.229692,-0.446675,-0.446675


In [17]:
import plotly.graph_objects as go
dat = kimp_dat_1day
fig = go.Figure(data=go.Candlestick(
        x=dat.index,
        open=dat['open'],
        high=dat['high'],
        low=dat['low'],
        close=dat['close']))

fig.update_layout(
    xaxis = dict(
        rangeslider = dict(
            visible = False
        )
    ),
    yaxis = dict(
        fixedrange = False)

)
# fig.show()

123

In [179]:
kimp_dat['kimp'].rolling(60*6*5).mean()

2018-05-04 17:12:00         NaN
2018-05-04 17:13:00         NaN
2018-05-04 17:14:00         NaN
2018-05-04 17:15:00         NaN
2018-05-04 17:16:00         NaN
                         ...   
2021-11-25 20:30:00    5.362188
2021-11-25 20:31:00    5.362366
2021-11-25 20:32:00    5.362505
2021-11-25 20:33:00    5.362830
2021-11-25 20:34:00    5.363120
Freq: T, Name: kimp, Length: 1873643, dtype: float64

In [182]:
import plotly.express as px

fig = px.line((kimp_dat['kimp']-kimp_dat['kimp'].rolling(60).mean())[-60*24*10:].reset_index(), x="index", y="kimp")
fig.show()