# 以時間維度分析BTC波動度
2024/01
本報告呈現如何使用幣安交易所API取得價格資料，以時間維度針對比特幣（Bitcoin, BTC）的波動度與成交量進行分析，最後以視覺化圖表展示分析結果。

內容分為三部分：
1. 下載並引用所需套件與模組
2. 下載資料與資料前處理
3. 計算與分析

## Download and import needed packages

幣安API套件

In [1]:

#pip install binance-futures-connector

In [None]:
# import logging
# from binance.lib.utils import config_logging
from binance.um_futures import UMFutures

import numpy as np
import pandas as pd
import matplotlib as plot
from datetime import datetime, timedelta
import time
import pytz

import math
import matplotlib.pyplot as plt
from matplotlib import cm
pd.options.mode.chained_assignment = None

## Download and preprocessing

使用幣安交易所API取得各幣種永續合約15分k價格資料（2021/01/01 ~ 即時資料）  
資料經過前處理後，格式如下：

|  columns   | unit | meaning |
|  ----  | ----  | ---- |
| open_time  | datetime | 開盤時間
| open  | float | 開盤價格
|  high | float | 最高價
|  low | float | 最低價
| close  | float | 收盤價（若還未收盤則是現價）
| volume  | float | 成交量（Symbol）
| close_time  | datetime | 收盤時間
|  #trade | int | 成交筆數
|  taker_buy_vol | float | 主動買入成交量（Symbol）
|  week | int | 星期幾
|  time_zone | str | 世界各地區盤中時間（後有詳述）


將交易時段分成以下幾種，重疊部分只記一次。

|  市場   | 時段（UTC+8）| 代碼
|  ----  | ----  |  ----  | 
|亞盤 |23:00 - 6:00|AS|
|歐亞重疊 |06:00 - 08:00|ASEU|
|歐盤 |08:00 - 12:00|EU|
|歐美重疊 |12:00 - 15:00|EUNA|
|美盤| 15:00 - 21:00|NA|
|其他| 21:00 - 23:00 | OTHERS




In [None]:
def download_data(symbol, interval = '5m'):
    um_futures_client = UMFutures(timeout = 5)
    utc = pytz.timezone('UTC')
    start_time_date = datetime(2024,2, 1, tzinfo = utc)
    klines_raw = []

    while start_time_date < datetime.now(tz=utc):
        start_time_ts = datetime.timestamp(start_time_date)
        start_time_date += timedelta(minutes=5*1500)

        klines_raw += um_futures_client.klines(symbol = symbol, interval = interval, startTime = int(start_time_ts)*1000, limit = 1500, timeZone = 0)
    
    
    headers = ['open_time', 'open', 'high', 'low', 'close', 'volume', 'close_time', 'quote_vol', '#trade', 'taker_buy_vol', 'taker_buy_quote_vol', 'ignore']
    klines_df = pd.DataFrame(klines_raw, columns = headers, dtype=float)
    klines_df.drop(columns=['ignore', 'quote_vol', 'taker_buy_quote_vol'], inplace=True)

    return klines_df

def preprocessing(klines_df, tz = True):
    klines_df['open_time'] = list(map(lambda x: datetime.utcfromtimestamp(x/1000), klines_df['open_time']))
    klines_df['close_time'] = list(map(lambda x: datetime.utcfromtimestamp(x/1000), klines_df['close_time']))   

    klines_df['return'] = klines_df['close'].pct_change()*100
    klines_df.loc[0, 'return'] = (klines_df['close'][0] - klines_df['open'][0]) / klines_df['open'][0] * 100
    klines_df = klines_df[klines_df['return']<20]

    first_day_of_week = 5 # the day of the week of the very first day of the interval
    week = list((klines_df.index + first_day_of_week * 96) // 96 % 7)
    klines_df['week'] = week

    def time_zone(date):
        if date.hour >= 23 or date.hour < 6:
            return 'AS'
        elif date.hour >= 6 and date.hour < 8:
            return 'ASEU'
        elif date.hour >= 8 and date.hour < 12:
            return 'EU'
        elif date.hour >= 12 and date.hour < 15:
            return 'EUNA'
        elif date.hour >= 15 and date.hour < 21:
            return 'NA'
        else:
            return 'OTHERS'
    if tz:
        klines_df['time_zone'] = list(map(time_zone, klines_df['open_time']))

    return klines_df

In [None]:
symbols = ['OPUSDT', 'PORTALUSDT', 'STRKUSDT', 'PYTHUSDT', 'SOLUSDT']

klines_df = dict()
for s in symbols:
    klines_raw = download_data(s)
    klines_df[s] = preprocessing(klines_raw)
    time.sleep(17)
    
# klines_df['BNBUSDT']


In [None]:
import numpy as np
li1 = np.array(list(map(int, klines_df['SOLUSDT'].index)))
li2 = np.array(list(map(int, klines_df['BTCUSDT'].index)))
 
dif1 = np.setdiff1d(li1, li2)
dif2 = np.setdiff1d(li2, li1)
 
temp3 = np.concatenate((dif1, dif2))
print(list(temp3))
sum(list(map(int, klines_df['SOLUSDT'].index)))

#### 最終資料格式

In [None]:
klines_df['OPUSDT']

Unnamed: 0,open_time,open,high,low,close,volume,close_time,#trade,taker_buy_vol,return,week,time_zone
0,2024-02-01 00:00:00,2.8986,2.9151,2.8958,2.9039,376039.0,2024-02-01 00:04:59.999,6080.0,167361.9,0.182847,5,AS
1,2024-02-01 00:05:00,2.9040,2.9100,2.8997,2.9086,133345.2,2024-02-01 00:09:59.999,2650.0,68834.0,0.161851,5,AS
2,2024-02-01 00:10:00,2.9085,2.9147,2.9050,2.9124,176270.3,2024-02-01 00:14:59.999,2753.0,77218.6,0.130647,5,AS
3,2024-02-01 00:15:00,2.9121,2.9185,2.9093,2.9127,178793.1,2024-02-01 00:19:59.999,2762.0,97452.1,0.010301,5,AS
4,2024-02-01 00:20:00,2.9125,2.9170,2.9050,2.9112,228776.2,2024-02-01 00:24:59.999,3500.0,113615.8,-0.051499,5,AS
...,...,...,...,...,...,...,...,...,...,...,...,...
11309,2024-03-11 06:25:00,4.5205,4.5300,4.5128,4.5139,352291.7,2024-03-11 06:29:59.999,3449.0,160515.4,-0.143793,3,ASEU
11310,2024-03-11 06:30:00,4.5138,4.5210,4.5040,4.5043,222927.7,2024-03-11 06:34:59.999,3408.0,104302.8,-0.212676,3,ASEU
11311,2024-03-11 06:35:00,4.5046,4.5055,4.4951,4.4980,373849.1,2024-03-11 06:39:59.999,3967.0,109524.7,-0.139866,3,ASEU
11312,2024-03-11 06:40:00,4.4984,4.5162,4.4954,4.4997,261981.6,2024-03-11 06:44:59.999,4307.0,187109.8,0.037795,3,ASEU


## 計算與分析

### 波動度
使用標準差公式
$$
\begin{aligned}
    \sigma = \sqrt{\frac{1}{n-1}\sum_{i=1}^{n} (x_i - \bar{x})^2}
\end{aligned}
$$
分別計算以下幾種不同情況的波動度：
|  title   |  meaning |
|  ----  | ---- 
| 15m_alltime  | 資料時間內15分K歷史波動度
| 1h_alltime  |  1H歷史波動度
|  1d_alltime | 1日歷史波動度
|week_day| 分別計算一週內不同天的波動度
|time_zone| 針對不同市場活躍時間計算1H波動度

In [None]:
volatility = {}
weekday = ['Sun', 'Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat']
time_zone = ['AS', 'ASEU', 'EU', 'EUNA', 'NA', 'OTHERS']
def vol(klines_df):
    v = {}
    v['15m_alltime'] = klines_df['return'].std()
    v['1h_alltime'] = math.sqrt(4) * v['15m_alltime']
    v['1d_alltime'] = math.sqrt(4*24) * v['15m_alltime']
    v['week_day'] = []
    v['time_zone'] = []

    for i in range(len(weekday)):
        v['week_day'].append(math.sqrt(4*24) * klines_df[klines_df['week'] == i]['return'].std())
    for tz in time_zone:
        v['time_zone'].append(math.sqrt(4) * klines_df[klines_df['time_zone'] == tz]['return'].std())

    return v

for s in symbols:
    volatility[s] = vol(klines_df[s].iloc[-30:,:])
# volatility['BTCUSDT']

In [None]:
total = 0

for s in symbols:
    v = volatility[s]['15m_alltime']
    total += v
    print(f'{s} 1d V: {v}')

total

OPUSDT 1d V: 0.2461401427435989
PORTALUSDT 1d V: 0.43319936482272875
STRKUSDT 1d V: 0.400331227296348
PYTHUSDT 1d V: 0.4535520234801793
SOLUSDT 1d V: 0.25152500067119576


1.7847477590140508

### 視覺化圖表與分析結果


In [None]:
def within_weekday(symbol, klines_df, v):
    plt.figure()
    plt.bar(weekday, v['week_day'])
    plt.axhline(v['1d_alltime'], c = 'r')
    plt.ylabel('Volatility(%)')
    plt.title(f'Volatility within a week({symbol[:3]}_1D)')

    # function to add value labels
    def addlabels(x, y, text):
        for i in range(len(x)):
            plt.text(i, y[i]//2, str(text[i])+'%', ha = 'center')

    week_vol_data = [klines_df[klines_df['week'] == i]['volume'] for i in range(7)]
    avg_vol_day = list(map(np.average, week_vol_data))
    avg_vol = np.average(klines_df['volume'])

    tp_taker_vol_data = [klines_df[klines_df['week'] == i]['taker_buy_vol'] for i in range(7)]
    avg_taker_vol = list(map(np.average, tp_taker_vol_data))

    pct = [round(i,2) for i in np.divide(avg_taker_vol,avg_vol_day)*100]

    plt.figure()
    plt.bar(weekday, avg_vol_day, label = 'Volume')
    plt.bar(weekday, avg_taker_vol, label = 'taker_buy_Volume')
    plt.axhline(avg_vol, c = 'r')
    addlabels(weekday, avg_vol_day, pct)
    plt.title(f'Average trade volume within a week({symbol[:3]}_15m)')
    plt.ylabel(f'avg. vol per day ({symbol[:3]})')

    t = list(map(np.average, [klines_df[klines_df['week'] == i]['#trade'] for i in range(7)]))
    avg_vol = list(map(np.average, week_vol_data))
    data = np.divide(avg_vol, t)

    perTrade = sum(klines_df['volume'])/sum(klines_df['#trade'])

    plt.figure()
    plt.bar(weekday, data, label = 'Volume')
    plt.axhline(perTrade, c = 'r')
    plt.title(f'Average trade volume per trade within a week({symbol[:3]}_15m)')
    plt.ylabel(f'Average Vol per trade ({symbol[:3]})')

    week_data = [klines_df[klines_df['week'] == i]['return'] for i in range(7)]
    plt.figure()
    plt.boxplot(week_data, showfliers = False, labels = weekday)
    plt.title(f'return distribution without fliers({symbol[:3]}_15m)')
    plt.ylabel('return(%))')

    plt.figure()
    plt.violinplot(week_data)
    plt.boxplot(week_data, showcaps = False, showbox = False)
    plt.xticks(range(1,8), weekday)
    plt.title(f'return distribution with fliers({symbol[:3]}_15m)')
    plt.ylabel('return(%)')

    plt.figure()
    plt.bar(time_zone, v['time_zone'])
    plt.axhline(v['1h_alltime'], c = 'r')
    plt.title(f'Volatility with different time zone({symbol[:3]}_1H)')
    plt.xlabel('time zone')
    plt.ylabel('volatility(%)')

    # function to add value labels
    def addlabels(x,y,text):
        for i in range(len(x)):
            plt.text(i, y[i]//2, str(text[i])+'%', ha = 'center')

    tz_vol_data = [klines_df[klines_df['time_zone'] == tz]['volume'] for tz in time_zone]
    avg_vol_tz = list(map(np.average, tz_vol_data))
    avg_vol = np.average(klines_df['volume'])

    tz_taker_vol_data = [klines_df[klines_df['time_zone'] == tz]['taker_buy_vol'] for tz in time_zone]
    avg_taker_vol = list(map(np.average, tz_taker_vol_data))

    pct = [round(i,2) for i in np.divide(avg_taker_vol,avg_vol_tz)*100]
    
    plt.figure()
    plt.bar(time_zone, avg_vol_tz, label = 'Volume')
    plt.bar(time_zone, avg_taker_vol, label = 'taker_buy_Volume')
    plt.axhline(avg_vol, c = 'r')
    addlabels(time_zone,avg_vol_tz, pct)
    plt.title(f'Average trade volume with different time zone({symbol[:3]}_15m)')
    plt.ylabel(f'avg. vol per day ({symbol[:3]})')


    t = list(map(np.average, [klines_df[klines_df['time_zone'] == tz]['#trade'] for tz in time_zone]))
    avg_vol = list(map(np.average, tz_vol_data))
    data = np.divide(avg_vol, t)

    perTrade = sum(klines_df['volume'])/sum(klines_df['#trade'])

    plt.figure()
    plt.bar(time_zone, data, label = 'Volume')
    plt.axhline(perTrade, c = 'r')
    plt.title(f'Average trade volume per trade with different time zone({symbol[:3]}_15m)')
    plt.ylabel(f'Average Vol per trade ({symbol[:3]})')

    tz_return_data = [klines_df[klines_df['time_zone'] == tz]['return'] for tz in time_zone]
    plt.figure()
    plt.boxplot(tz_return_data, vert = True, showfliers = False, labels = time_zone)
    plt.title(f'return distribution within time zone without fliers({symbol[:3]}_15m)')
    plt.xlabel('time zone')
    plt.ylabel('return(%)')

    plt.figure()
    plt.violinplot(tz_return_data)
    plt.boxplot(tz_return_data, showcaps = False, showbox = False)
    plt.xticks(range(1,len(time_zone)+1), time_zone)
    plt.title(f'return distribution within time zone with fliers({symbol[:3]}_15m)')
    plt.xlabel('time zone')
    plt.ylabel('return(%)')

    Aweek = []
    for d in range(len(weekday)):
        Aday = []
        for tz in time_zone:
            Aday.append(math.sqrt(4) * klines_df[(klines_df['time_zone'] == tz) & (klines_df['week'] == d)]['return'].std())
        Aweek.append(Aday)

    Aweek = pd.DataFrame(Aweek)

    fig = plt.figure()

    ax = plt.subplot(projection='3d')
    x = [0,1,2,3,4,5]
    y = list(range(0,21,3))
    z = 0
    for i in range(len(y)):
        ax.bar3d(x, y[i], z, dx = 0.5, dy = 2,dz = Aweek.iloc[i,:])
    plt.xlabel('time zone')
    plt.xticks(range(0,len(time_zone)), time_zone)
    plt.ylabel('day within week')
    plt.title(f'Volatility within a week and with different timezone ({symbol[:3]})')
    plt.show()


within_weekday(symbols[4], klines_df[symbols[4]], volatility[symbols[4]])


## 投資組合

In [None]:
import quantstats as qs

In [None]:
def data_1d(symbol):
    um_futures_client = UMFutures(timeout = 5)
    utc = pytz.timezone('UTC')
    start_time_date = datetime(2021, 1, 1, tzinfo = utc)
    start_time_ts = datetime.timestamp(start_time_date)
    klines_raw = um_futures_client.klines(symbol = symbol, interval = '1d', startTime = int(start_time_ts)*1000, limit = 1500, timeZone = 0)
        
        
    headers = ['open_time', 'open', 'high', 'low', 'close', 'volume', 'close_time', 'quote_vol', '#trade', 'taker_buy_vol', 'taker_buy_quote_vol', 'ignore']
    klines_df = pd.DataFrame(klines_raw, columns = headers, dtype=float)
    klines_df.drop(columns=['ignore', 'quote_vol', 'taker_buy_quote_vol'], inplace=True)

    klines_df['open_time'] = list(map(lambda x: datetime.utcfromtimestamp(x/1000), klines_df['open_time']))
    klines_df['close_time'] = list(map(lambda x: datetime.utcfromtimestamp(x/1000), klines_df['close_time']))   

    klines_df['return'] = klines_df['close'].pct_change()
    klines_df.loc[0, 'return'] = (klines_df['close'][0] - klines_df['open'][0]) / klines_df['open'][0] 


    return klines_df
klines_1d = {}

klines_1d_df = pd.DataFrame()
for s in symbols:
    klines_1d[s] = data_1d(s)
    klines_1d[s].index = klines_1d[s]['open_time']
    klines_1d_df[s] = klines_1d[s]['return']
klines_1d_df.index = klines_1d['BTCUSDT']['open_time']

In [None]:
# Strategy 1: equal weight
s1_return = klines_1d_df.copy()
s1_return['return'] = klines_1d_df.mean(axis=1)

s1_return

Unnamed: 0_level_0,BTCUSDT,ETHUSDT,XRPUSDT,BNBUSDT,SOLUSDT,return
open_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2021-01-01,0.013437,-0.010052,0.082916,0.010561,0.221964,0.063765
2021-01-02,0.097581,0.062006,-0.070257,0.012594,-0.022087,0.015968
2021-01-03,0.026541,0.264793,0.024887,0.079146,0.210892,0.121252
2021-01-04,-0.030963,0.064137,0.042384,-0.003826,0.140809,0.042509
2021-01-05,0.061454,0.055776,-0.043626,0.015969,-0.140021,-0.010090
...,...,...,...,...,...,...
2024-01-25,-0.003129,-0.007429,-0.008300,-0.002834,-0.020937,-0.008525
2024-01-26,0.046382,0.021957,0.035422,0.034203,0.061863,0.039965
2024-01-27,0.007075,0.000119,-0.003571,0.011951,0.021428,0.007400
2024-01-28,-0.001924,-0.004848,-0.011696,-0.001439,0.018146,-0.000352


In [None]:
s2_return = klines_1d_df.copy()
s2_return['return'] = pd.DataFrame(list(map(lambda s: volatility[s]['1d_alltime']/total*klines_1d_df[s], symbols))).T.sum(axis=1)
s2_return

Unnamed: 0_level_0,BTCUSDT,ETHUSDT,XRPUSDT,BNBUSDT,SOLUSDT,return
open_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2021-01-01,0.013437,-0.010052,0.082916,0.010561,0.221964,0.082944
2021-01-02,0.097581,0.062006,-0.070257,0.012594,-0.022087,0.004004
2021-01-03,0.026541,0.264793,0.024887,0.079146,0.210892,0.128363
2021-01-04,-0.030963,0.064137,0.042384,-0.003826,0.140809,0.055165
2021-01-05,0.061454,0.055776,-0.043626,0.015969,-0.140021,-0.028143
...,...,...,...,...,...,...
2024-01-25,-0.003129,-0.007429,-0.008300,-0.002834,-0.020937,-0.009970
2024-01-26,0.046382,0.021957,0.035422,0.034203,0.061863,0.041735
2024-01-27,0.007075,0.000119,-0.003571,0.011951,0.021428,0.008325
2024-01-28,-0.001924,-0.004848,-0.011696,-0.001439,0.018146,0.001014


In [None]:
s3_return =klines_1d_df.copy()
r = [s3_return.iloc[0].sum()]
for i in range(1, len(s3_return.index)):
    mask = s3_return.iloc[i-1]<0
    tmp = s3_return.iloc[i][mask].mean()
    if np.isnan(tmp):
        tmp = 0
    r.append(tmp)

s3_return['return'] = r


In [None]:
s3_return.loc['2021-05-20':'2021-05-30']

Unnamed: 0_level_0,BTCUSDT,ETHUSDT,XRPUSDT,BNBUSDT,SOLUSDT,return
open_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2021-05-20,0.103877,0.133214,0.116374,0.17015,0.271201,0.158963
2021-05-21,-0.081216,-0.121878,-0.147345,-0.167689,-0.124013,0.0
2021-05-22,0.005954,-0.055886,-0.100705,-0.080031,-0.197734,-0.08568
2021-05-23,-0.074293,-0.086495,-0.127664,-0.130372,-0.214828,-0.13984
2021-05-24,0.11913,0.263585,0.257881,0.325083,0.281437,0.249423
2021-05-25,-0.011677,0.021727,-0.019223,-0.005502,-0.04488,0.0
2021-05-26,0.023413,0.065553,0.05726,0.104097,0.180391,0.09129
2021-05-27,-0.017765,-0.049059,-0.053965,-0.024975,-0.04633,0.0
2021-05-28,-0.074139,-0.120721,-0.07787,-0.109816,-0.141686,-0.104846
2021-05-29,-0.029689,-0.055427,-0.075879,-0.07578,-0.055634,-0.058482


In [None]:
qs.reports.html(s3_return['return'], s2_return['return'])