In [1]:
import altair as alt
import datetime as dt
import logging
import numpy as np
import pandas as pd
import requests

from requests.exceptions import RequestException
from json.decoder import JSONDecodeError
from itertools import chain

In [2]:
DOMAIN = "https://api-pub.bitfinex.com/"

# Decorator which jsonifies succesful requests and prints errors if otherwise
def bitfinex_api_handler(f):
    def inner(*args, **kwargs):
        try:
            response = f(*args, **kwargs)
            response.raise_for_status()  # Raises an HTTPError for bad requests
            return response.json()
        except JSONDecodeError:
            logging.error('JSON decoding error')
        except RequestException as e:
            logging.error(f'HTTP error: {e}')
        except Exception as e:
            logging.error(f'Unknown error: {e}')

        return []
    return inner

@bitfinex_api_handler
def fetch(start=None, end=None, ticker='BTCUSD'):
    url = f"{DOMAIN}/v2/trades/t{ticker}/hist"
    params = {'start': start, 'end': end, 'limit' : 10_000, 'sort': -1}
    return requests.get(url, params=params)

def timestamp_now_ms():
    now = dt.datetime.now()
    timestamp = dt.datetime.timestamp(now)
    return timestamp * 1000

def fetch_last(hours=2, ticker='BTCUSD'):
    timestamp = timestamp_now_ms()
    timestamp_stop = timestamp - hours * 3600 * 1000
    data = {}
    while timestamp > timestamp_stop:
        resp = fetch(end=timestamp, ticker=ticker)
        if resp:
            data[timestamp] = resp
            # data received is sorted new to old
            timestamp = resp[-1][1]
        else:
            break
    # flatten
    return list(chain.from_iterable(data.values()))

In [3]:
TICKER = 'BTCUSD'
LAST_HOURS = 4 * 24
data = fetch_last(hours=LAST_HOURS, ticker=TICKER)

In [4]:
COLUMNS = {
    'ID': 'ID',
    'MTS': 'Timestamp',
    'AMOUNT': 'Q',
    'PRICE': 'P'
}

dfs = {}
# Data transform specific to Bitfinex
df = pd.DataFrame(data, columns=COLUMNS.values())
df['T'] = df['Timestamp'].map(lambda ts: dt.datetime.fromtimestamp(int(ts / 1000)))
df['Side'] = df['Q'].map(lambda q: 'B' if q > 0 else 'S')
df['Type'] = df['Side'].map(lambda x: 1 if x == 'S' else 0)
df['Q'] = df['Q'].map(abs)
df['V'] = df['Q'] * df['P']
df['V_buy'] = df['V'] * (df['Side'] == 'B')
df['V_sell'] = df['V'] - df['V_buy']
# Ascending order in time is important for OHLC
dfs['in'] = df.sort_values('T').reset_index(drop=True)
dfs['in']

Unnamed: 0,ID,Timestamp,Q,P,T,Side,Type,V,V_buy,V_sell
0,1474220880,1702560573533,0.002972,42469,2023-12-14 14:29:33,S,1,126.199606,0.000000,126.199606
1,1474220992,1702560573861,0.118862,42403,2023-12-14 14:29:33,B,0,5040.119379,5040.119379,0.000000
2,1474220991,1702560573856,0.023630,42399,2023-12-14 14:29:33,S,1,1001.888370,0.000000,1001.888370
3,1474220988,1702560573853,0.100000,42400,2023-12-14 14:29:33,B,0,4240.000000,4240.000000,0.000000
4,1474220980,1702560573822,0.268883,42400,2023-12-14 14:29:33,B,0,11400.635808,11400.635808,0.000000
...,...,...,...,...,...,...,...,...,...,...
69995,1476452553,1702908213354,0.043891,41430,2023-12-18 15:03:33,S,1,1818.404130,0.000000,1818.404130
69996,1476452554,1702908213426,0.008710,41430,2023-12-18 15:03:33,S,1,360.855300,0.000000,360.855300
69997,1476452555,1702908213439,0.008710,41430,2023-12-18 15:03:33,S,1,360.855300,0.000000,360.855300
69998,1476452556,1702908213456,0.008710,41430,2023-12-18 15:03:33,S,1,360.855300,0.000000,360.855300


In [5]:
def copy(f):
    def inner(df, *args, **kwargs):
        _df = df.copy()
        return f(_df, *args, **kwargs)
    return inner

def upsample(df, resolution=10_000):
    def discretize(df, resolution=resolution):
        # 136K -> 13 * [10K] + [6K]
        split_volume = lambda v: int(v // resolution) * [resolution] + [v % resolution]
        return df['V'].map(split_volume)

    def recalculate(df):
        df['Q'] = df['V'] / df['P']
        df['V_buy'] = df['V'] * (1 - df['Type'])
        df['V_sell'] = df['V'] * df['Type']
        df['Q_buy'] = df['Q'] * (1 - df['Type'])
        df['Q_sell'] = df['Q'] * df['Type']
        return df

    return (df
            .assign(V_split = discretize)
            .explode('V_split')
            .drop('V', axis=1)
            .rename(columns={'V_split': 'V'})
            .pipe(recalculate)
            )

def bin_by_cumulative_volume(df, bin_size):
    max_cum_vol = df['V'].cumsum().iloc[-1]
    stop = np.ceil(max_cum_vol) + bin_size
    bins = np.arange(0, stop, bin_size)
    binned = pd.cut(df['V'].cumsum(), bins=bins)
    df['V_upper_bound'] = binned.map(lambda bin: bin.right)
    return df

def flatten_columns(df):
    df.columns = ['_'.join(col).strip() for col in df.columns.values]
    return df

def aggregate_ohlc(df):
    return (df.groupby('V_upper_bound')
              .agg({
                'P': ['first', 'max', 'min', 'last'],
                'Q': 'sum',
                'V': 'sum',
                'V_buy': 'sum',
                'V_sell': 'sum',
                'T': 'last'
              }).pipe(flatten_columns)
               .rename(columns={
                  'P_first': 'open',
                  'P_max': 'high',
                  'P_min': 'low',
                  'P_last': 'close',
                  'Q_sum': 'Q',
                  'V_sum': 'V',
                  'V_buy_sum': 'V_buy',
                  'V_sell_sum': 'V_sell',
                  'T_last': 'T'
              })
          )

def clean(df):
    return df.fillna(method='ffill').reset_index(drop=True)



BIN_SIZE = 10_000_000
RESOLUTION = BIN_SIZE // 1000

dfs['v'] = (dfs['in']
            .pipe(upsample, resolution=RESOLUTION)
            .pipe(bin_by_cumulative_volume, bin_size=BIN_SIZE)
            .pipe(aggregate_ohlc)
            .pipe(clean)
            .assign(D=lambda df: df['V_buy'] - df['V_sell'])
            )

dfs['v'].head()

Unnamed: 0,open,high,low,close,Q,V,V_buy,V_sell,T,D
0,42469,42669,41475,41710,237.731376,9998272.0,5078245.0,4920027.0,2023-12-14 14:30:51,158218.2
1,41710,42157,41709,42000,238.154316,9998633.0,3213163.0,6785470.0,2023-12-14 14:35:27,-3572306.0
2,42000,42801,41930,42526,235.442367,9996094.0,3406908.0,6589186.0,2023-12-14 16:35:41,-3182278.0
3,42526,43470,42323,43371,232.962995,10005860.0,6774970.0,3230885.0,2023-12-14 18:49:28,3544085.0
4,43365,43398,42806,42806,232.409118,9998717.0,4097653.0,5901064.0,2023-12-15 04:16:16,-1803411.0


In [6]:
def plot_candles(df, x_axis=None, width=800, height=400):
    green = 'forestgreen'
    red = 'rgb(255, 72, 51)'
    darkred = 'firebrick'
    white = 'white'

    open_close_fill = alt.condition(
        "datum.open <= datum.close",
        alt.value(white),
        alt.value(red)
    )

    open_close_rule = alt.condition(
        "datum.open <= datum.close",
        alt.value(green),
        alt.value(darkred)
    )

    open_close_stroke = alt.condition(
        "datum.open <= datum.close",
        alt.value(green),
        alt.value(darkred)
    )

    base = alt.Chart(df, width=width, height=height).encode(
        x=x_axis,
        color=open_close_fill,
    )

    rule = base.mark_rule().encode(
        alt.Y(
            'low:Q',
            title='Price',
            scale=alt.Scale(zero=False),
        ),
        alt.Y2('high:Q'),
        color=open_close_rule
        )

    bar = base.mark_bar(
        size=(width / len(df)) * 0.86
        ).encode(
        alt.Y('open:Q'),
        alt.Y2('close:Q'),
        stroke=open_close_stroke
        )

    return rule + bar


def plot_dollar_candles(df, **kwargs):
    # inferred
    bin_size = df.V.iloc[0]
    x_title = f'${int(round(bin_size / 10**6))}M Bars'
    x_axis = alt.X(
        f'yearmonthdatehoursminutesseconds(T):O',
        axis=alt.Axis(title=x_title)
        )
    return plot_candles(df, x_axis=x_axis, **kwargs)

plot_dollar_candles(dfs['v'], width=1600, height=900)

In [7]:
def plot_deltas(df, y='D', width=800, height=400, **kwargs):
    return alt.Chart(df, width=width, height=height, **kwargs) \
              .mark_bar(fill='lightblue', stroke='grey', opacity=0.3, line=True) \
              .encode(
                  x=alt.X('yearmonthdatehoursminutesseconds(T):O', axis=alt.Axis(title='$10M Bars')),
                  y=y)

plot_dollar_candles(dfs['v'], width=1600, height=900) & \
plot_deltas(dfs['v'], width=1600, height=900)