In [1]:
import sys

sys.path.append("../")

In [2]:
import requests
import time
import pandas as pd
import numpy as np
from statsmodels.tsa.vector_ar.vecm import coint_johansen
from stats_arb.tests import adf_test, kpss_test, cal_half_life, pp_test
from datetime import datetime, timedelta
from ta.volatility import BollingerBands
from datetime import datetime
import seaborn as sns
from IPython.display import clear_output

from bokeh.layouts import column
from bokeh.plotting import figure, output_notebook, show, output_file

from bokeh.models import (  # type: ignore
    ColumnDataSource,
    Span,
    HoverTool,
)

import matplotlib.pyplot as plt


plt.rcParams["figure.figsize"] = (18,7)


In [3]:
API_BASE = 'https://fapi.binance.com/fapi/v1/'
TIMEFRAME = '1h'

LABELS = [
    'open_time',
    'open',
    'high',
    'low',
    'close',
    'volume',
    'close_time',
    'quote_asset_volume',
    'number_of_trades',
    'taker_buy_base_asset_volume',
    'taker_buy_quote_asset_volume',
    'ignore'
]

DROP_COLUMNS=[
    'close_time',
    'quote_asset_volume',
    'number_of_trades',
    'taker_buy_base_asset_volume',
    'taker_buy_quote_asset_volume',
    'ignore'
]


def get_batch(symbol, interval='1m', start_time=0, limit=1000):
    """Use a GET request to retrieve a batch of candlesticks. Process the JSON into a pandas
    dataframe and return it. If not successful, return an empty dataframe.
    """

    params = {
        'symbol': symbol,
        'interval': interval,
        'startTime': start_time,
        'limit': limit
    }
    try:
        # timeout should also be given as a parameter to the function
        response = requests.get(f'{API_BASE}klines', params, timeout=30)
    except requests.exceptions.ConnectionError:
        print('Connection error, Cooling down for 5 mins...')
        time.sleep(5 * 60)
        return get_batch(symbol, interval, start_time, limit)

    except requests.exceptions.Timeout:
        print('Timeout, Cooling down for 5 min...')
        time.sleep(5 * 60)
        return get_batch(symbol, interval, start_time, limit)

    if response.status_code == 200:
        return pd.DataFrame(response.json(), columns=LABELS)
    
    print(f'Got erroneous response back {symbol}: {response}. {response.text}')
    return pd.DataFrame([])


def get_candles(base, quote, start_date: datetime, interval='1m'):
    batches = []

    last_timestamp = int(start_date.timestamp()) * 1000
    # gather all candlesticks available, starting from the last timestamp loaded from disk or 0
    # stop if the timestamp that comes back from the api is the same as the last one
    previous_timestamp = None

    while previous_timestamp != last_timestamp:
        # stop if we reached data from today
        if datetime.fromtimestamp(last_timestamp / 1000) >= datetime.utcnow():
            break

        previous_timestamp = last_timestamp

        new_batch = get_batch(
            symbol=base + quote,
            interval=interval,
            start_time=last_timestamp
        )

        # requesting candles from the future returns empty
        # also stop in case response code was not 200
        if new_batch.empty:
            break

        last_timestamp = new_batch['open_time'].max()

        # sometimes no new trades took place yet on date.today();
        # in this case the batch is nothing new
        if previous_timestamp == last_timestamp:
            break

        batches.append(new_batch)
        last_datetime = datetime.fromtimestamp(last_timestamp / 1000)

        covering_spaces = 20 * ' '
        print(datetime.now(), base, quote, interval, str(last_datetime) + covering_spaces, end='\r', flush=True)

    if len(batches) > 0:
        # write clean version of csv to parquet
        df = pd.concat(batches, ignore_index=True)
        df.drop(columns=DROP_COLUMNS, inplace=True)
        df['open_time'] = pd.to_datetime(df['open_time'], unit='ms')
        df.set_index(keys=['open_time'], inplace=True)
        return df



In [4]:
def calculate_spread(df, symbols, hedge_ratio, nb_symbols = 2):
    spread = None
    for i in range(nb_symbols):
        if spread is None:
            spread = df[symbols[i]] * hedge_ratio[i]
        else:
            spread += df[symbols[i]] * hedge_ratio[i]
    
    spread.dropna(inplace=True)
    return spread

In [5]:
sns.set_context("paper", font_scale=1.8)

# Set up Bokeh to display the plot in a Jupyter notebook
output_notebook()

def zscore(x, window):
    r = x.rolling(window=window)
    m = r.mean().shift(1)
    s = r.std(ddof=0).shift(1)
    z = (x-m)/s
    return z

# def calculate_zscore(spread, mean, std):
#     return (spread - mean) / std

# def zscore(spread, window_size):
#     mean = spread.rolling(window=window_size).mean()
#     std = spread.rolling(window=window_size).std()
#     zscore = calculate_zscore(spread, mean, std)
#     return zscore

def plot_spread(spread, lookback=500):
    spread = spread.tail(lookback)
    # CHANGING MATPLOTLIB STYLE
    with sns.axes_style("darkgrid"):
        spread.plot() # Plot the spread
        plt.axhline(spread.mean(), color='red', linestyle='--') # Add the mean
        plt.xlabel('Time')
        plt.legend(['Price Spread', 'Mean'])
        plt.show()

def plot_zscore(spread, half_life, lookback=500, zscore_range=None, plot_name="Spread"):
    if zscore_range is None:
        zscore_range = [2, -2]
    
    spread = spread.tail(lookback)
    _zscore_s = zscore(spread, half_life).tail(lookback)
    print('zscore = ', _zscore_s.iloc[-1])

    formatters = {"$x": "datetime"}
    tooltips = [
        ("Date", "$x{%F %T}"),
        ("Spread", "$y")
    ]
    
    mean = spread.rolling(half_life).mean()
    std = spread.rolling(half_life).std()
    upper = mean + 2.5 * std
    lower = mean - 2.5 * std
    source = ColumnDataSource(data=dict(
        x=spread.index.values, 
        y1=spread, 
        y2=_zscore_s, 
        y3=mean,
        y4=upper,
        y5=lower,
    ))
    # create plots
    TOOLS = "xpan,xwheel_zoom,box_zoom,undo,redo,reset,save"
    plot1 = figure(title="Plot 1", tools=TOOLS, width=1000, height=400, x_axis_type='datetime')
    plot1.line('x', 'y1', source=source, line_color="blue")
    plot1.line('x', 'y3', source=source, line_color="red")
    plot1.line('x', 'y4', source=source, line_color="red")
    plot1.line('x', 'y5', source=source, line_color="red")

    plot1.add_tools(HoverTool(
            point_policy='follow_mouse',
            formatters=formatters,
            tooltips=tooltips,
            mode='vline'))
    plot1.yaxis.axis_label = "y1"

    plot2 = figure(title="Plot 2", tools=TOOLS, width=1000, height=300, x_axis_type='datetime')
    plot2.line('x', 'y2', source=source, line_color="red")

    hlines = []
    for i in zscore_range + [0]:
        hline = Span(location=i, dimension='width', line_color='blue', line_width=1)
        hlines.append(hline)

    plot2.renderers.extend(hlines)

    plot2.yaxis.axis_label = "y2"
    plot2.add_tools(HoverTool(
            point_policy='follow_mouse',
            formatters=formatters,
            tooltips=tooltips,
            mode='vline'))

    # combine plots into a grid
    grid = column(plot1, plot2)

    # show the grid
    output_notebook()
    try:
        output_file(f"output/{plot_name}.html", title="span.py example")
        show(grid)
    except:
        pass

In [6]:
# r = plot1.multi_line('x', 'y1', source=source)
# tool = FreehandDrawTool(renderers=[r])
# plot1.add_tools(tool)

In [7]:
def check_pair(symbols, hedge_ratio, half_life, timeframe=TIMEFRAME, nb_symbols=2, zscore_lookback=500, zscore_range=None):
    data = []

    for symbol in symbols:
        if timeframe == '1h':
            p = 4 * 30
        elif timeframe == '15m':
            p = 30
        elif timeframe == '1d':
            p = 365
        else:
            p = 30

        # print(p)
        df = get_candles(base=symbol, quote='USDT', start_date=datetime.utcnow() - timedelta(days=p), interval=timeframe)
        if df is None:
            continue

        df.rename(columns={'close': symbol}, inplace=True)
        # the data is too long, just limit to recent period
        log = np.log(df[symbol].astype(np.float32))
        data.append(log)

    df = pd.concat(data, axis=1)
    df = df.dropna(axis=1, how='all')

    spread = calculate_spread(df, symbols, hedge_ratio=hedge_ratio, nb_symbols=nb_symbols)
    # plot_spread(spread, lookback=zscore_lookback)
    plot_zscore(spread, half_life, zscore_range=zscore_range, lookback=zscore_lookback, plot_name='-'.join(symbols))
    print(adf_test(spread, verbose=True))
    return spread

In [14]:
# Final Portfolio Value: 1204.49
# Norm. Annual Return: 47.33%
# Max Drawdown: 12.70%

symbols = ['LTC', 'AVAX']
half_life = 64
hedge_ratio = [17.57792749841307, -17.330928478907293]

spread = check_pair(
    symbols, hedge_ratio, half_life, timeframe='1h', 
    zscore_range=[2.5, 4, -2.5, -4], zscore_lookback=500
)

zscore =  1.9440614591798013VAX USDT 1h 2023-05-10 09:00:00                    


Results of Dickey-Fuller Test:
Result: The series is  stationary
5.8501538400606204e-05


Start : This command cannot be run due to the error: The system cannot find the file specified.
At line:1 char:1
+ Start "file:///mnt/d/Working/PersonalProjects/Trading/trading-agent/c ...
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    + CategoryInfo          : InvalidOperation: (:) [Start-Process], InvalidOperationException
    + FullyQualifiedErrorId : InvalidOperationException,Microsoft.PowerShell.Commands.StartProcessCommand
 


In [13]:
# R = 27.84%	DD=2.07%

symbols = ['LINK', 'ADA']
half_life = 68
hedge_ratio = [22.738220775708367, -7.6506009277369795]

spread = check_pair(
    symbols, hedge_ratio, half_life, timeframe='1h',
    zscore_range=[2, 4, -2, -4], zscore_lookback=300, nb_symbols=2)

zscore =  -1.2254120928328254A USDT 1h 2023-05-10 09:00:00                     


Results of Dickey-Fuller Test:
Result: The series is  stationary
0.0023728828965472342


Start : This command cannot be run due to the error: The system cannot find the file specified.
At line:1 char:1
+ Start "file:///mnt/d/Working/PersonalProjects/Trading/trading-agent/c ...
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    + CategoryInfo          : InvalidOperation: (:) [Start-Process], InvalidOperationException
    + FullyQualifiedErrorId : InvalidOperationException,Microsoft.PowerShell.Commands.StartProcessCommand
 


In [12]:
# Final Portfolio Value: 1069.91
# Norm. Annual Return: 15.11%
# Max Drawdown: 2.02%

symbols = ['ETC', 'XMR']
half_life = 85
hedge_ratio = [16.36272503, -15.12756944]

spread = check_pair(
    symbols, hedge_ratio, half_life, timeframe='1h',
    zscore_range=[2.5, 4, -2.5, -4], zscore_lookback=300, nb_symbols=2)

zscore =  -0.4628901971640382R USDT 1h 2023-05-10 09:00:00                    


Results of Dickey-Fuller Test:
Result: The series is  stationary
0.006348992501298087


Start : This command cannot be run due to the error: The system cannot find the file specified.
At line:1 char:1
+ Start "file:///mnt/d/Working/PersonalProjects/Trading/trading-agent/c ...
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    + CategoryInfo          : InvalidOperation: (:) [Start-Process], InvalidOperationException
    + FullyQualifiedErrorId : InvalidOperationException,Microsoft.PowerShell.Commands.StartProcessCommand
 


In [9]:
# R=29.29% DD=1.81%
symbols = ['ETH', 'XMR']
half_life = 10
hedge_ratio = [6.352962304733253, -12.979642907136808]

spread = check_pair(
    symbols, hedge_ratio, half_life, timeframe='1d', 
    zscore_range=[2.5, 4, -2.5, -4], zscore_lookback=500
)

zscore =  -1.406537141008577MR USDT 1d 2023-05-10 07:00:00                    


Results of Dickey-Fuller Test:
Result: The series is  stationary
0.0066755516459907375


Start : This command cannot be run due to the error: The system cannot find the file specified.
At line:1 char:1
+ Start "file:///mnt/d/Working/PersonalProjects/Trading/trading-agent/c ...
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    + CategoryInfo          : InvalidOperation: (:) [Start-Process], InvalidOperationException
    + FullyQualifiedErrorId : InvalidOperationException,Microsoft.PowerShell.Commands.StartProcessCommand
 
