# Inventory Management Research

Research into a market making strategy that has inventory management based on the
paper _High-frequency trading in a limit order book_.

In [1]:
import warnings
warnings.filterwarnings("ignore")

In [2]:
import os
from enum import Enum
from glob import glob
from importlib import reload
from itertools import product
from json import dumps
from math import floor
from typing import Any, Callable, Dict, List, Optional, Tuple, Union
from uuid import uuid4

import arrow
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import psycopg2 as pg
import pyarrow as pa
import pyarrow.parquet as pq
import seaborn as sns
import yaml
from dotenv import load_dotenv
from matplotlib.ticker import (
    FuncFormatter,
    LogFormatter,
    ScalarFormatter,
    StrMethodFormatter,
)
from numba import jit, jitclass, njit, typed, types
from numba.typed import Dict
from numpy import random
from numpy.lib.stride_tricks import as_strided as stride
from psycopg2 import extensions
from pytz import utc

In [3]:
from scipy import stats
from sklearn.decomposition import PCA, FastICA
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from sklearn.ensemble import (
    AdaBoostClassifier,
    BaggingClassifier,
    ExtraTreesClassifier,
    GradientBoostingClassifier,
    RandomForestClassifier,
    RandomForestRegressor,
    VotingClassifier,
)
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF
from sklearn.linear_model import (
    Lasso,
    LogisticRegression,
    PassiveAggressiveClassifier,
    RidgeClassifier,
    SGDClassifier,
)
from sklearn.metrics import (
    accuracy_score,
    auc,
    classification_report,
    explained_variance_score,
    f1_score,
    log_loss,
    mean_absolute_error,
    mean_squared_error,
    mean_squared_log_error,
    r2_score,
    roc_auc_score,
    roc_curve,
)
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import BernoulliNB, GaussianNB, MultinomialNB
from sklearn.neighbors import KNeighborsClassifier, RadiusNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.svm import SVC, SVR, LinearSVC, NuSVC, OneClassSVM
from sklearn.tree import DecisionTreeClassifier, ExtraTreeClassifier
from statsmodels.tsa.ar_model import AR
from statsmodels.tsa.arima_model import ARIMA

In [4]:
from sklearn.covariance import EllipticEnvelope
from sklearn.ensemble import IsolationForest
from sklearn.neighbors import LocalOutlierFactor

In [5]:
from sklearn.decomposition import PCA, IncrementalPCA, KernelPCA, SparsePCA, TruncatedSVD
from sklearn.preprocessing import (
    MaxAbsScaler,
    MinMaxScaler,
    Normalizer,
    RobustScaler,
    StandardScaler,
)

In [6]:
from xgboost import XGBClassifier, XGBRegressor, plot_importance

In [7]:
import cufflinks as cf
import plotly.graph_objs as go
import plotly.grid_objs as gd
from plotly import __version__
from plotly.offline import download_plotlyjs, init_notebook_mode, iplot, plot

init_notebook_mode(connected=True)
cf.go_offline()
cf.set_config_file(offline=False, world_readable=True, theme='polar')

In [8]:
from src.backtest import *
from src.support import *
from src.feature_engineering import *

In [9]:
from src.backtest import _backtest

In [10]:
# Display and formatting
pd.set_option('display.max_columns', 200)
pd.set_option('display.max_rows', 1200)
palette = sns.color_palette('deep', 5)
palette[1], palette[2] = palette[2], palette[1]
sns.set_style('darkgrid')
%matplotlib inline

# Randomness Seed

In [11]:
# Set seed for re-produceable results
SEED = 42

# Load Data

## TAQ and Signals

Load the TAQ data, signals, OHLCV bars, and any other data.

In [12]:
# Load the TAQ data
taq = pd.read_parquet("data/taq/bitmex_BTCUSD_TAQ.parq")

### Review the Data

Ensure that the TAQ data exactly matches the following example, including the data types,
and index name (time), which can be set using `index.name = "time"`.

The signals must also have the correct time index, and the remaining columns can be arbitrary,
they are passed to your strategy for each new signal.

In [13]:
taq.head()

Unnamed: 0_level_0,price,amount,bid_price,bid_amount,ask_price,ask_amount
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2019-03-20 00:00:05.698,3997.0,8200.0,,,,
2019-03-20 00:00:06.933,3997.0,25.0,3996.5,758874.0,3997.0,885865.0
2019-03-20 00:00:06.973,3997.0,50.0,3996.5,758874.0,3997.0,885865.0
2019-03-20 00:00:07.380,3997.0,123.0,3996.5,783273.0,3997.0,862056.0
2019-03-20 00:00:07.546,3996.371527,-998859.0,3996.5,783273.0,3997.0,862056.0


In [14]:
taq.info(verbose=True)

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 5717845 entries, 2019-03-20 00:00:05.698000 to 2019-05-07 23:59:59.532000
Data columns (total 6 columns):
price         float64
amount        float64
bid_price     float64
bid_amount    float64
ask_price     float64
ask_amount    float64
dtypes: float64(6)
memory usage: 305.4 MB


# Signal Research


In [15]:
# Load Bitmex OHLCV data
ohlcv = pd.read_parquet("data/bitmex_BTCUSD_OHLCV.parq")

In [16]:
# Timestamp is for the end of the bar (close)
ohlcv.head()

Unnamed: 0_level_0,open,high,low,close,volume,volume_buy,volume_sell,count,count_buy,count_sell,twap,vwap
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2019-02-01 00:01:00,3413.5,3414.0,3413.5,3413.5,405200.0,330431.0,-74769.0,51.0,29.0,-22.0,3413.796972,3413.907738
2019-02-01 00:02:00,3413.5,3416.5,3413.5,3416.5,1283905.0,1160341.0,-123564.0,108.0,83.0,-25.0,3415.592266,3415.127847
2019-02-01 00:03:00,3416.5,3416.5,3416.0,3416.5,979499.0,906606.0,-72893.0,66.0,48.0,-18.0,3416.346058,3416.462791
2019-02-01 00:04:00,3416.5,3417.0,3416.0,3417.0,831404.0,813279.0,-18125.0,58.0,48.0,-10.0,3416.699591,3416.693508
2019-02-01 00:05:00,3417.0,3417.0,3416.5,3417.0,578247.0,532886.0,-45361.0,45.0,35.0,-10.0,3416.956439,3416.960777


### Generate OHLCV Data

In [17]:
# Load in the raw trades
# Load the raw trades data
# trades = pd.read_csv(
#     "data/trades/bitmex_BTCUSD_trades_2019-02-01.csv",
#     dtype={
#         "time": np.int64,
#         "price": np.float64,
#         "amount": np.float64,
#         "uid": str,
#     },
# )

In [18]:
#trades = aggregate_trades(trades, "bitmex", "1us", 0)

In [19]:
#trades.tail()

In [20]:
# Build OHLCV
# bar_size = "15m"
# ohlcv = generate_ohlcv(trades, bar_size)

In [21]:
ohlcv.tail()

Unnamed: 0_level_0,open,high,low,close,volume,volume_buy,volume_sell,count,count_buy,count_sell,twap,vwap
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2019-05-23 17:41:00,7777.0,7786.5,7776.5,7786.0,2144852.0,2022729.0,-122123.0,138.0,104.0,-34.0,7782.634087,7783.336121
2019-05-23 17:42:00,7786.5,7786.5,7779.5,7780.0,2081668.0,322466.0,-1759202.0,114.0,46.0,-68.0,7782.571515,7782.968803
2019-05-23 17:43:00,7779.5,7781.5,7774.0,7781.0,2063593.0,1097158.0,-966435.0,98.0,36.0,-62.0,7779.756207,7778.7115
2019-05-23 17:44:00,7781.5,7781.5,7770.5,7771.0,421131.0,39662.0,-381469.0,77.0,31.0,-46.0,7776.485427,7775.699162
2019-05-23 17:45:00,7770.5,7771.0,7769.0,7769.0,1387252.0,173718.0,-1213534.0,73.0,26.0,-47.0,7769.677349,7769.983252


## Fix Missing Data

In [22]:
# Check for missing data
ohlcv[ohlcv.isnull().values.any(axis=1)].head()

Unnamed: 0_level_0,open,high,low,close,volume,volume_buy,volume_sell,count,count_buy,count_sell,twap,vwap
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2019-02-12 17:33:00,,,,,,,,,,,,
2019-02-12 17:34:00,,,,,,,,,,,,
2019-02-12 17:35:00,,,,,,,,,,,,
2019-02-12 17:36:00,,,,,,,,,,,,
2019-02-12 17:37:00,,,,,,,,,,,,


In [23]:
def fix_features(
    features,
    methods={
        "open": "ffill",
        "high": "ffill",
        "low": "ffill",
        "close": "ffill",
        "vwap": "ffill",
        "twap": "ffill",
    },
    skip=["exchange", "trading_pair", "exchange_pair"],
):
    """Fix the features, performing the appropriate operation (ffill, etc.) for invalid data."""
    features.replace([np.inf, np.NINF], np.nan, inplace=True)
    for c, method in methods.items():
        features[c].fillna(method=method, inplace=True)
    features.replace([np.nan], 0.0, inplace=True)

In [24]:
# Fix missing data, ffill ohlcv, leave volumes as 0
fix_features(ohlcv)

In [25]:
# Check for missing data
ohlcv[ohlcv.isnull().values.any(axis=1)].head()

Unnamed: 0_level_0,open,high,low,close,volume,volume_buy,volume_sell,count,count_buy,count_sell,twap,vwap
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1


In [26]:
ohlcv.loc["2019-02-12 17:32":].head()

Unnamed: 0_level_0,open,high,low,close,volume,volume_buy,volume_sell,count,count_buy,count_sell,twap,vwap
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2019-02-12 17:32:00,3580.0,3580.0,3579.5,3580.0,172192.0,165679.0,-6513.0,52.0,45.0,-7.0,3579.920493,3579.981088
2019-02-12 17:33:00,3580.0,3580.0,3579.5,3580.0,0.0,0.0,0.0,0.0,0.0,0.0,3579.920493,3579.981088
2019-02-12 17:34:00,3580.0,3580.0,3579.5,3580.0,0.0,0.0,0.0,0.0,0.0,0.0,3579.920493,3579.981088
2019-02-12 17:35:00,3580.0,3580.0,3579.5,3580.0,0.0,0.0,0.0,0.0,0.0,0.0,3579.920493,3579.981088
2019-02-12 17:36:00,3580.0,3580.0,3579.5,3580.0,0.0,0.0,0.0,0.0,0.0,0.0,3579.920493,3579.981088


## Volatility

In [27]:
# Calculate volatility for different windows
def volatility(data, windows=[3, 5, 7, 14, 21, 30]):
    """Compute the volatility of the daily returns for the windows given."""
    for w in windows:
        data[f"volatility_{w}"] = np.log(data["close"] / data["close"].shift(1))
        data[f"volatility_{w}"] = (
            data[f"volatility_{w}"]
            .rolling(w, min_periods=1)
            .apply(lambda d: np.nanstd(d) * np.sqrt(w), raw=True)
        )

In [28]:
window = 60

In [29]:
# First calculation for sigma
volatility(ohlcv, windows=[window])
ohlcv["sigma"] = ohlcv[f"volatility_{window}"]

In [31]:
# Second calculation for sigma
# ohlcv['log_change'] = np.log(ohlcv['close']) - np.log(ohlcv['close'].shift(1))
# ohlcv['sigma'] = ohlcv['log_change'].rolling(window).std()

In [30]:
ohlcv.tail()

Unnamed: 0_level_0,open,high,low,close,volume,volume_buy,volume_sell,count,count_buy,count_sell,twap,vwap,volatility_60,sigma
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2019-05-23 17:41:00,7777.0,7786.5,7776.5,7786.0,2144852.0,2022729.0,-122123.0,138.0,104.0,-34.0,7782.634087,7783.336121,0.008603,0.008603
2019-05-23 17:42:00,7786.5,7786.5,7779.5,7780.0,2081668.0,322466.0,-1759202.0,114.0,46.0,-68.0,7782.571515,7782.968803,0.008631,0.008631
2019-05-23 17:43:00,7779.5,7781.5,7774.0,7781.0,2063593.0,1097158.0,-966435.0,98.0,36.0,-62.0,7779.756207,7778.7115,0.008634,0.008634
2019-05-23 17:44:00,7781.5,7781.5,7770.5,7771.0,421131.0,39662.0,-381469.0,77.0,31.0,-46.0,7776.485427,7775.699162,0.008636,0.008636
2019-05-23 17:45:00,7770.5,7771.0,7769.0,7769.0,1387252.0,173718.0,-1213534.0,73.0,26.0,-47.0,7769.677349,7769.983252,0.008608,0.008608


## Create Strategy Signals

Create the signals data frame for backtesting the strategy with.

In [31]:
signals = ohlcv[["sigma"]]

# Fix any missing data / NaNs
fix_features(signals, methods={"sigma": "ffill"})

In [32]:
# Check for NaNs
signals[signals.isnull().values.any(axis=1)].head()

Unnamed: 0_level_0,sigma
time,Unnamed: 1_level_1


In [33]:
# Check signals type -- must be float64
signals.info(verbose=True)

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 160905 entries, 2019-02-01 00:01:00 to 2019-05-23 17:45:00
Data columns (total 1 columns):
sigma    160905 non-null float64
dtypes: float64(1)
memory usage: 7.5 MB


In [34]:
signals.tail()

Unnamed: 0_level_0,sigma
time,Unnamed: 1_level_1
2019-05-23 17:41:00,0.008603
2019-05-23 17:42:00,0.008631
2019-05-23 17:43:00,0.008634
2019-05-23 17:44:00,0.008636
2019-05-23 17:45:00,0.008608


# Monte Carlo Signals

Baseline monte-carlo approach that just randomly changes the inventory.

In [36]:
max_size = 5

# Generate seeded random inventory
random.seed(SEED)
np.random.seed(SEED)
random_inv = np.random.randint(-max_size, max_size, size=(len(ohlcv), 1))

signals = pd.DataFrame(random_inv, index=ohlcv.index, columns=["inv"])
signals["inv"] = signals["inv"].astype(np.float)

In [37]:
signals.head()

Unnamed: 0_level_0,inv
time,Unnamed: 1_level_1
2019-02-01 00:15:00,1.0
2019-02-01 00:30:00,-2.0
2019-02-01 00:45:00,2.0
2019-02-01 01:00:00,-1.0
2019-02-01 01:15:00,1.0


In [38]:
signals.info(verbose=True)

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 10727 entries, 2019-02-01 00:15:00 to 2019-05-23 17:45:00
Freq: 15T
Data columns (total 1 columns):
inv    10727 non-null float64
dtypes: float64(1)
memory usage: 487.6 KB


# Strategy

Strategy targets the inventor specified by the signals in an attempt to achieve a return.

### Monte Carlo

Simple strategy that just uses the monte carlo model for inventory to randomly have long/short inventory
with some basic risk management (close all positions if losses > threshold.

In [39]:
@njit
def monte_carlo(
    signals: np.ndarray,
    position: typed.Dict,
    balance: typed.Dict,
    nav: float,
    orders: types.List,
    taq: np.ndarray,
    params: typed.Dict,
) -> types.List:
    """
    The strategy which is executed for each new signal.
    
    Args:
        signals: A numpy vector containing the signals at the current step
        position: The current position details
        balance: The current balance details
        nav: The NAV of the account in terms of USD
        orders: The list of active orders on the exchange
        taq: The current TAQ data
        params: Dictionary of parameters for the strategy
    Returns:
        The a list of the actions to take, which results in existing orders being removed or amended
    """
    actions = []
    inv = signals[1]
    inv_pct = inv / params["max_size"]
    add_order = True
    ts, price, _, bid_price, bid_amount, ask_price, ask_amount = taq

    # Simple risk management, immediately cancel everything and close position if losses exceed threshold
    if position["weighted_price"] != 0.0:
        pnl = ((1.0 / position["weighted_price"]) - (1.0 / bid_price)) * position["amount"]

        if ((nav + (pnl * bid_price)) / nav - 1.0) < params["pct_loss"]:
            # Cancel all orders
            for order in orders:
                actions.append({"operation": CANCEL, "order_id": order["order_id"]})
            # Market order to close position
            actions.append({
                "operation": ADD,
                "type": MARKET,
                "order_id": np.float64(np.random.randint(1e9, 2e9)),
                "amount": -1.0 * np.floor(position["amount"]),
            })
            # print("LOSSES EXCEEDED THRESHOLD: ", ((nav + (pnl * bid_price)) / nav - 1.0))
            return actions
    

    # Skip if current inventory is within threshold of target
    if np.abs(inv_pct - (position["amount"] / nav)) <= params["threshold"]:
        return actions

    # Order amount based on current inventory and the target inventory from signal
    amount = np.floor((inv_pct * nav) - position["amount"])

    # Don't cancel orders for the same repeated signal
    for order in orders:
        if add_order and (np.abs((amount - order["amount"]) / order["amount"]) <= params["threshold"]):
            add_order = False
            continue
        actions.append({"operation": CANCEL, "order_id": order["order_id"]})

    # Create order based on inventory target
    if add_order:
        actions.append(
            {
                "operation": ADD,
                "type": FOLLOW_LIMIT,
                "order_id": np.float64(np.random.randint(1e9, 2e9)),
                "price_level": params["level"],
                "amount": amount,
            }
        )

    return actions

## Dynamic Inventory Management Strategy

The inventory is calculated live and fair value and bid/ask size also determined.

In [35]:
# TODO: Add support for dynamic calculations at each interval for all parameters
# TODO: Add support to track fill rate based on live trading data with a window


@njit
def inventory_management(
    signals: np.ndarray,
    position: typed.Dict,
    balance: typed.Dict,
    nav: float,
    orders: types.List,
    taq: np.ndarray,
    params: typed.Dict,
) -> types.List:
    """
    The strategy which is executed for each new signal.
    
    Args:
        signals: A numpy vector containing the signals at the current step
        position: The current position details
        balance: The current balance details
        nav: The NAV of the account in terms of USD
        orders: The list of active orders on the exchange
        taq: The current TAQ data
        params: Dictionary of parameters for the strategy
    Returns:
        The a list of the actions to take, which results in existing orders being removed or amended
    """
    actions = []
    _, sigma = signals
    ts, price, _, bid_price_1, _, ask_price_1, _ = taq
    add_bid, add_ask = True, True

    # TODO: Round to 2 deimals
    # Calculate current inventory
    # Support for maximum leverage
    inv = (position["amount"] / nav) * params["max_size"]

    # Reservation price / fair value
    price_res = price - inv * params["gamma"] * sigma**2

    # Spread, must be based off current bid/ask prices, since no inbetween on Bitmex
    # TODO: Compute k dynamically as part of signal
    spread = params["gamma"] * sigma**2 + (2 / params["gamma"]) * np.log(
        1 + (params["gamma"] / params["k"])
    )
    spread /= 2.0

    # TODO: Perhaps use bid_price instead of last trade price
    if price_res >= price:
        ask_spread = spread + (price_res - price)
        bid_spread = spread - (price_res - price)
    else:
        ask_spread = spread - (price - price_res)
        bid_spread = spread + (price - price_res)

    # Bid and Ask price - round to 0.5 increments for BitMEX
    bid_price = np.round((price_res - bid_spread) * 2.0) / 2.0
    ask_price = np.round((price_res + ask_spread) * 2.0) / 2.0

    # Fix - Cannot set bid / ask beyond current top of books due to BitMEX spread limitation
    bid_price = bid_price_1 if bid_price > bid_price_1 else bid_price
    ask_price = ask_price_1 if ask_price < ask_price_1 else ask_price

    # TODO: Review - sets a cap rather than setting to max_size
    # TODO: round to 2 decimals?
    # Bid and ask size
#     bid_size = np.round(
#         np.where(inv >= params["max_size"], 0, params["max_size"] / np.exp(-params["n"] * inv)), 0
#     )
#     ask_size = np.round(
#         np.where(inv <= -params["max_size"], 0, -params["max_size"] * np.exp(-params["n"] * inv)), 0
#     )

    # TODO: THIS NEEDS REVIEW
    bid_size = params["max_size"] if inv < 0 else params["max_size"] / np.exp(-params["n"] * inv)
    bid_size = np.round(bid_size, 0)
    
    ask_size = -params["max_size"] if inv > 0 else -params["max_size"] * np.exp(-params["n"] * inv)
    ask_size = np.round(ask_size, 0)

    # Optimization - Don't cancel orders that match the current target
    for order in orders:
        # TODO: Round to 2 decimals?
        # Current order size as unit in terms of bid/ask size
        order_size = np.round((order["amount"] / nav) * params["max_size"], 0)

        if (
            add_bid
            and (np.sign(order_size) == 1.0)
            and (np.abs(bid_price - order["price"]) <= params["delta_price"])
            and (np.abs(bid_size - order_size) <= params["delta_size"])
        ):
            add_bid = False
            continue
        
        if (
            add_ask
            and (np.sign(order_size) == -1.0)
            and (np.abs(ask_price - order["price"]) <= params["delta_price"])
            and (np.abs(ask_size - order_size) <= params["delta_size"])
        ):
            add_ask = False
            continue
            
        actions.append({"operation": CANCEL, "order_id": order["order_id"]})

    # Create order based on inventory target
    if add_bid:
        actions.append(
            {
                "operation": ADD,
                "type": LIMIT,
                "order_id": np.float64(np.random.randint(1e9, 2e9)),
                "price": bid_price,
                "amount": np.floor((bid_size / params["max_size"]) * nav),
            }
        )
    if add_ask:
        actions.append(
            {
                "operation": ADD,
                "type": LIMIT,
                "order_id": np.float64(np.random.randint(1e9, 2e9)),
                "price": ask_price,
                "amount": np.floor((ask_size / params["max_size"]) * nav),
            }
        )

    return actions

In [38]:
# TODO: Review, test other functions / methods for bid/ask sizes
max_size = 10
n = -0.4

for inv in [-10, -7, -5, -3, -2, -1, 0, 1, 2, 3, 5, 7, 10]:
    bid_size = max_size if inv < 0 else max_size / np.exp(-n * inv)
    bid_size = np.round(bid_size, 0)
    
    ask_size = -max_size if inv > 0 else -max_size * np.exp(-n * inv)
    ask_size = np.round(ask_size, 0)
    
    print(f"inv: {inv} bid: {bid_size} ask: {ask_size}")

inv: -10 bid: 10 ask: -0.0
inv: -7 bid: 10 ask: -1.0
inv: -5 bid: 10 ask: -1.0
inv: -3 bid: 10 ask: -3.0
inv: -2 bid: 10 ask: -4.0
inv: -1 bid: 10 ask: -7.0
inv: 0 bid: 10.0 ask: -10.0
inv: 1 bid: 7.0 ask: -10
inv: 2 bid: 4.0 ask: -10
inv: 3 bid: 3.0 ask: -10
inv: 5 bid: 1.0 ask: -10
inv: 7 bid: 1.0 ask: -10
inv: 10 bid: 0.0 ask: -10


# Backtest

Backtest the strategy and evaluate the results.

In [36]:
# Start and end of test period
start, end = "2019-04-01 00:00:00", "2019-05-01 00:00:00"

In [37]:
# Convert and align the data
taq_raw, signals_raw = convert_align(taq, signals, start, end)

In [42]:
# Monte Carlo parameters
strategy_params = {
    "max_size": 5.0,
    "threshold": 0.05,
    "level": 0.0,
    "pct_loss": -0.05,
}

In [38]:
# Dynamic inventory management strategy parameters
strategy_params = {
    "max_size": 5.0,
    "gamma": 0.1,
    "k": 5.0,
    "n": -0.4,
    "delta_price": 1.0,
    "delta_size": 1.0,
}

In [39]:
# Run the backtest
balances, positions, orders = backtest(
    taq_raw,
    signals_raw,
    inventory_management,  # Strategy
    strategy_params,  # Strategy parameters
    bitmex_pnl,  # PnL calculations
    bitmex_nav,  # NAV calculations
    -0.00025,  # Market maker fee
    0.00075,  # Market taker fee
    1.0,  # Volume threshold for order filled
    1.0,  # Starting balance
    0.5,  # Latency in seconds for creating/amending limit orders
    SEED,  # Randomness seed
)

UnsupportedError: Failed in nopython mode pipeline (step: nopython frontend)
[1mUnsupported op-code encountered: build_map(items=[(Var($0.12, /Users/jakemoore/Code/market-making/research/src/backtest.py (410)), Var(balance_start, /Users/jakemoore/Code/market-making/research/src/backtest.py (402))), (Var($0.13, /Users/jakemoore/Code/market-making/research/src/backtest.py (410)), Var(balance_start, /Users/jakemoore/Code/market-making/research/src/backtest.py (402)))], size=2)
[1m
File "src/backtest.py", line 410:[0m
[1mdef _backtest(
    <source elided>
    # Variables for tracking state
[1m    balance = {"total": balance_start, "available": balance_start}
[0m    [1m^[0m[0m
[0m
Unsupported functionality was found in the code Numba was trying to compile.

If this functionality is important to you please file a feature request at:
https://github.com/numba/numba/issues/new


# Review the Results

## Balance and Positions

In [58]:
balances.tail()

Unnamed: 0_level_0,total,available,nav
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2019-04-30 23:15:29.888,0.514374,1.0,2706.0
2019-04-30 23:16:19.746,0.514332,1.0,2706.0
2019-04-30 23:31:11.529,0.514576,1.0,2705.0
2019-04-30 23:37:43.306,0.514741,1.0,2707.0
2019-04-30 23:45:50.908,0.514818,1.0,2710.0


In [45]:
positions.tail()

Unnamed: 0_level_0,amount,weighted_price
time,Unnamed: 1_level_1,Unnamed: 2_level_1
2019-04-30 22:47:03.783,854.0,5264.0
2019-04-30 23:00:07.310,-3429.0,5275.0
2019-04-30 23:16:19.746,-4291.0,5272.689816
2019-04-30 23:38:52.994,857.0,5259.0
2019-04-30 23:55:37.504,-858.0,5263.0


## Orders

The orders have many metrics tracked, such as the slippage, fees, and active duration (number of seconds it was active).

### Orders Summary

In [59]:
# Number of orders and percentage of orders cancelled
print("Number of orders: \t\t{:,}".format(orders.shape[0]))

print(
    "Percentage cancelled: \t\t{:.2%}".format(
        orders[orders["status"] == "cancelled"].shape[0] / orders.shape[0]
    )
)

print("Avg Order Size: \t\t${:,.2f}".format(orders["amount"].abs().mean()))

print("Avg Slippage: \t\t\t${:,.2f}".format(orders["slippage"].mean()))

print("Avg Order Duration: \t\t{:.2f}s".format(orders["active_duration"].mean()))

print("Avg Order (Filled) Duration: \t{:.2f}s".format(orders[orders["status"] == "filled"]["active_duration"].mean()))

Number of orders: 		5,707
Percentage cancelled: 		18.61%
Avg Order Size: 		$2,874.70
Avg Slippage: 			$0.00
Avg Order Duration: 		283.54s
Avg Order (Filled) Duration: 	139.36s


### Orders Review

In [60]:
# List filled orders
orders[orders["status"] == "filled"].tail()

Unnamed: 0_level_0,order_id,type,status,price,price_level,amount,amount_q,volume_req,volume,fee,slippage,closed,active_duration
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2019-04-30 23:15:01,1409566646,limit,filled,5263.0,,1627.0,872400.0,116339.0,510917.0,-7.7e-05,0.0,2019-04-30 23:15:29.888,28.888
2019-04-30 23:15:01,1330624294,limit,filled,5263.5,,-2712.0,872400.0,90754.0,118021.0,-0.000129,0.0,2019-04-30 23:16:19.746,78.746
2019-04-30 23:30:01,1739209434,limit,filled,5258.0,,2706.0,695835.0,471176.0,484023.0,-0.000129,0.0,2019-04-30 23:31:11.529,70.529
2019-04-30 23:30:01,1324629148,limit,filled,5259.5,,-1624.0,695835.0,60053.0,73300.0,-7.7e-05,0.0,2019-04-30 23:37:43.306,462.306
2019-04-30 23:45:01,1753862126,limit,filled,5264.5,,1624.0,284648.0,284648.0,363499.0,-7.7e-05,0.0,2019-04-30 23:45:50.908,49.908


In [None]:
# List cancelled orders
orders[orders["status"] == "cancelled"].tail()

In [None]:
# List orders with slippage >= $25
orders[orders["slippage"] >= 25.0].tail()

## Plots

**TODO: Add some basic metrics other than NAV.**

In [61]:
# Plot the total balance and NAV over time
balances[["total", "nav"]].resample("1H").first().iplot(y="nav", secondary_y="total", title="Balance and NAV")

In [62]:
# The balance (BTC) in comparison to the price changes in Bitcoin
tmp = balances[["total"]].resample("1T").first()
tmp.fillna(method="ffill", inplace=True)
tmp = tmp.join(ohlcv[["close"]], how="left")

tmp[["total", "close"]].resample("15T").first().iplot(y="total", secondary_y="close", title="Balance vs. Close Price")

In [None]:
# Positions, weighted price of position and position size
# TODO: Better plot
positions.resample("1H").first().iplot(y="amount", secondary_y="weighted_price")

In [None]:
# Order Reviews, average slippage and active duration
orders[["slippage","active_duration"]].resample("1D").mean().iplot(y="slippage", secondary_y="active_duration",
    yTitle="Slippage (dollars)", secondary_y_title="Open Duration (seconds)", title="Mean Slippage and Open Duration - Daily"
)