## For Crypto market:
- Volatility: high, which can lead to both large profit and significant risk
- Liquidity: Varies between different exchanges. More liquid market (BTC/USDT) are generally safer for MM
- 24/7 Trading: Unlike traditional market, crypto market never close, which means the bot needs to be able to operate continuously or manage the risks of runnning 24/7

In [1]:
import ccxt
import pandas as pd
from datetime import datetime
import os

In [2]:
kraken = ccxt.kraken() #binance has US-restriction
symbol = 'BTC/USD'
order_book = kraken.fetch_order_book(symbol)
print(type(order_book))
df = pd.DataFrame.from_dict(order_book)
df.head(5)

<class 'dict'>


Unnamed: 0,symbol,bids,asks,timestamp,datetime,nonce
0,BTC/USD,"[54261.9, 27.938, 1725757694]","[54262.0, 0.022, 1725757661]",,,
1,BTC/USD,"[54261.4, 0.092, 1725757693]","[54268.5, 0.022, 1725757680]",,,
2,BTC/USD,"[54259.5, 0.01, 1725757693]","[54271.7, 0.001, 1725756601]",,,
3,BTC/USD,"[54258.5, 0.14, 1725757687]","[54272.3, 0.001, 1725757694]",,,
4,BTC/USD,"[54258.4, 0.017, 1725757683]","[54272.4, 0.423, 1725757694]",,,


In [3]:
# Get current time of retrieval
current_time = datetime.now().strftime("%y%m%d_%H:%M:%S")

# Define file name
file_name = f"order_book_{symbol.replace("/", "")}_{current_time}.csv"

# Define save location
save_directory = "../data/raw"

# Full path where the file will be saved
saveOrderBook = os.path.join(save_directory, file_name)

# Convert DataFrame to CSV
df.to_csv(saveOrderBook, index = False)

print(f"Dava saved to {saveOrderBook}")

Dava saved to ../data/raw/order_book_BTCUSD_240907_20:08:17.csv


In [4]:
print(df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 6 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   symbol     100 non-null    object
 1   bids       100 non-null    object
 2   asks       100 non-null    object
 3   timestamp  0 non-null      object
 4   datetime   0 non-null      object
 5   nonce      0 non-null      object
dtypes: object(6)
memory usage: 4.8+ KB
None


In [5]:
# ------ Data Processing ------

bids = df['bids'] # bid price, volumn, datetime, and timestamp
asks = df['asks'] # ask price, volumn, datetime, and timestamp

df["bid_price"] = df["bids"].apply(lambda bid: bid[0])
df["bid_vol"] = df["bids"].apply(lambda bid: bid[1])
df["bid_datetime"] = df["bids"].apply(lambda bid: bid[2])
df["bid_datetime"] = pd.to_datetime(df["bid_datetime"], unit = "s")
df["bid_timestamp"] = df["bids"].apply(lambda bid: bid[2])

df["ask_price"] = df["asks"].apply(lambda ask: ask[0])
df["ask_vol"] = df["asks"].apply(lambda ask: ask[1])
df["ask_datetime"] = df["asks"].apply(lambda ask: ask[2])
df["ask_datetime"] = pd.to_datetime(df["ask_datetime"], unit = "s")
df["ask_timestamp"] = df["asks"].apply(lambda bid: bid[2])

df = df.drop(columns=["bids", "asks", "datetime", "timestamp", "nonce"], axis = 0)

print(f"Top bid: {df["bid_price"][0]}, Top ask: {df["bid_price"][0]}")
print(df.head(5))

Top bid: 54261.9, Top ask: 54261.9
    symbol  bid_price  bid_vol        bid_datetime  bid_timestamp  ask_price  \
0  BTC/USD    54261.9   27.938 2024-09-08 01:08:14     1725757694    54262.0   
1  BTC/USD    54261.4    0.092 2024-09-08 01:08:13     1725757693    54268.5   
2  BTC/USD    54259.5    0.010 2024-09-08 01:08:13     1725757693    54271.7   
3  BTC/USD    54258.5    0.140 2024-09-08 01:08:07     1725757687    54272.3   
4  BTC/USD    54258.4    0.017 2024-09-08 01:08:03     1725757683    54272.4   

   ask_vol        ask_datetime  ask_timestamp  
0    0.022 2024-09-08 01:07:41     1725757661  
1    0.022 2024-09-08 01:08:00     1725757680  
2    0.001 2024-09-08 00:50:01     1725756601  
3    0.001 2024-09-08 01:08:14     1725757694  
4    0.423 2024-09-08 01:08:14     1725757694  


In [6]:
print("Bid time span:", df["bid_datetime"][0]-df["bid_datetime"][len(df)-1])

# Separate the bids and asks
bids = df[['bid_price', 'bid_vol', 'bid_datetime', 'bid_timestamp']].sort_values(by='bid_price', ascending=False)
asks = df[['ask_price', 'ask_vol', 'ask_datetime', 'ask_timestamp']].sort_values(by='ask_price', ascending=True)

# Combine the top bid and ask into a new DataFrame for analysis if needed
top_bids_asks = pd.concat([bids.head(1), asks.head(1)], axis=1)

print(top_bids_asks)

Bid time span: 0 days 00:03:21
   bid_price  bid_vol        bid_datetime  bid_timestamp  ask_price  ask_vol  \
0    54261.9   27.938 2024-09-08 01:08:14     1725757694    54262.0    0.022   

         ask_datetime  ask_timestamp  
0 2024-09-08 01:07:41     1725757661  


In [7]:
# Metrics Calculation

# 1. Mid Price
'''
def: fair value of asset @ any given time

'''
mid_price = (top_bids_asks['ask_price'] + top_bids_asks['bid_price'])/2

# 2. Bid-Ask Spread         -- too essential nothing to say
spread = top_bids_asks['ask_price'] - top_bids_asks['bid_price']

# 3. VWAP (Volume-Weighted Average Price)
'''
def: average price, weight by volume over period

frml: (∑(price * vol)) / ∑vol
'''
bids_vwap = (bids['bid_price'] * bids['bid_vol']).sum() / bids['bid_vol'].sum()
asks_vwap = (asks['ask_price'] * asks['ask_vol']).sum() / asks['ask_vol'].sum()

# 4. Order Imbalance 
''' 
def: difference between bid vol and ask vol

use: indicate market sentiment
    - positive: buying pressure
    - negative: selling pressure
'''

order_imbalance = (top_bids_asks['bid_vol'] - top_bids_asks['ask_vol']) / (top_bids_asks['bid_vol'] + top_bids_asks['ask_vol'])
print(order_imbalance)
# 5. Market Depth at Top Levels
'''
def: ∑ volume of orders at different price levels

use: identify support and resistance level, provide detailed view of liquidity
    
'''
top_bid_depth = bids['bid_vol'].sum()
top_ask_depth = asks['ask_vol'].sum()

# 4. Top of Book Volume         --already sorted, so just take the top rows
top_of_book_bid_vol = bids.iloc[0]['bid_vol']
top_of_book_ask_vol = asks.iloc[0]['ask_vol']


# 6. Price volatility
'''
def: measure of price fluctation over a period, good use for Vega in Greeks

frml: calculate the std dev of price changes or returns
'''

import numpy as np

# Calculate percentage change for bid and ask prices
top_bids_asks['bid_price_pct_change'] = top_bids_asks['bid_price'].pct_change()
top_bids_asks['ask_price_pct_change'] = top_bids_asks['ask_price'].pct_change()

# Calculate standard deviation of percentage changes (volatility)
bid_price_volatility = np.std(top_bids_asks['bid_price_pct_change'].dropna())
ask_price_volatility = np.std(top_bids_asks['ask_price_pct_change'].dropna())

# Display the results
print(f"Bid Price Volatility: {bid_price_volatility}")
print(f"Ask Price Volatility: {ask_price_volatility}")

# 7. Time Weighted Average Spread
'''
def: spread weight by time over specific period

frml: (∑(spread * time interval)) / ∑(time interval)

use: how spread behaves over time
'''

# 1. Calculate the Spread
top_bids_asks['spread'] = top_bids_asks['ask_price'] - top_bids_asks['bid_price']

# 2. Calculate the Time Difference between consecutive rows in seconds
top_bids_asks['time_diff'] = top_bids_asks['bid_timestamp'].diff().fillna(0)

# 3. Weight the Spread by the Time Difference
top_bids_asks['weighted_spread'] = top_bids_asks['spread'] * top_bids_asks['time_diff']

# 4. Calculate the Time Weighted Average Spread (TWAS)
total_time = top_bids_asks['time_diff'].sum()


# Display the metrics for the top 5 rows
# columns_to_display = ['bid_price', 'ask_price', 'spread', 'order_imbalance']

0    0.998426
dtype: float64
Bid Price Volatility: nan
Ask Price Volatility: nan


In [8]:
print(top_bids_asks.head(5))

# Print additional metrics
print(f"Top Bid Depth: {top_bid_depth}")
print(f"Top Ask Depth: {top_ask_depth}")
print(f"Top of Book Bid Volume: {top_of_book_bid_vol}")
print(f"Top of Book Ask Volume: {top_of_book_ask_vol}")
print(f"Bid VWAP: {bids_vwap}")
print(f"Ask VWAP: {asks_vwap}")

# Print additional metrics
print(f"Top Bid Depth: {top_bid_depth}")
print(f"Top Ask Depth: {top_ask_depth}")
print(f"Top of Book Bid Volume: {top_of_book_bid_vol}")
print(f"Top of Book Ask Volume: {top_of_book_ask_vol}")

   bid_price  bid_vol        bid_datetime  bid_timestamp  ask_price  ask_vol  \
0    54261.9   27.938 2024-09-08 01:08:14     1725757694    54262.0    0.022   

         ask_datetime  ask_timestamp  bid_price_pct_change  \
0 2024-09-08 01:07:41     1725757661                   NaN   

   ask_price_pct_change  spread  time_diff  weighted_spread  
0                   NaN     0.1        0.0              0.0  
Top Bid Depth: 130.035
Top Ask Depth: 90.56599999999999
Top of Book Bid Volume: 27.938
Top of Book Ask Volume: 0.022
Bid VWAP: 54210.76898604222
Ask VWAP: 54316.84633416515
Top Bid Depth: 130.035
Top Ask Depth: 90.56599999999999
Top of Book Bid Volume: 27.938
Top of Book Ask Volume: 0.022


In [9]:
from datetime import datetime
import os

# Get current time of retrieval
current_time = datetime.now().strftime("%y%m%d_%H:%M:%S")

# Define file name
file_name = f"order_book_{symbol.replace("/", "")}_{current_time}.csv"

# Define save location
save_directory = "../data/processed"

# Full path where the file will be saved
saveOrderBook = os.path.join(save_directory, file_name)

# Convert DataFrame to CSV
df.to_csv(saveOrderBook, index = False)

print(f"Dava saved to {saveOrderBook}")

Dava saved to ../data/processed/order_book_BTCUSD_240907_20:08:17.csv


In [10]:
df

Unnamed: 0,symbol,bid_price,bid_vol,bid_datetime,bid_timestamp,ask_price,ask_vol,ask_datetime,ask_timestamp
0,BTC/USD,54261.9,27.938,2024-09-08 01:08:14,1725757694,54262.0,0.022,2024-09-08 01:07:41,1725757661
1,BTC/USD,54261.4,0.092,2024-09-08 01:08:13,1725757693,54268.5,0.022,2024-09-08 01:08:00,1725757680
2,BTC/USD,54259.5,0.010,2024-09-08 01:08:13,1725757693,54271.7,0.001,2024-09-08 00:50:01,1725756601
3,BTC/USD,54258.5,0.140,2024-09-08 01:08:07,1725757687,54272.3,0.001,2024-09-08 01:08:14,1725757694
4,BTC/USD,54258.4,0.017,2024-09-08 01:08:03,1725757683,54272.4,0.423,2024-09-08 01:08:14,1725757694
...,...,...,...,...,...,...,...,...,...
95,BTC/USD,54158.6,1.343,2024-09-08 01:07:31,1725757651,54368.0,0.062,2024-09-08 01:08:13,1725757693
96,BTC/USD,54158.5,0.001,2024-09-08 01:04:51,1725757491,54368.1,1.204,2024-09-08 01:07:59,1725757679
97,BTC/USD,54158.3,0.971,2024-09-08 01:07:28,1725757648,54373.5,0.488,2024-09-08 01:08:01,1725757681
98,BTC/USD,54155.8,0.001,2024-09-08 01:07:21,1725757641,54374.5,4.598,2024-09-08 01:05:59,1725757559


In [11]:
metrics_csv = f"order_book_metrics_{symbol.replace('/', '')}_metrics.csv"
save_directory = "../data/processed"
saveOrderBookMetrics = os.path.join(save_directory, metrics_csv)

new_data = pd.DataFrame({
    'datetime': [current_time],
    'symbol': [symbol],
    'bid_vol': [top_of_book_bid_vol],
    'bid_price': [top_bids_asks.iloc[0]['bid_price']],
    'ask_price': [top_bids_asks.iloc[0]['ask_price']],
    'ask_vol': [top_of_book_ask_vol],
    'mid_price': [mid_price.iloc[0]],
    'spread': [spread.iloc[0]],
    'order_imbalance': [order_imbalance.iloc[0]],
    'bid_vwap': [bids_vwap],
    'ask_vwap': [asks_vwap],
    'top_bid_depth': [top_bid_depth],
    'top_ask_depth': [top_ask_depth]
})

# Check if the CSV file already exists
if not os.path.isfile(saveOrderBookMetrics):
    # Create the CSV file with headers if it doesn't exist
    new_data.to_csv(saveOrderBookMetrics, index=False)
else:
    # Append new rows without headers if the file exists
    new_data.to_csv(saveOrderBookMetrics, mode='a', header=False, index=False)

print(f"Data saved to {saveOrderBookMetrics}")


Data saved to ../data/processed/order_book_metrics_BTCUSD_metrics.csv


---------------

In [12]:
# ------ Greeks for Risk Management ------

def calculate_delta(inventory, market_price):
    # Delta could be proportional to your inventory and market price
    return inventory * market_price * 0.001  # Arbitrary scaling factor

def calculate_gamma(delta, market_price, previous_price):
    # Gamma could be based on the change in delta with respect to price change
    return (delta - (inventory * previous_price * 0.001)) / (market_price - previous_price)

def calculate_vega(volatility):
    # Vega could be directly proportional to volatility
    return volatility * 0.01  # Arbitrary scaling factor

def calculate_volatility(prices):
    return np.std(prices[-10:])  # Example using last 10 price changes

-----

In [13]:
# Initialize parameters
inventory = 10  # Example initial inventory level
previous_price = df['best_bid_price'].iloc[0]  # Starting with the first bid price
recent_prices = []

# Iterate through the dataframe to simulate the market-making process
for index, row in df.iterrows():
    market_price = (row['best_bid_price'] + row['best_ask_price']) / 2
    
    # Append market price to recent prices for volatility calculation
    recent_prices.append(market_price)
    if len(recent_prices) > 10:
        recent_prices.pop(0)
    
    # Calculate Greeks
    delta = calculate_delta(inventory, market_price)
    gamma = calculate_gamma(delta, market_price, previous_price)
    volatility = calculate_volatility(recent_prices)
    vega = calculate_vega(volatility)
    
    # Base spread adjusted by Vega (for volatility) and Gamma (for Delta sensitivity)
    base_spread = 0.01
    adjusted_spread = base_spread + vega + (gamma / 100)
    
    # Determine buy and sell prices
    bid_price = market_price * (1 - adjusted_spread - delta)
    ask_price = market_price * (1 + adjusted_spread + delta)
    
    # Update previous price for next iteration
    previous_price = market_price
    
    # Pseudo-code for placing orders (replace with actual API calls)
    print(f"Placing buy order at: {bid_price}, sell order at: {ask_price}")

KeyError: 'best_bid_price'