# RESEARCH NOTEBOOK --> PMM Simple Strategy Design

In [7]:
import warnings

warnings.filterwarnings("ignore")

import pandas_ta as ta  # noqa: F401
import numpy as np
from core.data_sources import CLOBDataSource

# Initialize the data source
clob = CLOBDataSource()

2025-09-09 13:08:27,343 - asyncio - ERROR - Unclosed client session
client_session: <aiohttp.client.ClientSession object at 0x10bd3dbe0>


In [8]:
# Define the parameters
exchange = "binance"
trading_pair = "POL-USDT"
timeframe = "1s"
days = 1

In [9]:
# Get the candles
candles = await clob.get_candles_last_days(exchange, trading_pair, timeframe, days)

2025-09-09 13:08:29,577 - asyncio - ERROR - Task was destroyed but it is pending!
task: <Task pending name='Task-5' coro=<safe_wrapper() running at /opt/anaconda3/envs/quants-lab/lib/python3.12/site-packages/hummingbot/core/utils/async_utils.py:9> wait_for=<Future pending cb=[Task.task_wakeup()]>>


In [10]:
candles.data.describe()

Unnamed: 0,timestamp,open,high,low,close,volume,quote_asset_volume,n_trades,taker_buy_base_volume,taker_buy_quote_volume
count,86400.0,86400.0,86400.0,86400.0,86400.0,86400.0,86400.0,86400.0,86400.0,86400.0
mean,1757390908.5,0.27486262,0.27486472,0.27486053,0.27486267,814.27841898,223.98611446,1.28430556,367.31189583,101.01401057
std,24941.67596614,0.00289302,0.00289292,0.00289316,0.00289301,7321.91534473,2007.40898525,5.93060125,3522.31324735,969.37788096
min,1757347709.0,0.2686,0.2686,0.2686,0.2686,0.0,0.0,0.0,0.0,0.0
25%,1757369308.75,0.2724,0.2724,0.2724,0.2724,0.0,0.0,0.0,0.0,0.0
50%,1757390908.5,0.2746,0.2746,0.2746,0.2746,0.0,0.0,0.0,0.0,0.0
75%,1757412508.25,0.2773,0.2773,0.2773,0.2773,0.0,0.0,0.0,0.0,0.0
max,1757434108.0,0.282,0.282,0.282,0.282,1158790.4,315393.32818,751.0,301080.3,81872.54513


In [11]:
# The candles object has builtin methods to visualize the candles easily
candles.plot(height=600, width=1200)

In [12]:
candles.plot(type="returns", height=400, width=1200)

### 2. Volume Analysis

In [None]:
# Export key metrics for strategy implementation
key_metrics = df[['close', 'volume', 'volatility_300s', 'vwap_300s', 'order_flow_imbalance',
                  'mm_opportunity_score', 'optimal_spread_300s', 'inventory_risk_300s',
                  'hl_spread_bps', 'volume_ratio_300s', 'reversal_rate_300s']].copy()

print("Key Metrics Summary for PMM Strategy:")
print("="*50)
for col in key_metrics.columns:
    if col != 'close':  # Skip price column
        print(f"{col}: mean={key_metrics[col].mean():.4f}, std={key_metrics[col].std():.4f}")

# Save to CSV for further analysis
key_metrics.to_csv('pmm_strategy_metrics.csv')
print(f"\nMetrics exported to: pmm_strategy_metrics.csv")
print(f"Data points: {len(key_metrics)}")
print(f"Timeframe: {key_metrics.index[0]} to {key_metrics.index[-1]}")

### Market Making Strategy Insights

Based on the analysis above, here are key insights for PMM strategy design:

**Volatility Patterns:**
- Monitor rolling volatility across multiple timeframes (1min, 5min, 15min, 1hour)
- Use ATR and Bollinger Band width as dynamic spread adjustment signals
- Optimal market making occurs in moderate volatility regimes

**Volume & Liquidity:**
- Track volume ratios to identify high-activity periods
- VWAP deviation indicates price efficiency and mean reversion opportunities  
- Order flow imbalance reveals directional pressure and inventory risk

**Risk Management:**
- Inventory risk increases with persistent order flow imbalance
- Adverse selection risk peaks during trend continuation periods
- Position sizing should be inversely related to volatility

**Opportunity Identification:**
- High MM opportunity score indicates favorable conditions
- Optimal spreads should adapt to volatility and inventory position
- Price reversal patterns suggest mean reversion opportunities

In [None]:
# Comprehensive market making dashboard
from plotly.subplots import make_subplots
import plotly.graph_objects as go

# Create comprehensive dashboard
fig = make_subplots(
    rows=4, cols=2,
    subplot_titles=(
        'Price & Volatility', 'Volume Analysis',
        'Market Making Opportunity Score', 'Order Flow & Spread',
        'Volume Profile', 'Risk Metrics',
        'Theoretical P&L', 'Market Efficiency'
    ),
    specs=[[{"secondary_y": True}, {"secondary_y": True}],
           [{"secondary_y": True}, {"secondary_y": True}],
           [{"type": "bar"}, {"secondary_y": True}],
           [{"secondary_y": True}, {"secondary_y": True}]]
)

# Sample data for visualization (every 60 seconds to reduce clutter)
sample_df = df.iloc[::60].copy()
sample_times = sample_df.index

# 1. Price & Volatility
fig.add_trace(go.Scatter(x=sample_times, y=sample_df['close'], 
                        name='Price', line=dict(color='blue')), row=1, col=1)
fig.add_trace(go.Scatter(x=sample_times, y=sample_df['bb_upper_300s'], 
                        name='BB Upper', line=dict(color='red', dash='dash')), row=1, col=1)
fig.add_trace(go.Scatter(x=sample_times, y=sample_df['bb_lower_300s'], 
                        name='BB Lower', line=dict(color='red', dash='dash')), row=1, col=1)
fig.add_trace(go.Scatter(x=sample_times, y=sample_df['volatility_300s']*1000, 
                        name='Volatility (x1000)', line=dict(color='orange')), 
                        row=1, col=1, secondary_y=True)

# 2. Volume Analysis
fig.add_trace(go.Bar(x=sample_times, y=sample_df['volume'], 
                    name='Volume', marker_color='lightblue'), row=1, col=2)
fig.add_trace(go.Scatter(x=sample_times, y=sample_df['volume_ratio_300s'], 
                        name='Volume Ratio', line=dict(color='green')), 
                        row=1, col=2, secondary_y=True)

# 3. Market Making Opportunity Score
fig.add_trace(go.Scatter(x=sample_times, y=sample_df['mm_opportunity_score'], 
                        name='MM Score', fill='tonexty', line=dict(color='purple')), row=2, col=1)
fig.add_trace(go.Scatter(x=sample_times, y=sample_df['optimal_spread_300s']*10000, 
                        name='Optimal Spread (bps)', line=dict(color='red')), 
                        row=2, col=1, secondary_y=True)

# 4. Order Flow & Spread
fig.add_trace(go.Scatter(x=sample_times, y=sample_df['order_flow_imbalance'], 
                        name='Order Flow', line=dict(color='darkgreen')), row=2, col=2)
fig.add_trace(go.Scatter(x=sample_times, y=sample_df['hl_spread_bps'], 
                        name='HL Spread (bps)', line=dict(color='brown')), 
                        row=2, col=2, secondary_y=True)

# 5. Volume Profile
top_buckets = volume_profile.nlargest(15, 'volume_pct')
fig.add_trace(go.Bar(x=top_buckets['volume_pct'], y=top_buckets['close'], 
                    orientation='h', name='Volume by Price', marker_color='cyan'), row=3, col=1)

# 6. Risk Metrics
fig.add_trace(go.Scatter(x=sample_times, y=sample_df['inventory_risk_300s'], 
                        name='Inventory Risk', line=dict(color='red')), row=3, col=2)
fig.add_trace(go.Scatter(x=sample_times, y=sample_df['adverse_selection_risk']*1000, 
                        name='Adverse Selection (x1000)', line=dict(color='orange')), 
                        row=3, col=2, secondary_y=True)

# 7. Theoretical P&L
fig.add_trace(go.Scatter(x=sample_times, y=sample_df['theoretical_pnl_cumsum'], 
                        name='Cumulative P&L', fill='tonexty', line=dict(color='green')), row=4, col=1)
fig.add_trace(go.Scatter(x=sample_times, y=sample_df['sharpe_ratio_300s'], 
                        name='Sharpe Ratio', line=dict(color='blue')), 
                        row=4, col=1, secondary_y=True)

# 8. Market Efficiency
fig.add_trace(go.Scatter(x=sample_times, y=sample_df['reversal_rate_300s'], 
                        name='Reversal Rate', line=dict(color='purple')), row=4, col=2)
fig.add_trace(go.Scatter(x=sample_times, y=sample_df['amihud_illiq_ma']*100000, 
                        name='Illiquidity (x100k)', line=dict(color='red')), 
                        row=4, col=2, secondary_y=True)

# Update layout
fig.update_layout(
    height=1200,
    title_text="Market Making Analysis Dashboard",
    showlegend=False
)

# Update x-axes
for i in range(1, 5):
    for j in range(1, 3):
        if not (i == 3 and j == 1):  # Skip volume profile
            fig.update_xaxes(title_text="Time", row=i, col=j)

fig.show()

### 5. Comprehensive Visualizations

In [None]:
# Market making specific indicators and opportunity identification

# 1. Inventory Risk Metrics
# Simulated inventory drift based on order flow
df['inventory_drift'] = df['order_flow_imbalance'].cumsum()
df['inventory_risk_60s'] = df['inventory_drift'].rolling(60).std()
df['inventory_risk_300s'] = df['inventory_drift'].rolling(300).std()

# 2. Optimal Spread Estimation
# Based on volatility and inventory risk
def optimal_spread(volatility, inventory_risk, risk_aversion=1.0, tick_size=0.0001):
    """Estimate optimal spread based on Avellaneda-Stoikov model"""
    gamma = risk_aversion  # Risk aversion parameter
    sigma = volatility
    q = inventory_risk  # Inventory position
    
    # Simplified optimal spread
    spread = gamma * sigma + 2 * gamma * abs(q) * sigma
    return np.maximum(spread, tick_size)

df['optimal_spread_60s'] = optimal_spread(
    df['volatility_60s'].fillna(0),
    df['inventory_risk_60s'].fillna(0)
)

df['optimal_spread_300s'] = optimal_spread(
    df['volatility_300s'].fillna(0),
    df['inventory_risk_300s'].fillna(0)
)

# 3. Market Making Signal Generation
# Combine multiple factors for MM opportunity scoring
def mm_opportunity_score(row):
    """Calculate market making opportunity score (0-100)"""
    score = 0
    
    # High volume periods (more opportunities)
    if row['volume_ratio_300s'] > 1.5:
        score += 20
    elif row['volume_ratio_300s'] > 1.2:
        score += 10
    
    # Moderate volatility (not too high/low)
    vol_percentile = (row['volatility_300s'] - df['volatility_300s'].quantile(0.1)) / \
                    (df['volatility_300s'].quantile(0.9) - df['volatility_300s'].quantile(0.1))
    if 0.3 <= vol_percentile <= 0.7:
        score += 25
    elif 0.2 <= vol_percentile <= 0.8:
        score += 15
    
    # Mean reversion tendency
    if abs(row['reversal_rate_300s']) > 0.1:
        score += 20
    
    # Low inventory risk
    if abs(row['inventory_risk_300s']) < df['inventory_risk_300s'].quantile(0.3):
        score += 15
    
    # Balanced order flow
    if abs(row['order_flow_imbalance']) < 0.1:
        score += 20
    
    return min(score, 100)

df['mm_opportunity_score'] = df.apply(mm_opportunity_score, axis=1)

# 4. Risk Management Indicators
# Maximum acceptable position size based on volatility
df['max_position_size'] = 1 / (df['volatility_300s'] * np.sqrt(300))  # Inversely related to vol

# Drawdown risk indicator
df['price_deviation_vwap'] = (df['close'] - df['vwap_300s']) / df['vwap_300s']
df['adverse_selection_risk'] = abs(df['price_deviation_vwap']).rolling(300).mean()

# 5. Performance Metrics for Market Making
# Theoretical P&L estimation (simplified)
spread_capture_rate = 0.5  # Assume we capture 50% of the spread
df['theoretical_pnl'] = df['hl_spread'] * spread_capture_rate * df['n_trades']
df['theoretical_pnl_cumsum'] = df['theoretical_pnl'].cumsum()

# Risk-adjusted returns
df['sharpe_ratio_300s'] = df['theoretical_pnl'].rolling(300).mean() / \
                         (df['theoretical_pnl'].rolling(300).std() + 1e-10)

print("Market Making Indicators Summary:")
print(f"Average MM Opportunity Score: {df['mm_opportunity_score'].mean():.1f}/100")
print(f"High Opportunity Periods (>70): {(df['mm_opportunity_score'] > 70).sum()} ({(df['mm_opportunity_score'] > 70).mean()*100:.1f}%)")
print(f"Average Optimal Spread (5min): {df['optimal_spread_300s'].mean():.6f} ({df['optimal_spread_300s'].mean()*10000:.2f} bps)")
print(f"Average Inventory Risk (5min): {df['inventory_risk_300s'].mean():.3f}")
print(f"Average Adverse Selection Risk: {df['adverse_selection_risk'].mean():.6f}")
print(f"Theoretical Daily P&L: {df['theoretical_pnl'].sum():.6f}")
print(f"Average Sharpe Ratio (5min): {df['sharpe_ratio_300s'].mean():.3f}")

### 4. Market Making Specific Indicators

In [None]:
# Market microstructure analysis for market making
# Since we don't have bid/ask data, we'll estimate spread from OHLC data

# Estimated spread metrics (proxy using high-low range)
df['hl_spread'] = df['high'] - df['low']  # High-Low spread as proxy
df['hl_spread_bps'] = (df['hl_spread'] / df['close']) * 10000  # in basis points

# Price impact estimation
df['price_change'] = df['close'] - df['open']
df['price_change_bps'] = (df['price_change'] / df['open']) * 10000

# Market efficiency metrics
# Price reversal detection (mean reversion indicator)
df['price_reversal'] = np.where(
    (df['returns'].shift(1) > 0) & (df['returns'] < 0), 1,  # Up then down
    np.where((df['returns'].shift(1) < 0) & (df['returns'] > 0), -1, 0)  # Down then up
)

df['reversal_rate_300s'] = df['price_reversal'].rolling(window=300).mean()

# Tick analysis (price change patterns)
df['tick_direction'] = np.sign(df['price_change'])
df['tick_runs'] = (df['tick_direction'] != df['tick_direction'].shift()).cumsum()

# Calculate average run length
tick_runs = df.groupby('tick_runs')['tick_direction'].count()
avg_run_length = tick_runs.mean()

# Liquidity indicators
# Amihud illiquidity measure (price impact per unit volume)
df['amihud_illiq'] = abs(df['returns']) / (df['volume'] + 1e-10)
df['amihud_illiq_ma'] = df['amihud_illiq'].rolling(window=300).mean()

# Roll spread estimator (effective spread)
df['roll_spread'] = -np.sqrt(np.maximum(df['returns'].rolling(2).cov(df['returns'].shift(1)), 0)) * 2

# Market making opportunity indicators
# Volatility clustering (GARCH-like)
df['vol_cluster'] = df['returns'].rolling(60).std() / df['returns'].rolling(300).std()

# Price efficiency measure (variance ratio)
def variance_ratio(returns, k=2):
    """Calculate variance ratio for k-period returns vs 1-period returns"""
    n = len(returns)
    var_1 = returns.var()
    
    # k-period overlapping returns
    k_returns = returns.rolling(k).sum().dropna()
    var_k = k_returns.var() / k
    
    return var_k / var_1 if var_1 > 0 else 1

# Calculate variance ratios for different horizons
vr_2 = variance_ratio(df['returns'].dropna(), 2)
vr_5 = variance_ratio(df['returns'].dropna(), 5)
vr_10 = variance_ratio(df['returns'].dropna(), 10)

# Market making profitability indicators
# Bid-ask bounce (using high-low as proxy)
df['ba_bounce'] = abs(df['close'] - df['vwap'])
df['ba_bounce_ma'] = df['ba_bounce'].rolling(300).mean()

print("Market Microstructure Analysis:")
print(f"Average HL Spread: {df['hl_spread_bps'].mean():.2f} bps")
print(f"Median HL Spread: {df['hl_spread_bps'].median():.2f} bps")
print(f"Average Price Reversal Rate: {df['reversal_rate_300s'].mean():.3f}")
print(f"Average Tick Run Length: {avg_run_length:.2f}")
print(f"Average Amihud Illiquidity: {df['amihud_illiq_ma'].mean():.6f}")
print(f"Variance Ratios - 2min: {vr_2:.3f}, 5min: {vr_5:.3f}, 10min: {vr_10:.3f}")
print(f"Average Bid-Ask Bounce: {df['ba_bounce_ma'].mean():.6f}")

### 3. Spread Analysis & Market Microstructure

In [None]:
# Volume-based metrics for market making
# VWAP (Volume Weighted Average Price)
df['cumulative_volume'] = df['volume'].cumsum()
df['cumulative_pv'] = (df['close'] * df['volume']).cumsum()
df['vwap'] = df['cumulative_pv'] / df['cumulative_volume']

# Rolling VWAP for different windows
for window in [300, 900, 3600]:
    rolling_vol = df['volume'].rolling(window=window).sum()
    rolling_pv = (df['close'] * df['volume']).rolling(window=window).sum()
    df[f'vwap_{window}s'] = rolling_pv / rolling_vol

# Volume metrics
df['volume_ma_60s'] = df['volume'].rolling(window=60).mean()
df['volume_ma_300s'] = df['volume'].rolling(window=300).mean()
df['volume_ma_3600s'] = df['volume'].rolling(window=3600).mean()

# Relative volume (current vs average)
df['volume_ratio_60s'] = df['volume'] / df['volume_ma_60s']
df['volume_ratio_300s'] = df['volume'] / df['volume_ma_300s']

# Trade intensity metrics
df['trades_per_second'] = df['n_trades']
df['avg_trade_size'] = df['volume'] / (df['n_trades'] + 1e-10)  # Avoid division by zero
df['trades_ma_300s'] = df['n_trades'].rolling(window=300).mean()

# Taker buy/sell analysis
df['taker_buy_ratio'] = df['taker_buy_base_volume'] / (df['volume'] + 1e-10)
df['taker_sell_ratio'] = 1 - df['taker_buy_ratio']

# Order flow imbalance
df['order_flow_imbalance'] = df['taker_buy_ratio'] - 0.5  # Centered around 0

# Volume profile analysis (price levels with most volume)
# Create price buckets
price_min, price_max = df['low'].min(), df['high'].max()
n_buckets = 50
price_buckets = np.linspace(price_min, price_max, n_buckets + 1)
df['price_bucket'] = pd.cut(df['close'], bins=price_buckets, labels=False)

# Calculate volume by price level
volume_profile = df.groupby('price_bucket').agg({
    'volume': 'sum',
    'n_trades': 'sum',
    'close': 'mean'
}).reset_index()

volume_profile['volume_pct'] = volume_profile['volume'] / volume_profile['volume'].sum() * 100

print("Volume Analysis Summary:")
print(f"Total Volume: {df['volume'].sum():,.0f}")
print(f"Total Trades: {df['n_trades'].sum():,.0f}")
print(f"Average Trade Size: {df['avg_trade_size'].mean():.2f}")
print(f"Average Taker Buy Ratio: {df['taker_buy_ratio'].mean():.3f}")
print(f"Volume Concentration (top 20% buckets): {volume_profile.nlargest(int(n_buckets*0.2), 'volume')['volume_pct'].sum():.1f}%")

In [13]:
df = candles.data
# Calculate returns
df['returns'] = df['close'].pct_change()
df['log_returns'] = np.log(df['close'] / df['close'].shift(1))

# Rolling volatility (multiple windows for different timescales)
windows = [60, 300, 900, 3600]  # 1min, 5min, 15min, 1hour in seconds
for window in windows:
    df[f'volatility_{window}s'] = df['returns'].rolling(window=window).std() * np.sqrt(window)

# Average True Range (ATR)
df['high_low'] = df['high'] - df['low']
df['high_close_prev'] = abs(df['high'] - df['close'].shift(1))
df['low_close_prev'] = abs(df['low'] - df['close'].shift(1))
df['true_range'] = df[['high_low', 'high_close_prev', 'low_close_prev']].max(axis=1)

for window in [300, 900, 3600]:  # 5min, 15min, 1hour
    df[f'atr_{window}s'] = df['true_range'].rolling(window=window).mean()

# Bollinger Bands (2 standard deviations)
for window in [300, 900]:
    rolling_mean = df['close'].rolling(window=window).mean()
    rolling_std = df['close'].rolling(window=window).std()
    df[f'bb_upper_{window}s'] = rolling_mean + (rolling_std * 2)
    df[f'bb_lower_{window}s'] = rolling_mean - (rolling_std * 2)
    df[f'bb_width_{window}s'] = (df[f'bb_upper_{window}s'] - df[f'bb_lower_{window}s']) / rolling_mean

# Calculate volatility percentiles for risk assessment
vol_stats = {}
for window in windows:
    vol_col = f'volatility_{window}s'
    vol_stats[f'{window}s'] = {
        'mean': df[vol_col].mean(),
        'median': df[vol_col].median(),
        'p75': df[vol_col].quantile(0.75),
        'p90': df[vol_col].quantile(0.90),
        'p95': df[vol_col].quantile(0.95),
        'max': df[vol_col].max()
    }

print("Volatility Statistics (annualized):")
for window, stats in vol_stats.items():
    print(f"\n{window} rolling volatility:")
    for stat, value in stats.items():
        print(f"  {stat}: {value:.4f}")

# Clean up temporary columns
df.drop(['high_low', 'high_close_prev', 'low_close_prev'], axis=1, inplace=True)

Volatility Statistics (annualized):

60s rolling volatility:
  mean: 0.0011
  median: 0.0010
  p75: 0.0012
  p90: 0.0016
  p95: 0.0018
  max: 0.0051

300s rolling volatility:
  mean: 0.0025
  median: 0.0023
  p75: 0.0028
  p90: 0.0032
  p95: 0.0037
  max: 0.0084

900s rolling volatility:
  mean: 0.0043
  median: 0.0041
  p75: 0.0047
  p90: 0.0056
  p95: 0.0063
  max: 0.0106

3600s rolling volatility:
  mean: 0.0087
  median: 0.0084
  p75: 0.0094
  p90: 0.0110
  p95: 0.0124
  max: 0.0154


### 1. Volatility Analysis

In [15]:
import pandas as pd

# Convert to DataFrame for easier manipulation
df = candles.data.copy()
df['datetime'] = pd.to_datetime(df['timestamp'], unit='s')
df.set_index('datetime', inplace=True)

print(f"Data shape: {df.shape}")
print(f"Timeframe: {df.index[0]} to {df.index[-1]}")
print(f"Total duration: {df.index[-1] - df.index[0]}")

Data shape: (86400, 26)
Timeframe: 2025-09-08 16:08:29 to 2025-09-09 16:08:28
Total duration: 0 days 23:59:59


## Market Making Metrics Analysis

Now let's analyze key metrics that are crucial for market making strategies:
- **Volatility**: Price movement patterns and risk assessment
- **Volume Analysis**: Liquidity patterns and trading intensity
- **Spread Analysis**: Bid-ask spread dynamics
- **Market Microstructure**: Tick-by-tick behavior and order flow