# Dynamic Position Sizing for Mean Reversion

In this notebook, we implement a mean reversion strategy that dynamically adjusts position size based on the distance from the fair price of 2000. The further away the price is from 2000, the larger the position size.

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Set plot style
plt.style.use('seaborn-v0_8-whitegrid')
plt.rcParams['figure.figsize'] = (12, 6)
plt.rcParams['figure.dpi'] = 100

## 1. Load Data

First, let's load the Squid_Ink price data and limit it to the first 20,000 timestamps (in-sample data).

In [None]:
# Define function to load price data from CSV files
def load_price_data(round_num, day_num):
    import os
    
    # Path to data directory
    data_path = '../../../Prosperity 3 Data'
    
    # Construct file path
    file_path = os.path.join(data_path, f'Round {round_num}/prices_round_{round_num}_day_{day_num}.csv')
    
    # Check if file exists
    if not os.path.exists(file_path):
        print(f"File not found: {file_path}")
        return pd.DataFrame()
    
    # Load data
    try:
        data = pd.read_csv(file_path, sep=';')
        print(f"Successfully loaded {len(data)} rows from {file_path}")
    except Exception as e:
        print(f"Error loading file {file_path}: {e}")
        return pd.DataFrame()
    
    return data

# Load data for all days in round 1
print("Loading price data...")
all_data = pd.DataFrame()

for day in range(-2, 1):
    day_data = load_price_data(1, day)
    if len(day_data) > 0:
        # Add day offset to timestamp for continuity
        day_data['timestamp'] += 10**6 * (day+2)
        all_data = pd.concat([all_data, day_data])

# Check if data was loaded successfully
if len(all_data) == 0:
    raise ValueError("No data was loaded. Please check the data directory path.")

# Check the columns in the loaded data
print(f"Columns in loaded data: {all_data.columns.tolist()}")

# Filter for SQUID_INK
if 'product' in all_data.columns:
    prices = all_data[all_data['product'] == 'SQUID_INK'].copy()
    print(f"Filtered for SQUID_INK using 'product' column: {len(prices)} rows")
elif 'symbol' in all_data.columns:
    prices = all_data[all_data['symbol'] == 'SQUID_INK'].copy()
    print(f"Filtered for SQUID_INK using 'symbol' column: {len(prices)} rows")
else:
    raise ValueError("Could not find 'product' or 'symbol' column in the data.")

# Check if we have any data after filtering
if len(prices) == 0:
    raise ValueError("No SQUID_INK data found after filtering.")

print(f"Loaded {len(prices)} SQUID_INK price data points")

# Sort by timestamp to ensure chronological order
prices = prices.sort_values('timestamp').reset_index(drop=True)
print(f"Sorted data by timestamp")

# Limit to first 20,000 timestamps (in-sample data)
in_sample_prices = prices.iloc[:20000]
print(f"Limited to {len(in_sample_prices)} in-sample data points")

In [None]:
# Extract price data from the real data
# For SQUID_INK, we'll use the mid_price column which is the average of bid and ask
print(f"Available columns in price data: {in_sample_prices.columns.tolist()}")

# Use mid_price as our primary price source
if 'mid_price' in in_sample_prices.columns:
    squid_price = in_sample_prices['mid_price']
    print("Using mid_price column for price data")
# If mid_price is not available, calculate it from bid and ask
elif 'bid_price_1' in in_sample_prices.columns and 'ask_price_1' in in_sample_prices.columns:
    squid_price = (in_sample_prices['bid_price_1'] + in_sample_prices['ask_price_1']) / 2
    print("Calculated mid price from bid_price_1 and ask_price_1")
# Fall back to other price columns if available
elif 'vwap' in in_sample_prices.columns:
    squid_price = in_sample_prices['vwap']
    print("Using vwap column for price data")
elif 'price' in in_sample_prices.columns:
    squid_price = in_sample_prices['price']
    print("Using price column for price data")
else:
    raise ValueError("Could not find appropriate price columns in the data")

print(f"Price range: {squid_price.min()} to {squid_price.max()}")

# Calculate returns
returns = squid_price.pct_change().dropna()
log_returns = np.log(squid_price).diff().dropna()
print(f"Calculated {len(returns)} return data points")

# Define the fair price
FAIR_PRICE = 2000

# Calculate deviation from fair price
price_deviation = squid_price - FAIR_PRICE
pct_deviation = (squid_price - FAIR_PRICE) / FAIR_PRICE * 100

### 1.1 Visualize Price Data

Let's visualize the price data and its deviation from the fair price.

In [None]:
# Plot price data
plt.figure(figsize=(12, 6))
plt.plot(squid_price, label='Squid_Ink Price')
plt.axhline(y=FAIR_PRICE, color='r', linestyle='--', label='Fair Price (2000)')
plt.title('Squid_Ink Price')
plt.xlabel('Timestamp')
plt.ylabel('Price')
plt.legend()
plt.grid(True)
plt.show()

# Plot deviation from fair price
plt.figure(figsize=(12, 6))
plt.plot(pct_deviation, label='Percentage Deviation from Fair Price')
plt.axhline(y=0, color='r', linestyle='--')
plt.title('Percentage Deviation from Fair Price (2000)')
plt.xlabel('Timestamp')
plt.ylabel('Percentage Deviation (%)')
plt.legend()
plt.grid(True)
plt.show()

## 2. Implement Dynamic Position Sizing Strategy

Now, let's implement a mean reversion strategy that dynamically adjusts position size based on the distance from the fair price. The further away the price is from the fair price, the larger the position size.

In [None]:
def dynamic_position_sizing_strategy(prices, fair_price, max_position=1.0, scaling_factor=1.0, cap_pct=10.0):
    """
    Implement a mean reversion strategy with dynamic position sizing based on distance from fair price.
    
    Parameters:
        prices (pd.Series): Series of prices
        fair_price (float): Fair price to revert to
        max_position (float): Maximum position size (absolute value)
        scaling_factor (float): Factor to scale the position size (higher = more aggressive)
        cap_pct (float): Cap percentage deviation for position sizing (to avoid extreme positions)
        
    Returns:
        pd.Series: Portfolio positions (continuous values between -max_position and max_position)
    """
    # Calculate percentage deviation from fair price
    pct_deviation = (prices - fair_price) / fair_price * 100
    
    # Cap the percentage deviation to avoid extreme positions
    capped_pct_deviation = pct_deviation.copy()
    capped_pct_deviation[capped_pct_deviation > cap_pct] = cap_pct
    capped_pct_deviation[capped_pct_deviation < -cap_pct] = -cap_pct
    
    # Calculate position size based on deviation (negative deviation = positive position)
    # The further from fair price, the larger the position
    positions = -capped_pct_deviation * scaling_factor / cap_pct * max_position
    
    return positions

### 2.1 Test Different Parameter Combinations

Let's test different parameter combinations to find the optimal strategy.

In [None]:
# Test the strategy with different parameters
max_positions = [1.0, 2.0, 3.0]  # Maximum position sizes to test
scaling_factors = [0.5, 1.0, 1.5]  # Scaling factors to test
cap_pcts = [5.0, 10.0, 15.0]  # Cap percentages to test

# Initialize results dictionary
results = []

# Test different parameter combinations
for max_position in max_positions:
    for scaling_factor in scaling_factors:
        for cap_pct in cap_pcts:
            # Get positions
            positions = dynamic_position_sizing_strategy(
                squid_price, FAIR_PRICE, max_position, scaling_factor, cap_pct
            )
            
            # Calculate strategy returns
            strategy_returns = positions.shift(1) * returns
            strategy_returns = strategy_returns.dropna()
            
            # Calculate cumulative returns
            cumulative_returns = (1 + strategy_returns).cumprod() - 1
            
            # Calculate performance metrics
            total_return = cumulative_returns.iloc[-1]
            annualized_return = (1 + total_return) ** (252 / len(strategy_returns)) - 1
            annualized_volatility = strategy_returns.std() * np.sqrt(252)
            sharpe_ratio = annualized_return / annualized_volatility if annualized_volatility != 0 else 0
            max_drawdown = (cumulative_returns - cumulative_returns.cummax()).min()
            win_rate = (strategy_returns > 0).mean()
            
            # Calculate average position size
            avg_position_size = positions.abs().mean()
            
            # Store results
            results.append({
                'max_position': max_position,
                'scaling_factor': scaling_factor,
                'cap_pct': cap_pct,
                'total_return': total_return,
                'annualized_return': annualized_return,
                'annualized_volatility': annualized_volatility,
                'sharpe_ratio': sharpe_ratio,
                'max_drawdown': max_drawdown,
                'win_rate': win_rate,
                'avg_position_size': avg_position_size
            })

# Convert results to DataFrame
results_df = pd.DataFrame(results)

# Sort by total return
results_df = results_df.sort_values('total_return', ascending=False)

# Display top 10 results
print("Top 10 Parameter Combinations by Total Return:")
display(results_df.head(10))

## 3. Visualize the Best Strategy

Now, let's visualize the best strategy based on the parameter optimization results.

In [None]:
# Get best parameters
best_params = results_df.iloc[0]
print(f"Best Parameters: max_position = {best_params['max_position']}, scaling_factor = {best_params['scaling_factor']}, cap_pct = {best_params['cap_pct']}")
print(f"Total Return: {best_params['total_return']:.2%}")
print(f"Sharpe Ratio: {best_params['sharpe_ratio']:.2f}")
print(f"Win Rate: {best_params['win_rate']:.2%}")
print(f"Average Position Size: {best_params['avg_position_size']:.2f}")

# Run the strategy with best parameters
best_positions = dynamic_position_sizing_strategy(
    squid_price, FAIR_PRICE, best_params['max_position'], best_params['scaling_factor'], best_params['cap_pct']
)

# Calculate strategy returns
best_strategy_returns = best_positions.shift(1) * returns
best_strategy_returns = best_strategy_returns.dropna()

# Calculate cumulative returns
best_cumulative_returns = (1 + best_strategy_returns).cumprod() - 1

# Calculate buy and hold returns for comparison
buy_hold_returns = returns
buy_hold_cumulative_returns = (1 + buy_hold_returns).cumprod() - 1

In [None]:
# Plot price and positions
fig, axes = plt.subplots(3, 1, figsize=(15, 12), sharex=True)

# Plot price
axes[0].plot(squid_vwap, label='VWAP')
axes[0].axhline(y=FAIR_PRICE, color='r', linestyle='--', label='Fair Price (2000)')
axes[0].set_title('Squid_Ink VWAP')
axes[0].set_ylabel('Price')
axes[0].legend()
axes[0].grid(True)

# Plot positions
axes[1].plot(best_positions, label='Position Size')
axes[1].axhline(y=0, color='r', linestyle='--')
axes[1].set_title('Dynamic Position Sizes')
axes[1].set_ylabel('Position Size')
axes[1].legend()
axes[1].grid(True)

# Plot cumulative returns
axes[2].plot(best_cumulative_returns, label='Dynamic Position Strategy')
axes[2].plot(buy_hold_cumulative_returns, label='Buy & Hold')
axes[2].set_title('Cumulative Returns')
axes[2].set_xlabel('Timestamp')
axes[2].set_ylabel('Cumulative Return')
axes[2].legend()
axes[2].grid(True)

plt.tight_layout()
plt.show()

## 4. Compare with Fixed Position Strategy

Now, let's compare our dynamic position sizing strategy with a traditional fixed position strategy.

In [None]:
def fixed_position_strategy(prices, fair_price):
    """
    Implement a simple mean reversion strategy with fixed position sizes.
    
    Parameters:
        prices (pd.Series): Series of prices
        fair_price (float): Fair price to revert to
        
    Returns:
        pd.Series: Portfolio positions (1 for long, -1 for short, 0 for no position)
    """
    # Initialize positions
    positions = pd.Series(0, index=prices.index)
    
    # Set positions based on fair price
    positions[prices > fair_price] = -1  # Short when price is above fair price
    positions[prices < fair_price] = 1   # Long when price is below fair price
    
    return positions

# Run the fixed position strategy
fixed_positions = fixed_position_strategy(squid_vwap, FAIR_PRICE)

# Calculate strategy returns
fixed_strategy_returns = fixed_positions.shift(1) * returns
fixed_strategy_returns = fixed_strategy_returns.dropna()

# Calculate cumulative returns
fixed_cumulative_returns = (1 + fixed_strategy_returns).cumprod() - 1

# Calculate performance metrics
fixed_total_return = fixed_cumulative_returns.iloc[-1]
fixed_annualized_return = (1 + fixed_total_return) ** (252 / len(fixed_strategy_returns)) - 1
fixed_annualized_volatility = fixed_strategy_returns.std() * np.sqrt(252)
fixed_sharpe_ratio = fixed_annualized_return / fixed_annualized_volatility if fixed_annualized_volatility != 0 else 0
fixed_max_drawdown = (fixed_cumulative_returns - fixed_cumulative_returns.cummax()).min()
fixed_win_rate = (fixed_strategy_returns > 0).mean()

In [None]:
# Print performance metrics
print("Fixed Position Strategy Performance:")
print(f"Total Return: {fixed_total_return:.2%}")
print(f"Annualized Return: {fixed_annualized_return:.2%}")
print(f"Annualized Volatility: {fixed_annualized_volatility:.2%}")
print(f"Sharpe Ratio: {fixed_sharpe_ratio:.2f}")
print(f"Maximum Drawdown: {fixed_max_drawdown:.2%}")
print(f"Win Rate: {fixed_win_rate:.2%}")

# Print dynamic strategy performance for comparison
print("
Dynamic Position Strategy Performance:")
print(f"Total Return: {best_params['total_return']:.2%}")
print(f"Annualized Return: {best_params['annualized_return']:.2%}")
print(f"Annualized Volatility: {best_params['annualized_volatility']:.2%}")
print(f"Sharpe Ratio: {best_params['sharpe_ratio']:.2f}")
print(f"Maximum Drawdown: {best_params['max_drawdown']:.2%}")
print(f"Win Rate: {best_params['win_rate']:.2%}")

In [None]:
# Plot comparison of cumulative returns
plt.figure(figsize=(12, 6))
plt.plot(best_cumulative_returns, label='Dynamic Position Strategy')
plt.plot(fixed_cumulative_returns, label='Fixed Position Strategy')
plt.plot(buy_hold_cumulative_returns, label='Buy & Hold')
plt.title('Comparison of Cumulative Returns')
plt.xlabel('Timestamp')
plt.ylabel('Cumulative Return')
plt.legend()
plt.grid(True)
plt.show()

## 5. Implement Transaction Costs

Now, let's implement transaction costs to make our analysis more realistic.

In [None]:
# Define transaction cost (1.5/2000 = 0.075% per dollar traded)
transaction_cost = 1.5/2000  # 0.075% per dollar traded

# Calculate position changes for dynamic strategy
dynamic_position_changes = best_positions.diff().fillna(0)

# Calculate transaction costs
dynamic_transaction_costs = pd.Series(0, index=dynamic_position_changes.index)
dynamic_transaction_costs = dynamic_position_changes.abs() * transaction_cost

# Calculate strategy returns with transaction costs
dynamic_strategy_returns_with_costs = best_positions.shift(1) * returns - dynamic_transaction_costs.shift(1)
dynamic_strategy_returns_with_costs = dynamic_strategy_returns_with_costs.dropna()

# Calculate cumulative returns with transaction costs
dynamic_cumulative_returns_with_costs = (1 + dynamic_strategy_returns_with_costs).cumprod() - 1

# Calculate position changes for fixed strategy
fixed_position_changes = fixed_positions.diff().fillna(0)

# Calculate transaction costs
fixed_transaction_costs = pd.Series(0, index=fixed_position_changes.index)
fixed_transaction_costs = fixed_position_changes.abs() * transaction_cost

# Calculate strategy returns with transaction costs
fixed_strategy_returns_with_costs = fixed_positions.shift(1) * returns - fixed_transaction_costs.shift(1)
fixed_strategy_returns_with_costs = fixed_strategy_returns_with_costs.dropna()

# Calculate cumulative returns with transaction costs
fixed_cumulative_returns_with_costs = (1 + fixed_strategy_returns_with_costs).cumprod() - 1

In [None]:
# Calculate performance metrics with transaction costs
# Dynamic strategy
dynamic_total_return_with_costs = dynamic_cumulative_returns_with_costs.iloc[-1]
dynamic_annualized_return_with_costs = (1 + dynamic_total_return_with_costs) ** (252 / len(dynamic_strategy_returns_with_costs)) - 1
dynamic_annualized_volatility_with_costs = dynamic_strategy_returns_with_costs.std() * np.sqrt(252)
dynamic_sharpe_ratio_with_costs = dynamic_annualized_return_with_costs / dynamic_annualized_volatility_with_costs if dynamic_annualized_volatility_with_costs != 0 else 0
dynamic_max_drawdown_with_costs = (dynamic_cumulative_returns_with_costs - dynamic_cumulative_returns_with_costs.cummax()).min()
dynamic_win_rate_with_costs = (dynamic_strategy_returns_with_costs > 0).mean()

# Fixed strategy
fixed_total_return_with_costs = fixed_cumulative_returns_with_costs.iloc[-1]
fixed_annualized_return_with_costs = (1 + fixed_total_return_with_costs) ** (252 / len(fixed_strategy_returns_with_costs)) - 1
fixed_annualized_volatility_with_costs = fixed_strategy_returns_with_costs.std() * np.sqrt(252)
fixed_sharpe_ratio_with_costs = fixed_annualized_return_with_costs / fixed_annualized_volatility_with_costs if fixed_annualized_volatility_with_costs != 0 else 0
fixed_max_drawdown_with_costs = (fixed_cumulative_returns_with_costs - fixed_cumulative_returns_with_costs.cummax()).min()
fixed_win_rate_with_costs = (fixed_strategy_returns_with_costs > 0).mean()

# Calculate number of trades
dynamic_num_trades = (dynamic_position_changes != 0).sum()
fixed_num_trades = (fixed_position_changes != 0).sum()

# Calculate total transaction costs
dynamic_total_costs = dynamic_transaction_costs.sum()
fixed_total_costs = fixed_transaction_costs.sum()

In [None]:
# Print performance metrics with transaction costs
print("Performance Metrics with Transaction Costs:
")

print("Dynamic Position Strategy:")
print(f"Total Return: {dynamic_total_return_with_costs:.2%}")
print(f"Annualized Return: {dynamic_annualized_return_with_costs:.2%}")
print(f"Sharpe Ratio: {dynamic_sharpe_ratio_with_costs:.2f}")
print(f"Maximum Drawdown: {dynamic_max_drawdown_with_costs:.2%}")
print(f"Win Rate: {dynamic_win_rate_with_costs:.2%}")
print(f"Number of Trades: {dynamic_num_trades}")
print(f"Total Transaction Costs: {dynamic_total_costs:.2%}")

print("
Fixed Position Strategy:")
print(f"Total Return: {fixed_total_return_with_costs:.2%}")
print(f"Annualized Return: {fixed_annualized_return_with_costs:.2%}")
print(f"Sharpe Ratio: {fixed_sharpe_ratio_with_costs:.2f}")
print(f"Maximum Drawdown: {fixed_max_drawdown_with_costs:.2%}")
print(f"Win Rate: {fixed_win_rate_with_costs:.2%}")
print(f"Number of Trades: {fixed_num_trades}")
print(f"Total Transaction Costs: {fixed_total_costs:.2%}")

In [None]:
# Plot comparison of cumulative returns with transaction costs
plt.figure(figsize=(12, 6))
plt.plot(dynamic_cumulative_returns_with_costs, label='Dynamic Position Strategy (with costs)')
plt.plot(fixed_cumulative_returns_with_costs, label='Fixed Position Strategy (with costs)')
plt.plot(buy_hold_cumulative_returns, label='Buy & Hold')
plt.title('Comparison of Cumulative Returns with Transaction Costs')
plt.xlabel('Timestamp')
plt.ylabel('Cumulative Return')
plt.legend()
plt.grid(True)
plt.show()

## 6. Conclusion

In this notebook, we implemented a mean reversion strategy with dynamic position sizing based on the distance from the fair price of 2000. The key findings are:

1. The dynamic position sizing strategy adjusts position size proportionally to the deviation from the fair price, taking larger positions when the mispricing is more significant.

2. We tested different parameter combinations for maximum position size, scaling factor, and cap percentage, and found the optimal parameters through backtesting.

3. Compared to a traditional fixed position strategy, the dynamic position sizing strategy can potentially achieve better risk-adjusted returns by taking advantage of larger mispricings.

4. When accounting for transaction costs, the dynamic position sizing strategy may generate more trades than the fixed strategy, which could impact performance in high-frequency trading scenarios.

Future improvements could include:

1. Testing different position sizing functions (e.g., exponential, logarithmic) instead of linear scaling
2. Combining dynamic position sizing with other indicators (e.g., CMMA, price spikes) to filter trades
3. Implementing a more sophisticated risk management approach
4. Testing the strategy on different assets and time periods