# Price Spike Holding Period Testing

This notebook tests different holding periods for a price spike-based mean reversion strategy.

In [None]:
import sys
import os

# Import our backtester package
sys.path.append(os.path.abspath('../../'))
from backtester import get_price_data, get_vwap
print("Using backtester package")

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Try to import seaborn, but don't fail if it's not available
try:
    import seaborn as sns
    print(f"Seaborn version: {sns.__version__}")
    sns.set(style="whitegrid")
    HAS_SEABORN = True
except ImportError:
    print("Seaborn not available, using matplotlib instead")
    HAS_SEABORN = False

## 1. Load Data

First, let's load the Squid_Ink price data and limit it to the first 20,000 timestamps (in-sample data).

In [None]:
# Load data directly using backtester package
print("Loading price data...")
prices = get_price_data('SQUID_INK', 1)
print(f"Loaded {len(prices)} price data points")

# Limit to first 20,000 timestamps (in-sample data)
in_sample_prices = prices.iloc[:20000]
print(f"Limited to {len(in_sample_prices)} in-sample data points")

# Get VWAP
print("Getting VWAP for SQUID_INK...")
squid_vwap = in_sample_prices['vwap']
print(f"Got VWAP with {len(squid_vwap)} data points")
print(f"VWAP range: {squid_vwap.min()} to {squid_vwap.max()}")

# Calculate log prices and log returns
log_prices = np.log(squid_vwap)
log_returns = log_prices.diff().dropna()
print(f"Calculated log returns with {len(log_returns)} data points")

# Calculate regular returns for strategy evaluation
returns = squid_vwap.pct_change().dropna()
print(f"Calculated returns with {len(returns)} data points")

## 2. Calculate Price Spikes

Let's calculate price spikes as log returns divided by rolling standard deviation.

In [None]:
def calculate_price_spikes(log_returns, window=20):
    """
    Calculate price spikes as log returns divided by rolling standard deviation.
    
    Parameters:
        log_returns (pd.Series): Series of log returns
        window (int): Window size for rolling standard deviation
        
    Returns:
        pd.Series: Price spikes (z-scores)
    """
    # Calculate rolling standard deviation
    rolling_std = log_returns.rolling(window=window).std()
    
    # Calculate price spikes (z-scores)
    price_spikes = log_returns / rolling_std
    
    return price_spikes

# Calculate price spikes with a fixed window size
window = 20  # Use a fixed window size based on previous testing
spikes = calculate_price_spikes(log_returns, window)

# Display the first few rows
spikes.head(10)

## 3. Define Price Spike-Based Mean Reversion Strategy

Let's define a strategy that only buys below 2000 when there's a downward price spike and only sells above 2000 when there's an upward price spike.

In [None]:
def price_spike_strategy(prices, spikes, fair_price, upper_threshold=2.0, lower_threshold=-2.0, holding_period=10):
    """
    Implement a price spike-based mean reversion strategy.
    
    Parameters:
        prices (pd.Series): Series of prices
        spikes (pd.Series): Series of price spikes (z-scores)
        fair_price (float): Fair price to revert to
        upper_threshold (float): Upper threshold for price spikes
        lower_threshold (float): Lower threshold for price spikes
        holding_period (int): Number of periods to hold the position
        
    Returns:
        pd.Series: Portfolio positions (1 for long, -1 for short, 0 for no position)
    """
    # Initialize positions
    positions = pd.Series(0, index=prices.index)
    
    # Get valid indices where spikes is not NaN
    valid_indices = spikes.dropna().index
    
    # Set positions based on price spikes and fair price
    for time in valid_indices:
        # Get the current price and spike value
        current_price = prices.loc[time]
        current_spike = spikes.loc[time]
        
        # Get the index position
        idx = prices.index.get_loc(time)
        
        # ONLY short above fair price when there's an upward price spike
        if current_price > fair_price and current_spike > upper_threshold:
            # Set short position for holding period
            end_idx = min(idx + holding_period + 1, len(positions))
            positions.iloc[idx+1:end_idx] = -1
        
        # ONLY buy below fair price when there's a downward price spike
        elif current_price < fair_price and current_spike < lower_threshold:
            # Set long position for holding period
            end_idx = min(idx + holding_period + 1, len(positions))
            positions.iloc[idx+1:end_idx] = 1
    
    return positions

## 4. Test Different Holding Periods

Let's test different holding periods for the price spike-based mean reversion strategy.

In [None]:
# Define the fair price
FAIR_PRICE = 2000

# Define fixed threshold parameters
upper_threshold = 2.0
lower_threshold = -2.0

# Define holding periods to test
holding_periods = [1, 3, 5, 10, 15, 20, 25, 30, 40, 50]

# Initialize results dictionary
holding_results = {}

# Test different holding periods
for hp in holding_periods:
    # Get positions
    positions = price_spike_strategy(squid_vwap, spikes, FAIR_PRICE, upper_threshold, lower_threshold, hp)
    
    # Calculate strategy returns
    strategy_returns = positions.shift(1) * returns
    strategy_returns = strategy_returns.dropna()
    
    # Calculate performance metrics
    total_return = strategy_returns.sum()
    sharpe_ratio = strategy_returns.mean() / strategy_returns.std() * np.sqrt(252)  # Annualized
    win_rate = (strategy_returns > 0).mean()
    
    # Count the number of trades
    num_trades = (positions.diff() != 0).sum()
    
    # Store results
    holding_results[hp] = {
        'Holding Period': hp,
        'Total Return': total_return,
        'Sharpe Ratio': sharpe_ratio,
        'Win Rate': win_rate,
        'Number of Trades': num_trades
    }

# Convert results to DataFrame
holding_df = pd.DataFrame(holding_results).T

# Sort by Total Return
holding_df = holding_df.sort_values('Total Return', ascending=False)

# Display results
holding_df

## 5. Visualize Holding Period Results

Let's visualize the results of the holding period testing.

In [None]:
# Create a DataFrame with holding periods as columns for visualization
metrics = ['Total Return', 'Sharpe Ratio', 'Win Rate', 'Number of Trades']
viz_data = {metric: [] for metric in metrics}

for hp in holding_periods:
    for metric in metrics:
        viz_data[metric].append(holding_results[hp][metric])

viz_df = pd.DataFrame(viz_data, index=holding_periods)
viz_df.index.name = 'Holding Period'

# Plot performance metrics by holding period
plt.figure(figsize=(15, 10))

plt.subplot(2, 2, 1)
plt.plot(viz_df.index, viz_df['Total Return'], marker='o')
plt.title('Total Return by Holding Period')
plt.xlabel('Holding Period')
plt.ylabel('Total Return')
plt.grid(True)

plt.subplot(2, 2, 2)
plt.plot(viz_df.index, viz_df['Sharpe Ratio'], marker='o')
plt.title('Sharpe Ratio by Holding Period')
plt.xlabel('Holding Period')
plt.ylabel('Sharpe Ratio')
plt.grid(True)

plt.subplot(2, 2, 3)
plt.plot(viz_df.index, viz_df['Win Rate'], marker='o')
plt.title('Win Rate by Holding Period')
plt.xlabel('Holding Period')
plt.ylabel('Win Rate')
plt.grid(True)

plt.subplot(2, 2, 4)
plt.plot(viz_df.index, viz_df['Number of Trades'], marker='o')
plt.title('Number of Trades by Holding Period')
plt.xlabel('Holding Period')
plt.ylabel('Number of Trades')
plt.grid(True)

plt.tight_layout()
plt.show()

## 6. Analyze Best Holding Period

Let's analyze the performance of the strategy with the best holding period based on total returns.

In [None]:
# Get the best holding period based on total returns
best_hp = holding_df.index[0]

print(f'Best holding period based on total returns: {best_hp}')
print(f'Total Return: {holding_df.loc[best_hp, "Total Return"]:.4f}')
print(f'Sharpe Ratio: {holding_df.loc[best_hp, "Sharpe Ratio"]:.4f}')
print(f'Win Rate: {holding_df.loc[best_hp, "Win Rate"]:.4f}')
print(f'Number of Trades: {int(holding_df.loc[best_hp, "Number of Trades"])}')

# Get positions for the best holding period
best_positions = price_spike_strategy(squid_vwap, spikes, FAIR_PRICE, upper_threshold, lower_threshold, best_hp)

# Calculate strategy returns
best_returns = best_positions.shift(1) * returns
best_returns = best_returns.dropna()

# Plot cumulative returns
plt.figure(figsize=(15, 10))

plt.subplot(2, 1, 1)
plt.plot(best_returns.cumsum(), label=f'Strategy Returns (Holding Period={best_hp})')
plt.title(f'Cumulative Returns of Best Price Spike-Based Strategy', fontsize=16)
plt.legend(fontsize=12)
plt.grid(True)

plt.subplot(2, 1, 2)
plt.plot(squid_vwap, label='VWAP')
plt.axhline(y=FAIR_PRICE, color='r', linestyle='--', label='Fair Price (2000)')

# Plot buy and sell signals
buy_signals = best_positions.diff() > 0
sell_signals = best_positions.diff() < 0

plt.scatter(buy_signals[buy_signals].index, squid_vwap[buy_signals], 
            marker='^', s=100, color='green', label='Buy Signal (Below 2000)')
plt.scatter(sell_signals[sell_signals].index, squid_vwap[sell_signals], 
            marker='v', s=100, color='red', label='Sell Signal (Above 2000)')

plt.title('Price with Trading Signals', fontsize=16)
plt.legend(fontsize=12)
plt.grid(True)

plt.tight_layout()
plt.show()

## 7. Conclusion

In this notebook, we've tested different holding periods for a price spike-based mean reversion strategy. The strategy only buys below 2000 when there's a downward price spike and only sells above 2000 when there's an upward price spike.

Key findings:
1. The holding period has a significant impact on the strategy's performance
2. The optimal holding period based on total returns is [to be filled after running]
3. Shorter holding periods allow for more frequent trading but may exit positions before the full mean reversion effect is realized
4. Longer holding periods capture more of the mean reversion effect but may expose the strategy to adverse price movements
5. The best holding period balances the trade-off between capturing the mean reversion effect and minimizing exposure to adverse price movements

These findings suggest that the holding period is a critical parameter for price spike-based mean reversion strategies. By optimizing the holding period, we can improve the profitability of the strategy.