# Price Spike Threshold Heatmap

This notebook creates heatmaps for price spike threshold parameter testing, focusing on total returns.

In [None]:
import sys
import os

# Import our backtester package
sys.path.append(os.path.abspath('../../'))
from backtester import get_price_data, get_vwap
print("Using backtester package")

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Try to import seaborn, but don't fail if it's not available
try:
    import seaborn as sns
    print(f"Seaborn version: {sns.__version__}")
    sns.set(style="whitegrid")
    HAS_SEABORN = True
except ImportError:
    print("Seaborn not available, using matplotlib instead")
    HAS_SEABORN = False

## 1. Load Data

First, let's load the Squid_Ink price data and limit it to the first 20,000 timestamps (in-sample data).

In [None]:
# Load data directly using backtester package
print("Loading price data...")
prices = get_price_data('SQUID_INK', 1)
print(f"Loaded {len(prices)} price data points")

# Limit to first 20,000 timestamps (in-sample data)
in_sample_prices = prices.iloc[:20000]
print(f"Limited to {len(in_sample_prices)} in-sample data points")

# Get VWAP
print("Getting VWAP for SQUID_INK...")
squid_vwap = in_sample_prices['vwap']
print(f"Got VWAP with {len(squid_vwap)} data points")
print(f"VWAP range: {squid_vwap.min()} to {squid_vwap.max()}")

# Calculate log prices and log returns
log_prices = np.log(squid_vwap)
log_returns = log_prices.diff().dropna()
print(f"Calculated log returns with {len(log_returns)} data points")

# Calculate regular returns for strategy evaluation
returns = squid_vwap.pct_change().dropna()
print(f"Calculated returns with {len(returns)} data points")

## 2. Calculate Price Spikes

Let's calculate price spikes as log returns divided by rolling standard deviation.

In [None]:
def calculate_price_spikes(log_returns, window=20):
    """
    Calculate price spikes as log returns divided by rolling standard deviation.
    
    Parameters:
        log_returns (pd.Series): Series of log returns
        window (int): Window size for rolling standard deviation
        
    Returns:
        pd.Series: Price spikes (z-scores)
    """
    # Calculate rolling standard deviation
    rolling_std = log_returns.rolling(window=window).std()
    
    # Calculate price spikes (z-scores)
    price_spikes = log_returns / rolling_std
    
    return price_spikes

# Calculate price spikes with different window sizes
window_sizes = [10, 20, 50, 100]
price_spike_indicators = {}

for window in window_sizes:
    price_spike_indicators[f'Spike_{window}'] = calculate_price_spikes(log_returns, window)

# Choose a window size for further analysis
window = 20  # Use a fixed window size
spikes = price_spike_indicators[f'Spike_{window}']

# Display the first few rows
spikes.head(10)

## 3. Define Price Spike-Based Mean Reversion Strategy

Let's define a strategy that only buys below 2000 when there's a downward price spike and only sells above 2000 when there's an upward price spike.

In [None]:
def price_spike_strategy(prices, spikes, fair_price, upper_threshold=2.0, lower_threshold=-2.0, holding_period=10):
    """
    Implement a price spike-based mean reversion strategy.
    
    Parameters:
        prices (pd.Series): Series of prices
        spikes (pd.Series): Series of price spikes (z-scores)
        fair_price (float): Fair price to revert to
        upper_threshold (float): Upper threshold for price spikes
        lower_threshold (float): Lower threshold for price spikes
        holding_period (int): Number of periods to hold the position
        
    Returns:
        pd.Series: Portfolio positions (1 for long, -1 for short, 0 for no position)
    """
    # Initialize positions
    positions = pd.Series(0, index=prices.index)
    
    # Get valid indices where spikes is not NaN
    valid_indices = spikes.dropna().index
    
    # Set positions based on price spikes and fair price
    for time in valid_indices:
        # Get the current price and spike value
        current_price = prices.loc[time]
        current_spike = spikes.loc[time]
        
        # Get the index position
        idx = prices.index.get_loc(time)
        
        # ONLY short above fair price when there's an upward price spike
        if current_price > fair_price and current_spike > upper_threshold:
            # Set short position for holding period
            end_idx = min(idx + holding_period + 1, len(positions))
            positions.iloc[idx+1:end_idx] = -1
        
        # ONLY buy below fair price when there's a downward price spike
        elif current_price < fair_price and current_spike < lower_threshold:
            # Set long position for holding period
            end_idx = min(idx + holding_period + 1, len(positions))
            positions.iloc[idx+1:end_idx] = 1
    
    return positions

## 4. Test Price Spike Threshold Parameters

Let's test different upper and lower threshold parameters for the price spike indicator, focusing on total returns as the primary metric.

In [None]:
# Define the fair price
FAIR_PRICE = 2000

# Define threshold parameters to test
upper_thresholds = [1.0, 1.5, 2.0, 2.5, 3.0]
lower_thresholds = [-1.0, -1.5, -2.0, -2.5, -3.0]
holding_period = 10  # Use a fixed holding period for threshold testing

# Initialize results dictionary
threshold_results = {}

# Test different threshold combinations
for upper in upper_thresholds:
    for lower in lower_thresholds:
        # Skip invalid combinations (upper <= -lower)
        if upper <= -lower:
            continue
            
        strategy_name = f'upper_{upper}_lower_{lower}'
        
        # Get positions
        positions = price_spike_strategy(squid_vwap, spikes, FAIR_PRICE, upper, lower, holding_period)
        
        # Calculate strategy returns
        strategy_returns = positions.shift(1) * returns
        strategy_returns = strategy_returns.dropna()
        
        # Calculate performance metrics
        total_return = strategy_returns.sum()
        sharpe_ratio = strategy_returns.mean() / strategy_returns.std() * np.sqrt(252)  # Annualized
        win_rate = (strategy_returns > 0).mean()
        
        # Count the number of trades
        num_trades = (positions.diff() != 0).sum()
        
        # Store results
        threshold_results[strategy_name] = {
            'Upper Threshold': upper,
            'Lower Threshold': lower,
            'Total Return': total_return,
            'Sharpe Ratio': sharpe_ratio,
            'Win Rate': win_rate,
            'Number of Trades': num_trades
        }

# Convert results to DataFrame
threshold_df = pd.DataFrame(threshold_results).T

# Sort by Total Return
threshold_df = threshold_df.sort_values('Total Return', ascending=False)

# Display top 10 results
threshold_df.head(10)

## 5. Create Heatmaps for Threshold Parameters

Let's create heatmaps to visualize the results of the threshold parameter testing, focusing on total returns.

In [None]:
# Create a pivot table for heatmap visualization
pivot_data = threshold_df.reset_index()
total_return_pivot = pd.pivot_table(
    pivot_data, 
    values='Total Return', 
    index='Upper Threshold', 
    columns='Lower Threshold'
)

trades_pivot = pd.pivot_table(
    pivot_data, 
    values='Number of Trades', 
    index='Upper Threshold', 
    columns='Lower Threshold'
)

# Create heatmaps
if HAS_SEABORN:
    # Use seaborn for heatmaps if available
    plt.figure(figsize=(12, 8))
    sns.heatmap(total_return_pivot, annot=True, cmap='viridis', fmt='.4f', linewidths=.5)
    plt.title('Total Return by Price Spike Threshold Parameters', fontsize=16)
    plt.xlabel('Lower Threshold', fontsize=14)
    plt.ylabel('Upper Threshold', fontsize=14)
    plt.tight_layout()
    plt.show()
    
    plt.figure(figsize=(12, 8))
    sns.heatmap(trades_pivot, annot=True, cmap='Blues', fmt='d', linewidths=.5)
    plt.title('Number of Trades by Price Spike Threshold Parameters', fontsize=16)
    plt.xlabel('Lower Threshold', fontsize=14)
    plt.ylabel('Upper Threshold', fontsize=14)
    plt.tight_layout()
    plt.show()
else:
    # Use matplotlib for heatmaps if seaborn is not available
    plt.figure(figsize=(12, 8))
    plt.imshow(total_return_pivot, cmap='viridis', interpolation='nearest')
    plt.colorbar(label='Total Return')
    
    # Add labels
    plt.title('Total Return by Price Spike Threshold Parameters', fontsize=16)
    plt.xlabel('Lower Threshold', fontsize=14)
    plt.ylabel('Upper Threshold', fontsize=14)
    
    # Add tick labels
    plt.xticks(range(len(total_return_pivot.columns)), total_return_pivot.columns)
    plt.yticks(range(len(total_return_pivot.index)), total_return_pivot.index)
    
    # Add text annotations
    for i in range(len(total_return_pivot.index)):
        for j in range(len(total_return_pivot.columns)):
            if not np.isnan(total_return_pivot.iloc[i, j]):
                plt.text(j, i, f'{total_return_pivot.iloc[i, j]:.4f}', 
                         ha='center', va='center', color='white')
    
    plt.tight_layout()
    plt.show()
    
    plt.figure(figsize=(12, 8))
    plt.imshow(trades_pivot, cmap='Blues', interpolation='nearest')
    plt.colorbar(label='Number of Trades')
    
    # Add labels
    plt.title('Number of Trades by Price Spike Threshold Parameters', fontsize=16)
    plt.xlabel('Lower Threshold', fontsize=14)
    plt.ylabel('Upper Threshold', fontsize=14)
    
    # Add tick labels
    plt.xticks(range(len(trades_pivot.columns)), trades_pivot.columns)
    plt.yticks(range(len(trades_pivot.index)), trades_pivot.index)
    
    # Add text annotations
    for i in range(len(trades_pivot.index)):
        for j in range(len(trades_pivot.columns)):
            if not np.isnan(trades_pivot.iloc[i, j]):
                plt.text(j, i, f'{int(trades_pivot.iloc[i, j])}', 
                         ha='center', va='center', color='white')
    
    plt.tight_layout()
    plt.show()

## 6. Analyze Best Threshold Parameters

Let's analyze the performance of the strategy with the best threshold parameters based on total returns.

In [None]:
# Get the best threshold parameters based on total returns
best_strategy = threshold_df.index[0]
best_upper = threshold_df.loc[best_strategy, 'Upper Threshold']
best_lower = threshold_df.loc[best_strategy, 'Lower Threshold']

print(f'Best threshold parameters based on total returns: Upper = {best_upper}, Lower = {best_lower}')
print(f'Total Return: {threshold_df.loc[best_strategy, "Total Return"]:.4f}')
print(f'Sharpe Ratio: {threshold_df.loc[best_strategy, "Sharpe Ratio"]:.4f}')
print(f'Win Rate: {threshold_df.loc[best_strategy, "Win Rate"]:.4f}')
print(f'Number of Trades: {int(threshold_df.loc[best_strategy, "Number of Trades"])}')

# Get positions for the best threshold parameters
best_positions = price_spike_strategy(squid_vwap, spikes, FAIR_PRICE, best_upper, best_lower, holding_period)

# Calculate strategy returns
best_returns = best_positions.shift(1) * returns
best_returns = best_returns.dropna()

# Plot cumulative returns
plt.figure(figsize=(15, 10))

plt.subplot(2, 1, 1)
plt.plot(best_returns.cumsum(), label=f'Strategy Returns (Upper={best_upper}, Lower={best_lower})')
plt.title(f'Cumulative Returns of Best Price Spike-Based Strategy', fontsize=16)
plt.legend(fontsize=12)
plt.grid(True)

plt.subplot(2, 1, 2)
plt.plot(squid_vwap, label='VWAP')
plt.axhline(y=FAIR_PRICE, color='r', linestyle='--', label='Fair Price (2000)')

# Plot buy and sell signals
buy_signals = best_positions.diff() > 0
sell_signals = best_positions.diff() < 0

plt.scatter(buy_signals[buy_signals].index, squid_vwap[buy_signals], 
            marker='^', s=100, color='green', label='Buy Signal (Below 2000)')
plt.scatter(sell_signals[sell_signals].index, squid_vwap[sell_signals], 
            marker='v', s=100, color='red', label='Sell Signal (Above 2000)')

plt.title('Price with Trading Signals', fontsize=16)
plt.legend(fontsize=12)
plt.grid(True)

plt.tight_layout()
plt.show()

## 7. Conclusion

In this notebook, we've explored a price spike-based mean reversion strategy for Squid_Ink. The strategy only buys below 2000 when there's a downward price spike and only sells above 2000 when there's an upward price spike.

Key findings:
1. Price spikes, calculated as log returns divided by rolling standard deviation, provide a good indicator for potential mean reversion opportunities
2. The optimal threshold parameters based on total returns are [to be filled after running]
3. More extreme thresholds (higher upper, lower lower) generally [increase/decrease] the selectivity of the strategy
4. More selective strategies tend to have [higher/lower] total returns but [fewer/more] trades
5. The best threshold combination balances the trade-off between signal quality and signal frequency

These findings suggest that price spikes are a valuable indicator for mean reversion strategies. By only trading when there are significant price spikes, we can improve the quality of our mean reversion signals and potentially increase the profitability of the strategy.