# Oscillating Variance Parameter Exploration

This notebook focuses on exploring different parameter selections for oscillating variance events in Squid_Ink data. We'll use only the first 20,000 timestamps (in-sample data) for our analysis.

In [None]:
import sys
import os

# Import our backtester package
sys.path.append(os.path.abspath('../../'))
from backtester import get_price_data, get_vwap, relative_entropy_binned
print("Using backtester package")

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import norm
from itertools import product

## 1. Load Data

First, let's load the Squid_Ink price data and limit it to the first 20,000 timestamps (in-sample data).

In [None]:
# Load data directly using backtester package
print("Loading price data...")
prices = get_price_data('SQUID_INK', 1)
print(f"Loaded {len(prices)} price data points")

# Limit to first 20,000 timestamps (in-sample data)
in_sample_prices = prices.iloc[:20000]
print(f"Limited to {len(in_sample_prices)} in-sample data points")

# Get VWAP
print("Getting VWAP for SQUID_INK...")
squid_vwap = in_sample_prices['vwap']
print(f"Got VWAP with {len(squid_vwap)} data points")
print(f"VWAP range: {squid_vwap.min()} to {squid_vwap.max()}")

# Calculate log returns
log_ret = np.log(squid_vwap).diff().dropna()
print(f"Calculated log returns with {len(log_ret)} data points")

## 2. Define Function to Calculate Oscillating Variance

Let's define a function to calculate oscillating variance with different parameters.

In [None]:
def calculate_oscillating_variance(returns, vol_window, var_window):
    """
    Calculate oscillating variance with specified parameters.
    
    Parameters:
    - returns: Series of log returns
    - vol_window: Window size for volatility calculation
    - var_window: Window size for variance of volatility calculation
    
    Returns:
    - Series with oscillating variance
    """
    # Calculate volatility (standard deviation of returns)
    volatility = returns.rolling(window=vol_window).std()
    
    # Calculate oscillating variance (variance of volatility)
    osc_var = volatility.rolling(window=var_window).var()
    
    return osc_var

## 3. Explore Different Parameter Combinations

Let's explore different combinations of volatility window and variance window parameters.

In [None]:
# Define parameter ranges to explore
vol_windows = [10, 20, 50, 100]
var_windows = [10, 20, 50, 100]

# Calculate oscillating variance for each parameter combination
osc_var_results = {}

for vol_window, var_window in product(vol_windows, var_windows):
    param_key = f"vol_{vol_window}_var_{var_window}"
    osc_var_results[param_key] = calculate_oscillating_variance(log_ret, vol_window, var_window)

# Create a DataFrame with all results
osc_var_df = pd.DataFrame(osc_var_results)

# Display the first few rows
osc_var_df.head(10)

## 4. Visualize Oscillating Variance for Different Parameters

Let's visualize how oscillating variance changes with different parameter combinations.

In [None]:
# Plot oscillating variance for different parameter combinations
plt.figure(figsize=(15, 15))

# Plot for each volatility window
for i, vol_window in enumerate(vol_windows):
    plt.subplot(len(vol_windows), 1, i+1)
    
    for var_window in var_windows:
        param_key = f"vol_{vol_window}_var_{var_window}"
        plt.plot(osc_var_df[param_key], label=f"Var Window = {var_window}")
    
    plt.title(f"Oscillating Variance with Volatility Window = {vol_window}")
    plt.legend()
    plt.grid(True)

plt.tight_layout()
plt.show()

In [None]:
# Plot distributions of oscillating variance for different parameters
plt.figure(figsize=(15, 15))

# Create a grid of subplots
fig, axes = plt.subplots(len(vol_windows), len(var_windows), figsize=(15, 15))

# Plot histogram for each parameter combination
for i, vol_window in enumerate(vol_windows):
    for j, var_window in enumerate(var_windows):
        param_key = f"vol_{vol_window}_var_{var_window}"
        axes[i, j].hist(osc_var_df[param_key].dropna(), bins=50)
        axes[i, j].set_title(f"Vol={vol_window}, Var={var_window}")
        axes[i, j].grid(True)

plt.tight_layout()
plt.show()

## 5. Calculate Statistics for Different Parameters

Let's calculate statistics for oscillating variance with different parameter combinations.

In [None]:
# Calculate statistics for each parameter combination
stats = []

for vol_window, var_window in product(vol_windows, var_windows):
    param_key = f"vol_{vol_window}_var_{var_window}"
    osc_var = osc_var_df[param_key].dropna()
    
    # Calculate statistics
    stats.append({
        'Vol Window': vol_window,
        'Var Window': var_window,
        'Mean': osc_var.mean(),
        'Median': osc_var.median(),
        'Std Dev': osc_var.std(),
        'Skewness': osc_var.skew(),
        'Kurtosis': osc_var.kurt(),
        'Min': osc_var.min(),
        'Max': osc_var.max(),
        'Count': len(osc_var)
    })

# Create a DataFrame with statistics
stats_df = pd.DataFrame(stats)

# Display the statistics
stats_df

## 6. Define and Detect Oscillating Variance Events

Let's define oscillating variance events for different parameter combinations and detect them in the data.

In [None]:
# Define function to detect oscillating variance events
def detect_osc_var_events(osc_var, std_multiplier=2.0):
    """
    Detect oscillating variance events based on threshold.
    
    Parameters:
    - osc_var: Series with oscillating variance values
    - std_multiplier: Multiplier for standard deviation to set threshold
    
    Returns:
    - Series with event indicators (1 for event, 0 for no event)
    """
    # Calculate mean and standard deviation
    mean = osc_var.mean()
    std = osc_var.std()
    
    # Set threshold
    threshold = mean + std_multiplier * std
    
    # Detect events
    events = (osc_var > threshold).astype(int)
    
    return events, threshold

In [None]:
# Define standard deviation multipliers to explore
std_multipliers = [1.5, 2.0, 2.5]

# Detect events for each parameter combination and std multiplier
event_results = {}
threshold_results = {}

for vol_window, var_window in product(vol_windows, var_windows):
    param_key = f"vol_{vol_window}_var_{var_window}"
    osc_var = osc_var_df[param_key].dropna()
    
    for std_multiplier in std_multipliers:
        event_key = f"{param_key}_std_{std_multiplier}"
        events, threshold = detect_osc_var_events(osc_var, std_multiplier)
        event_results[event_key] = events
        threshold_results[event_key] = threshold

# Create a DataFrame with events
event_df = pd.DataFrame(event_results)

# Display the first few rows
event_df.head(10)

## 7. Analyze Event Frequency

Let's analyze the frequency of oscillating variance events for different parameter combinations.

In [None]:
# Calculate event frequency for each parameter combination
event_stats = []

for vol_window, var_window in product(vol_windows, var_windows):
    param_key = f"vol_{vol_window}_var_{var_window}"
    
    for std_multiplier in std_multipliers:
        event_key = f"{param_key}_std_{std_multiplier}"
        events = event_df[event_key]
        
        # Calculate event frequency
        event_count = events.sum()
        event_percentage = event_count / len(events) * 100
        
        # Store results
        event_stats.append({
            'Vol Window': vol_window,
            'Var Window': var_window,
            'Std Multiplier': std_multiplier,
            'Event Count': event_count,
            'Event Percentage': event_percentage,
            'Threshold': threshold_results[event_key]
        })

# Create a DataFrame with event statistics
event_stats_df = pd.DataFrame(event_stats)

# Display the event statistics
event_stats_df.sort_values('Event Percentage', ascending=False)

## 8. Visualize Events for Selected Parameters

Let's visualize the oscillating variance events for selected parameter combinations.

In [None]:
# Select top parameter combinations based on event frequency
top_params = event_stats_df.sort_values('Event Percentage', ascending=False).head(3)
top_params

In [None]:
# Visualize events for top parameter combinations
plt.figure(figsize=(15, 15))

for i, row in enumerate(top_params.itertuples()):
    vol_window = row.Vol_Window
    var_window = row.Var_Window
    std_multiplier = row.Std_Multiplier
    
    param_key = f"vol_{vol_window}_var_{var_window}"
    event_key = f"{param_key}_std_{std_multiplier}"
    
    # Get oscillating variance and events
    osc_var = osc_var_df[param_key]
    events = event_df[event_key]
    threshold = threshold_results[event_key]
    
    # Get event timestamps
    event_times = events[events == 1].index
    
    # Plot oscillating variance with events
    plt.subplot(3, 1, i+1)
    plt.plot(osc_var, label='Oscillating Variance')
    plt.axhline(y=threshold, color='r', linestyle='--', label='Threshold')
    plt.scatter(event_times, osc_var.loc[event_times], 
                marker='o', s=100, color='red', label='Events')
    
    plt.title(f"Oscillating Variance Events (Vol={vol_window}, Var={var_window}, Std={std_multiplier})")
    plt.legend()
    plt.grid(True)

plt.tight_layout()
plt.show()

## 9. Analyze Post-Event Returns

Let's analyze the returns following oscillating variance events for the top parameter combinations.

In [None]:
# Define function to calculate post-event returns
def calculate_post_event_returns(events, returns, horizons=[1, 5, 10, 20, 50]):
    """Calculate returns after events for different time horizons."""
    post_returns = {}
    
    # Get event timestamps
    event_times = events[events == 1].index
    
    if len(event_times) == 0:
        return {h: np.nan for h in horizons}
    
    # Calculate post-event returns for each horizon
    for horizon in horizons:
        horizon_returns = []
        
        for time in event_times:
            try:
                # Get the index position
                idx = returns.index.get_loc(time)
                
                # Calculate cumulative return for the horizon
                if idx + horizon < len(returns):
                    cum_ret = returns.iloc[idx+1:idx+horizon+1].sum()
                    horizon_returns.append(cum_ret)
            except:
                continue
        
        if horizon_returns:
            post_returns[horizon] = {
                'mean': np.mean(horizon_returns),
                'median': np.median(horizon_returns),
                'std': np.std(horizon_returns),
                'min': np.min(horizon_returns),
                'max': np.max(horizon_returns),
                'count': len(horizon_returns)
            }
        else:
            post_returns[horizon] = {
                'mean': np.nan,
                'median': np.nan,
                'std': np.nan,
                'min': np.nan,
                'max': np.nan,
                'count': 0
            }
    
    return post_returns

In [None]:
# Calculate post-event returns for top parameter combinations
post_event_returns = {}

for i, row in enumerate(top_params.itertuples()):
    vol_window = row.Vol_Window
    var_window = row.Var_Window
    std_multiplier = row.Std_Multiplier
    
    param_key = f"vol_{vol_window}_var_{var_window}"
    event_key = f"{param_key}_std_{std_multiplier}"
    
    # Get events
    events = event_df[event_key]
    
    # Calculate post-event returns
    post_returns = calculate_post_event_returns(events, log_ret)
    post_event_returns[event_key] = post_returns

# Display post-event returns for the first parameter combination
first_key = list(post_event_returns.keys())[0]
pd.DataFrame(post_event_returns[first_key]).T

In [None]:
# Visualize post-event returns for top parameter combinations
plt.figure(figsize=(15, 10))

for i, event_key in enumerate(post_event_returns.keys()):
    post_returns = post_event_returns[event_key]
    
    # Extract mean returns for each horizon
    horizons = list(post_returns.keys())
    mean_returns = [post_returns[h]['mean'] for h in horizons]
    
    # Plot mean returns
    plt.plot(horizons, mean_returns, marker='o', label=event_key)

plt.axhline(y=0, color='r', linestyle='--')
plt.title('Post-Event Returns for Top Parameter Combinations')
plt.xlabel('Time Horizon')
plt.ylabel('Average Return')
plt.legend()
plt.grid(True)
plt.show()

## 10. Conclusion

In this notebook, we've explored different parameter selections for oscillating variance events in Squid_Ink data. We've analyzed how the choice of volatility window, variance window, and standard deviation multiplier affects the detection of oscillating variance events.

Key findings:
1. Different parameter combinations lead to different event frequencies and characteristics
2. The top parameter combinations based on event frequency are [to be filled after running]
3. Post-event returns show [to be filled after running]

Based on this analysis, the optimal parameter selection for oscillating variance events appears to be [to be filled after running].