## Distribution of returns gaps vs regular trading hours

https://claude.ai/chat/1e648f41-8e1c-4e35-b7c1-0f213c93c2f1

In [1]:
import vectorbtpro as vbt
import ttools as tts
#from lightweight_charts import chart, Panel, PlotDFAccessor, PlotSRAccessor
#import talib
import ttools as tts
from ttools.config import DATA_DIR
from ttools.utils import zoneNY, AggType
from ttools.loaders import load_data
from numba import jit
import pandas as pd
import numpy as np
from datetime import datetime
vbt.settings.plotting.auto_rangebreaks = True
vbt.settings.set_theme("dark")
vbt.settings.plotting["use_resampler"] = True

TTOOLS: Loaded env variables from file /Users/davidbrazda/Documents/Development/python/.env


either load locally aggregated data

In [8]:
#This is how to call LOAD function
symbol = ["BAC"]
#datetime in zoneNY 
day_start = datetime(2024, 2, 1, 9, 30, 0)
day_stop = datetime(2024, 10, 20, 16, 0, 0)
day_start = zoneNY.localize(day_start)
day_stop = zoneNY.localize(day_stop)

#requested AGG
resolution = 12 #10min bars #12s bars
agg_type = AggType.OHLCV #other types AggType.OHLCV_VOL, AggType.OHLCV_DOL, AggType.OHLCV_RENKO
exclude_conditions = ['C','O','4','B','7','V','P','W','U','Z','F','9','M','6'] #None to defaults
minsize = 100 #min trade size to include
main_session_only = True
force_remote = False

bac_data = load_data(symbol = symbol,
                     agg_type = agg_type,
                     resolution = resolution,
                     start_date = day_start,
                     end_date = day_stop,
                     #exclude_conditions = None,
                     minsize = minsize,
                     main_session_only = main_session_only,
                     force_remote = force_remote,
                     return_vbt = False, #returns vbt object
                     verbose = True
                     )

df_local = bac_data["BAC"]

matched agg files 1

File: BAC-AggType.OHLCV-12-2024-02-01T09-30-00-2024-10-20T16-00-00-4679BCFMOPUVWZ-100-True.parquet
Coverage: 2024-02-01 09:30:00 to 2024-10-20 16:00:00
Symbol: BAC
Agg Type: AggType.OHLCV
Resolution: 12
Excludes: 4679BCFMOPUVWZ
Minsize: 100
Main Session Only: True
--------------------------------------------------------------------------------
Loaded from agg_cache /Users/davidbrazda/Library/Application Support/v2realbot/aggcache/BAC-AggType.OHLCV-12-2024-02-01T09-30-00-2024-10-20T16-00-00-4679BCFMOPUVWZ-100-True.parquet


Or load data remotely.

In [13]:
from ttools.external_loaders import load_history_bars
from ttools.config import zoneNY
from datetime import datetime, time
from alpaca.data.timeframe import TimeFrame, TimeFrameUnit

symbol = "SPY"
start_date = day_start #zoneNY.localize(datetime(2023, 2, 27, 18, 51, 38))
end_date = day_stop # zoneNY.localize(datetime(2023, 4, 27, 21, 51, 39))
timeframe = TimeFrame(amount=1,unit=TimeFrameUnit.Minute)

df = load_history_bars(symbol, start_date, end_date, timeframe, True)
df = df.loc[('SPY',)]

In [9]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from scipy import stats as scipy_stats

def calculate_returns(df):
    """
    Calculate gap returns, main session returns, and total returns
    from OHLCV data with datetime index
    """
    # Resample to daily frequency if needed
    daily_df = df.resample('D').agg({
        'open': 'first',
        'high': 'max',
        'low': 'min',
        'close': 'last'
    }).dropna()
    
    # Calculate returns
    gap_returns = (daily_df['open'] / daily_df['close'].shift(1) - 1) * 100
    main_session_returns = (daily_df['close'] / daily_df['open'] - 1) * 100
    total_returns = (daily_df['close'] / daily_df['close'].shift(1) - 1) * 100
    
    return pd.DataFrame({
        'gap_returns': gap_returns,
        'main_session_returns': main_session_returns,
        'total_returns': total_returns
    }, index=daily_df.index).dropna()

def analyze_return_statistics(returns_df, periods=None):
    """
    Calculate summary statistics for each return type, optionally for specific periods
    """
    if periods is None:
        periods = {'Full Period': returns_df}
    
    all_stats = {}
    for period_name, period_data in periods.items():
        period_stats = {}
        for col in period_data.columns:
            period_stats[col] = {
                'mean': period_data[col].mean(),
                'median': period_data[col].median(),
                'std': period_data[col].std(),
                'skew': period_data[col].skew(),
                'kurtosis': period_data[col].kurtosis(),
                'min': period_data[col].min(),
                'max': period_data[col].max(),
                'Jarque-Bera': scipy_stats.jarque_bera(period_data[col].dropna())[0],
                'JB p-value': scipy_stats.jarque_bera(period_data[col].dropna())[1]
            }
        all_stats[period_name] = pd.DataFrame(period_stats)
    
    return all_stats

def plot_overall_distributions(returns_df):
    """
    Create overall distribution plots using plotly
    """
    # Create subplots
    fig = make_subplots(rows=2, cols=2,
                       subplot_titles=('Return Distributions', 
                                     'Box Plots',
                                     'QQ Plots'),
                       specs=[[{'colspan': 2}, None],
                             [{'type': 'box'}, {'type': 'scatter'}]])
    
    colors = {'gap_returns': 'blue',
              'main_session_returns': 'green',
              'total_returns': 'red'}
    
    names = {'gap_returns': 'Gap Returns',
             'main_session_returns': 'Main Session Returns',
             'total_returns': 'Total Returns'}
    
    # Add histogram traces
    for col, color in colors.items():
        # Histogram
        fig.add_trace(
            go.Histogram(x=returns_df[col],
                        name=f"{names[col]} (Dist)",
                        opacity=0.7,
                        nbinsx=50,
                        showlegend=True),
            row=1, col=1
        )
        
        # Box plots
        fig.add_trace(
            go.Box(y=returns_df[col],
                  name=f"{names[col]} (Box)",
                  boxpoints='outliers',
                  showlegend=True),
            row=2, col=1
        )
        
        # QQ plots
        sorted_returns = np.sort(returns_df[col].dropna())
        theoretical_quantiles = scipy_stats.norm.ppf(
            np.linspace(0.01, 0.99, len(sorted_returns))
        )
        
        fig.add_trace(
            go.Scatter(x=theoretical_quantiles,
                      y=sorted_returns,
                      mode='markers',
                      name=f"{names[col]} (QQ)",
                      marker=dict(color=color),
                      showlegend=True),
            row=2, col=2
        )

    # Update layout
    fig.update_layout(
        height=800,
        title_text='Overall Return Distribution Analysis',
        showlegend=True,
        barmode='overlay',
        legend=dict(
            yanchor="top",
            y=0.99,
            xanchor="left",
            x=1.0
        )
    )
    
    fig.update_xaxes(title_text='Returns (%)', row=1, col=1)
    fig.update_yaxes(title_text='Count', row=1, col=1)
    fig.update_xaxes(title_text='Theoretical Quantiles', row=2, col=2)
    fig.update_yaxes(title_text='Sample Quantiles', row=2, col=2)
    
    return fig

def plot_return_components_evolution(returns_df, close_series):
    """
    Plot the evolution of cumulative returns and their components with close price overlay
    """
    # Calculate cumulative returns for each component
    cum_returns = pd.DataFrame(index=returns_df.index)
    
    # Calculate cumulative contributions
    cum_returns['gap'] = returns_df['gap_returns'].cumsum()
    cum_returns['rth'] = returns_df['main_session_returns'].cumsum()
    cum_returns['total'] = returns_df['total_returns'].cumsum()
    
    # Create figure with secondary y-axis
    fig = go.Figure()
    
    # Add stacked areas for components
    fig.add_trace(
        go.Scatter(
            x=cum_returns.index,
            y=cum_returns['gap'],
            name='Gap Returns',
            fill=None,
            line=dict(color='rgba(0, 0, 255, 0.5)', width=1),
            mode='lines',
            stackgroup='one'  # this makes it stacked
        )
    )
    
    fig.add_trace(
        go.Scatter(
            x=cum_returns.index,
            y=cum_returns['rth'],
            name='RTH Returns',
            fill=None,
            line=dict(color='rgba(0, 255, 0, 0.5)', width=1),
            mode='lines',
            stackgroup='one'  # this makes it stacked
        )
    )
    
    # Add total return line
    fig.add_trace(
        go.Scatter(
            x=cum_returns.index,
            y=cum_returns['total'],
            name='Total Returns',
            line=dict(color='red', width=2, dash='dash'),
            mode='lines'
        )
    )
    
    # Add close price on secondary y-axis
    fig.add_trace(
        go.Scatter(
            x=close_series.index,
            y=close_series,
            name='Close Price',
            line=dict(color='black', width=1.5),
            mode='lines',
            yaxis='y2'
        )
    )
    
    # Update layout with secondary y-axis
    fig.update_layout(
        title='Return Components and Price Evolution',
        xaxis_title='Date',
        yaxis_title='Cumulative Return (%)',
        yaxis2=dict(
            title='Price',
            overlaying='y',
            side='right',
            showgrid=False
        ),
        showlegend=True,
        height=600,
        legend=dict(
            yanchor="top",
            y=0.99,
            xanchor="left",
            x=1.0
        ),
        hovermode='x unified'
    )
    
    # Add summary statistics at the end
    final_returns = {
        'gap': cum_returns['gap'].iloc[-1],
        'rth': cum_returns['rth'].iloc[-1],
        'total': cum_returns['total'].iloc[-1]
    }
    
    print("\nFinal Returns Breakdown:")
    print(f"Gap Returns: {final_returns['gap']:.2f}%")
    print(f"RTH Returns: {final_returns['rth']:.2f}%")
    print(f"Total Returns: {final_returns['total']:.2f}%")
    print(f"\nContribution to Total Returns:")
    print(f"Gap: {(final_returns['gap']/final_returns['total']*100):.1f}%")
    print(f"RTH: {(final_returns['rth']/final_returns['total']*100):.1f}%")
    
    return fig, final_returns

def analyze_distributions_over_time(df, period_length='Y'):
    """
    Analyze return distributions for different time periods
    """
    returns_df = calculate_returns(df)
    
    # Split data into periods (e.g., yearly)
    periods = {}
    for name, group in returns_df.groupby(pd.Grouper(freq=period_length)):
        if len(group) > 0:  # Only include non-empty periods
            periods[str(name.year)] = group
    
    # Calculate statistics for each period
    stats_by_period = analyze_return_statistics(returns_df, periods)
    
    return returns_df, stats_by_period

def analyze_return_components(returns_df):
    """
    Analyze and visualize the components of total returns (gaps vs RTH)
    """
    # Calculate cumulative returns for each component
    cum_returns = {
        'total': (1 + returns_df['total_returns']/100).cumprod() - 1,
        'gap': (1 + returns_df['gap_returns']/100).cumprod() - 1,
        'rth': (1 + returns_df['main_session_returns']/100).cumprod() - 1
    }
    
    # Get final values
    final_returns = {
        'total': cum_returns['total'].iloc[-1] * 100,
        'gap': cum_returns['gap'].iloc[-1] * 100,
        'rth': cum_returns['rth'].iloc[-1] * 100
    }
    
    # Create figure with secondary y-axis
    fig = make_subplots(rows=2, cols=1, 
                       subplot_titles=('Return Components Breakdown', 'Cumulative Returns Over Time'),
                       row_heights=[0.4, 0.6],
                       vertical_spacing=0.15)
    
    # Add bar chart for return components
    colors = ['blue', 'green', 'red']
    components = ['Gap Returns', 'RTH Returns', 'Total Returns']
    values = [final_returns['gap'], final_returns['rth'], final_returns['total']]
    
    fig.add_trace(
        go.Bar(x=components,
               y=values,
               marker_color=colors,
               text=[f"{v:.1f}%" for v in values],
               textposition='auto'),
        row=1, col=1
    )
    
    # Add lines for cumulative returns
    fig.add_trace(
        go.Scatter(x=returns_df.index,
                  y=cum_returns['gap'] * 100,
                  name='Cumulative Gap Returns',
                  line=dict(color='blue', width=2)),
        row=2, col=1
    )
    
    fig.add_trace(
        go.Scatter(x=returns_df.index,
                  y=cum_returns['rth'] * 100,
                  name='Cumulative RTH Returns',
                  line=dict(color='green', width=2)),
        row=2, col=1
    )
    
    fig.add_trace(
        go.Scatter(x=returns_df.index,
                  y=cum_returns['total'] * 100,
                  name='Cumulative Total Returns',
                  line=dict(color='red', width=2)),
        row=2, col=1
    )
    
    # Update layout
    fig.update_layout(
        height=800,
        title_text='Return Components Analysis',
        showlegend=True,
        legend=dict(
            yanchor="top",
            y=0.99,
            xanchor="left",
            x=1.0
        )
    )
    
    # Update axes labels
    fig.update_yaxes(title_text='Return (%)', row=1, col=1)
    fig.update_yaxes(title_text='Cumulative Return (%)', row=2, col=1)
    
    return fig, final_returns

def plot_return_components_evolution_v2(returns_df):
    """
    Plot the evolution of cumulative returns with separate components
    """
    # Calculate cumulative returns
    cum_returns = pd.DataFrame(index=returns_df.index)
    cum_returns['gap'] = returns_df['gap_returns'].cumsum()
    cum_returns['rth'] = returns_df['main_session_returns'].cumsum()
    cum_returns['total'] = returns_df['total_returns'].cumsum()
    
    # Create figure with two subplots
    fig = make_subplots(rows=2, cols=1, 
                       subplot_titles=('Cumulative Returns', 
                                     'Contribution to Total Return (%)'),
                       vertical_spacing=0.15,
                       row_heights=[0.6, 0.4])
    
    # Plot absolute cumulative returns
    fig.add_trace(
        go.Scatter(x=cum_returns.index, 
                  y=cum_returns['gap'],
                  name='Gap Returns',
                  line=dict(color='blue', width=2)),
        row=1, col=1
    )
    
    fig.add_trace(
        go.Scatter(x=cum_returns.index, 
                  y=cum_returns['rth'],
                  name='RTH Returns',
                  line=dict(color='green', width=2)),
        row=1, col=1
    )
    
    fig.add_trace(
        go.Scatter(x=cum_returns.index, 
                  y=cum_returns['total'],
                  name='Total Returns',
                  line=dict(color='red', width=2, dash='dash')),
        row=1, col=1
    )
    
    # Calculate and plot relative contribution
    contribution_df = pd.DataFrame(index=returns_df.index)
    # Handle the case where total return is zero
    total_abs = np.abs(cum_returns['gap']) + np.abs(cum_returns['rth'])
    contribution_df['gap_pct'] = (np.abs(cum_returns['gap']) / total_abs * 100).fillna(50)
    contribution_df['rth_pct'] = (np.abs(cum_returns['rth']) / total_abs * 100).fillna(50)
    
    # Add contribution percentage area plot
    fig.add_trace(
        go.Scatter(x=contribution_df.index, 
                  y=contribution_df['gap_pct'],
                  name='Gap Contribution',
                  fill='tozeroy',
                  fillcolor='rgba(0, 0, 255, 0.3)',
                  line=dict(color='blue', width=1)),
        row=2, col=1
    )
    
    fig.add_trace(
        go.Scatter(x=contribution_df.index, 
                  y=contribution_df['rth_pct'],
                  name='RTH Contribution',
                  fill='tozeroy',
                  fillcolor='rgba(0, 255, 0, 0.3)',
                  line=dict(color='green', width=1)),
        row=2, col=1
    )
    
    # Update layout
    fig.update_layout(
        height=800,
        title_text='Return Components Analysis',
        showlegend=True,
        legend=dict(
            yanchor="top",
            y=0.99,
            xanchor="left",
            x=1.0
        ),
        hovermode='x unified'
    )
    
    # Update axes labels
    fig.update_yaxes(title_text='Cumulative Return (%)', row=1, col=1)
    fig.update_yaxes(title_text='Contribution (%)', row=2, col=1)
    fig.update_xaxes(title_text='Date', row=2, col=1)
    
    # Update y-axis range for contribution plot
    fig.update_yaxes(range=[0, 100], row=2, col=1)
    
    return fig

In [14]:
#df = df_local
# Assuming df is your OHLCV dataframe with datetime index
returns_df, stats_by_period = analyze_distributions_over_time(df)

# Plot overall distributions
fig_overall = plot_overall_distributions(returns_df)
fig_overall.show()

# Plot time evolution
daily_df = df.resample('D').agg({
    'open': 'first',
    'high': 'max',
    'low': 'min',
    'close': 'last'
}).dropna()
close_series = daily_df['close']
fig, final_returns = plot_return_components_evolution(returns_df, close_series)
fig.show()

fig = plot_return_components_evolution_v2(returns_df)
fig.show()

# Print statistics for all periods
for period, stats in stats_by_period.items():
    print(f"\nStatistics for {period}:")
    print(stats)

# Create and display the plot
fig, final_returns = analyze_return_components(returns_df)
fig.show()

# Print detailed breakdown
print("\nFinal Returns Breakdown:")
print(f"Gap Returns: {final_returns['gap']:.2f}%")
print(f"RTH Returns: {final_returns['rth']:.2f}%")
print(f"Total Returns: {final_returns['total']:.2f}%")
print(f"Gap Contribution: {(final_returns['gap']/final_returns['total']*100):.1f}% of total")
print(f"RTH Contribution: {(final_returns['rth']/final_returns['total']*100):.1f}% of total")


Final Returns Breakdown:
Gap Returns: 20.22%
RTH Returns: -1.74%
Total Returns: 18.44%

Contribution to Total Returns:
Gap: 109.7%
RTH: -9.4%



Statistics for 2024:
             gap_returns  main_session_returns  total_returns
mean            0.112328         -9.679420e-03       0.102434
median          0.139257          4.283427e-02       0.097468
std             0.565305          5.914967e-01       0.794481
skew           -1.924296         -7.978903e-01      -0.442806
kurtosis       14.385021          1.456108e+00       1.741532
min            -3.921864         -2.036379e+00      -3.012047
max             1.931439          1.445437e+00       2.675700
Jarque-Bera  1569.884888          3.312267e+01      26.460985
JB p-value      0.000000          6.419526e-08       0.000002



Final Returns Breakdown:
Gap Returns: 22.04%
RTH Returns: -2.04%
Total Returns: 19.56%
Gap Contribution: 112.7% of total
RTH Contribution: -10.4% of total
