In [22]:
from triple_barrier import *
from volatility import get_daily_vol
from load_data import load_bars
from filters import cusum_filter

In [23]:
import pandas as pd
from typing import Tuple, Union
import logging

from abc import ABC, abstractmethod
from typing import Dict, Tuple, Union

import numpy as np
import pandas as pd
import talib
from loguru import logger

class BaseStrategy(ABC):
    """Abstract base class for trading strategies"""

    @abstractmethod
    def generate_signals(self, data: pd.DataFrame) -> pd.Series:
        """Generate trading signals (1 for long, -1 for short, 0 for no position)"""
        pass

    @abstractmethod
    def get_strategy_name(self) -> str:
        """Return strategy name"""
        pass

    @abstractmethod
    def get_objective(self) -> str:
        """Return strategy objective"""
        pass

class BollingerMeanReversionStrategy(BaseStrategy):
    """Bollinger Bands mean reversion strategy"""

    def __init__(self, window: int = 20, num_std: float = 2.0, objective: str = "mean_reversion"):
        self.window = window
        self.num_std = num_std
        self.objective = objective

    def generate_signals(self, data: pd.DataFrame) -> pd.Series:
        """Generate mean-reversion signals using Bollinger Bands"""
        close = data["close"]

        # Calculate Bollinger Bands
        upper_band, _, lower_band = talib.BBANDS(
            close, timeperiod=self.window, nbdevup=self.num_std, nbdevdn=self.num_std
        )

        # Generate signals
        signals = pd.Series(0, index=data.index, dtype="int8", name="side")
        signals[(close >= upper_band)] = -1  # Sell signal (mean reversion)
        signals[(close <= lower_band)] = 1  # Buy signal (mean reversion)
        return signals

    def get_strategy_name(self) -> str:
        return f"Bollinger_w{self.window}_std{self.num_std}"

    def get_objective(self) -> str:
        return self.objective

def get_entries(
    strategy: 'BaseStrategy',
    data: pd.DataFrame,
    filter_events: bool = False,
    filter_threshold: Union[float, pd.Series] = None,
    on_crossover: bool = True,
) -> Tuple[pd.Series, pd.DatetimeIndex]:
    """Get timestamps and position information for entry events.

    This function processes signals from a given `BaseStrategy` to identify trade
    entry points. It can apply a CUSUM filter to isolate significant events or,
    by default, detect entries at signal crossover points.

    Args:
        strategy (BaseStrategy): The trading strategy object that generates the
            primary signals.
        data (pd.DataFrame): A pandas DataFrame containing the input data, expected
            to have a 'close' column if `filter_events` is True.
        filter_events (bool, optional): If True, a CUSUM filter is applied to the
            signals to identify significant events. Defaults to False.
        filter_threshold (Union[float, pd.Series], optional): The threshold for the
            CUSUM filter. Must be a float or a pandas Series. Defaults to None.
        on_crossover (bool, optional): If True, only events where the signal changes
            from the previous period are considered entry points. Defaults to True.

    Raises:
        ValueError: If `filter_events` is True and `filter_threshold` is not a
            `float` or `pd.Series`.
            
    Returns:
        Tuple[pd.Series, pd.DatetimeIndex]: A tuple containing:
            side (pd.Series): A Series with the same index as the input data,
                where each value represents the trading position (-1 for short,
                1 for long, 0 for no position).
            t_events (pd.DatetimeIndex): A DatetimeIndex of the timestamps for
                each detected entry event.
    """
    primary_signals = strategy.generate_signals(data)
    signal_mask = primary_signals != 0

    # Vectorized CUSUM filter application
    if filter_events:
        try:
            close = data.close
        except AttributeError as e:
            logger.error(f"Dataframe must have a 'close' column: {e}")
            raise e

        if not isinstance(filter_threshold, (pd.Series, float)):
            raise ValueError("filter_threshold must be a Series or a float")
        elif isinstance(filter_threshold, pd.Series):
            filter_threshold = filter_threshold.copy().dropna()
            close = close.reindex(filter_threshold.index)

        # Assuming cusum_filter is a function that takes a Series and a threshold
        filtered_events = cusum_filter(close, filter_threshold)
        signal_mask &= primary_signals.index.isin(filtered_events)
    else:
        # Vectorized signal change detection
        if on_crossover:
            signal_mask &= primary_signals != primary_signals.shift()

    t_events = primary_signals.index[signal_mask]

    side = pd.Series(index=data.index, name="side")
    side.loc[t_events] = primary_signals.loc[t_events]
    side = side.ffill().fillna(0).astype("int8")

    if filter_events:
        s = " generated by CUSUM filter"
    elif on_crossover:
        s = " generated by crossover"
    else:
        s = ""

    logger.info(f"Generated {len(t_events):,} trade events{s}.")

    return side, t_events

In [24]:
df = load_bars('EURUSD', 'tick')
target = get_daily_vol(df['close'], lookback=100)

cusum_filter_threshold = target.mean()

INFO: Loading tick bars from: EURUSD_tick_bars_20251101_170526.csv
INFO: Loaded 686,033 tick bars
INFO:   Start: 2023-01-02 07:33:51.458001
INFO:   End: 2025-10-31 22:58:59.181001
INFO: Loaded 686,033 tick bars
INFO:   Start: 2023-01-02 07:33:51.458001
INFO:   End: 2025-10-31 22:58:59.181001
INFO:   Columns: ['open', 'high', 'low', 'close', 'tick_volume']
INFO:   Columns: ['open', 'high', 'low', 'close', 'tick_volume']


In [25]:
# Create a strategy instance
strategy = BollingerMeanReversionStrategy(window=20, num_std=2.0)

# Generate entry signals
sides, t_events = get_entries(
    strategy=strategy,
    data=df,
    filter_events=True,
    filter_threshold=cusum_filter_threshold,
    on_crossover=True,
)

print(f"Generated {len(t_events):,} entry events")
print(f"\nSide distribution:")
print(sides.value_counts())
print(f"\nFirst few events:")
print(t_events[:5])

INFO: 22,594 CUSUM-filtered events
[32m2025-11-01 18:28:57.758[0m | [1mINFO    [0m | [36m__main__[0m:[36mget_entries[0m:[36m135[0m - [1mGenerated 12,185 trade events generated by CUSUM filter.[0m
[32m2025-11-01 18:28:57.758[0m | [1mINFO    [0m | [36m__main__[0m:[36mget_entries[0m:[36m135[0m - [1mGenerated 12,185 trade events generated by CUSUM filter.[0m


Generated 12,185 entry events

Side distribution:
side
-1    347711
 1    338300
 0        22
Name: count, dtype: int64

First few events:
DatetimeIndex(['2023-01-02 09:47:58.469001', '2023-01-02 10:34:16.916001',
               '2023-01-02 11:02:50.158001', '2023-01-02 11:47:31.464001',
               '2023-01-02 12:23:42.647001'],
              dtype='datetime64[ns]', name='time', freq=None)


In [26]:
v_barriers = add_vertical_barrier(t_events, df['close'], num_bars=50)
v_barriers.head()

time
2023-01-02 09:47:58.469001   2023-01-02 11:16:17.247001
2023-01-02 10:34:16.916001   2023-01-02 11:55:14.860001
2023-01-02 11:02:50.158001   2023-01-02 13:05:16.449001
2023-01-02 11:47:31.464001   2023-01-02 15:16:27.231001
2023-01-02 12:23:42.647001   2023-01-02 16:03:10.331001
Name: t1, dtype: datetime64[ns]

In [27]:
# Check the target volatility values
print(f"Target volatility stats:")
print(f"  Mean: {target.mean():.6f}")
print(f"  Median: {target.median():.6f}")
print(f"  Min: {target.min():.6f}")
print(f"  Max: {target.max():.6f}")
print(f"\nCUSUM filter threshold: {cusum_filter_threshold:.6f}")
print(f"\nProfit/Loss thresholds (as multiple of target):")
print(f"  Take profit: {1 * target.mean():.6f}")
print(f"  Stop loss: {2 * target.mean():.6f}")

Target volatility stats:
  Mean: 0.000848
  Median: 0.000683
  Min: 0.000005
  Max: 0.008568

CUSUM filter threshold: 0.000848

Profit/Loss thresholds (as multiple of target):
  Take profit: 0.000848
  Stop loss: 0.001696


In [None]:
# Try with more relaxed parameters
events = triple_barrier_labels(
    close=df['close'],
    target=target,
    t_events=t_events,
    vertical_barrier_times=v_barriers,
    side_prediction=sides,
    pt_sl=[1, 2],  # Increased profit target (2x volatility for both)
    min_ret=0.001,  # Remove minimum return filter
    min_pct=0.01,  # Reduced from 0.05 to allow smaller classes
    vertical_barrier_zero=False,
    drop=True,  # Don't drop labels initially to see what we get
    verbose=True
)

print(f"\nEvents shape: {events.shape}")
if len(events) > 0:
    print(f"\nLabel distribution:")
    print(events['bin'].value_counts())
    print(f"\nReturn statistics:")
    print(events['ret'].describe())
    print(f"\nFirst few events:")
    print(events.head(10))
    events.to_csv('data/EURUSD_triple_barrier_events.csv')
else:
    print("\n⚠️ No events generated!")
    print("\nPossible reasons:")
    print("1. pt_sl thresholds too tight relative to volatility")
    print("2. Vertical barriers being hit before price barriers")
    print("3. CUSUM filter + Bollinger strategy combination too restrictive")

get_events done after 0:00:00.
get_bins done after 0:00:00.
drop_labels done after 0:00:00.

triple_barrier_labels done after 0:00:00.

pt_sl = [1, 2]
Sampled 4,377 of 686,033 (0.64%).

Label distribution:
  0: 46.31%
  1: 53.69%

Accuracy (positive labels): 53.69%

Events shape: (4377, 5)

Label distribution:
bin
1    2350
0    2027
Name: count, dtype: int64

Return statistics:
count    4377.000000
mean        0.000036
std         0.001455
min        -0.011132
25%        -0.000768
50%         0.000114
75%         0.001141
max         0.006353
Name: ret, dtype: float64

First few events:
                                                   t1      trgt       ret  \
time                                                                        
2023-01-03 10:04:56.812001 2023-01-03 10:16:13.477001  0.001016 -0.002159   
2023-01-03 10:05:15.772001 2023-01-03 10:17:36.057001  0.001141 -0.001538   
2023-01-03 10:13:48.968001 2023-01-03 10:27:00.805001  0.001753 -0.001228   
2023-01-03 10:18:52.

## Important Finding: min_ret Parameter

**The `min_ret` parameter is critical for tick/high-frequency data:**
- Forex tick bars have very small returns (typically 0.00001 - 0.001)
- Setting `min_ret=0.001` (0.1%) filtered out ~99% of events
- Setting `min_ret=0.0` keeps all events regardless of return magnitude

**Recommendation for tick data:**
- Use `min_ret=0.0` or very small values (< 0.0001)
- Or use adaptive thresholds based on volatility: `min_ret = 0.5 * target.mean()`

## Analysis of Triple Barrier Results

The labels represent:
- **bin = 1**: Price hit the profit target (moved favorably by 2x volatility)
- **bin = 0**: Vertical barrier hit (timeout after 50 bars)
- **bin = -1**: Stop loss hit (moved against by 2x volatility) - but these were filtered out

Note: The actual returns (`ret`) show the price movement at the barrier touch, not the full profit/loss of the trade.

In [29]:
# Detailed analysis by side (long vs short)
print("Analysis by Trade Direction:")
print("="*60)

for side_val in [-1, 1]:
    side_name = "SHORT" if side_val == -1 else "LONG"
    side_events = events[events['side'] == side_val]
    
    if len(side_events) > 0:
        print(f"\n{side_name} trades: {len(side_events):,}")
        print(f"  Label distribution:")
        for label, count in side_events['bin'].value_counts().sort_index().items():
            pct = count / len(side_events) * 100
            label_name = {-1: "Stop Loss", 0: "Timeout", 1: "Profit Target"}[label]
            print(f"    {label_name:15s}: {count:6,} ({pct:5.1f}%)")
        
        print(f"  Return stats:")
        print(f"    Mean: {side_events['ret'].mean():8.6f}")
        print(f"    Std:  {side_events['ret'].std():8.6f}")
        print(f"    Min:  {side_events['ret'].min():8.6f}")
        print(f"    Max:  {side_events['ret'].max():8.6f}")

# Check if the strategy is working as expected
print("\n" + "="*60)
print("Strategy Effectiveness:")
print(f"  Bollinger mean reversion with pt_sl=[1,2]")
print(f"  Profit target hit rate: {(events['bin']==1).sum() / len(events):.1%}")
print(f"  Timeout rate: {(events['bin']==0).sum() / len(events):.1%}")

Analysis by Trade Direction:

SHORT trades: 2,205
  Label distribution:
    Timeout        :  1,025 ( 46.5%)
    Profit Target  :  1,180 ( 53.5%)
  Return stats:
    Mean: 0.000050
    Std:  0.001434
    Min:  -0.005477
    Max:  0.006353

LONG trades: 2,172
  Label distribution:
    Timeout        :  1,002 ( 46.1%)
    Profit Target  :  1,170 ( 53.9%)
  Return stats:
    Mean: 0.000023
    Std:  0.001476
    Min:  -0.011132
    Max:  0.005670

Strategy Effectiveness:
  Bollinger mean reversion with pt_sl=[1,2]
  Profit target hit rate: 53.7%
  Timeout rate: 46.3%
