In [None]:
# ============ IMPORTS ============
import pandas as pd
import numpy as np
import sys
from scipy.signal import find_peaks
from src.triple_barrier import triple_barrier

# ============ LOAD CACHED DATA ============\
# bitcoin_data_raw = pd.read_pickle('data_cache/bitcoin_data.pkl')
# bitcoin_data_raw = bitcoin_data_raw['bitcoin_price_history']
processed_data_cache = pd.read_pickle('data_cache/processed_data.pkl')
bitcoin_price_and_features = processed_data_cache['bitcoin_price_and_features']

# Start with normalised features
labelling_df = processed_data_cache['features_norm']

# Add Close price for reference
labelling_df['Close'] = bitcoin_price_and_features[('Close', 'BTC-USD')]

In [None]:
# ============ PEAK DETECTION & LABELING ============
# Detect peaks
peak_indices, properties = find_peaks(
    labelling_df['Close'].values,
    prominence = labelling_df['Close'].mean() * 0.10,
    distance=30
)

# Initialize 'Near_Peak' column
labelling_df['Near_Peak'] = 0

# Label near-peak regions
WINDOW_DAYS = 15
for peak_idx in peak_indices:
    start = max(0, peak_idx - WINDOW_DAYS)
    end = min(len(labelling_df), peak_idx + WINDOW_DAYS)
    labelling_df.iloc[start:end, labelling_df.columns.get_loc('Near_Peak')] = 1

In [None]:
# ============ TRIPLE BARRIER LABELLING ============
# ========== CONFIG ==========
window = 7  # days
profit_target = 1.5
stop_loss = 1.5
min_return_threshold = 0.005  # 0.5% minimum return
# ============================

labelling_df['Daily_Return'] = bitcoin_price_and_features['Daily_Return']
labelling_df['Volatility'] = bitcoin_price_and_features['Volatility_7day']

# ============ TRIPLE BARRIER LABELLING ============
labels, returns, hit_day = triple_barrier(
    price_series = labelling_df['Close'],
    volatility_series = labelling_df['Volatility_EWMA_100day'],
    holding_period = window,
    profit_mult = profit_target,
    stop_mult = stop_loss,
    min_ret_threshold = min_return_threshold
)
labelling_df['Actual_Return_7day'] = returns
labelling_df['Barrier_Hit_Day'] = hit_day
labelling_df['Label_7day'] = labels

In [None]:
# Store CSV.
labelling_df.to_csv('data_cache/labelled_df.csv', index=True)