In [447]:
import locale
import itertools
import pandas as pd
import numpy as np
import scipy.stats as stats
import matplotlib.pyplot as plt
import plotly.graph_objects as go
from plotly.subplots import make_subplots

In [197]:
swaps = pd.read_csv(r'C:\Users\david\Desktop\phi_wsol_swaps.csv')

# Convert string timestamp to datetime format
swaps['blockTimestamp'] = pd.to_datetime(swaps['blockTimestamp'])

# Set block timestamp as new index for aggregation
swaps.set_index('blockTimestamp', inplace=True)

In [None]:
# Aggregate & Calculate trading signals
def algo_backtest(
        swaps: pd.DataFrame, 
        candle_interval='5min', 
        vwap_window=20, 
        z_window=20, 
        std=2,
        volume_col='usd_volume'
        ) -> pd.DataFrame:
    """
    Generates trading signals by aggregating swap data into OHLC candles, computing VWAP-based z-scores, and filtering signal cycles to avoid disruptive transitions.  
    Returns a DataFrame with enriched signal logic and a dictionary of specified configuration parameters.
    """
    # ––––––––––––––––––––––––––––
    # Aggregate base token prices into specified OHLC candles interval AND aggregate usd volume
    ohlc = swaps.resample(candle_interval).agg({
        'baseTokenPriceUsd': 'ohlc', # Build OHLS candles
        'totalValueUsd': 'sum'       # Add USD volumes
    })

    ohlc.columns = ['open', 'high', 'low', 'close', 'usd_volume']

    # ––––––––––––––––––––––––––––
    # Assign z-scores to deviations from rolling VWAP (but actually from the mean of these deviations based on std() formula and z-window size)
    signal_df = vwap_zscore_signals(ohlc, volume_col=volume_col, vwap_window=vwap_window, z_window=z_window, std=std)

    # ––––––––––––––––––––––––––––
    # Make intermediate signals that occur between a long (1) and a short (-1) or vice versa distinguishable to avoid long-short/short-long cycle disruptions
    # e.g. [000_1011101-1_00] (recurring ones between 1 and -1 disrupt the cycle)
    sig_filtered = []
    leading_sig = None

    for sig in signal_df.signal:
        if sig == 0:
            sig_filtered.append(0)
        elif sig == leading_sig:
            sig_filtered.append(sig * 11)
        else:
            sig_filtered.append(sig)
            leading_sig = sig

    signal_df.loc[:, 'signal_cycle'] = sig_filtered
    # ––––––––––––––––––––––––––––

    # Gather parameter values for descriptive plot labels later
    parameters = {
        'candle_interval': candle_interval, 
        'volume_col': volume_col, 
        'vwap_window': vwap_window, 
        'z_window': z_window, 
        'std': std
    }

    return signal_df, parameters

# Used in algo_backtest() function for algo calculation
def vwap_zscore_signals(df: pd.DataFrame, volume_col: str, vwap_window: int, z_window: int, std: int) -> pd.DataFrame:
    """
    Calculates trading signals by comparing price deviations from a rolling VWAP using z-scores (mean-reverse concept).  
    Flags long and short entries based on std thresholds, enriching the input DataFrame with signal logic and supporting metrics.
    """
    # Fill NaNs with the most recent non-null value
    cols = ['open', 'high', 'low', 'close']
    df[cols] = df[cols].ffill()
    
    # Typical price (H+L+C)/3 for VWAP
    df['typ_price'] = (df['high'] + df['low'] + df['close']) / 3

    # Cumulative VWAP over chosen window
    df['vwap'] = (df['typ_price'] * df[volume_col]).rolling(vwap_window).sum() / \
                 df[volume_col].rolling(vwap_window).sum()

    # Deviation from VWAP
    df['vwap_dev'] = df['close'] - df['vwap']

    # Rolling std of deviations
    df['dev_std'] = df['vwap_dev'].rolling(z_window).std()

    # z-score
    df['zscore'] = df['vwap_dev'] / df['dev_std']

    # Signals
    df['signal'] = 0
    df.loc[df['zscore'] > std, 'signal'] = -1 # short
    df.loc[df['zscore'] < -std, 'signal'] = 1 # long

    return df

# Plot given signals on OHLC candles
def ohlc_signal_plot(df: pd.DataFrame, parameters: dict) -> plt.Figure:
    """
    Visualizes trading signals on a dual-axis chart combining candlestick price data, VWAP, and volume.  
    Highlights long and short entries with distinct markers and overlays recurring signals for cycle clarity.
    """
    # Create subplot with 2 rows: one for candles, one for volume
    fig = make_subplots(
        rows=2, cols=1,
        shared_xaxes=True,
        row_heights=[0.7, 0.3],
        vertical_spacing=0.162
    )

    # Candlestick chart
    fig.add_trace(
        go.Candlestick(
            x=df.index,
            open=df['open'],
            high=df['high'],
            low=df['low'],
            close=df['close'],
            name=f'Price (interval = {parameters['candle_interval']})'
        ),
        row=1, col=1
    )

    # Long signals (1) – green markers below candle lows
    long_mask = df['signal_cycle'] == 1
    fig.add_trace(
        go.Scatter(
            x=df.index[long_mask],
            y=df['low'][long_mask] * 0.97, # a touch below candle low
            mode='markers',
            marker=dict(color='green', size=10, symbol='triangle-up'),
            name='Long signal'
        )
    )

    # Recurring long signals within a long-short cycle – grey markers below candle highs
    rec_long_mask = df['signal_cycle'] == 11
    fig.add_trace(
        go.Scatter(
            x=df.index[rec_long_mask],
            y=df['low'][rec_long_mask] * 0.97, # a touch below candle low
            mode='markers',
            marker=dict(color='grey', size=10, symbol='triangle-up'),
            opacity=0.5,
            name='Recurring long signal'
        )
    )

    # Short signals (-1) – red markers above candle highs
    short_mask = df['signal_cycle'] == -1
    fig.add_trace(
        go.Scatter(
            x=df.index[short_mask],
            y=df['high'][short_mask] * 1.03, # a touch above candle high
            mode='markers',
            marker=dict(color='red', size=10, symbol='triangle-down'),
            name='Short signal'
        )
    )

    # Recurring short signals within a short-long – grey markers above candle highs
    rec_short_mask = df['signal_cycle'] == -11
    fig.add_trace(
        go.Scatter(
            x=df.index[rec_short_mask],
            y=df['high'][rec_short_mask] * 1.03, # a touch above candle high
            mode='markers',
            marker=dict(color='grey', size=10, symbol='triangle-down'),
            opacity=0.5,
            name='Recurring short signal'
        )
    )

    # VWAP
    fig.add_trace(
        go.Scatter(
        x=df.index,
        y=df.vwap,
        mode='lines',
        line=dict(color='blue', width=3),
        opacity=0.4,
        name=f'VWAP (window = {parameters['vwap_window']})'
        ),
        row=1, col=1
    )

    # Volume bars
    fig.add_trace(
        go.Bar(
            x=df.index,
            y=df['usd_volume'],
            marker_color='lightblue',
            opacity=0.9,
            name='Volume'
        ),
        row=2, col=1
    )

    # Proper datetime formats fot the dates in the title below
    locale.setlocale(locale.LC_TIME, 'en_US.UTF-8')

    # Layout tweaks
    fig.update_layout(
        title=f'PHI/WSOL Signals Backtest | {df.index.min().strftime('%d %b %Y, %#I %p')} – {df.index.max().strftime('%d %b %Y, %#I %p')} (UTC)',
        xaxis=dict(
            showticklabels=False,
            rangeslider=dict(
                visible=True,
                bgcolor='rgba(128,128,128,0.15)', # grey with transparency
                thickness=0.1
            )
        ),
        xaxis2=dict(title='Time'),
        yaxis=dict(title='Price (USD)'),
        yaxis2=dict(title='Volume (USD)'),
        showlegend=True,
        height=700
    )

    return fig

In [None]:
signal_df, parameters = algo_backtest(swaps, '5min', 30, 10, 1.5)

# Visualize signals
ohlc_signal_plot(signal_df, parameters)

In [463]:
# Grid search over backtest parameter combinations
def param_grid_result(
        candle_intervals: list = ['1min', '3min', '5min', '7min'],
        vwap_windows: list     = [10, 15, 20, 25, 30],
        z_windows: list        = [10, 15, 20, 25, 30],
        std_thresholds: list   = [1, 1.5, 2, 2.5, 3]
        ) -> pd.DataFrame:
    """
    Performs a grid search over multiple backtest parameter combinations to evaluate trading strategy performance.  
    Iterates through all permutations of candle intervals, VWAP windows, Z-score windows, and standard deviation thresholds.  
    Returns a DataFrame of metrics including win rate, trade count, returns, and drawdown for each configuration.
    """
    param_grid = list(itertools.product(candle_intervals, vwap_windows, z_windows, std_thresholds))

    results = []

    for candle_interval, vwap_window, z_window, std in param_grid:
        df, params = algo_backtest(
            swaps,
            candle_interval=candle_interval,
            vwap_window=vwap_window,
            z_window=z_window,
            std=std
        )
        win_rate, n_trades, median_change, max_change, max_drawdown = evaluate_performance(df)
        results.append({
            'win_rate_%': win_rate,
            'n_trades': n_trades,
            'median_change_%': median_change,
            'max_change_%': max_change,
            'max_drawdown_%': max_drawdown,
            **params
        })

    return pd.DataFrame(results).drop(columns='volume_col')

# Signal analysis (performance metrics). Used in param_grid_result() function
def evaluate_performance(df):
    """
    Analyzes trading signals to compute performance metrics from executed trades.  
    Tracks entries, exits, PnL, and percentage changes, while calculating win rate, median and max returns, and maximum drawdown.  
    Returns a compact set of key trading statistics.
    """
    trades = []
    entry_price = None
    entry_type = None

    for price, sig in zip(df['close'], df['signal_cycle']):
        if sig in (0, 11, -11): # skip "noise"
            continue 

        # Close & reverse in one bar
        if entry_type == 'long' and sig == -1:
            trades.append({'type': 'long', 'entry': entry_price, 'exit': price, 'pnl': price - entry_price, 'change': round((price - entry_price) / price * 100)})
            entry_price, entry_type = price, 'short' # reverse immediately
            continue

        if entry_type == 'short' and sig == 1:
            trades.append({'type': 'short', 'entry': entry_price, 'exit': price, 'pnl': entry_price - price, 'change': round((entry_price - price) / entry_price * 100)})
            entry_price, entry_type = price, 'long'
            continue

        # Entry
        if entry_type is None:
            if sig == 1:
                entry_price, entry_type = price, 'long'
            elif sig == -1:
                entry_price, entry_type = price, 'short'
    
    trades_df = pd.DataFrame(trades)

    # Metrics
    if 'change' in trades_df.columns:
        win_rate = round((trades_df['change'] > 0).mean() * 100)
        median_change = trades_df['change'].median()
        max_change = trades_df['change'].max()
    else:
        win_rate = median_change = max_change = np.nan

    n_trades = len(trades_df)

    # Drawdown
    if 'pnl' in trades_df.columns:
        cumsum = trades_df['pnl'].cumsum() # running equity
        cummax = cumsum.cummax() # running peak equity
        drawdown = (cummax - cumsum) / cummax * 100
        max_drawdown = round(np.clip(drawdown.max(), 0, 100))
    else:
        max_drawdown = np.nan

    return win_rate, n_trades, median_change, max_change, max_drawdown

results_df = param_grid_result()

In [None]:
# Scoring results by rank to filter out the most productive parameters (only first 4 metric columns are ranked)
# note: it's best to mix ranking with weightening column/metric importance in the future (e.g. [0.1, 0.3, 0.2, 0.15])
row_rank_scores = results_df.iloc[:, :4].rank(pct=True).sum(axis=1)
top_5_params = results_df.loc[row_rank_scores.nlargest(5).index]

In [None]:
from IPython.display import display, Markdown

display(Markdown("""
### Top 5 Parameter Configurations

**Metric Descriptions:**
- **`win_rate_%`**: Percentage of trades that closed profitably.
- **`n_trades`**: Total number of executed trades in the backtest.
- **`median_change_%`**: Median percentage return per trade.
- **`max_change_%`**: Highest percentage return from a single trade.
- **`max_drawdown_%`**: Largest peak-to-trough equity decline during the test.
"""))
top_5_params.style.background_gradient(cmap='Blues', subset=['win_rate_%', 'n_trades', 'median_change_%', 'max_change_%', 'max_drawdown_%'])


### Top 5 Parameter Configurations

**Metric Descriptions:**
- **`win_rate_%`**: Percentage of trades that closed profitably.
- **`n_trades`**: Total number of executed trades in the backtest.
- **`median_change_%`**: Median percentage return per trade.
- **`max_change_%`**: Highest percentage return from a single trade.
- **`max_drawdown_%`**: Largest peak-to-trough equity decline during the test.


Unnamed: 0,win_rate_%,n_trades,median_change_%,max_change_%,max_drawdown_%,candle_interval,vwap_window,z_window,std
138,100.0,4,22.5,29.0,0.0,3min,10,20,2.5
302,100.0,6,13.0,24.0,0.0,5min,20,10,2.0
287,100.0,5,20.0,23.0,0.0,5min,15,20,2.0
282,100.0,5,19.0,23.0,0.0,5min,15,15,2.0
351,100.0,11,9.0,21.0,0.0,5min,30,10,1.5
