# Statistical Algos

## Notebook's Environment

In [None]:
INSTALL_DEPS = False
if INSTALL_DEPS:
  %pip install matplotlib==3.8.3
  %pip installnumpy==1.26.4
  %pip installpandas==2.2.1
  %pip installpandas_market_calendars==4.4.0
  %pip installpytz==2024.1
  %pip installscipy==1.12.0
  %pip installta==0.11.0
  %pip installyfinance==0.2.37

!python --version

## Cloud Environment Setup

In [None]:
import os
import sys
import warnings

warnings.filterwarnings("ignore")

IN_KAGGLE = IN_COLAB = False
try:
    # https://www.tensorflow.org/install/pip#windows-wsl2
    import google.colab
    from google.colab import drive

    drive.mount("/content/drive")
    DATA_PATH = "/content/drive/MyDrive/EDT dataset"
    MODEL_PATH = "/content/drive/MyDrive/models"
    IN_COLAB = True
    print("Colab!")
except:
    IN_COLAB = False
if "KAGGLE_KERNEL_RUN_TYPE" in os.environ and not IN_COLAB:
    print("Running in Kaggle...")
    for dirname, _, filenames in os.walk("/kaggle/input"):
        for filename in filenames:
            print(os.path.join(dirname, filename))
    MODEL_PATH = "./models"
    DATA_PATH = "/kaggle/input/"
    IN_KAGGLE = True
    print("Kaggle!")
elif not IN_COLAB:
    IN_KAGGLE = False
    MODEL_PATH = "./models"
    DATA_PATH = "./data/"
    print("running localhost!")

# Instruments

In [None]:
from constants import *

INTERVAL = YFinanceOptions.M15
TARGET_FUT, INTERVAL

## Data Load

In [None]:
import pandas as pd
import numpy as np

filename = f"{DATA_PATH}{os.sep}futures_{INTERVAL}.csv"
print(filename)
futs_df = pd.read_csv(filename, index_col="Date", parse_dates=True)

print(futs_df.shape)
print(futs_df.columns)
futs_df.head(2)

In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(18, 8))

plt.plot(futs_df[f'{TARGET_FUT}_Close'], label=f'{TARGET_FUT} Close', alpha=0.7)
plt.title(f'{TARGET_FUT} Price')
plt.xlabel('Date')
plt.ylabel('Price')
plt.legend()
plt.grid(True)

plt.tight_layout()
plt.show()

In [None]:
target_fut_df = futs_df[[f"{TARGET_FUT}_Close", f"{TARGET_FUT}_Volume"]]
target_fut_df

In [None]:
from quant_equations import get_ou, get_annualized_factor, calc_annualized_sharpe, deflated_sharpe_ratio, modulate_std

HALF_LIFE, HURST = get_ou(target_fut_df, f"{TARGET_FUT}_Close")

print("Half-Life:", HALF_LIFE)
print("Hurst:", HURST)

# Kalman Filter

The Kalman filter consists of states, observations, and transitions.

$$
X_t = H_t Z_t + V_t
$$

$$
Z_t = F_{t-1} Z_{t-1} + G_{t-1} U_{t-1} + W_{t-1}
$$

- $ Z_t $ is the unobservable state, mapped by $ H_t $ to $ X_t $, representing the internal hidden states.
- $ X_t $ is the price.
- $ U_t $ are the control parameters, which are 0 in this case. $ G_t $ links $ U_t $ to $ Z_t $, which is also 0.
- $ F_{t-1} $ is the state transition matrix.
- $ W_t $ and $ V_t $ are the noise terms with covariance matrices $ Q_t $ and $ R_t $, respectively.

The model equations:

1. **Prediction of the Error Covariance:**
$$
P_t = F_{t-1} P_{t-1}^+ F_{t-1}^T + Q_{t-1}
$$
   - Predicts the new error covariance $ P_t $, representing the accuracy of the next prediction.
   - $ Q_{t-1} $ is the process noise covariance.
   - $ F_{t-1} $ is the state transition matrix.
   - $ P_{t-1} $ is the previous error covariance.

2. **Update of the Error Covariance:**
$$
P_t^+ = (I - K_t H_t) P_t (I - K_t H_t)^T + K_t R_t K_t^T
$$
   - Updates the error covariance.
   - $ P_t^+ $ is the new level of uncertainty.
   - $ K_t $ is the Kalman gain.
   - $ H_t $ is the observation matrix.
   - $ R_t $ is the observation noise covariance.
   - $ I $ is the identity matrix.

3. **Kalman Gain:**
$$
K_t = P_t H_t^T (H_t P_t H_t^T + R_t)^{-1}
$$
   - The gain matrix that determines the correction applied to the prediction.
   - A small gain indicates low uncertainty in the observation $ X_t $.

4. **State Prediction:**
$$
Z_t^- = F_{t-1} Z_{t-1}^+ + G_{t-1} U_{t-1}
$$
   - Predicts the next state from the previous corrected state $ Z_{t-1}^+ $, using the state transition matrix $ F_{t-1} $.
   - $ G_{t-1} U_{t-1} $ represents the control input, which is 0 in this case.

5. **State Update:**
$$
Z_t^+ = Z_t^- + K_t (X_t - H_t Z_t^-) = Z_t^- (1 - K_t H_t) + K_t X_t
$$
   - Updates the state $ Z_t^+ $ after incorporating the new observation $ X_t $.
   - $ X_t - H_t Z_t^- $ is the residual (or innovation), representing the difference between the observed and predicted values.


## Kalman no-lookback MA


$$
y(t) = m(t) + \epsilon(t), \quad \text{("Measurement")} 
$$

$$
m(t) = m(t-1) + \omega(t-1). \quad \text{("State transition")}
$$



$$
m(t \mid t) = m(t \mid t-1) + K(t) (y(t) - m(t \mid t-1)). \quad \text{("State update")}
$$



Variance of the Forecast Error
$$
Q(t) = \text{var}(m(t)) + V_{\epsilon}. \tag{3.17}
$$


The **Kalman gain** is
$$
K(t) = \frac{R(t \mid t-1)}{R(t \mid t-1) + V_{\epsilon}}, \tag{3.18}
$$


and the state variance update is

$$
R(t \mid t) = (1 - K(t)) R(t \mid t-1). \tag{3.19}
$$

Uncertainty:
$$
V_{\epsilon} = R(t \mid t-1) \left( \frac{\tau}{T_{\text{max}} - 1} \right) \tag{3.20}
$$

In [None]:
from signals import signal_kf_bollinger_bands


window = abs(HALF_LIFE)
bb_df = signal_kf_bollinger_bands(target_fut_df, f"{TARGET_FUT}_Close", f"{TARGET_FUT}_Volume", std_factor=modulate_std (HURST))
spread = bb_df["%B"].bfill().ffill()
volumes = target_fut_df[f"{TARGET_FUT}_Volume"].to_numpy() # Keep uncertainty unscaled, i want it to impact more.

assert not np.isnan(spread).any() and not np.isnan(volumes).any()

bb_df.tail(5)

## Visualize KF BB

In [None]:
plt.figure(figsize=(18, 14))

# To make the plots easier to read
ZOOM = 200
bb_df = bb_df.tail(ZOOM)

# Plot for price and Bollinger Bands
ax1 = plt.subplot2grid((18, 1), (0, 0), rowspan=8, colspan=1)
ax1.plot(bb_df[f'{TARGET_FUT}_Close'], label=f'{TARGET_FUT} Close', color='blue', alpha=0.6, linestyle='--')
ax1.plot(bb_df['MA'], label='Moving Average', color='red', linestyle='-.')
ax1.plot(bb_df['U'], label='Upper Bollinger Band', color='green')
ax1.plot(bb_df['L'], label='Lower Bollinger Band', color='green', alpha=0.7)
ax1.set_title(f'Bollinger Bands for {TARGET_FUT}')
ax1.set_xlabel('Date')
ax1.set_ylabel('Price')
ax1.legend()
ax1.grid(True)

# Plot for volume
ax2 = plt.subplot2grid((18, 1), (8, 0), rowspan=4, colspan=1, sharex=ax1)
ax2.bar(bb_df.index, bb_df[f'{TARGET_FUT}_Volume'], label=f'{TARGET_FUT} Volume', color='grey')
ax2.set_title(f'Volume for {TARGET_FUT}')
ax2.set_xlabel('Date')
ax2.set_ylabel('Volume')
ax2.legend()
ax2.grid(True)

plt.tight_layout()
plt.show()

## Backtest

In [None]:
from tqdm import tqdm
import itertools
import math
from scipy.stats import skew, kurtosis

def bollinger_band_backtest(price_df, target_col, volume_col, std_factor, stoploss_pct=0.9, t_max=0.1):
    df = price_df.copy()
    bb_df = signal_kf_bollinger_bands(df, target_col, volume_col, std_factor, t_max=t_max)

    df['MA'] = bb_df['MA']
    df['SD'] = bb_df['SD']
    df['U'] = bb_df['U']
    df['L'] = bb_df['L']
    df['SB'] = (df[target_col] < bb_df['L']).astype(int).diff().clip(0) * +1
    df['SS'] = (df[target_col] > bb_df['U']).astype(int).diff().clip(0) * -1
    df['SBS'] = (df[target_col] > bb_df['MA']).astype(int).diff().clip(0) * -1
    df['SSB'] = (df[target_col] < bb_df['MA']).astype(int).diff().clip(0) * +1
    df['Closed'] = 0
    df['Position'] = 0
    df['Ret'] = 0.
    entry = position = 0
    for i, row in df.iterrows():
        if (row['SBS'] == -1 and position == 1) or \
            (row['SSB'] == 1 and position == -1) or \
            (position == 1 and row[target_col] <= row[target_col] - (stoploss_pct * entry)) or \
            (position == -1 and row[target_col] >= row[target_col] + (stoploss_pct * entry)):
            if position == 1:
                df.loc[i, 'Ret'] = (row[target_col] - entry) / entry
                df.loc[i, 'Closed'] = 1
            else:
                df.loc[i, 'Ret'] = (entry - row[target_col]) / entry
                df.loc[i, 'Closed'] = -1
            position = 0

        if (row['SB'] == 1 and position == 0) or (row['SS'] == -1 and position == 0):
            entry = row[target_col]
            position = 1 if row['SB'] == 1 else -1
        df.loc[i, 'Position'] = position
        # TODO: add unrealized returns to check for DDs.

    df['cRets'] = (1 + df['Ret']).cumprod() - 1

    variance = df['Ret'].var()
    df['Drawdown'] = (1 + df['Ret']).cumprod().div((1 + df['Ret']).cumprod().cummax()) - 1
    max_drawdown = df['Drawdown'].min()
    drawdown_length = (df['Drawdown'] < 0).astype(int).groupby(df['Drawdown'].eq(0).cumsum()).cumsum().max()
    sharpe = calc_annualized_sharpe(df['Ret'], period=INTERVAL)
    trades = (df['Position'].diff().ne(0) & df['Position'].ne(0)).sum()
    stats_df = pd.DataFrame({
        "T_max": [t_max],
        "Standard_Factor": [std_factor],
        "stoploss_pct": [stoploss_pct],
        "Cumulative_Returns": [df['cRets'].iloc[-1]],
        "Max Ret": [df['Ret'].max()],
        "Max Loss": [df['Ret'].min()],
        "Variance": [variance],
        "STD": [np.sqrt(variance)],
        "Max_Drawdown": [max_drawdown],
        "Drawdown_Length": [drawdown_length],
        "Sharpe": [sharpe],
        "Trades_Count": [trades],
        "Trades_per_Interval": [trades / len(df)],
        "Trading_Intervals": [len(df)],
        "Rets": [df['Ret'].to_numpy()],
        "Rets_Skew": [skew(df['Ret'].to_numpy())],
        "Rets_Kurt": [kurtosis(df['Ret'].to_numpy())],
    })

    return df, stats_df

def param_search_bbs(df, target_col, volume_col, hurst=HURST):
    std_adjustments = [0.05, 0.25, 0.5]
    t_maxs = [0.1, 0.5, 0.9]
    combinations = list(itertools.product(t_maxs, std_adjustments))

    best_sharpe = -float('inf')
    best_sharpe_stats = None
    best_rets = -float('inf')
    best_rets_stats = None
    best_mdd = -float('inf')
    best_mdd_stats = None

    sharpes = []
    n_tests = len(combinations)

    for t_max, adjustment in tqdm(combinations, desc="param_search_bbs"):
        std_factor = modulate_std(hurst, adjustment=adjustment)
        _, stats_df = bollinger_band_backtest(df, target_col, volume_col, std_factor, t_max=t_max)

        stat = stats_df['Sharpe'].iloc[0]
        sharpes.append(stat)
        if stat > best_sharpe:
            best_sharpe = stat
            best_sharpe_stats = stats_df.copy()

        stat = stats_df['Cumulative_Returns'].iloc[0]
        if stat > best_rets:
            best_rets = stat
            best_rets_stats = stats_df.copy()

        stat = stats_df['Max_Drawdown'].iloc[0]
        if stat > best_mdd:
            best_mdd = stat
            best_mdd_stats = stats_df.copy()

    # We're datamining, we need to deflated the sharpe!
    for df in [best_sharpe_stats, best_rets_stats, best_mdd_stats]:
        df['Sharpe'] = deflated_sharpe_ratio(df['Sharpe'].iloc[0],
                                            len(df['Rets'].iloc[0]),
                                            df['Rets_Skew'].iloc[0],
                                            df['Rets_Kurt'].iloc[0],
                                            sharpes,
                                            n_tests)

    results_df = pd.concat([best_sharpe_stats.assign(Metric='Sharpe'),
                            best_rets_stats.assign(Metric='Cumulative Returns'),
                            best_mdd_stats.assign(Metric='Max Drawdown')],
                           ignore_index=True)

    return results_df


stats_df = param_search_bbs(futs_df, f'{TARGET_FUT}_Close', f'{TARGET_FUT}_Volume', hurst=HURST)
cumret_df= stats_df[stats_df["Metric"] == "Cumulative Returns"]
cumret_df.drop(columns=["Rets"])

# KF

$$ x(t+1) = x(t) + \omega(t)$$ 

where $ x(t) $ is the stock close price at time $ t $ with a normal distribution $ N(\mu_t, P_t) $ and $ \omega(t) $ is the process model distributed as $ N(0, Q_t) $. 

The spread between consecutive close observables is a white noise with a time varying variance.

The measurement $ z(t) $ would be the observed daily close price. 
Because there is no uncertainty, we can interpret the measurement noise, $ N(0, R_t) $, as the belief that the observed price is meaningful. If the observed price has a large impact on future observables, i.e., if $R_t$ is small, then the Kalman filter would update our prediction to be closer to the observed price. Conversely, if we are not sure, i.e., if $ R_t $ is large, then the observed price won't be used to augment our prediction. 

Assuming that a close price with a large trading volume compared to the previous day is more likely to affect the next day's price:

$$
R_t = P_t \ast \frac{V_{t-1}}{\min(V_{t-1}, V_t)}
$$

where $ V_t $ is the daily trading volume at day $ t $.(Sinclair, 2010).

Init:
- $\mu_0 = z(0)$: The last close price on the first daily.
- $P_0$: The variance of  the least interval.
- Qt of process model $w(t)$ as $Q_0 = \frac{\sigma}{1-\sigma}$, where $\sigma$ is $10^{-4}$ (Chan. 2013)

https://thescipub.com/pdf/jcssp.2023.739.748.pdf

For a system with position, velocity, and acceleration, the state transition matrix can be derived from the kinematic equations.
Assuming a discrete-time system with constant time step $ \Delta t $:
- Position (price) $ x_t $
- Velocity (speed) $ v_t $
- Acceleration $ a_t $

The state vector $ Z_t $ is:

$$
Z_t = \begin{bmatrix} x_t \\ v_t \\ a_t \end{bmatrix}
$$

The kinematic equations for constant acceleration are:

$$
x_{t+1} = x_t + v_t \Delta t + \frac{1}{2} a_t \Delta t^2
$$

$$
v_{t+1} = v_t + a_t \Delta t
$$

$$
a_{t+1} = a_t \quad \text{(assuming acceleration remains constant over the time step)}
$$

These equations can be represented in matrix form as:

$$
\begin{bmatrix} x_{t+1} \\ v_{t+1} \\ a_{t+1} \end{bmatrix} = \begin{bmatrix} 1 & \Delta t & \frac{1}{2} \Delta t^2 \\ 0 & 1 & \Delta t \\ 0 & 0 & 1 \end{bmatrix} \begin{bmatrix} x_t \\ v_t \\ a_t \end{bmatrix}
$$

Thus, the state transition matrix $ F $ is:

$$
F = \begin{bmatrix} 1 & \Delta t & \frac{1}{2} \Delta t^2 \\ 0 & 1 & \Delta t \\ 0 & 0 & 1 \end{bmatrix}
$$

The observation matrix $ H $ maps the state vector $ Z_t $ to the observation $ X_t $.

Since the \%B measure directly relates to the position component of the state vector, the observation matrix $ H $ is:
$$
H = \begin{bmatrix} 1 & 0 & 0 \end{bmatrix}
$$

This means that the observed \%B measure is a direct observation of the position without direct observation of velocity or acceleration.


In [None]:
from sklearn.metrics import mean_squared_error
from pykalman import KalmanFilter

def signal_kf(observables, volumes, em_train_perc=0.80, em_iter=15, delta_t=1, q_t=0.0001, r_t=0.1):
    # State transition matrix
    train_size = int(len(observables) * em_train_perc)
    F = np.array([
        [1, delta_t, 0.5 * delta_t**2],
        [0, 1, delta_t],
        [0, 0, 1]
    ])
    # Observation matrix
    H = np.array([[1, 0, 0]])
    # Initial values don't have that much affect down the line.
    initial_x = np.mean(observables[:train_size])
    initial_var = np.var(observables[:train_size])
    state_mean = np.array([initial_x, 0, 0])
    # https://pykalman.github.io/
    kf = KalmanFilter(
        transition_matrices=F,
        observation_matrices=H,
        initial_state_mean=state_mean,  # initial velocity and acceleration are zero
        initial_state_covariance=np.eye(3) * initial_var,  # Covariance matrix for the state
        observation_covariance=np.array([[r_t]]),  # Observation Noise.
        transition_covariance=np.eye(3) * q_t,  # Q, Process Noise.
        em_vars=['transition_covariance', 'observation_covariance',
                 'initial_state_mean', 'initial_state_covariance']
    )

    # 'Train'. EM to find the best Model Var
    kf = kf.em(observables[:train_size], n_iter=em_iter)
    filtered_state_means, filtered_state_covariances = kf.filter(observables[:train_size])
    state_mean = filtered_state_means[-1]
    state_covariance = filtered_state_covariances[-1]

    filtered_state_means = []
    hidden_1 = []
    hidden_2 = []
    filtered_state_covariances = []
    kalman_gains = []

    filtered_state_means.append(state_mean[0])
    for i in tqdm(range(train_size, len(observables))):
        # Rt = Pt * Vt-1 / min(Vt-1, Vt)
        if volumes[i-1] != 0 and volumes[i] != 0:
            Rt = state_covariance[0, 0] * volumes[i-1] / min(volumes[i-1], volumes[i])
        else:
            Rt = state_covariance[0, 0]
        assert not np.isnan(Rt).any(), f"{Rt} = {state_covariance[0, 0] } * {volumes[i-1]} / {min(volumes[i-1], volumes[i])} at {i}"

        # Predict step
        means, states = kf.filter(observables[:i])
        state_mean = means[-1]
        state_covariance = states[-1]

        state_mean, state_covariance = kf.filter_update(
            filtered_state_mean=state_mean,
            filtered_state_covariance=state_covariance,
            observation=np.array([observables[i]]),
            observation_matrix=H,
            observation_covariance=np.array([[Rt]])
        )

        kalman_gain = state_covariance @ H.T @ np.linalg.inv(H @ state_covariance @ H.T + np.array([[Rt]]))
        kalman_gains.append(kalman_gain[:, 0])
        filtered_state_means.append(state_mean[0])
        filtered_state_covariances.append(state_covariance[0, 0])
        hidden_1.append(state_mean[1])
        hidden_2.append(state_mean[2])

    # Align with the observations
    filtered_state_means = filtered_state_means[:-delta_t]
    residuals = observables[train_size:] - np.array(filtered_state_means)

    results = pd.DataFrame({
        'X': observables[train_size:].values,
        'Z1': hidden_1,
        'Z2': hidden_2,
        'Filtered_X': filtered_state_means,
        'Uncertainty': filtered_state_covariances,
        'Residuals': residuals,
        'KG_X': [kg[0] for kg in kalman_gains],
        'KG_Z1': [kg[1] for kg in kalman_gains],
        'KG_Z2': [kg[2] for kg in kalman_gains]
    })

    return results

results = signal_kf(spread, volumes=volumes)
mse = mean_squared_error(results['X'], results['Filtered_X'])
print(f'MSE {mse}')
results.head()

## Visuals

In [None]:
import matplotlib.pyplot as plt

fig, axs = plt.subplots(5, gridspec_kw={'height_ratios': [3, 3, 1, 1, 1]}, figsize=(18, 16))

# Plot the future's close prices
axs[0].plot(futs_df.index[-len(results['X']):], futs_df[f'{TARGET_FUT}_Close'].tail(len(results['X'])), label='Future Close', color='blue')
axs[0].set_title(f'{TARGET_FUT} Close Prices')
axs[0].legend()

# Plot the actual vs filtered observables with uncertainty tunnel
axs[1].plot(results.index, results['X'], label='Actual Spread', linestyle="-.")
axs[1].plot(results.index, results['Filtered_X'], label='Kalman Filtered Spread', alpha=0.7)
axs[1].fill_between(results.index,
                    results['Filtered_X'] - results['Uncertainty'],
                    results['Filtered_X'] + results['Uncertainty'],
                    label='Uncertainty', color="gray", alpha=0.5)
axs[1].set_title(f'{TARGET_FUT} Actual vs Kalman Filtered Spread')
axs[1].legend()
axs[1].set_ylim(-0.5,2)

# Plot the residuals
axs[2].plot(results.index, results['Z1'], label='Hidden 1')
axs[2].plot(results.index, results['Z2'], label='Hidden 2', alpha=0.7)
axs[2].set_title(f'{TARGET_FUT} Kalman Filter Residuals')
axs[2].axhline(y=0, color='black', linestyle='--')
axs[2].legend()

# Plot the residuals
axs[3].plot(results.index, results['Residuals'], label='Residuals', color='red')
axs[3].set_title(f'{TARGET_FUT} Kalman Filter Residuals')
axs[3].axhline(y=0, color='black', linestyle='--')
axs[3].legend()

# Kalman Gain
axs[4].bar(results.index, results['KG_X'], label='Measure')
axs[4].bar(results.index, results['KG_Z1'], label='Hidden 1')
axs[4].bar(results.index, results['KG_Z2'], label='Hidden 2')
axs[4].set_title(f'{TARGET_FUT} Kalman Gain')
axs[4].legend()

plt.tight_layout()
plt.show()

# Backtests

In [None]:
from scipy.stats import skew, kurtosis

def kalman_backtest(observables, volumes, thresholds=[0, 0.5, 1], stoploss_pct=0.5, delta_t=1, q_t=0.00010001000100010001, em_train_perc=0.2, em_iter=15):
    results = signal_kf(observables, volumes, em_train_perc, em_iter, delta_t, q_t)
    df = results.copy()

    df['SB'] = (df['Filtered_X'] <= thresholds[0]).astype(int).diff().clip(0) * +1
    df['SS'] = (df['Filtered_X'] >= thresholds[2]).astype(int).diff().clip(0) * -1
    df['SBS'] = (df['Filtered_X'] >= thresholds[1]).astype(int).diff().clip(0) * -1
    df['SSB'] = (df['Filtered_X'] <= thresholds[1]).astype(int).diff().clip(0) * +1
    df['Closed'] = 0
    df['Position'] = 0
    df['Ret'] = 0.0
    entry = position = 0
    target_col = 'X'
    for i, row in tqdm(df.iterrows(), desc="kalman_backtest"):
        if (row['SBS'] == -1 and position == 1) or \
           (row['SSB'] == 1 and position == -1) or \
           (position == 1 and row[target_col] <= entry * (1 - stoploss_pct)) or \
           (position == -1 and row[target_col] >= entry * (1 + stoploss_pct)):
            if position == 1:
                df.loc[i, 'Ret'] = (row[target_col] - entry) / entry
                df.loc[i, 'Closed'] = 1
            else:
                df.loc[i, 'Ret'] = (entry - row[target_col]) / entry
                df.loc[i, 'Closed'] = -1
            position = 0

        if (row['SB'] == 1 and position == 0) or (row['SS'] == -1 and position == 0):
            entry = row[target_col]
            position = 1 if row['SB'] == 1 else -1
        df.loc[i, 'Position'] = position

    df['cRets'] = (1 + df['Ret']).cumprod() - 1

    variance = df['Ret'].var()
    df['Drawdown'] = (1 + df['Ret']).cumprod().div((1 + df['Ret']).cumprod().cummax()) - 1
    max_drawdown = df['Drawdown'].min()
    drawdown_length = (df['Drawdown'] < 0).astype(int).groupby(df['Drawdown'].eq(0).cumsum()).cumsum().max()
    sharpe = calc_annualized_sharpe(df['Ret'], period=INTERVAL)
    trades = (df['Position'].diff().ne(0) & df['Position'].ne(0)).sum()
    stats_df = pd.DataFrame({
        "Thresholds": [thresholds],
        "Stoploss_pct": [stoploss_pct],
        "Cumulative_Returns": [df['cRets'].iloc[-1]],
        "Max Ret": [df['Ret'].max()],
        "Max Loss": [df['Ret'].min()],
        "Variance": [variance],
        "STD": [np.sqrt(variance)],
        "Max_Drawdown": [max_drawdown],
        "Drawdown_Length": [drawdown_length],
        "Sharpe": [sharpe],
        "Trades_Count": [trades],
        "Trades_per_Interval": [trades / len(df)],
        "Trading_Intervals": [len(df)],
        "Rets": [df['Ret'].to_numpy()],
        "Rets_Skew": [skew(df['Ret'].to_numpy())],
        "Rets_Kurt": [kurtosis(df['Ret'].to_numpy())],
    })

    return df, stats_df

df, stats_df = kalman_backtest(spread, volumes=volumes)
stats_df.drop(columns=["Rets"])