# Backtest Statistics

### Loading libraries

In [1]:
# Randomness
import random

# Numerical Computing
import numpy as np

# Data Manipulation
import pandas as pd
from pandas import Timestamp

# Data Visualization
import seaborn as sns
import matplotlib.pyplot as plt

import plotly.graph_objects as go
import plotly.io as pio
%matplotlib inline

# Date & Time
from datetime import datetime, timedelta

# Typing
from typing import Tuple, List, Dict, Union, Optional, Any, Generator

# Scikit-Learn
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import BaggingClassifier
from sklearn.datasets import make_classification
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.model_selection._split import _BaseKFold
from sklearn.model_selection import StratifiedKFold, GridSearchCV, RandomizedSearchCV
from sklearn.metrics import accuracy_score, precision_score, recall_score, plot_roc_curve, log_loss

# Scientific Statistical Python
import scipy.cluster.hierarchy as sch
from scipy.stats import jarque_bera
from scipy.stats import rv_continuous, kstest, norm

### Types of Backtest Statistics: General Characteristics
#### Deriving The Timing of Bets From a Series of Targets Positions

In [2]:
def get_bets_timing(tPos: pd.Series) -> pd.Index:
    df0 = tPos[tPos == 0].index
    df1 = tPos.shift(1)
    df1 = df1[df1 != 0].index
    bets = df0.intersection(df1)    # flattening
    df0 = tPos.iloc[1:] * tPos.iloc[:-1].values
    bets = bets.union(df0[df0 < 0].index).sort_values()    # tPos flips
    if tPos.index[-1] not in bets:
        bets = bets.append(tPos.index[-1:])    # last bet
    return bets

In [3]:
def get_holding_period(tPos: pd.Series) -> float:
    hp, tEntry = pd.DataFrame(columns=['dT', 'w']), 0.0
    pDiff, tDiff = tPos.diff(), (tPos.index - tPos.index[0]) / np.timedelta64(1, 'D')
    for i in range(1, tPos.shape[0]):
        if pDiff.iloc[i] * tPos.iloc[i - 1] >= 0:    # increased or unchanged
            if tPos.iloc[i] != 0:
                tEntry = (tEntry * tPos.iloc[i - 1] + tDiff[i] * pDiff.iloc[i]) / tPos.iloc[i]
        else:    # decreased
            if tPos.iloc[i] * tPos.iloc[i-1] < 0:    # flip
                hp.loc[tPos.index[i], ['dT', 'w']] = (tDiff[i] - tEntry, abs(tPos.iloc[i - 1]))
                tEntry = tDiff[i]    # reset entry time
            else:
                hp.loc[tPos.index[i], ['dT', 'w']] = (tDiff[i] - tEntry, abs(pDiff.iloc[i]))
    if hp['w'].sum() > 0:
        hp = (hp['dT'] * hp['w']).sum() / hp['w'].sum()
    else:
        hp = np.nan
    return hp

#### Deriving `HHI` Concentration

In [4]:
def get_HHI(betRet: pd.Series) -> float:
    if betRet.shape[0] <= 2:
        return np.nan
    wght = betRet / betRet.sum()
    hhi = (wght ** 2).sum()
    hhi = (hhi - betRet.shape[0] ** (-1)) / (1.0 - betRet.shape[0] ** (-1))
    return hhi

### Drawdown & Time Under Water
#### Deriving The Sequence of `DD & TuW`

In [7]:
def compute_DD_TuW(series: pd.Series, dollars: bool = False) -> Tuple[pd.Series, pd.Series]:
    df0 = series.to_frame('pnl')
    df0['hwm'] = series.expanding().max()
    df1 = df0.groupby('hwm').min().reset_index()
    df1.columns = ['hwm', 'min']
    df1.index = df0['hwm'].drop_duplicates(keep='first').index    # time of hwm
    df1 = df1[df1['hwm'] > df1['min']]    # hwm followed by a drawdown
    if dollars:
        dd = df1['hwm'] - df1['min']
    else:
        dd = 1 - df1['min'] / df1['hwm']
    tuw = ((df1.index[1:] - df1.index[:-1]) / np.timedelta64(1, 'Y')).values    # in years
    tuw = pd.Series(tuw, index=df1.index[:-1])
    return dd, tuw