In [27]:
import pandas as pd
from backtesting import Backtest, Strategy


# Reading in data and setting up training and testing sets
- Here we have set up a 70 / 30 training / testing set data split
- The reason for setting up training and testing sets is so we can optimise parameters on the training set, and then test them on our testing set to see if the parameters work on unseen data

In [28]:
df = pd.read_csv('ETHUSDT_1h_data.csv', parse_dates=True, index_col=0)
print(df)

#only use data from 2024 onwards
# df = df[df.index > pd.Timestamp('12-01-2024')]

split_date = df.index[int(len(df) * 0.7)]

df_train = df.loc[:split_date]
df_test  = df.loc[split_date:]

print('Training set:')
print(df_train.describe())
print('Testing set:')
print(df_test.describe())





                        Open     High      Low    Close
timestamp                                              
2020-10-30 18:00:00   381.67   383.94   381.39   382.75
2020-10-30 19:00:00   382.70   383.98   382.10   383.18
2020-10-30 20:00:00   383.14   384.52   382.32   383.93
2020-10-30 21:00:00   383.94   384.34   381.17   383.91
2020-10-30 22:00:00   383.92   384.78   382.81   382.91
...                      ...      ...      ...      ...
2025-10-29 13:00:00  4016.01  4029.41  3983.14  3995.17
2025-10-29 14:00:00  3995.16  4000.08  3973.53  3988.57
2025-10-29 15:00:00  3988.57  3994.56  3941.61  3951.71
2025-10-29 16:00:00  3951.72  3977.27  3926.00  3962.21
2025-10-29 17:00:00  3962.20  3975.43  3959.83  3972.72

[43780 rows x 4 columns]
Training set:
               Open          High           Low         Close
count  30646.000000  30646.000000  30646.000000  30646.000000
mean    2181.524342   2194.012414   2168.158066   2181.609021
std      927.241303    932.898516    921.01212

# Defining our strategy and its parameters
- We define our strategy as `SMAStrategy` which is an instance of `Strategy` (a built in class)
- We define the parameters that our strategy will use
- Our strategy has to have these two functions: `init` and `next`
## `init` function
- init (initialise) is the function ran when the strategy is created
- we use this function to define the logic that our trading bot will use
- in this example we precompute short and long simple moving averages (SMAs) of the Close price
- these precomputed values can then easily be used in the next function

## `next` function
- during the backtest, the next function will be ran on every single data point
- this is where we include the actual trade execution logic
- here we will enter a position (buy) when the short SMA crosses above the long SMA
- we will close the position (sell) when the short SMA crosses below the long SMA
- NOTE: this is simple logic where we only buy and sell spot, no shorts, we always buy with maximum available capital

In [29]:
class SMAStrategy(Strategy):
    sma_short_period = 10
    sma_long_period = 30

    def init(self):
        close_prices = pd.Series(self.data.Close, index=self.data.index)
        sma_short = close_prices.rolling(window=self.sma_short_period).mean()
        sma_long = close_prices.rolling(window=self.sma_long_period).mean()
        self.sma_short = self.I(lambda: sma_short, name='SMA_SHORT')
        self.sma_long = self.I(lambda: sma_long, name='SMA_LONG')

    def next(self):

        #short term sma crosses above long term, signals reversal in momentum
        #buy when crossover first happens, when prev candle did not cross
        if self.sma_short[-1] > self.sma_long[-1] and self.sma_short[-2] <= self.sma_long[-2]:
            self.buy()
        elif self.sma_short[-1] < self.sma_long[-1] and self.sma_short[-2] >= self.sma_long[-2]:
            self.position.close()

# Running a backtest on the strategy
## Backtest function parameter explanation:
- `df_train` - our dataset that we will run the strategy on
- `SMAStrategy` - the instance of Strategy that we defined, the trading logic to be backtested
- `cash` - how much money the backtest will begin with
- `exclusive_orders` - whether or not we can have multiple trades open at the same time, in this instance True means we cannot
- `finalize_trades` - at the end of the backtest any open trades will be closed at the final price in the dataset

## Displaying key performance metrics
- here we print the stats from the backtest
- the power of backtesting.py is these metrics, they are all calculated for us allowing us to easily evaluate a strategy across these metrics

In [30]:
bt_train = Backtest(df_train, SMAStrategy, cash=1000000, exclusive_orders=True, finalize_trades=True)
stats = bt_train.run()
print(stats)

Start                     2020-10-30 18:00:00
End                       2024-04-30 11:00:00
Duration                   1277 days 17:00:00
Exposure Time [%]                    54.04947
Equity Final [$]                   6886758.21
Equity Peak [$]                    8233099.47
Return [%]                          588.67582
Buy & Hold Return [%]               678.20473
Return (Ann.) [%]                    73.44109
Volatility (Ann.) [%]               109.16883
CAGR [%]                             73.53767
Sharpe Ratio                          0.67273
Sortino Ratio                         2.28299
Calmar Ratio                          1.26382
Alpha [%]                           278.60017
Beta                                   0.4572
Max. Drawdown [%]                   -58.11019
Avg. Drawdown [%]                    -4.44206
Max. Drawdown Duration      520 days 20:00:00
Avg. Drawdown Duration        9 days 14:00:00
# Trades                                  607
Win Rate [%]                      

# Plotting the strategy to see a visualisation of the performance

In [31]:
bt_train.plot()




# Now to optimise the strategy
- Our strategy has parameters (e.g. `sma_short_period`, `sma_long_period`) we can optimise the strategy to find the best parameters
- We can optimise for whatever we would like, in this instance we optimise for Sharpe Ratio
- The optimise function will simply run through all possible combinations of parameters and find the one with the highest Sharpe Ratio
- This can be slow, so we can add constraints for parameter combinations that we will allow. e.g. `sma_short_period` must be lower than `sma_long_period`


In [32]:
def optimise(series):
    return series['Sharpe Ratio']

def constraint(series):
    return series['sma_short_period'] < series['sma_long_period']


# Running the optimisation
- We provide ranges of values for each parameter that we want to be tested
- `sma_short_period=range(10,20,5)` will test all combinations of the short SMA period from 10 to 20 incrementing by 5 each time. That is 10, 15, 20
- as it is doing an exhaustive search, running backtests for all combinations, it can take a long time if we specify lots of values
- take note that we specified to maximize for our optimise function (`maximize=optimise`), so it will find the parameters that maximise for whatever our optimise function returns. 
- It is important to note that if we wanted to optimise to minimise a metric we would change this, e.g. minimise the maximum drawdown faced by the strategy


In [33]:
stats_optimised = bt_train.optimize(
    sma_short_period=range(10, 20, 5), 
    sma_long_period=range(20,50,5), 
    maximize=optimise, 
    constraint=constraint)

  output = _optimize_grid()


In [37]:
print(stats_optimised)
stats_optimised._strategy

Start                     2020-10-30 18:00:00
End                       2024-04-30 11:00:00
Duration                   1277 days 17:00:00
Exposure Time [%]                    54.28767
Equity Final [$]                   8308869.89
Equity Peak [$]                   12048115.97
Return [%]                          730.88699
Buy & Hold Return [%]               667.65704
Return (Ann.) [%]                    82.98612
Volatility (Ann.) [%]               118.30584
CAGR [%]                             83.09793
Sharpe Ratio                          0.70145
Sortino Ratio                         2.51258
Calmar Ratio                          1.33353
Alpha [%]                           427.02689
Beta                                  0.45511
Max. Drawdown [%]                   -62.23046
Avg. Drawdown [%]                    -3.11657
Max. Drawdown Duration      768 days 03:00:00
Avg. Drawdown Duration        7 days 10:00:00
# Trades                                  846
Win Rate [%]                      

<Strategy SMAStrategy(sma_short_period=10,sma_long_period=20)>

# Now we will test the optimized parameters on the testing set
- Here, we pass in `df_test`, our testing dataset which contains unseen data.  
- We again call `.run()`, but this time we pass in our optimized parameters `.run(**stats_optimised._strategy._params)`. This runs the backtest using those parameters rather than the ones defined in the Strategy class.  
- Finally, we print the performance statistics of this backtest to see how our optimized parameters performed on the testing set.  

## Did we overfit?
- We want to determine whether the optimized parameters were overfitted to the training set.  
- Overfitting occurs when a strategy learns patterns that are too specific to the training data, failing to capture the general, repeatable signals we are aiming for.  
- As a result, it performs well on the training data but fails to replicate that performance on unseen testing data.
- We can compare performance on training set and testing set to determine if the parameters were overfit


In [35]:
bt_test = Backtest(df_test, SMAStrategy, cash=1000000, exclusive_orders=True, finalize_trades=True)
test_stats = bt_test.run(**stats_optimised._strategy._params)

#printing all stats side by side for comparison
print(f"{'Metric':<30} | {'TRAINING (Before)':<25} | {'TRAINING (Optimized)':<25} | {'TEST SET':<25}")
print("-" * 115)

for metric in stats.index:
    if metric not in ['_strategy', '_equity_curve', '_trades']:
        train_before_val = stats[metric]
        train_after_val = stats_optimised[metric]
        test_val = test_stats[metric]
        print(f"{metric:<30} | {str(train_before_val):<25} | {str(train_after_val):<25} | {str(test_val):<25}")


Metric                         | TRAINING (Before)         | TRAINING (Optimized)      | TEST SET                 
-------------------------------------------------------------------------------------------------------------------
Start                          | 2020-10-30 18:00:00       | 2020-10-30 18:00:00       | 2024-04-30 11:00:00      
End                            | 2024-04-30 11:00:00       | 2024-04-30 11:00:00       | 2025-10-29 17:00:00      
Duration                       | 1277 days 17:00:00        | 1277 days 17:00:00        | 547 days 06:00:00        
Exposure Time [%]              | 54.04946811981988         | 54.2876721268681          | 55.37875904073088        
Equity Final [$]               | 6886758.209999994         | 8308869.890000001         | 1992495.6899999988       
Equity Peak [$]                | 8233099.469999996         | 12048115.970000006        | 2306951.6299999994       
Return [%]                     | 588.6758209999995         | 730.8869890000001 

# Plotting the optimised strategy on our test set to visualise its performance


In [36]:
bt_test.plot()


