In [2]:
# Imports and set up

# Start by importing NumPy and vectorbt. You’ll use SciPy to test the statistical significance of the results.

import numpy as np
import scipy.stats as stats

import vectorbt as vbt

In [3]:
# Then create an array of moving average windows to test and download price data.

windows = np.arange(10, 50)

price = vbt.YFData.download('AAPL').get('Close')

In [4]:
# Build the functions

# Create the data splits for the walk-forward optimization.

(in_price, in_indexes), (out_price, out_indexes) = price.vbt.rolling_split(
    n=30,
    window_len=365 * 2,
    set_lens=(180,),
    left_to_right=False,
)

In [5]:
# This code segments the prices into 30 splits, each two years long, and reserves 180 days for the test.

# Now create the functions that run the backtest.

def simulate_all_params(price, windows, **kwargs):
    fast_ma, slow_ma = vbt.MA.run_combs(
        price, windows, r=2, short_names=["fast", "slow"]
    )
    entries = fast_ma.ma_crossed_above(slow_ma)
    exits = fast_ma.ma_crossed_below(slow_ma)

    pf = vbt.Portfolio.from_signals(price, entries, exits, **kwargs)
    return pf.sharpe_ratio()

In [6]:
# This function builds two moving averages for each window you pass in.

# Then it creates DataFrames showing where the fast-moving average crosses above the slow-moving average. These are the trade entries. It does the opposite for the trade exits.

# After the backtest is run, the function returns the Sharpe ratio.

# Next, you need to figure out the combination of windows that maximizes the Sharpe ratio.

def get_best_index(performance, higher_better=True):
    if higher_better:
        return performance[performance.groupby('split_idx').idxmax()].index
    return performance[performance.groupby('split_idx').idxmin()].index

def get_best_params(best_index, level_name):
    return best_index.get_level_values(level_name).to_numpy()
# The first function returns the indexes in the DataFrame for the windows in each data split that maximizes the Sharpe ratio. The second function returns the window values.

In [7]:
# Finally, create a function that runs the backtest with the windows that maximize the Sharpe ratio.

def simulate_best_params(price, best_fast_windows, best_slow_windows, **kwargs):

    fast_ma = vbt.MA.run(price, window=best_fast_windows, per_column=True)
    slow_ma = vbt.MA.run(price, window=best_slow_windows, per_column=True)

    entries = fast_ma.ma_crossed_above(slow_ma)
    exits = fast_ma.ma_crossed_below(slow_ma)

    pf = vbt.Portfolio.from_signals(price, entries, exits, **kwargs)
    return pf.sharpe_ratio()
# This function creates the moving average values that maximize the Sharpe ratio, runs the backtest, and returns the Sharpe ratio.



In [8]:
# Run the analysis

# Start by optimizing the moving average windows on the in-sample data.

in_sharpe = simulate_all_params(
    in_price,
    windows,
    direction="both",
    freq="d"
)
# The result is a DataFrame that has the Sharpe ratio for the best combination of windows for each split.

# Now you can get the optimized windows and test them with out-of-sample data.

in_best_index = get_best_index(in_sharpe)

in_best_fast_windows = get_best_params(
    in_best_index,
    'fast_window'
)
in_best_slow_windows = get_best_params(
    in_best_index,
    'slow_window'
)
in_best_window_pairs = np.array(
    list(
        zip(
            in_best_fast_windows,
            in_best_slow_windows
        )
    )
)

In [11]:
# Running this code gives you the parameter values for the fast-moving average and slow-moving average you can test with the out-of-sample data.

out_test_sharpe = simulate_best_params(
    out_price,
    in_best_fast_windows,
    in_best_slow_windows,
    direction="both",
    freq="d"
)
# The result is a DataFrame that has the Sharpe ratio for the backtest using out-of-sample test data and the window values that optimize the Sharpe ratio from the in-sample data.
out_test_sharpe

ma_window  ma_window  split_idx
15         30         0            2.934171
17         19         1           -1.182685
19         22         2            1.825433
35         36         3           -0.379473
10         11         4           -2.212113
11         13         5           -0.823432
17         18         6           -2.268702
19         20         7            0.150229
13         15         8            1.595456
10         24         9            0.779228
11         32         10           0.402868
10         45         11           0.483384
26         32         12          -0.557833
48         49         13           0.291812
13         15         14          -0.564042
29         36         15          -2.069703
12         14         16          -0.695318
35         36         17           2.460269
31         32         18           0.063564
14         49         19                inf
38         49         20           2.714624
41         44         21           0.777409


In [12]:
# Compare the results

# The whole point of this analysis is to understand if the parameters you fit on the in-sample data can be used in real life to make money.

# The most common issue in backtesting is overfitting to random data. (Especially when using technical analysis.)

# You can run a simple t-test to understand if the out-of-sample Sharpe ratio is statistically greater than the in-sample Sharpe ratio. If it were, it would give you some measure of confidence that you did not overfit to random data.

in_sample_best = in_sharpe[in_best_index].values
out_sample_test = out_test_sharpe.values

t, p = stats.ttest_ind(
    a=out_sample_test,
    b=in_sample_best,
    alternative="greater"
)

  a_zero_mean = a - mean


In [13]:
in_sample_best

array([2.28468017, 2.37503945, 1.3664512 , 2.78876011, 1.46241364,
       1.04446549, 2.2527809 , 2.28616777, 1.78566952, 1.50841107,
       0.97898887, 1.41685958, 1.36869151, 1.23858968, 1.45397882,
       1.33697608, 1.39323022, 2.65541281, 1.74225941, 1.65555823,
       3.00510862, 1.49608238, 2.05083162, 1.83479291, 1.70201641,
       1.33762371, 1.12723992, 2.04648867, 2.20829972, 0.95648917])

In [14]:
out_sample_test

array([ 2.93417072, -1.1826848 ,  1.82543294, -0.3794729 , -2.21211286,
       -0.82343175, -2.26870223,  0.15022907,  1.59545584,  0.77922779,
        0.40286777,  0.48338428, -0.55783331,  0.29181243, -0.564042  ,
       -2.06970252, -0.69531819,  2.46026949,  0.06356449,         inf,
        2.71462436,  0.77740906,  0.57824441,  0.50305597, -1.23974177,
        0.9206466 ,  2.03716192,  1.77725919, -0.42339654,  1.68085737])