In [1]:
import numpy as np
import scipy.stats as stats
import vectorbt as vbt

In [3]:
# load market price data
start = "2015-01-01 UTC"
end = "2024-01-01 UTC"
prices = vbt.YFData.download(
    "AAPL",
    start=start,
    end=end
).get("Close")

In [29]:
# split data for walk-forward optimization
# 20 splits, each two years long and reserves 180 days for the test
(in_price, in_indexes), (out_price, out_indexes) = prices.vbt.rolling_split(
    n=20, # breaking the data into 20 pieces
    window_len=365 * 2, # in-sample period
    set_lens=(180,), # out-of-sample period
    left_to_right=False, # data splits begin from most recent to oldest
)

In [7]:
# A function that simulates all possible combinations of MA periods to find the best performing SMA strategy based off the sharpe ratio
def simulate_all_params(price, windows, **kwargs): 
    # windows is a list with all of different possible MA combos
    # **kwargs are for extra parameters like short_names
    
    short_ma, long_ma = vbt.MA.run_combs( # run_comb() calculates all thoses different combination
        price,
        windows,
        r=2, # creates a pair of the moving averages
        short_names=["short", "long"]
    )
    entries = short_ma.ma_crossed_above(long_ma)
    exits = short_ma.ma_crossed_below(long_ma)
    pf = vbt.Portfolio.from_signals(price, entries, exits, **kwargs)
    return pf.sharpe_ratio() # return the sharpe ratio for each of the combination of our SMAs

In [9]:
# helper functions that return the indexes and parameters where the performance is maximized
def get_best_index(performance):
    # performance is dataframe with stats about our SMAs
    return performance[
        performance.groupby("split_idx").idxmax()
    # from stats group those results by each data split 
    # then find the ones with best stats in each split
    ].index

# from our best SMAs across each split each those paramaters
def get_best_params(best_index, level_name):
    return best_index.get_level_values(level_name).to_numpy() 
    # return the best parameters as an numpy array for compatibility

In [11]:
# A function that runs the backtest given the best moving average values and returns the associated Sharpe ratio
def simulate_best_params(price, best_short_windows, best_long_windows, **kwargs):
    short_ma = vbt.MA.run( # short-term moving averages
        price,
        window=best_short_windows,
        per_column=True
    )
    long_ma = vbt.MA.run( # long-term moving averages 
        price,
        window=best_long_windows,
        per_column=True
    )
    entries = short_ma.ma_crossed_above(long_ma) # buy signals
    exits = short_ma.ma_crossed_below(long_ma) # sell signals
    pf = vbt.Portfolio.from_signals(
        price, entries, exits, **kwargs)  # portfolio
    return pf.sharpe_ratio() #algo's strats

In [13]:
# execute the analysis by passing in a range of moving average windows to simulute_all_params
# returns the sharpe ratio for every combination

windows = np.arange(10, 40) # an array of window sizes

in_sharpe = simulate_all_params(
    in_price,
    windows,
    direction="both", # both long and short positions
    freq="d" # frequency set to dail
)

In [15]:
# return the best in-sample moving average windows and combine them into one array

in_best_index = get_best_index(in_sharpe) # gets the results from above, uses our help function to get the best parameters
# then returns a MultiIndex with the best parameters

in_best_short_windows = get_best_params( # finds the best short-term moving averages window lengths
    in_best_index,
    "short_window"
)
in_best_long_windows = get_best_params( # finds the best long-term moving averages window lengths
    in_best_index,
    "long_window"
)
in_best_window_pairs = np.array( # converts into a numpy array
    list(
        zip(
            in_best_short_windows, # combines the best short and long term moving averages
            in_best_long_windows
        )
    )
)

In [17]:
#retrieve the out-of-sample Sharpe ratios using the optimized moving average windows
out_test_sharpe = simulate_best_params(
    out_price, # testing dataset for validations/assessment of our algo
    in_best_short_windows,
    in_best_long_windows,
    direction="both",
    freq="d"
)

# Using the best parameters we found during training, and see how well the perform on new (out_of_sample) data

In [19]:
display(out_test_sharpe)

ma_window  ma_window  split_idx
10         11         0            1.854818
           17         1           -0.111151
           11         2           -2.377270
11         16         3           -1.339048
18         23         4            1.868990
23         26         5            0.670555
24         25         6            0.095804
23         26         7            2.307156
20         26         8            1.022533
22         25         9            1.716510
23         24         10           0.571056
18         19         11           1.695676
35         37         12          -1.986084
18         20         13           1.036891
                      14           0.666488
                      15          -0.006653
37         39         16          -1.523036
20         21         17           2.275778
19         21         18           1.029716
16         18         19          -3.461469
Name: sharpe_ratio, dtype: float64

In [21]:
# compare the strategy's in-sample results vs. out-of-sample results

in_sample_best = in_sharpe[in_best_index].values # Extracts the Sharpe ratios of the most optimal combos from training
out_sample_test = out_test_sharpe.values # Extracts the sharpe ratio, the overall algorithms performance during the valdiation phase

t, p = stats.ttest_ind(
    a=out_sample_test,
    b=in_sample_best,
    alternative="greater" # tests if out-of-sample performance is significantly better than in-sample data
)

In [23]:
display(p)

0.9996137090509504

In [25]:
print("In-sample Sharpe:", in_sample_best.mean())
print("Out-sample Sharpe:", out_sample_test.mean())

In-sample Sharpe: 1.7123868915703866
Out-sample Sharpe: 0.3003629394867363
