In [5]:
import requests
import json
import math
import pandas as pd
import numpy as np
import datetime as dt
import matplotlib.pyplot as plt
from qgridnext import show_grid
from datetime import datetime, timedelta
import sys  
import os
import pandas_ta as ta
import sklearn as sk

# Ensure the directories are in the system path
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..', 'Data_Management')))
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..', 'Universe_Selection')))
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..', 'Signal_Generation')))
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..', 'Risk_Management')))
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..', 'Portfolio_Optimization')))

# Import the modules
from data import Data
from calculations import Calculations
from coarse import Coarse_1 as Coarse
from fine import Fine_1 as Fine
from entry_signal import Trend_Following, Mean_Reversion
from tail_risk import Stop_Loss, Take_Profit
from manage_trade import Manage_Trade
# from portfolio_management import portfolio_management, portfolio_optimization

---

### Walk Forward Optimization (wfo)

To perform walk-forward optimization (WFO), you essentially need three core components:

**Trading Strategy Function**:\
This function implements the trading strategy (e.g., moving average crossover) and generates buy/sell signals based on the strategy's logic. It should also calculate the returns and apply them to simulate trades.


**Objective Function**:\
The objective function calculates the performance metric (e.g., Sharpe ratio, cumulative returns) that you want to optimize. It uses the signals generated by the trading strategy and evaluates the performance based on historical data.

**Walk-Forward Optimization Loop**:\
This function performs the actual walk-forward optimization process. It splits the data into in-sample (training) and out-of-sample (testing) sets, runs optimization on the training set, evaluates on the testing set, and repeats the process over multiple folds (using cross-validation).

Below is a simple wfo for a crossover strategy.

Note that TimeSeriesSplit increase the size of the training, so it is not technically a wfo, it is more like a increasing window.

In [48]:
import yfinance as yf
data = yf.download("AAPL", start="2024-01-01", end="2024-12-20")

[*********************100%***********************]  1 of 1 completed


In [6]:
data

Price,Adj Close,Close,High,Low,Open,Volume
Ticker,AAPL,AAPL,AAPL,AAPL,AAPL,AAPL
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
2023-01-03 00:00:00+00:00,123.768463,125.070000,130.899994,124.169998,130.279999,112117500
2023-01-04 00:00:00+00:00,125.045036,126.360001,128.660004,125.080002,126.889999,89113600
2023-01-05 00:00:00+00:00,123.718979,125.019997,127.769997,124.760002,127.129997,80962700
2023-01-06 00:00:00+00:00,128.271118,129.619995,130.289993,124.889999,126.010002,87754700
2023-01-09 00:00:00+00:00,128.795593,130.149994,133.410004,129.889999,130.470001,70790800
...,...,...,...,...,...,...
2023-12-22 00:00:00+00:00,192.656174,193.600006,195.410004,192.970001,195.179993,37122800
2023-12-26 00:00:00+00:00,192.108871,193.050003,193.889999,192.830002,193.610001,28919300
2023-12-27 00:00:00+00:00,192.208359,193.149994,193.500000,191.089996,192.490005,48087700
2023-12-28 00:00:00+00:00,192.636292,193.580002,194.660004,193.169998,194.139999,34049900


In [None]:
import numpy as np
import pandas as pd
import yfinance as yf
from sklearn.model_selection import TimeSeriesSplit
from skopt import gp_minimize
from skopt.space import Integer

def sharpe_ratio(returns, risk_free_rate=0.0):
    """Calculates the Sharpe Ratio."""
    excess_returns = returns - risk_free_rate
    return np.sqrt(252) * np.mean(excess_returns) / np.std(excess_returns)

def moving_average_crossover(data, windows):
    """Generates trading signals based on MA crossover."""
    if len(windows) != 2:
        raise ValueError("windows list must contain exactly two elements (fast and slow window).")
    fast_window, slow_window = windows

    fast_ma = data.rolling(window=fast_window).mean()
    slow_ma = data.rolling(window=slow_window).mean()

    # Reindex to align indices
    fast_ma = fast_ma.reindex(data.index)
    slow_ma = slow_ma.reindex(data.index)

    signals = pd.DataFrame(index=data.index)
    signals['signals'] = np.where(fast_ma > slow_ma, 1, -1)
    signals = signals.shift(1)  # Prevent lookahead bias
    signals = signals.fillna(0)

    return signals['signals']

def objective_function(data, params):
    """Objective function to minimize (negative Sharpe Ratio). Takes a list of params."""
    params = [10, 50]
    if len(params) != 2:
        raise ValueError("params list must contain exactly two elements (fast and slow window).")
    fast_window, slow_window = params
    # if fast_window >= slow_window:
    #     return 10000  # Very bad result
    signals = moving_average_crossover(data, params)
    new_data = pd.concat([data, signals], axis=1)
    returns = new_data['signals'] * new_data['AAPL'].pct_change()
    returns = returns.dropna()
    sharpe = sharpe_ratio(returns)
    return -sharpe  # Minimize the negative Sharpe Ratio

def walk_forward_optimization(data, space, objective_function, n_splits=5, n_calls=20):
    """Performs walk-forward optimization. Takes objective function as argument."""
    tscv = TimeSeriesSplit(n_splits=n_splits)
    best_params_over_time = []
    sharpes_over_time = []
    returns_over_time = []

    for train_index, test_index in tscv.split(data):
        train_data = data.iloc[train_index]
        test_data = data.iloc[test_index]

        # Perform Bayesian optimization
        result = gp_minimize(lambda x: objective_function(train_data, x), space, n_calls=n_calls, random_state=0)
        best_params = result.x
        best_params_over_time.append(best_params)

        # Evaluate on the test set
        signals_test = moving_average_crossover(test_data, best_params)
        new_data = pd.concat([data, signals_test], axis=1)
        returns = new_data['signals'] * new_data['AAPL'].pct_change()
        returns = returns.dropna()
        sharpe_test = sharpe_ratio(returns)
        sharpes_over_time.append(sharpe_test)
        returns_over_time.append(returns)
        print(f"Fold: Best Params: {best_params}, Sharpe Ratio (Test): {sharpe_test}")

    total_returns_test = pd.concat(returns_over_time)

    return best_params_over_time, sharpes_over_time, total_returns_test

# Download historical data
ticker = "AAPL"
data = yf.download(ticker, period="2y")
data = data['Close']
data = data.dropna()

# Define the search space
space = [Integer(2, 50, name='fast_window'), Integer(5, 200, name='slow_window')]

# Perform walk-forward optimization
best_params, sharpes, total_returns_test = walk_forward_optimization(data, space, objective_function, n_splits=5, n_calls=20)

print("Best parameters over time:", best_params)
print("Sharpe ratios over time:", sharpes)
print(f"Average Sharpe Ratio over the folds {np.mean(sharpes)}")

[*********************100%***********************]  1 of 1 completed


Fold: Best Params: [30, 170], Sharpe Ratio (Test): -1.6231721035520568




Fold: Best Params: [30, 170], Sharpe Ratio (Test): -0.2138513416868408
Fold: Best Params: [30, 170], Sharpe Ratio (Test): 0.6229423893853532
Fold: Best Params: [30, 170], Sharpe Ratio (Test): -2.9096071754978965
Fold: Best Params: [30, 170], Sharpe Ratio (Test): -2.140565323273122
Best parameters over time: [[30, 170], [30, 170], [30, 170], [30, 170], [30, 170]]
Sharpe ratios over time: [-1.6231721035520568, -0.2138513416868408, 0.6229423893853532, -2.9096071754978965, -2.140565323273122]
Average Sharpe Ratio over the folds -1.2528507109249127


In [67]:
total_returns_test.cumsum()

Date
2023-05-08 00:00:00+00:00   -0.000000
2023-05-09 00:00:00+00:00    0.009971
2023-05-10 00:00:00+00:00   -0.000450
2023-05-11 00:00:00+00:00   -0.001544
2023-05-12 00:00:00+00:00    0.005247
                               ...   
2024-12-23 00:00:00+00:00   -0.437835
2024-12-24 00:00:00+00:00   -0.449313
2024-12-26 00:00:00+00:00   -0.452489
2024-12-27 00:00:00+00:00   -0.439247
2024-12-30 00:00:00+00:00   -0.425983
Length: 415, dtype: float64

In [70]:
tscv = TimeSeriesSplit(n_splits=5)
for train_index, test_index in tscv.split(data):
    print("TRAIN:", train_index, "TEST:", test_index)

TRAIN: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71
 72 73 74 75 76 77 78 79 80 81 82 83 84 85] TEST: [ 86  87  88  89  90  91  92  93  94  95  96  97  98  99 100 101 102 103
 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121
 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139
 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157
 158 159 160 161 162 163 164 165 166 167 168]
TRAIN: [  0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17
  18  19  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35
  36  37  38  39  40  41  42  43  44  45  46  47  48  49  50  51  52  53
  54  55  56  57  58  59  60  61  62  63  64  65  66  67  68  69  70  71
  72  73  74  75  76  77  78  79  80  81  82  83  84  85  86  87  88  89
  90  91  92  

In [32]:
import pandas as pd
windows = [10, 50]


"""Generates trading signals based on MA crossover."""
if len(windows) != 2:
    raise ValueError("windows list must contain exactly two elements (fast and slow window).")
fast_window, slow_window = windows

fast_ma = data.rolling(window=fast_window).mean()
slow_ma = data.rolling(window=slow_window).mean()

# Reindex to align indices
fast_ma = fast_ma.reindex(data.index)
slow_ma = slow_ma.reindex(data.index)

signals = pd.DataFrame(index=data.index)
signals['signals'] = np.where(fast_ma > slow_ma, 1, -1)
signals = signals.shift(1)  # Prevent lookahead bias
signals = signals.fillna(0)



In [34]:
signals

Unnamed: 0_level_0,signals
Date,Unnamed: 1_level_1
2023-01-03 00:00:00+00:00,-1
2023-01-04 00:00:00+00:00,-1
2023-01-05 00:00:00+00:00,-1
2023-01-06 00:00:00+00:00,-1
2023-01-09 00:00:00+00:00,-1
...,...
2024-12-23 00:00:00+00:00,1
2024-12-24 00:00:00+00:00,1
2024-12-26 00:00:00+00:00,1
2024-12-27 00:00:00+00:00,1


In [54]:
"""Objective function to minimize (negative Sharpe Ratio). Takes a list of params."""
params = [10, 50]
if len(params) != 2:
    raise ValueError("params list must contain exactly two elements (fast and slow window).")
fast_window, slow_window = params
# if fast_window >= slow_window:
#     return 10000  # Very bad result
signals = moving_average_crossover(data, params)
new_data = pd.concat([data, signals], axis=1)
returns = new_data['signals'] * new_data['AAPL'].pct_change()
returns = returns.dropna()
sharpe = sharpe_ratio(returns)

In [55]:
sharpe

-0.025851094107977715

---

Another method to implement WFO from scratch (which is preferred)

In [40]:
len(data)

253

In [205]:
import numpy as np
import pandas as pd
from sklearn.model_selection import ParameterGrid
from skopt import gp_minimize
from skopt.space import Integer, Categorical, Real
from skopt.utils import use_named_args

#Helper method that converts a dictionary of parameters to a list of tuples
def dict_to_param_space(param_dict):
    param_space = []
    for param_name, param_range in param_dict.items():
        if isinstance(param_range, tuple) and len(param_range) == 2:
            # Create an Integer parameter range
            param_space.append(Integer(param_range[0], param_range[1], name=param_name))

        elif isinstance(param_range, range):
            # Convert range to min and max bounds
            param_space.append(Integer(min(param_range), max(param_range), name=param_name))
        else:
            raise ValueError(f"Invalid range for parameter '{param_name}': {param_range}")
    return param_space

In [206]:
# 1. Load historical data
def load_data(file_path):
    data = pd.read_csv(file_path, parse_dates=["date"], index_col="date")
    return data

# 2. Define a sample trading strategy
def trading_strategy(data, long_window, short_window):
    data["SMA_Short"] = data["Close"].rolling(window=short_window).mean()
    data["SMA_Long"] = data["Close"].rolling(window=long_window).mean()
    data["Signal"] = np.where(data["SMA_Short"] > data["SMA_Long"], 1, 0)  # 1 for Buy, 0 for Sell
    data["return"] = data["Close"].pct_change()
    data["Strategy_Return"] = data["Signal"].shift(1) * data["return"]
    data["creturns"] = (1 + data["Strategy_Return"]).cumprod() - 1  # Calculate cumulative returns
    return data

# 3. Split data into in-sample and out-of-sample periods
def split_data(data, train_size, test_size, step_size):
    start = 0
    while start + train_size + test_size <= len(data):
        train = data.iloc[start:start + train_size]
        test = data.iloc[start + train_size:start + train_size + test_size]
        yield train, test
        start += step_size

In [207]:


# 4. Optimize parameters on in-sample data
def optimize_parameters(train_data, param_grid):
    best_params = None
    best_performance = -np.inf
    for params in ParameterGrid(param_grid):
        result = trading_strategy(train_data.copy(), **params)
        performance = result['creturns'].iloc[-1]  # Get the last value of cumulative returns
        print(performance, best_performance, params)
        if performance > best_performance:
            best_performance = performance
            best_params = params
        
    print(f'In sample best performance: {best_performance}')
    return best_params

# 4. Optimize parameters on in-sample data using gp_minimize
def optimize_parameters_gp(train_data, param_space):

    if isinstance(param_space, dict):
        param_space = dict_to_param_space(param_space)

    @use_named_args(param_space)
    def objective(**params):
        result = trading_strategy(train_data.copy(), **params)
        # Use negative performance because gp_minimize minimizes
        performance = result["creturns"].iloc[-1]
        return -performance if not pd.isnull(performance) else np.inf  # Handle invalid values

    # Run gp_minimize
    result = gp_minimize(
        func=objective,
        dimensions=param_space,
        n_calls=50,  # Number of evaluations
        random_state=42,
    )
    
    # Extract the best parameters
    best_params = {dim.name: val for dim, val in zip(param_space, result.x)}
    return best_params

# Example Usage
# Load data

In [208]:
# 5. Test optimized parameters on out-of-sample data
def test_strategy(test_data, best_params):
    result = trading_strategy(test_data.copy(), **best_params)
    if "creturns" in result.columns:
        return result["creturns"].iloc[-1]
    else:
        return np.nan

# 6. Walk-forward optimization loop
def walk_forward_optimization(data, train_size, test_size, step_size, param_grid, optimize_fn):
    results = []
    for train, test in split_data(data, train_size, test_size, step_size):
        print(f"Training on data from {train.index[0]} to {train.index[-1]}")
        print(f"Testing on data from {test.index[0]} to {test.index[-1]}")
        
        # Optimize on training data
        best_params = optimize_fn(train, param_grid)
        print(f"Best params: {best_params}")
        
        # Test on out-of-sample data
        performance = test_strategy(test, best_params)
        print(f"Out-of-sample performance: {performance}")
        
        results.append(performance)
    return results

In [209]:
data = yf.download("AAPL", start="2020-01-01", end="2024-12-20")
data.columns = data.columns.droplevel(1)

[*********************100%***********************]  1 of 1 completed


In [210]:
# Parameter grid for optimization
param_grid = {"short_window": range(5, 21, 5), "long_window": range(20, 101, 20)}

# Walk-forward optimization
train_size = 100  # Number of training days
test_size = 100  # Number of testing days
step_size = 100   # Step size for walk forward
### NOTE: Make sure that the train size and test_size are longer than the largest parameter window to ensure that the rolling averages are calculated correctly

results = walk_forward_optimization(data.dropna(), train_size, test_size, step_size, param_grid, optimize_parameters)

# Aggregate and analyze results
if results: 
    print(f"Average out-of-sample performance: {np.mean(results):.2f}")
else:
    print("No results to display.")

Training on data from 2020-01-02 00:00:00+00:00 to 2020-05-26 00:00:00+00:00
Testing on data from 2020-05-27 00:00:00+00:00 to 2020-10-15 00:00:00+00:00
0.09597608917022904 -inf {'long_window': 20, 'short_window': 5}
0.08595394125699607 0.09597608917022904 {'long_window': 20, 'short_window': 10}
0.04738620577558095 0.09597608917022904 {'long_window': 20, 'short_window': 15}
0.0 0.09597608917022904 {'long_window': 20, 'short_window': 20}
0.10339670599382011 0.09597608917022904 {'long_window': 40, 'short_window': 5}
0.11997887078481284 0.10339670599382011 {'long_window': 40, 'short_window': 10}
0.18019904143167564 0.11997887078481284 {'long_window': 40, 'short_window': 15}
0.15161986834932173 0.18019904143167564 {'long_window': 40, 'short_window': 20}
0.10078893022173463 0.18019904143167564 {'long_window': 60, 'short_window': 5}
0.07804637217860377 0.18019904143167564 {'long_window': 60, 'short_window': 10}
0.07804637217860377 0.18019904143167564 {'long_window': 60, 'short_window': 15}
0

In [158]:
results

[0.10123625644240564,
 0.044742036949854125,
 0.0948417189329791,
 0.18427941922088653,
 -0.05170333556220752,
 0.0,
 0.09244964158255331,
 0.18607734804593812,
 0.00603670710166937,
 0.040131620444971094,
 -0.15580976930670032]

In [162]:
for train, test in split_data(data, train_size, test_size, step_size):
    print(f"Training on data from {train.index[0]} to {train.index[-1]}")
    print(f"Testing on data from {test.index[0]} to {test.index[-1]}")

Training on data from 2020-01-02 00:00:00+00:00 to 2020-05-26 00:00:00+00:00
Testing on data from 2020-05-27 00:00:00+00:00 to 2020-10-15 00:00:00+00:00
Training on data from 2020-05-27 00:00:00+00:00 to 2020-10-15 00:00:00+00:00
Testing on data from 2020-10-16 00:00:00+00:00 to 2021-03-11 00:00:00+00:00
Training on data from 2020-10-16 00:00:00+00:00 to 2021-03-11 00:00:00+00:00
Testing on data from 2021-03-12 00:00:00+00:00 to 2021-08-03 00:00:00+00:00
Training on data from 2021-03-12 00:00:00+00:00 to 2021-08-03 00:00:00+00:00
Testing on data from 2021-08-04 00:00:00+00:00 to 2021-12-23 00:00:00+00:00
Training on data from 2021-08-04 00:00:00+00:00 to 2021-12-23 00:00:00+00:00
Testing on data from 2021-12-27 00:00:00+00:00 to 2022-05-18 00:00:00+00:00
Training on data from 2021-12-27 00:00:00+00:00 to 2022-05-18 00:00:00+00:00
Testing on data from 2022-05-19 00:00:00+00:00 to 2022-10-11 00:00:00+00:00
Training on data from 2022-05-19 00:00:00+00:00 to 2022-10-11 00:00:00+00:00
Testi

In [163]:
row_number = data.index.get_loc('2020-05-26')

In [164]:
row_number

99

In [171]:
data_2 = data.iloc[200: 300].copy()

In [169]:
data.iloc[200]

Price
Adj Close    1.161839e+02
Close        1.190200e+02
High         1.215500e+02
Low          1.188100e+02
Open         1.212800e+02
Volume       1.153938e+08
Name: 2020-10-16 00:00:00+00:00, dtype: float64

In [None]:
best_params = None
best_performance = -np.inf
for params in ParameterGrid(param_grid):
    result = trading_strategy(data_2.copy(), **params)
    performance = result['creturns'].iloc[-1]  # Get the last value of cumulative returns
    print(performance, best_performance, params)
    if performance > best_performance:
        best_performance = performance
        best_params = params
    
print(f'In sample best performance: {best_performance}')

-0.005632179910099899 -inf {'long_window': 20, 'short_window': 5}
0.044742036949854125 -0.005632179910099899 {'long_window': 20, 'short_window': 10}
0.030264978096686157 0.044742036949854125 {'long_window': 20, 'short_window': 15}
0.0 0.044742036949854125 {'long_window': 20, 'short_window': 20}
0.05963567382865387 0.044742036949854125 {'long_window': 40, 'short_window': 5}
0.029327638513925613 0.05963567382865387 {'long_window': 40, 'short_window': 10}
0.028183946123923942 0.05963567382865387 {'long_window': 40, 'short_window': 15}
-0.011600406468898372 0.05963567382865387 {'long_window': 40, 'short_window': 20}
-0.021739153613471784 0.05963567382865387 {'long_window': 60, 'short_window': 5}
-0.06063668480543316 0.05963567382865387 {'long_window': 60, 'short_window': 10}
-0.007841631306674701 0.05963567382865387 {'long_window': 60, 'short_window': 15}
-0.05232923395549616 0.05963567382865387 {'long_window': 60, 'short_window': 20}
-0.10635941790970926 0.05963567382865387 {'long_window'

In [172]:
trading_strategy(data_2, 40, 5).creturns.iloc[-1]

0.05963567382865387

In [121]:
data['SMA_Short']

Date
2024-01-02 00:00:00+00:00           NaN
2024-01-03 00:00:00+00:00           NaN
2024-01-04 00:00:00+00:00           NaN
2024-01-05 00:00:00+00:00    183.244999
2024-01-08 00:00:00+00:00    183.224998
                                ...    
2024-12-13 00:00:00+00:00    247.587505
2024-12-16 00:00:00+00:00    248.405003
2024-12-17 00:00:00+00:00    250.152500
2024-12-18 00:00:00+00:00    250.174999
2024-12-19 00:00:00+00:00    250.589996
Name: SMA_Short, Length: 245, dtype: float64

Testing the class

In [211]:
import numpy as np
import pandas as pd
from sklearn.model_selection import ParameterGrid
from skopt import gp_minimize
from skopt.space import Integer, Categorical, Real
from skopt.utils import use_named_args
import yfinance as yf



class WFO():
    def __init__(self, data, trading_strategy, param_grid, train_size, test_size, step_size, optimize_fn="grid"):
        """
        This class performs a walk-forward optimization on a trading strategy.

        Parameters:
        data (pd.DataFrame): The historical data to be used for backtesting.
        trading_strategy (object): The trading strategy to be optimized.
        param_grid (dict): The grid of parameters to be optimized.
        train_size (int): The number of data points to be used for training.
        test_size (int): The number of data points to be used for testing.
        step_size (int): The number of data points to step forward in each iteration.
        """
        self.data = data
        self.trading_strategy = trading_strategy
        self.param_grid = param_grid
        self.train_size = train_size
        self.test_size = test_size
        self.step_size = step_size
        self.optimize_fn = optimize_fn

        max_param = max(
        param.high if isinstance(param, (Integer, Real)) else max(param) #To handle all different cases
        for param in param_grid.values()
        )
        if step_size + train_size + test_size > len(data):
            raise ValueError("Invalid train, test, or step size.")
        if (train_size > max_param or test_size > max_param):
            raise ValueError("Parameter range exceeds train size or Test size.")
        if optimize_fn not in ["grid", "gp"]:
            raise ValueError("Invalid optimization function")
        
                             

    #### Helper Methods ####
    def dict_to_param_space(self, param_dict):
        param_space = []
        for param_name, param_range in param_dict.items():
            if isinstance(param_range, tuple) and len(param_range) == 2:
                if isinstance(param_range[0], int) and isinstance(param_range[1], int):
                    # Create an Integer parameter range
                    param_space.append(Integer(param_range[0], param_range[1], name=param_name))

                elif isinstance(param_range[0], float) and isinstance(param_range[1], float):
                    # Create a Real parameter range for floats
                    param_space.append(Real(param_range[0], param_range[1], name=param_name))

                elif isinstance(param_range[0], Categorical) and isinstance(param_range[1], Categorical):
                    # Create a Categorical parameter range
                    param_space.append(Categorical(param_range, name=param_name))

            elif isinstance(param_range, range):
                # Convert range to min and max bounds
                param_space.append(Integer(min(param_range), max(param_range), name=param_name))
            else:
                raise ValueError(f"Invalid range for parameter '{param_name}': {param_range}")
        return param_space
    

    def split_data(self, data, train_size, test_size, step_size):
        start = 0
        while start + train_size + test_size <= len(data):
            train = data.iloc[start:start + train_size]
            test = data.iloc[start + train_size:start + train_size + test_size]
            yield train, test
            start += step_size


    #### Optimization Methods ####
    def optimize_parameters_grid(self, train_data, param_grid):
        best_params = None
        best_performance = -np.inf
        for params in ParameterGrid(param_grid):
            result = self.trading_strategy(train_data.copy(), **params)
            performance = result['creturns'].iloc[-1]  # Get the last value of cumulative returns
            if performance > best_performance:
                best_performance = performance
                best_params = params
        return best_params

    def optimize_parameters_gp(self, train_data, param_space):

        if isinstance(param_space, dict):
            param_space = self.dict_to_param_space(param_space)

        @use_named_args(param_space)
        def objective(**params):
            result = self.trading_strategy(train_data.copy(), **params)
            # Use negative performance because gp_minimize minimizes
            performance = result["creturns"].iloc[-1]
            return -performance if not pd.isnull(performance) else np.inf  # Handle invalid values

        # Run gp_minimize
        result = gp_minimize(
            func=objective,
            dimensions=param_space,
            n_calls=50,  # Number of evaluations
            random_state=42,
        )
        
        # Extract the best parameters
        best_params = {dim.name: val for dim, val in zip(param_space, result.x)}
        return best_params

   
    def test_strategy(self, test_data, best_params):
        result = self.trading_strategy(test_data.copy(), **best_params)
        if "creturns" in result.columns:
            return result["creturns"].iloc[-1]
        else:
            return np.nan


    def walk_forward_optimization(self):
        """
        Perform a walk-forward optimization on a dataset.
        """
        results = []
        for train, test in self.split_data(self.data, self.train_size, self.test_size, self.step_size):
            # Optimize on training data
            if self.optimize_fn == "grid":
                best_params = self.optimize_parameters_grid(train, self.param_grid)
            elif self.optimize_fn == "gp":
                best_params = self.optimize_parameters_gp(train, self.param_grid)
            
            # Test on out-of-sample data
            performance = self.test_strategy(test, best_params)
            print(f"Out-of-sample performance: {performance}")
            
            results.append(performance)
        return results
 
# Initialize the WFO class
wfo = WFO(data, trading_strategy, param_grid, train_size=100, test_size=50, step_size=10)

In [212]:
max(param_grid['long_window'])

100

In [213]:
wfo.walk_forward_optimization()

Out-of-sample performance: 0.1314862991688981
Out-of-sample performance: 0.05128900981775253
Out-of-sample performance: 0.3508442276411028
Out-of-sample performance: 0.0481317488731543
Out-of-sample performance: -0.0030032283369400004
Out-of-sample performance: -0.09034392152651427
Out-of-sample performance: -0.04549358842710538
Out-of-sample performance: -0.04549358842710538
Out-of-sample performance: -0.09940816634577476
Out-of-sample performance: 0.0333815999618583
Out-of-sample performance: 0.15393292374693313
Out-of-sample performance: 0.10472602365027295
Out-of-sample performance: 0.06284113782341239
Out-of-sample performance: -0.08265598721322709
Out-of-sample performance: -0.10635941790970926
Out-of-sample performance: 0.0
Out-of-sample performance: 0.0
Out-of-sample performance: 0.0
Out-of-sample performance: 0.023468467709071295
Out-of-sample performance: -0.030598573453252897
Out-of-sample performance: -0.03302359903265306
Out-of-sample performance: 0.0
Out-of-sample perform

[0.1314862991688981,
 0.05128900981775253,
 0.3508442276411028,
 0.0481317488731543,
 -0.0030032283369400004,
 -0.09034392152651427,
 -0.04549358842710538,
 -0.04549358842710538,
 -0.09940816634577476,
 0.0333815999618583,
 0.15393292374693313,
 0.10472602365027295,
 0.06284113782341239,
 -0.08265598721322709,
 -0.10635941790970926,
 0.0,
 0.0,
 0.0,
 0.023468467709071295,
 -0.030598573453252897,
 -0.03302359903265306,
 0.0,
 0.040667080233110164,
 0.0,
 0.14961061161003797,
 0.0,
 0.024095565167760125,
 0.07535010080497151,
 0.046851712165749904,
 0.0008861955386292308,
 0.0,
 -0.004214541356903245,
 -0.010436162718636677,
 0.08335568015788808,
 0.1727241545237055,
 0.19172526339515272,
 0.06212175748381665,
 -0.048770782787985745,
 -0.05805212738806964,
 0.0,
 0.0,
 0.0,
 -0.023968546970034144,
 -0.053972114633049295,
 -0.053972114633049295,
 -0.055809461948557515,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 -0.0778625702142346,
 -0.007336906997227777,
 -0.0615

In [217]:
# Example lists
list1 = [1, 2, 3]
list2 = ['a', 'b', 'c']
list3 = [True, False, True]

# Using zip()
zipped = zip(list1, list2)

# Convert to a list to see the output
result = dict(zipped)

# Print the result
result


{1: 'a', 2: 'b', 3: 'c'}