In [2]:
import numpy as np
import pandas as pd
from skopt import gp_minimize
from skopt.space import Real, Integer
from skopt.utils import use_named_args
from skopt.plots import plot_convergence, plot_objective
import matplotlib.pyplot as plt

def create_objective_function(master_data, price_df):
    """
    Create the objective function for Bayesian optimization
    """
    # Define the parameter space
    space = [
        Integer(60, 252, name='lookback_period'),  # 3M to 12M
        Real(0.01, 0.1, name='winsorize_lower'),   # 1% to 10%
        Real(0.9, 0.99, name='winsorize_upper')    # 90% to 99%
    ]

    # Define the objective function
    @use_named_args(space)
    def objective(**params):
        try:
            # Calculate factor with current parameters
            factor = calculate_mom6_parameterized(
                master_data,
                lookback_period=params['lookback_period'],
                winsorize_lower=params['winsorize_lower'],
                winsorize_upper=params['winsorize_upper']
            )
            
            # Evaluate performance
            perf = evaluate_performance(factor, price_df)
            
            # We want to maximize Sharpe, so return negative for minimization
            return -perf['sharpe']  # or use -perf['ic_mean'] if preferred
            
        except Exception as e:
            print(f"Error with parameters {params}: {str(e)}")
            return 0.0  # Return poor score for failed evaluations
    
    return objective, space

def run_bayesian_optimization(master_data, price_df, n_calls=50):
    """
    Run Bayesian optimization for factor parameters
    
    Parameters:
    -----------
    master_data : dict
        Dictionary containing OHLC data
    price_df : pd.DataFrame
        Price data for factor evaluation
    n_calls : int
        Number of optimization iterations
    """
    # Create objective function and space
    objective, space = create_objective_function(master_data, price_df)
    
    # Run optimization
    print("Starting Bayesian Optimization...")
    result = gp_minimize(
        func=objective,
        dimensions=space,
        n_calls=n_calls,
        n_random_starts=10,
        noise=0.1,
        random_state=42
    )
    
    # Extract results
    best_params = {
        'lookback_period': result.x[0],
        'winsorize_lower': result.x[1],
        'winsorize_upper': result.x[2]
    }
    
    print("\nOptimization Results:")
    print("Best parameters found:")
    for param, value in best_params.items():
        print(f"{param}: {value}")
    print(f"Best Sharpe ratio: {-result.fun}")  # Negative because we minimized negative Sharpe
    
    # Plot optimization results
    plt.figure(figsize=(12, 5))
    
    plt.subplot(1, 2, 1)
    plot_convergence(result)
    plt.title('Convergence plot')
    
    plt.subplot(1, 2, 2)
    plot_objective(result, show_points=True)
    plt.title('Parameter space')
    
    plt.tight_layout()
    plt.show()
    
    return result, best_params

def analyze_optimization_results(result, master_data, price_df):
    """
    Analyze the results of Bayesian optimization
    """
    # Get parameter names
    param_names = ['lookback_period', 'winsorize_lower', 'winsorize_upper']
    
    # Create DataFrame of all evaluations
    evaluations = pd.DataFrame([
        {param_names[i]: x[i] for i in range(len(param_names))}
        for x in result.x_iters
    ])
    evaluations['score'] = -np.array(result.func_vals)  # Convert back to Sharpe ratio
    
    # Calculate performance with best parameters
    best_params = {
        'lookback_period': result.x[0],
        'winsorize_lower': result.x[1],
        'winsorize_upper': result.x[2]
    }
    
    best_factor = calculate_mom6_parameterized(master_data, **best_params)
    
    # Prepare data for alphalens analysis
    factor_data = best_factor.stack().reset_index()
    factor_data.columns = ['date', 'asset', 'factor']
    factor_data = factor_data.set_index(['date', 'asset'])
    
    factor_data_aligned = al.utils.get_clean_factor_and_forward_returns(
        factor=factor_data,
        prices=price_df,
        periods=(1, 5, 10, 20),
        quantiles=10,
        max_loss=0.5
    )
    
    # Create tear sheet
    al.tears.create_full_tear_sheet(
        factor_data=factor_data_aligned,
        long_short=True,
        group_neutral=False
    )
    
    return evaluations

# Example usage:
def optimize_factor_bayesian(master_data, price_df):
    """
    Complete pipeline for Bayesian optimization of factor parameters
    """
    # Run optimization
    result, best_params = run_bayesian_optimization(master_data, price_df)
    
    # Analyze results
    evaluations = analyze_optimization_results(result, master_data, price_df)
    
    return result, best_params, evaluations

### Load data 

In [None]:
master_data = 
price_data = 

### Implement 

In [None]:

# Run optimization
result, best_params, evaluations = optimize_factor_bayesian(master_data, price_df)

# View all evaluations
print("\nAll evaluations:")
print(evaluations.sort_values('score', ascending=False).head())