In [6]:
import yfinance as yf
import pandas as pd
import numpy as np
import random
from deap import base, creator, tools, algorithms
import matplotlib.pyplot as plt

# Define the fitness function for NSGA-II
if "FitnessMulti" not in creator.__dict__:
    creator.create("FitnessMulti", base.Fitness, weights=(1.0, -1.0))  # Maximize Sharpe Ratio, Minimize Drawdown
if "Individual" not in creator.__dict__:
    creator.create("Individual", list, fitness=creator.FitnessMulti)

# Toolbox setup
toolbox = base.Toolbox()
toolbox.register("attr_bool", random.randint, 0, 1)
toolbox.register("individual", tools.initRepeat, creator.Individual, toolbox.attr_bool, n=52)
toolbox.register("population", tools.initRepeat, list, toolbox.individual)
toolbox.register("mate", tools.cxTwoPoint)
toolbox.register("mutate", tools.mutFlipBit, indpb=0.05)
toolbox.register("select", tools.selNSGA2)

def download_data(ticker, start_date, end_date):
    print(f"Downloading data for {ticker} from {start_date} to {end_date}...")
    data = yf.download(ticker, start=start_date, end=end_date)
    data.reset_index(inplace=True)
    print(f"Downloaded {len(data)} rows of data.")
    return data

def decode_chromosome(individual):
    """
    Decodes the chromosome into rules and indicators.
    Adjust bit counts to match the number of indicator columns.
    """
    # Adjust to match the defined signals
    buy_structure_bits = individual[:9]  # 9 bits for structure
    buy_logic_bits = individual[9:12]    # 3 bits for logic
    buy_indicator_bits = individual[12:16]  # 4 bits for indicators
    
    sell_structure_bits = individual[16:25]  # 9 bits for structure
    sell_logic_bits = individual[25:28]      # 3 bits for logic
    sell_indicator_bits = individual[28:32]  # 4 bits for indicators

    def parse_structure(struct_bits, logic_bits):
        # Parse structure (AND/OR logic and required bits)
        logic = ['AND' if bit == 0 else 'OR' for bit in logic_bits]
        required = struct_bits  # Required T/F bits for each indicator
        return logic, required

    def parse_indicators(ind_bits):
        # Extract active indicators
        return [i for i, bit in enumerate(ind_bits) if bit == 1]

    buy_logic, buy_required = parse_structure(buy_structure_bits, buy_logic_bits)
    sell_logic, sell_required = parse_structure(sell_structure_bits, sell_logic_bits)
    buy_active_indicators = parse_indicators(buy_indicator_bits)
    sell_active_indicators = parse_indicators(sell_indicator_bits)

    return {
        'buy_logic': buy_logic,
        'buy_required': buy_required,
        'buy_indicators': buy_active_indicators,
        'sell_logic': sell_logic,
        'sell_required': sell_required,
        'sell_indicators': sell_active_indicators,
    }

def compute_indicator_signals(df):
    """
    Adds indicators as signals in the DataFrame.
    """
    df['SMA_cross'] = (df['SMA9'] > df['SMA40']).astype(int)
    df['MACD_signal'] = (df['MACD'] > df['Signal_Line']).astype(int)
    df['Momentum_signal'] = (df['Close'] > df['Close'].shift(1)).astype(int)
    df['RSI_signal'] = (df['RSI'] < 30).astype(int)  # Oversold condition
    
    # Add signals to ensure 4 indicator columns align with the chromosome
    return df

def apply_rule(df, logic, required, active_indicators, indicator_columns):
    """
    Applies the decoded rules to generate buy/sell signals.
    """
    if not active_indicators or max(active_indicators) >= len(indicator_columns):
        # No valid active indicators
        return pd.Series(False, index=df.index)

    # Generate individual conditions
    conditions = []
    for idx in active_indicators:
        col_name = indicator_columns[idx]
        if col_name in df.columns:  # Ensure the column exists
            if required[idx] == 1:
                conditions.append(df[col_name] == 1)  # Condition must be true
            else:
                conditions.append(df[col_name] == 0)  # Condition must be false

    if not conditions:
        return pd.Series(False, index=df.index)

    # Combine conditions using the logic
    combined_condition = conditions[0]
    for i, condition in enumerate(conditions[1:], start=1):
        if logic[i - 1] == 'AND':
            combined_condition = combined_condition & condition
        elif logic[i - 1] == 'OR':
            combined_condition = combined_condition | condition

    return combined_condition.astype(int)

# Adjust the total chromosome length
toolbox.unregister("individual")
toolbox.register("individual", tools.initRepeat, creator.Individual, toolbox.attr_bool, n=32)

def evaluate_strategy(individual, df):
    """
    Evaluates the trading strategy.
    """
    try:
        df = df.copy()

        # Decode chromosome
        decoded = decode_chromosome(individual)
        indicator_columns = ['SMA_cross', 'MACD_signal', 'Momentum_signal', 'RSI_signal']

        # Compute buy and sell signals
        buy_signal = apply_rule(
            df, 
            decoded['buy_logic'], 
            decoded['buy_required'], 
            decoded['buy_indicators'], 
            indicator_columns
        )
        sell_signal = apply_rule(
            df, 
            decoded['sell_logic'], 
            decoded['sell_required'], 
            decoded['sell_indicators'], 
            indicator_columns
        )

        # Assign signals to the DataFrame
        df['Buy_Signal'] = buy_signal
        df['Sell_Signal'] = sell_signal

        # Calculate positions
        df['Position'] = 0
        
        # If you never want the second level:
        df.columns = df.columns.droplevel('Ticker')

        position = 0
        for i in range(1, len(df)):
            if df.iloc[i]['Buy_Signal'] == 1:
                position = 1
            elif df.iloc[i]['Sell_Signal'] == 1:
                position = -1
            df.iat[i, df.columns.get_loc('Position')] = position

        # Calculate returns
        df['Daily_Return'] = df['Close'].pct_change()
        df['Portfolio_Return'] = df['Position'].shift(1) * df['Daily_Return']
        df = df.dropna()

        if df.empty:
            return -np.inf, -np.inf

        # Calculate performance metrics
        returns = df['Portfolio_Return']
        if returns.std() == 0:
            sharpe_ratio = -np.inf
        else:
            sharpe_ratio = np.sqrt(252) * (returns.mean() / returns.std())

        cum_returns = (1 + returns).cumprod()
        running_max = cum_returns.cummax()
        drawdowns = (cum_returns - running_max) / running_max
        max_drawdown = drawdowns.min()

        return sharpe_ratio, max_drawdown

    except Exception as e:
        print(f"Error in evaluate_strategy: {e}")
        return -np.inf, -np.inf
        
def calculate_indicators(df):
    print("Calculating indicators...")
    df = df.copy()
    df['SMA9'] = df['Close'].rolling(window=9).mean()
    df['SMA40'] = df['Close'].rolling(window=40).mean()
    df['EMA12'] = df['Close'].ewm(span=12, adjust=False).mean()
    df['EMA26'] = df['Close'].ewm(span=26, adjust=False).mean()
    df['MACD'] = df['EMA12'] - df['EMA26']
    df['Signal_Line'] = df['MACD'].ewm(span=9, adjust=False).mean()

    # Calculate RSI example
    delta = df['Close'].diff()
    gain = (delta.where(delta > 0, 0)).rolling(14).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(14).mean()
    rs = gain / loss
    df['RSI'] = 100 - (100 / (1 + rs))

    df.fillna(method='ffill', inplace=True)
    df.fillna(method='bfill', inplace=True)

    print("Indicators calculated.")
    df = compute_indicator_signals(df)  # add signals columns
    return df

def fitness(individual):
    global training_data
    try:
        sharpe, drawdown = evaluate_strategy(individual, training_data)
        # Ensure valid results before returning
        if isinstance(sharpe, (int, float)) and isinstance(drawdown, (int, float)):
            return sharpe, drawdown
        else:
            raise ValueError("Invalid Sharpe or Drawdown values.")
    except Exception as e:
        print(f"Error in fitness calculation: {e}")
        return -np.inf, -np.inf

def run_nsga2(data, generations=50, pop_size=100):
    global training_data
    training_data = data

    print("Initializing NSGA-II...")
    toolbox.register("evaluate", fitness)
    population = toolbox.population(n=pop_size)

    print(f"Starting optimization for {generations} generations with population size {pop_size}...")
    algorithms.eaMuPlusLambda(
        population, toolbox, mu=pop_size, lambda_=pop_size, cxpb=0.9, mutpb=0.1, ngen=generations, verbose=True
    )
    print("Optimization complete.")

    pareto_front = tools.sortNondominated(population, len(population), first_front_only=True)[0]
    print(f"Pareto front contains {len(pareto_front)} solutions.")
    return pareto_front

# Main execution
if __name__ == "__main__":
    ticker = "^GSPC"
    start_date = "2000-01-01"
    end_date = "2020-12-31"

    print("Starting main execution...")
    data = download_data(ticker, start_date, end_date)
    data = calculate_indicators(data)

    window_size = 3 * 252
    results = []

    for start in range(0, len(data) - window_size, 252):
        print(f"Processing rolling window starting at index {start}...")
        train_data = data.iloc[start:start + 2 * 252]
        test_data = data.iloc[start + 2 * 252:start + window_size]

        print("Running NSGA-II for training data...")
        pareto_front = run_nsga2(train_data)

        print("Evaluating Pareto front on test data...")
        best_individual = max(pareto_front, key=lambda ind: ind.fitness.values[0])
        sharpe, drawdown = evaluate_strategy(best_individual, test_data)
        results.append((sharpe, drawdown))

        print(f"Test Period {start} - {start + window_size}: Sharpe = {sharpe}, Drawdown = {drawdown}")

    sharpe_ratios, drawdowns = zip(*results)
    print(f"Average Sharpe Ratio: {np.mean(sharpe_ratios)}")
    print(f"Average Max Drawdown: {np.mean(drawdowns)}")


[*********************100%***********************]  1 of 1 completed

Starting main execution...
Downloading data for ^GSPC from 2000-01-01 to 2020-12-31...
Downloaded 5283 rows of data.
Calculating indicators...
Indicators calculated.
Processing rolling window starting at index 0...
Running NSGA-II for training data...
Initializing NSGA-II...
Starting optimization for 50 generations with population size 100...



  df.fillna(method='ffill', inplace=True)
  df.fillna(method='bfill', inplace=True)


gen	nevals
0  	100   
1  	100   
2  	100   
3  	100   
4  	100   
5  	100   
6  	100   
7  	100   
8  	100   
9  	100   
10 	100   
11 	100   
12 	100   
13 	100   
14 	100   
15 	100   
16 	100   
17 	100   
18 	100   
19 	100   
20 	100   
21 	100   
22 	100   
23 	100   
24 	100   
25 	100   
26 	100   
27 	100   
28 	100   
29 	100   
30 	100   
31 	100   
32 	100   
33 	100   
34 	100   
35 	100   
36 	100   
37 	100   
38 	100   
39 	100   
40 	100   
41 	100   
42 	100   
43 	100   
44 	100   
45 	100   
46 	100   
47 	100   
48 	100   
49 	100   
50 	100   
Optimization complete.
Pareto front contains 100 solutions.
Evaluating Pareto front on test data...
Test Period 0 - 756: Sharpe = 0.26379708709872846, Drawdown = -0.18367689838925905
Processing rolling window starting at index 252...
Running NSGA-II for training data...
Initializing NSGA-II...
Starting optimization for 50 generations with population size 100...
gen	nevals
0  	100   
1  	100   
2  	100   
3  	100   
4  	100  

KeyboardInterrupt: 