In [14]:
import yfinance as yf
import pandas as pd
import numpy as np
import random
from deap import base, creator, tools, algorithms
import matplotlib.pyplot as plt

# Define the fitness function for NSGA-II
if "FitnessMulti" not in creator.__dict__:
    creator.create("FitnessMulti", base.Fitness, weights=(1.0, -1.0))
if "Individual" not in creator.__dict__:
    creator.create("Individual", list, fitness=creator.FitnessMulti)

# Toolbox setup
toolbox = base.Toolbox()
toolbox.register("attr_bool", random.randint, 0, 1)
toolbox.register("individual", tools.initRepeat, creator.Individual, toolbox.attr_bool, n=52)
toolbox.register("population", tools.initRepeat, list, toolbox.individual)
toolbox.register("mate", tools.cxTwoPoint)
toolbox.register("mutate", tools.mutFlipBit, indpb=0.05)
toolbox.register("select", tools.selNSGA2)

def download_data(ticker, start_date, end_date):
    print(f"Downloading data for {ticker} from {start_date} to {end_date}...")
    data = yf.download(ticker, start=start_date, end=end_date)
    data.reset_index(inplace=True)
    print(f"Downloaded {len(data)} rows of data.")
    return data

def decode_chromosome(individual):
    """
    Decode a 52-bit chromosome into the Buy and Sell rule structures.

    Layout (indices in the chromosome):
       Buy_Active        = bits 0..8   (9 bits)
       Buy_TrueFalse     = bits 9..17  (9 bits)
       Buy_Operators     = bits 18..25 (8 bits)
       Sell_Active       = bits 26..34 (9 bits)
       Sell_TrueFalse    = bits 35..43 (9 bits)
       Sell_Operators    = bits 44..51 (8 bits)
    """
    # Buy side
    buy_active_bits     = individual[0:9]
    buy_truefalse_bits  = individual[9:18]
    buy_operator_bits   = individual[18:26]

    # Sell side
    sell_active_bits    = individual[26:35]
    sell_truefalse_bits = individual[35:44]
    sell_operator_bits  = individual[44:52]

    # Create dictionary structure
    return {
        "buy_active":     buy_active_bits,
        "buy_truefalse":  buy_truefalse_bits,
        "buy_ops":        buy_operator_bits,

        "sell_active":    sell_active_bits,
        "sell_truefalse": sell_truefalse_bits,
        "sell_ops":       sell_operator_bits,
    }


def compute_indicator_signals(df):
    """
    Adds indicators as signals in the DataFrame.
    """
    df['SMA_cross'] = (df['SMA9'] > df['SMA40']).astype(int)
    df['MACD_signal'] = (df['MACD'] > df['Signal_Line']).astype(int)
    df['Momentum_signal'] = (df['Close'] > df['Close'].shift(1)).astype(int)
    df['RSI_signal'] = (df['RSI'] < 30).astype(int)  # Oversold condition
    
    # Add signals to ensure 4 indicator columns align with the chromosome
    return df

def apply_boolean_rule(df, indicator_cols, active_bits, truefalse_bits, operator_bits):
    """
    Given 9 'active_bits' and 9 'truefalse_bits' plus 8 'operator_bits',
    build a final boolean series indicating whether the rule is satisfied.
    
    - indicator_cols: A list of 9 DataFrame columns, e.g.:
      ["SMA_signal", "MACD_signal", "MO_signal", ... "BB_signal"]
    - active_bits[i] = 1 => we use that indicator
    - truefalse_bits[i] = 1 => we want the indicator == 1, else == 0
    - operator_bits[i] = 0 => AND, 1 => OR, connecting indicator i and i+1
    """

    # Step 1) Create a list of partial conditions, one per indicator
    partial_conditions = []
    for i in range(9):
        if active_bits[i] == 1:
            # That means we care about this indicator
            desired_val = truefalse_bits[i]  # 0 or 1
            col = indicator_cols[i]
            cond = (df[col] == desired_val)
            partial_conditions.append(cond)
        else:
            # If it's not active, we skip it entirely
            # or treat it as "True" so it doesn't break the chain
            # The simpler approach is to skip it:
            pass

    # If no indicators are active, return all False (or all True, depending on your convention)
    if len(partial_conditions) == 0:
        return pd.Series(False, index=df.index)

    # Step 2) Combine them with the operator bits
    # We'll combine them in a chain from left to right:
    final_cond = partial_conditions[0]
    # operator_bits has length 8, connecting consecutive indicators in partial_conditions
    # BUT note we may have fewer than 9 partial_conditions if some were inactive.
    # So we actually combine up to partial_conditions[-1].
    # We'll do a simpler approach: pairwise combine them in order:
    idx_op = 0
    for i in range(1, len(partial_conditions)):
        op = operator_bits[idx_op] if idx_op < len(operator_bits) else 0
        if op == 0:
            # AND
            final_cond = final_cond & partial_conditions[i]
        else:
            # OR
            final_cond = final_cond | partial_conditions[i]
        idx_op += 1

    return final_cond.astype(int)

def evaluate_strategy(individual, df):
    # 1) Decode the chromosome
    decoded = decode_chromosome(individual)

    # 2) Decide which 9 columns in df correspond to the 9 indicators
    indicator_cols = [
        "SMA_signal", 
        "MACD_signal", 
        "MO_signal",
        "PO_signal",
        "SO_signal",
        "RSI_signal",
        "CCI_signal",
        "LW_signal",
        "BB_signal"
    ]
    
    # 3) Generate buy signal
    df = df.copy()
    buy_signal = apply_boolean_rule(
        df, 
        indicator_cols,
        decoded["buy_active"],
        decoded["buy_truefalse"],
        decoded["buy_ops"]
    )
    
    # 4) Generate sell signal
    sell_signal = apply_boolean_rule(
        df,
        indicator_cols,
        decoded["sell_active"],
        decoded["sell_truefalse"],
        decoded["sell_ops"]
    )

    # 5) Combine the signals into final positions, e.g.:
    #    +1 if buy=1 and sell=0, -1 if sell=1 and buy=0, else 0
    #    (Details may vary; just ensure no lookahead bias.)
    positions = []
    current_pos = 0
    for i in range(len(df)):
        if buy_signal.iloc[i] == 1 and sell_signal.iloc[i] == 0:
            current_pos = 1
        elif buy_signal.iloc[i] == 0 and sell_signal.iloc[i] == 1:
            current_pos = -1
        # else hold prior position or go flat if both signals come up
        positions.append(current_pos)
    df["Position"] = positions

    # 6) Calculate daily returns for these positions.
    df["Daily_Return"] = df["Close"].pct_change() * df["Position"].shift(1)
    df.dropna(inplace=True)

    # 7) Compute Sharpe ratio & Max Drawdown. (Same as your existing code)
    returns = df["Daily_Return"]
    if returns.std() == 0:
        sharpe_ratio = -np.inf
    else:
        sharpe_ratio = np.sqrt(252) * returns.mean() / returns.std()

    cum_returns = (1 + returns).cumprod()
    running_max = cum_returns.cummax()
    drawdowns = (cum_returns - running_max) / running_max
    max_drawdown = drawdowns.min()

    return sharpe_ratio, max_drawdown
        
def calculate_indicators(df):
    print("Calculating indicators...")
    df = df.copy()
    
    # Simple Moving Averages (SMA)
    df['SMA9'] = df['Close'].rolling(window=9).mean()
    df['SMA40'] = df['Close'].rolling(window=40).mean()
    df['SMA_signal'] = ((df['SMA9'].shift(1) < df['SMA40'].shift(1)) & (df['SMA9'] > df['SMA40'])).astype(int)

    # Exponential Moving Averages (EMA) and MACD
    df['EMA12'] = df['Close'].ewm(span=12, adjust=False).mean()
    df['EMA26'] = df['Close'].ewm(span=26, adjust=False).mean()
    df['MACD'] = df['EMA12'] - df['EMA26']
    df['Signal_Line'] = df['MACD'].ewm(span=9, adjust=False).mean()
    df['MACD_signal'] = ((df['MACD'].shift(1) < df['Signal_Line'].shift(1)) & (df['MACD'] > df['Signal_Line'])).astype(int)

    # Momentum Oscillator
    df['MO_signal'] = ((df['Close'].diff(10).shift(1) < 0) & (df['Close'].diff(10) > 0)).astype(int)

    # Price Oscillator
    df['PO'] = (df['EMA12'] - df['EMA26']) / df['EMA26']
    df['PO_signal'] = ((df['PO'].shift(1) < 0) & (df['PO'] > 0)).astype(int)

    # Stochastic Oscillator
    low_min = df['Low'].rolling(window=14).min()
    high_max = df['High'].rolling(window=14).max()
    df['K'] = 100 * ((df['Close'] - low_min) / (high_max - low_min))
    df['D'] = df['K'].rolling(window=3).mean()
    df['D_slow'] = df['D'].rolling(window=3).mean()
    df['SO_signal'] = ((df['D'].shift(1) < df['D_slow'].shift(1)) & 
                       (df['D'] > df['D_slow']) & 
                       (df['D'] < 20) & 
                       (df['D_slow'] < 20)).astype(int)

    # Relative Strength Index (RSI)
    delta = df['Close'].diff()
    gain = delta.where(delta > 0, 0).rolling(window=14).mean()
    loss = -delta.where(delta < 0, 0).rolling(window=14).mean()
    rs = gain / loss
    df['RSI'] = 100 - (100 / (1 + rs))
    df['RSI_signal'] = ((df['RSI'].shift(1) < 30) & (df['RSI'] > 30)).astype(int)

    # Commodity Channel Index (CCI)
    typical_price = (df['High'] + df['Low'] + df['Close']) / 3
    tp_ma = typical_price.rolling(window=20).mean()
    tp_std = typical_price.rolling(window=20).std()
    df['CCI'] = (typical_price - tp_ma) / (0.015 * tp_std)
    df['CCI_signal'] = ((df['CCI'].shift(1) < 100) & (df['CCI'] > 100)).astype(int)

    # Larry Williams %R
    high_max = df['High'].rolling(window=14).max()
    low_min = df['Low'].rolling(window=14).min()
    df['LW'] = -100 * (high_max - df['Close']) / (high_max - low_min)
    df['LW_signal'] = ((df['LW'].shift(1) < -80) & (df['LW'] > -80)).astype(int)

    # Bollinger Bands
    df['SMA20'] = df['Close'].rolling(window=20).mean()
    df['BBstd'] = df['Close'].rolling(window=20).std()
    df['BB_upper'] = df['SMA20'] + (2 * df['BBstd'])
    df['BB_lower'] = df['SMA20'] - (2 * df['BBstd'])
    
    # Fixed BB signal calculation with proper alignment
    close_prev = df['Close'].shift(1)
    bb_lower_prev = df['BB_lower'].shift(1)
    df['BB_signal'] = ((close_prev < bb_lower_prev) & (df['Close'] > df['BB_lower'])).astype(int)

    # Fill NaN values created during rolling computations
    df.fillna(method='ffill', inplace=True)
    df.fillna(method='bfill', inplace=True)

    print("Indicators calculated.")
    return df

def fitness(individual):
    global training_data
    try:
        sharpe, drawdown = evaluate_strategy(individual, training_data)
        # Ensure valid results before returning
        if isinstance(sharpe, (int, float)) and isinstance(drawdown, (int, float)):
            return sharpe, drawdown
        else:
            raise ValueError("Invalid Sharpe or Drawdown values.")
    except Exception as e:
        print(f"Error in fitness calculation: {e}")
        return -np.inf, -np.inf

def run_nsga2(data, generations=50, pop_size=100):
    global training_data
    training_data = data

    print("Initializing NSGA-II...")
    toolbox.register("evaluate", fitness)
    population = toolbox.population(n=pop_size)

    print(f"Starting optimization for {generations} generations with population size {pop_size}...")
    algorithms.eaMuPlusLambda(
        population, toolbox, mu=pop_size, lambda_=pop_size, cxpb=0.9, mutpb=0.1, ngen=generations, verbose=True
    )
    print("Optimization complete.")

    pareto_front = tools.sortNondominated(population, len(population), first_front_only=True)[0]
    print(f"Pareto front contains {len(pareto_front)} solutions.")
    return pareto_front

# Main execution
if __name__ == "__main__":
    ticker = "^GSPC"
    start_date = "2000-01-01"
    end_date = "2020-12-31"

    print("Starting main execution...")
    data = download_data(ticker, start_date, end_date)
    data = calculate_indicators(data)

    window_size = 3 * 252
    results = []

    for start in range(0, len(data) - window_size, 252):
        print(f"Processing rolling window starting at index {start}...")
        train_data = data.iloc[start:start + 2 * 252]
        test_data = data.iloc[start + 2 * 252:start + window_size]

        print("Running NSGA-II for training data...")
        pareto_front = run_nsga2(train_data)

        print("Evaluating Pareto front on test data...")
        best_individual = max(pareto_front, key=lambda ind: ind.fitness.values[0])
        sharpe, drawdown = evaluate_strategy(best_individual, test_data)
        results.append((sharpe, drawdown))

        print(f"Test Period {start} - {start + window_size}: Sharpe = {sharpe}, Drawdown = {drawdown}")

    sharpe_ratios, drawdowns = zip(*results)
    print(f"Average Sharpe Ratio: {np.mean(sharpe_ratios)}")
    print(f"Average Max Drawdown: {np.mean(drawdowns)}")


[*********************100%***********************]  1 of 1 completed

Starting main execution...
Downloading data for ^GSPC from 2000-01-01 to 2020-12-31...
Downloaded 5283 rows of data.
Calculating indicators...





ValueError: Operands are not aligned. Do `left, right = left.align(right, axis=1, copy=False)` before operating.