In [None]:
import random
import numpy as np
import pandas as pd
from sklearn.cluster import KMeans
from deap import base, creator, tools, algorithms
from optimize_portfolio import optimize_portfolio


In [None]:
"""
Data Setup
"""

# Load the dataset
asset_index = pd.read_excel("data/asset_index.xlsx")
trade_dt = pd.read_excel("data/ashare_trading_calender.xlsx")
asset_index = asset_index[asset_index['TRADE_DT'].isin(trade_dt['TRADE_DT'])]

In [None]:
"""
Clustering (Optional)
"""


In [None]:
"""
GA Initialization
"""

# Objective Setup
creator.create("FitnessMulti", base.Fitness, weights=(1.0, 1.0, -1.0)) # MAX: return, sharpe; MIN: volatility
creator.create("Individual", list, fitness=creator.FitnessMulti)

# GA Initialization
toolbox = base.Toolbox()
toolbox.register("attr_bool", random.randint, 0, 1)
toolbox.register("individual", tools.initRepeat, creator.Individual, toolbox.attr_bool, n=len(asset_index))
toolbox.register("population", tools.initRepeat, list, toolbox.individual)

In [None]:
"""
Fitness Function with Regularization (Optional)
"""

# Fitness Function
def evaluate(individual):
    # Select assets based on the individual's binary vector
    selected_assets = [asset for asset, include in zip(asset_index, individual) if include]

    # Model output (new weights)
    new_weights, evaluation = optimize_portfolio('MVO', selected_assets, asset_index)

    # Expected Return, Sharpe Ratio, Volatility
    expected_return, sharpe_ratio, volatility = evaluation

    return expected_return, sharpe_ratio, -volatility

# optimize_portfolio(model_type, data, constraints)

In [None]:
"""
Genetic Algorithm Setup
"""

# NSGA-II
toolbox.register("evaluate", evaluate)
toolbox.register("mate", tools.cxTwoPoint)
toolbox.register("mutate", tools.mutFlipBit, indpb=0.05)
toolbox.register("select", tools.selNSGA2)

In [None]:
""""
Run Genetic Algorithm
"""

# Running the GA
def run_ga(pop_size, num_generations):
    pop = toolbox.population(n=pop_size)
    hof = tools.ParetoFront()
    stats = tools.Statistics(lambda ind: ind.fitness.values)
    stats.register("avg", np.mean, axis=0)
    stats.register("std", np.std, axis=0)
    stats.register("min", np.min, axis=0)
    stats.register("max", np.max, axis=0)

    algorithms.eaSimple(pop, toolbox, cxpb=0.7, mutpb=0.2, ngen=num_generations, stats=stats, halloffame=hof, verbose=True)

    return pop, hof, stats

# Example run
population, pareto_front, stats = run_ga(50, 100)  # 50 individuals, 100 generations

In [None]:
# Extract Best Portfolio
