# Genetic Algorithm Portfolio Optimization

In [51]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import sys
import os
from pathlib import Path
import importlib
import experiments.problem.experiment_executor
import experiments.problem.experiment_loader
import pop.dataset.dataset_manager

ROOT_PATH = os.path.abspath(os.path.join(os.getcwd(), '..'))
if ROOT_PATH not in sys.path:
    sys.path.insert(0, ROOT_PATH)


importlib.reload(experiments.problem.experiment_executor)
importlib.reload(experiments.problem.experiment_loader)
importlib.reload(pop.dataset.dataset_manager)

from experiments.problem.experiment_executor import ExperimentExecutor
from experiments.problem.experiment_loader import ExperimentLoader
from pop.dataset.dataset_manager import DatasetManager

## Experiment Configuration

In [48]:
# Experiment number
num_exp = 1

# Paths
CONFIG_PATH = f"experiments/problem/configurations/ga/experiment_{num_exp}.csv"
RESULTS_PATH = "experiments/results/ga"

# Ensure results directory exists
Path(RESULTS_PATH).mkdir(parents=True, exist_ok=True)

# Initialize DatasetManager
dataset_manager = DatasetManager("dataset")

# Load GA experiments
ga_experiments = ExperimentLoader.load_experiments(os.path.join(ROOT_PATH, CONFIG_PATH))

# Initialize GA Experiment Executor
ga_executor = ExperimentExecutor()

In [49]:
print(ga_experiments[0])

{'num_assets': 10, 'correlation_level': 'high', 'pop_size': 100, 'max_generations': 50, 'mutation_rate': 0.1, 'repair_method': 'normalize', 'tournament_size': 3, 'num_elites': 1}


## Run one execution

In [50]:
# Run a single experiment
num_runs = 1

if ga_experiments:
    print("Running a single GA experiment...")
    executor = ExperimentExecutor()
    result = executor.run_single_experiment(algorithm_type= "ga", num_companies = 10, sharpe_ratios = 3, experiment=ga_experiments[0], seed=0)
    print(f"Experiment result: {result}")
else:
    print("No experiments found to run.")


Running a single GA experiment...
Experiment result: {'experiment_id': 'unknown', 'num_assets': 10, 'correlation_level': 'high', 'pop_size': 100, 'max_generations': 50, 'mutation_rate': 0.1, 'repair_method': 'normalize', 'tournament_size': 3, 'num_elites': 1, 'seed': 0, 'sharpe_ratio': [-inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf]}


In [None]:
# Save results to a CSV file
results_df = pd.DataFrame(ga_executor.results)

# Define the results file path dynamically
results_file = f"{RESULTS_PATH}/experiment_{num_exp}_{num_runs}_iterations.csv"

# Save the DataFrame to CSV
results_df.to_csv(results_file, index=False)

# Print confirmation message
print(f"Results saved to {results_file}")

## Visualize fitness evolution

In [None]:
# Plot fitness evolution for each experiment
for result in ga_executor.results:
    plt.plot(result['convergence_history'], label=f"Experiment {result['experiment_id']}")

# Add labels and title
plt.title(f"GA Fitness Evolution ({num_runs} Iterations)")
plt.xlabel("Generations")
plt.ylabel("Fitness")
plt.legend()

In [None]:
# Save the plot
results_file = f"{RESULTS_PATH}/experiment_{num_exp}_{num_runs}_iterations_fitness.png"
plt.savefig(results_file)
plt.clf()

print(f"Fitness evolution plot saved to {results_file}")

## Visualize diversity evolution

In [None]:
# Plot diversity evolution
for result in ga_executor.results:
    if 'diversity_history' in result:
        plt.plot(result['diversity_history'], label=f"Experiment {result['experiment_id', 'N/A']}")
        
# Add labels and title
plt.title(f"GA Diversity Evolution ({num_runs} Iterations)")
plt.xlabel("Generations")
plt.ylabel("Diversity")
plt.legend()

In [None]:
# Save the plot
results_file = f"{RESULTS_PATH}/experiment_{num_exp}_{num_runs}_iterations_diversity.png"
plt.savefig(results_file)
plt.clf()

print(f"Diversity plot saved to {results_file}")

## Repeat experiment several times

In [None]:
# Run a single experiment 10 times
num_runs = 10

if ga_experiments:
    print(f"Running GA experiments with {num_runs} iterations...")
    repeated_results = ga_executor.run_repeated_experiment(ga_experiments[0], num_runs=num_runs)

    fitness_values = [result['sharpe_ratio'] for result in repeated_results]

    print(f"Best fitness: {max(fitness_values):.4f}")
    print(f"Mean fitness: {np.mean(fitness_values):.4f} ± {np.std(fitness_values):.4f}")
else:
    print("No experiments found to run.")

In [None]:
# Save results to a CSV file
results_df = pd.DataFrame(ga_executor.results)
results_file = f"{RESULTS_PATH}/experiment_{num_exp}_{num_runs}_iterations.csv"
results_df.to_csv(results_file, index=False)

print(f"Results saved to {results_file}")

## Run all experiments

In [None]:
# Run all experiments 31 times
num_runs = 31

if ga_experiments:
    print(f"Running all GA experiments with {num_runs} iterations...")
    ga_executor.run_all_experiments(ga_experiments, num_runs=num_runs)
else:
    print("No experiments found to run.")

In [None]:
# Save results to a CSV file
results_df = pd.DataFrame(ga_executor.results)
results_file = f"{RESULTS_PATH}/ga_results_{num_runs}_iterations.csv"
results_df.to_csv(results_file, index=False)

print(f"Results saved to {results_file}")