# Portfolio Optimization Experiments

In [74]:
# Install if needed
# !pip install pyyaml

In [94]:
%reload_ext autoreload
%autoreload 2

In [76]:
from config_manager import Config
import pandas as pd
import numpy as np

In [77]:
# Load data
data = pd.read_csv('data.csv', index_col=0, parse_dates=True)
print(f"Data shape: {data.shape}")
print(f"Date range: {data.index[0]} to {data.index[-1]}")

Data shape: (4527, 42)
Date range: 2007-12-20 00:00:00 to 2025-12-18 00:00:00


## Configuration

Set `RUN_MODE` to switch between quick test and full overnight run.

In [80]:
RUN_MODE = 'overnight'  # 'quick' or 'overnight'

In [92]:
if RUN_MODE == 'quick':
    # Quick test - few minutes to check everything works
    cfg = Config(
        validation={
            'strategy': 'holdout',
            'n_seeds': 1,
            'random_seeds': [42]
        },
        ddpg={
            'total_timesteps': 5000,
            'warmup_steps': 500,
            'buffer_size': 10000
        },
        div_ddpg={
            'total_timesteps': 5000,
            'warmup_steps': 500,
            'buffer_size': 10000
        },
        pga_map_elites={
            'n_iterations': 20,
            'archive': {'n_niches': 50},
            'initial_population': 10,
            'batch_size_eval': 8
        },
        logging={'log_freq': 100, 'eval_freq': 500}
    )
    
else:
    cfg = Config(
        validation={
            'strategy': 'expanding_window',
            'n_seeds': 3,
            'random_seeds': [42, 123, 456]
        },
        ddpg={
            'num_epochs': 500,
            'patience': 20,
            'actor_lr': 0.0001,
            'critic_lr': 0.001,
            'batch_size': 64,
            'tau': 0.005,
            'gamma': 0.99,
            'hidden_sizes' :[64, 64],
            'verbose' : 1
        },
        div_ddpg={
            'num_epochs': 500,
            'patience': 20,
            'actor_lr': 0.0001,
            'critic_lr': 0.001,
            'alpha': 5.0,  # stronger diversity pressure
            'scaling_method': 'fixed'
        },
        pga_map_elites={
            'max_evals': 20000,
            'n_niches': 512,
            'random_init': 500,
            'eval_batch_size': 100
        }
    )

print(f"Run mode: {RUN_MODE}")
#print(f"DDPG timesteps: {cfg.ddpg.total_timesteps}")
print(f"PGA iterations: {cfg.pga_map_elites.n_iterations}")
print(f"Seeds: {cfg.validation.random_seeds}")

Run mode: overnight
PGA iterations: 1000
Seeds: [42, 123, 456]


## Data Split

In [82]:
def split_data(data, train_ratio=0.7, val_ratio=0.15):
    """Simple holdout split."""
    n = len(data)
    train_end = int(n * train_ratio)
    val_end = int(n * (train_ratio + val_ratio))
    
    train = data.iloc[:train_end]
    val = data.iloc[train_end:val_end]
    test = data.iloc[val_end:]
    
    return train, val, test

train_data, val_data, test_data = split_data(data)
print(f"Train: {len(train_data)} days ({train_data.index[0].date()} to {train_data.index[-1].date()})")
print(f"Val: {len(val_data)} days ({val_data.index[0].date()} to {val_data.index[-1].date()})")
print(f"Test: {len(test_data)} days ({test_data.index[0].date()} to {test_data.index[-1].date()})")

Train: 3168 days (2007-12-20 to 2020-07-22)
Val: 679 days (2020-07-23 to 2023-04-03)
Test: 680 days (2023-04-04 to 2025-12-18)


## Run Experiments

In [83]:
results = {}
seed = cfg.validation.random_seeds[0]

### 1. Equal Weights Baseline

In [84]:
# Equal weights - no training needed
n_assets = train_data.shape[1]
ew_weights = np.ones(n_assets) / n_assets

# Evaluate on validation set
ew_returns = (val_data * ew_weights).sum(axis=1)
results['equal_weights'] = {
    'cumulative_return': (1 + ew_returns).prod() - 1,
    'sharpe': ew_returns.mean() / ew_returns.std() * np.sqrt(252),
    'weights': ew_weights
}
print(f"Equal Weights - Sharpe: {results['equal_weights']['sharpe']:.3f}")

Equal Weights - Sharpe: 0.528


### 2. DDPG

In [95]:
from models.ddpg import DDPG
from models.networks import NeuralNetwork

ddpg_agent = DDPG(
    lookback_window=cfg.data.lookback_window,
    predictor=NeuralNetwork,
    batch_size=cfg.ddpg.batch_size,
    short_selling=False,
    verbose=1,
    seed=seed,
    hidden_sizes=cfg.ddpg.actor_hidden_dims
)

ddpg_agent.train(
    train_data=train_data,
    val_data=val_data,
    actor_lr=cfg.ddpg.actor_lr,
    critic_lr=cfg.ddpg.critic_lr,
    tau=cfg.ddpg.tau,
    gamma=cfg.ddpg.gamma,
    soft_update=True,
    num_epochs=cfg.ddpg.num_epochs,
    early_stopping=False,
    patience=cfg.ddpg.patience
)

# Evaluate
spo_results, dpo_results = ddpg_agent.evaluate(test_data, dpo=True)

results['ddpg'] = {
    'spo_profit': spo_results[0],
    'spo_sharpe': spo_results[1],
    'dpo_profit': dpo_results[0],
    'dpo_sharpe': dpo_results[1]
}
print(f"DDPG - SPO Sharpe: {results['ddpg']['spo_sharpe']:.3f}, DPO Sharpe: {results['ddpg']['dpo_sharpe']:.3f}")

RuntimeError: The size of tensor a (1280) must match the size of tensor b (64) at non-singleton dimension 0

### 3. Div-DDPG

In [86]:
from models.div_ddpg import DivDDPG
from models.networks import NeuralNetwork

div_ddpg_agent = DivDDPG(
    lookback_window=cfg.data.lookback_window,
    predictor=NeuralNetwork,
    batch_size=cfg.div_ddpg.batch_size,
    short_selling=False,
    verbose=1,
    seed=seed,
    hidden_sizes=cfg.div_ddpg.actor_hidden_dims
)

div_ddpg_agent.train(
    train_data=train_data,
    val_data=val_data,
    actor_lr=cfg.div_ddpg.actor_lr,
    critic_lr=cfg.div_ddpg.critic_lr,
    tau=cfg.div_ddpg.tau,
    gamma=cfg.div_ddpg.gamma,
    soft_update=cfg.div_ddpg.soft_update,
    num_epochs=cfg.div_ddpg.num_epochs,
    early_stopping=cfg.div_ddpg.early_stopping,
    patience=cfg.div_ddpg.patience,
    # Diversity params
    alpha=cfg.div_ddpg.diversity.alpha,
    alpha_final=cfg.div_ddpg.diversity.alpha_final,
    scaling_method=cfg.div_ddpg.diversity.scaling_method,
    n_prior_samples=cfg.div_ddpg.diversity.n_prior_samples
)

spo_results, dpo_results = div_ddpg_agent.evaluate(test_data, dpo=True)

results['div_ddpg'] = {
    'spo_profit': spo_results[0],
    'spo_sharpe': spo_results[1],
    'dpo_profit': dpo_results[0],
    'dpo_sharpe': dpo_results[1]
}
print(f"Div-DDPG - SPO Sharpe: {results['div_ddpg']['spo_sharpe']:.3f}, DPO Sharpe: {results['div_ddpg']['dpo_sharpe']:.3f}")

Epoch 1/500, Actor: 0.067764, Critic: 0.037132, Diversity: -0.000000, Alpha: 1.0000, Val: 0.022819
Epoch 2/500, Actor: 0.032715, Critic: 0.012844, Diversity: -0.000004, Alpha: 1.0000, Val: 0.045447
Epoch 3/500, Actor: 0.053467, Critic: 0.005136, Diversity: -0.000034, Alpha: 1.0000, Val: 0.031672
Epoch 4/500, Actor: 0.059809, Critic: 0.006661, Diversity: -0.000183, Alpha: 1.0000, Val: 0.036120
Epoch 5/500, Actor: -0.007309, Critic: 0.004476, Diversity: -0.001485, Alpha: 0.9998, Val: 0.056933
Epoch 6/500, Actor: 0.042062, Critic: 0.005319, Diversity: -0.005309, Alpha: 0.9985, Val: 0.039361
Epoch 7/500, Actor: 0.060838, Critic: 0.008194, Diversity: -0.006219, Alpha: 0.9947, Val: 0.036847
Epoch 8/500, Actor: 0.025450, Critic: 0.006844, Diversity: -0.002705, Alpha: 0.9938, Val: 0.074271
Epoch 9/500, Actor: 0.033072, Critic: 0.003919, Diversity: -0.007063, Alpha: 0.9973, Val: 0.074149
Epoch 10/500, Actor: 0.022796, Critic: 0.004080, Diversity: -0.014258, Alpha: 0.9930, Val: 0.032578
Epoch 11

### 4. PGA-MAP-Elites

In [87]:
#!pip install scikit-learn

In [88]:
from models.pga_map_elites.try_pga_out import PortfolioEnv
from models.pga_map_elites.pga_map_elites import main, config

# Create environment with your data split
env = PortfolioEnv(
    data_path="data.csv",  # or pass the dataframe directly
    lookback=cfg.data.lookback_window,
    episode_len=50
)

# Update config
config["state_dim"] = env.state_dim
config["action_dim"] = env.action_dim
config["eval_batch_size"] = 20
config["max_evals"] = 1000
config["random_init"] = 200
config["eval_batch_size"] = 50


# Run
archive = main(env, config)

PGA-MAP-Elites Initialization
Computing CVT with 256 niches, 2D behavior space...
  CVT computed in 0.0s
  Replay buffer initialized (max size: 1,000,000)
  Critic initialized (TD3-style twin critics)

Config Summary:
  Max evaluations: 1,000
  Random init: 200 evals
  Batch size: 50
  Variation split: 50% GA, 50% PG
  Critic training: 300 steps/iter
  PG variation: 10 steps/offspring

Iteration 1 | Evals: 0/1000
Phase: Random Initialization (0/200)

  [Evaluation] Evaluating 50 policies...
    Evaluated 25/50...
    Evaluated 50/50...
    Time: 0.1s
    Batch fitness: mean=0.0134, max=0.1352, min=-0.1779

  [Summary]
    Iteration time: 0.2s (total: 0.2s)
  Archive Metrics:
    Coverage: 5/256 (2.0%)
    Fitness: max=0.1352, mean=0.0401, median=0.0699, std=0.1150
    QD-Score: 0.2004
    BD Distribution:
      volatility: mean=0.133, std=0.113, range=[0.059, 0.358]
      diversification: mean=0.867, std=0.025, range=[0.822, 0.895]
  Replay Buffer: 2,500/1,000,000 (0.2%)
  Random polic

KeyboardInterrupt: 

In [None]:
#!pip install torch



In [None]:
import torch
# Get best policy from archive
best_key = max(archive.keys(), key=lambda k: archive[k].fitness)
best_policy = archive[best_key].x
best_fitness = archive[best_key].fitness
best_bd = archive[best_key].desc

print(f"Best policy: fitness={best_fitness:.4f}, BD={best_bd}")

# Evaluate on test data
test_env = PortfolioEnv(data_path="data.csv", lookback=cfg.data.lookback_window, episode_len=len(test_data)-cfg.data.lookback_window-1)

# Run evaluation
state = test_env.reset()
test_returns = []
test_weights = []

for _ in range(len(test_data) - cfg.data.lookback_window - 1):
    with torch.no_grad():
        action = best_policy(torch.FloatTensor(state).unsqueeze(0)).cpu().numpy().flatten()
    weights = np.abs(action) / (np.sum(np.abs(action)) + 1e-8)
    test_weights.append(weights)
    
    next_state, reward, done, info = test_env.step(action)
    test_returns.append(info['portfolio_return'])
    state = next_state
    if done:
        break

test_returns = np.array(test_returns)
pga_sharpe = np.mean(test_returns) / (np.std(test_returns) + 1e-8) * np.sqrt(252)
pga_cum_return = (1 + test_returns).prod() - 1

results['pga_map_elites'] = {
    'sharpe': pga_sharpe,
    'cumulative_return': pga_cum_return,
    'coverage': len(archive) / config['n_niches'],
    'qd_score': sum(ind.fitness for ind in archive.values())
}

print(f"PGA-MAP-Elites - Sharpe: {pga_sharpe:.3f}, Return: {pga_cum_return*100:.1f}%")

Best policy: fitness=0.2651, BD=[0.20122131 0.        ]
PGA-MAP-Elites - Sharpe: 0.672, Return: 25.7%


## Results Summary

In [None]:
# Compare results
for method, res in results.items():
    if 'sharpe' in res:
        print(f"{method}: Sharpe={res['sharpe']:.3f}, Return={res['cumulative_return']*100:.1f}%")

equal_weights: Sharpe=0.528, Return=17.2%


In [None]:
print("\n" + "="*50)
print("RESULTS SUMMARY")
print("="*50)
for method, res in results.items():
    if 'sharpe' in res:
        print(f"{method:20s}: Sharpe={res['sharpe']:.3f}")
    elif 'spo_sharpe' in res:
        print(f"{method:20s}: SPO Sharpe={res['spo_sharpe']:.3f}, DPO Sharpe={res['dpo_sharpe']:.3f}")


RESULTS SUMMARY
equal_weights       : Sharpe=0.528
ddpg                : SPO Sharpe=0.066, DPO Sharpe=0.068
div_ddpg            : SPO Sharpe=0.080, DPO Sharpe=0.080
pga_map_elites      : Sharpe=0.672


In [None]:
# Save config for reproducibility
cfg.save(f'experiments/{RUN_MODE}_config.yaml')