# Portfolio Experiments

In [1]:
%reload_ext autoreload
%autoreload 2

In [2]:
import yaml, numpy as np, pandas as pd

def load_config(path='default_config.yaml', **ov):
    with open(path) as f: cfg = yaml.safe_load(f)
    cfg.update(ov)
    return cfg

In [3]:
cfg = load_config()  # or: load_config(num_epochs=50)

In [4]:
# =============================================================================
# Cell 2: Data Loading & Cross-Validation Splits
# =============================================================================
# Expanding window validation for financial time series
# Avoids look-ahead bias and tests across multiple market regimes
# Reference: de Prado (2018) "Advances in Financial Machine Learning", Ch. 7

def create_folds(data, test_years=2, init_train_years=10, val_years=1, step_years=1):
    """
    Expanding window cross-validation for time series.
    
    Returns:
        folds: list of (train_df, val_df) tuples
        test: held-out test set (final test_years of data)
    """
    days_per_year = 252  # trading days
    
    # Hold out final test set
    test_size = test_years * days_per_year
    test = data.iloc[-test_size:]
    remaining = data.iloc[:-test_size]
    
    # Create expanding folds
    folds = []
    train_end = init_train_years * days_per_year
    val_size = val_years * days_per_year
    step = step_years * days_per_year
    
    while train_end + val_size <= len(remaining):
        train = remaining.iloc[:train_end]
        val = remaining.iloc[train_end:train_end + val_size]
        folds.append((train, val))
        train_end += step
    
    return folds, test

data = pd.read_csv('data.csv', index_col=0, parse_dates=True)
folds, test = create_folds(data)
print(f"Created {len(folds)} folds, test set: {len(test)} days")

Created 5 folds, test set: 504 days


In [5]:
# =============================================================================
# Cell 3: Equal Weights Baseline
# =============================================================================
# 1/N portfolio - surprisingly hard to beat
# Reference: DeMiguel, Garlappi & Uppal (2009) "Optimal Versus Naive 
#            Diversification", Review of Financial Studies

w = np.ones(test.shape[1]) / test.shape[1]
r = (test * w).sum(axis=1)
ew_sharpe = r.mean() / r.std() * np.sqrt(252)
print(f"Equal Weights Sharpe: {ew_sharpe:.3f}")

Equal Weights Sharpe: 1.645


In [6]:
# =============================================================================
# Cell 4: DDPG / DDES-DDPG Runner
# =============================================================================
# DDPG: Lillicrap et al. (2015) "Continuous control with deep reinforcement learning"
# DDES: Hong et al. (2018) "Diversity-Driven Exploration Strategy for Deep RL"
#
# Key difference:
#   DDPG  - exploration via Gaussian noise on actions
#   DDES  - exploration via diversity term in actor loss: -Q(s,a) + α * D(a, a_prior)

from models.div_ddpg import DDPG
from models.networks import NeuralNetwork

def run_ddpg(train, val, test, cfg, use_ddes=False):
    """Train and evaluate DDPG or DDES-DDPG."""
    ddpg_cfg = cfg['ddpg']  # Extract nested config
    
    agent = DDPG(
        lookback_window=cfg['lookback_window'],
        predictor=NeuralNetwork,
        batch_size=ddpg_cfg['batch_size'],
        hidden_sizes=cfg['hidden_sizes'],
        seed=cfg['seeds'][0],
    )
    agent.train(
        train, val,
        actor_lr=cfg['actor_lr'],
        critic_lr=cfg['critic_lr'],
        gamma=cfg['gamma'],
        tau=cfg['tau'],
        soft_update=True,  # This was also missing from config
        num_epochs=ddpg_cfg['num_epochs'],
        patience=ddpg_cfg['patience'],
        noise=ddpg_cfg['noise'],
        use_ddes=use_ddes,
        ddes_alpha=ddpg_cfg['ddes_alpha'],
    )
    _, (_, sharpe) = agent.evaluate(test)
    return sharpe

In [7]:
# =============================================================================
# Cell 5: Run Experiments Across Folds
# =============================================================================
results = {'EW': ew_sharpe, 'DDPG': [], 'DDES': []}

for i, (train, val) in enumerate(folds):
    print(f"\n--- Fold {i+1}/{len(folds)} ---")
    print(f"Train: {len(train)} days, Val: {len(val)} days")
    
    results['DDPG'].append(run_ddpg(train, val, test, cfg, use_ddes=False))
    results['DDES'].append(run_ddpg(train, val, test, cfg, use_ddes=True))


--- Fold 1/5 ---
Train: 2520 days, Val: 252 days
Epoch 1/300, Actor: 0.005788, Critic: 0.036189, Val: 0.045153
Epoch 2/300, Actor: 0.049707, Critic: 0.056402, Val: 0.046739
Epoch 3/300, Actor: 0.001892, Critic: 0.046224, Val: 0.046439
Epoch 4/300, Actor: -0.001583, Critic: 0.042579, Val: 0.045639
Epoch 5/300, Actor: -0.001441, Critic: 0.038466, Val: 0.048347
Epoch 6/300, Actor: 0.015573, Critic: 0.038178, Val: 0.065709
Epoch 7/300, Actor: -0.070278, Critic: 0.037387, Val: 0.071060
Epoch 8/300, Actor: -0.055066, Critic: 0.051828, Val: 0.043571
Epoch 9/300, Actor: 0.003323, Critic: 0.047668, Val: 0.046628
Epoch 10/300, Actor: 0.010712, Critic: 0.035800, Val: 0.042717
Epoch 11/300, Actor: -0.006359, Critic: 0.041845, Val: 0.093141
Epoch 12/300, Actor: -0.030091, Critic: 0.039331, Val: 0.048419
Epoch 13/300, Actor: -0.084612, Critic: 0.037894, Val: 0.050554
Epoch 14/300, Actor: -0.052886, Critic: 0.042026, Val: 0.046830
Epoch 15/300, Actor: -0.052584, Critic: 0.040559, Val: 0.050193
Epoch

In [8]:
# =============================================================================
# Cell 6: Results Summary
# =============================================================================
print("\n" + "="*50)
print(f"{'Method':<12} {'Mean Sharpe':>12} {'Std':>10}")
print("-"*50)
print(f"{'EW':<12} {results['EW']:>12.3f} {'-':>10}")
print(f"{'DDPG':<12} {np.mean(results['DDPG']):>12.3f} {np.std(results['DDPG']):>10.3f}")
print(f"{'DDES':<12} {np.mean(results['DDES']):>12.3f} {np.std(results['DDES']):>10.3f}")
print("="*50)


Method        Mean Sharpe        Std
--------------------------------------------------
EW                  1.645          -
DDPG                1.328      0.340
DDES                1.003      0.175


In [9]:
#!pip install cloudpickle


In [10]:
# =============================================================================
# Cell 1: Imports + Config
# =============================================================================
import numpy as np
import pandas as pd
import torch
from sklearn.neighbors import KDTree

from models.pga_map_elites.official_networks import Actor, Critic
from models.pga_map_elites.official_utils import (
    ReplayBuffer, Individual, add_to_archive, cvt
)
from models.pga_map_elites.official_variational_operators import VariationalOperator
from models.pga_map_elites.portfolio_env import PortfolioEnv

cfg = {
    'seeds': [42],
    'hidden_sizes': [128, 128],
    'gamma': 0.99,
    'tau': 0.005,
    'actor_lr': 0.001,
    'n_niches': 128,
    'max_evals': 20000,
    'random_init': 1000,
    'batch_size': 100,
    'proportion_evo': 0.5,
    'iso_sigma': 0.005,
    'line_sigma': 0.05,
    'nr_of_steps_crit': 300,
    'nr_of_steps_act': 10,
    'train_batch_size': 256,
    'policy_noise': 0.2,
    'noise_clip': 0.5,
    'policy_freq': 2,
}





In [11]:
# =============================================================================
# Cell 2: eval_policy (only function not in utils)
# =============================================================================
def eval_policy(actor, env):
    """Run one episode. Source: parallel_worker() in original PGA-MAP-Elites."""
    state = env.reset()
    done = False
    states, actions, next_states, rewards, not_dones = [], [], [], [], []
    
    while not done:
        action = actor.select_action(np.array(state))
        next_state, reward, done, info = env.step(action)
        done_bool = float(done) if env._step < env._max_episode_steps else 0
        
        states.append(state.copy())
        actions.append(action.copy())
        next_states.append(next_state.copy())
        rewards.append(reward)
        not_dones.append(1.0 - done_bool)
        state = next_state
    
    transitions = (
        np.array(states), np.array(actions), np.array(next_states),
        np.array(rewards).reshape(-1,1), np.array(not_dones).reshape(-1,1)
    )
    return env.tot_reward, env.desc.copy(), transitions

In [12]:
# =============================================================================
# Cell 3: Setup
# =============================================================================
data = pd.read_csv('data.csv', index_col=0)
env = PortfolioEnv(data.iloc[:-504], lookback=20, episode_len=50)

torch.manual_seed(cfg['seeds'][0])
np.random.seed(cfg['seeds'][0])

critic = Critic(env.state_dim, env.action_dim, max_action=1.0,
                discount=cfg['gamma'], tau=cfg['tau'],
                policy_noise=cfg['policy_noise'], noise_clip=cfg['noise_clip'])
replay_buffer = ReplayBuffer(env.state_dim, env.action_dim)
kdt = KDTree(cvt(cfg['n_niches'], dim=2, samples=25000))
archive = {}

# Variation operator (uses your existing iso_dd and pg_variation)
var_op = VariationalOperator(
    actor_fn=lambda: Actor(env.state_dim, env.action_dim, 1.0, cfg['hidden_sizes']),
    num_cpu=4,
    iso_sigma=cfg['iso_sigma'],
    line_sigma=cfg['line_sigma'],
    learning_rate=cfg['actor_lr'],
)


Mutation operator: False
Crossover operator: <bound method VariationalOperator.iso_dd of <models.pga_map_elites.official_variational_operators.VariationalOperator object at 0x169f7f230>>


In [13]:

# =============================================================================
# Cell 4: Main loop
# =============================================================================
n_evals = 0

while n_evals < cfg['max_evals']:
    
    if n_evals < cfg['random_init']:
        offspring = [Actor(env.state_dim, env.action_dim, 1.0, cfg['hidden_sizes']) 
                     for _ in range(cfg['batch_size'])]
    else:
        if replay_buffer.size > cfg['train_batch_size']:
            critic.train(archive, replay_buffer, cfg['nr_of_steps_crit'], cfg['train_batch_size'])
            states = replay_buffer.sample_state(cfg['train_batch_size'], cfg['nr_of_steps_act'])
        else:
            states = None
        
        offspring = var_op(archive, cfg['batch_size'], cfg['proportion_evo'],
                          critic=critic, states=states, nr_of_steps_act=cfg['nr_of_steps_act'])
    
    for actor in offspring:
        fitness, desc, transitions = eval_policy(actor, env)
        replay_buffer.add(transitions)
        add_to_archive(Individual(actor, desc, fitness), desc, archive, kdt)
    
    n_evals += len(offspring)
    
    if n_evals % 500 == 0:
        best = max((x.fitness for x in archive.values()), default=0)
        print(f"[{n_evals}] Archive: {len(archive)} | Best: {best:.4f}")


[500] Archive: 10 | Best: 0.2264
[1000] Archive: 10 | Best: 0.2443
[1500] Archive: 11 | Best: 0.2443
[2000] Archive: 11 | Best: 0.2598
[2500] Archive: 12 | Best: 0.2651
[3000] Archive: 12 | Best: 0.2651
[3500] Archive: 12 | Best: 0.2651
[4000] Archive: 12 | Best: 0.2651
[4500] Archive: 12 | Best: 0.2651
[5000] Archive: 12 | Best: 0.2651
[5500] Archive: 12 | Best: 0.2651
[6000] Archive: 12 | Best: 0.2651
[6500] Archive: 12 | Best: 0.2651
[7000] Archive: 12 | Best: 0.2651
[7500] Archive: 12 | Best: 0.2651
[8000] Archive: 12 | Best: 0.2671
[8500] Archive: 12 | Best: 0.2671
[9000] Archive: 12 | Best: 0.2671
[9500] Archive: 12 | Best: 0.2671
[10000] Archive: 12 | Best: 0.2671
[10500] Archive: 12 | Best: 0.2671
[11000] Archive: 12 | Best: 0.2671
[11500] Archive: 12 | Best: 0.2671
[12000] Archive: 12 | Best: 0.2671
[12500] Archive: 12 | Best: 0.2671
[13000] Archive: 12 | Best: 0.2671
[13500] Archive: 12 | Best: 0.2671
[14000] Archive: 12 | Best: 0.2671
[14500] Archive: 12 | Best: 0.2671
[1500

In [14]:
for k, v in archive.items():
    print(f"Niche: {k}, Desc: {v.desc}")

Niche: (0.29679727167350234, 0.9455219444258902), Desc: [0.28535311 0.99992669]
Niche: (0.22130476551078593, 0.9602005137577128), Desc: [0.20213107 0.99973565]
Niche: (0.4615543901621344, 0.9653754951248095), Desc: [0.50497935 0.99993211]
Niche: (0.38239283555189124, 0.9490894019967555), Desc: [0.34375136 0.99993032]
Niche: (0.124652237668541, 0.9607944990415933), Desc: [0.16737247 0.99966186]
Niche: (0.6059602532881218, 0.9521211446663399), Desc: [0.610335   0.99974883]
Niche: (0.9556708238445168, 0.9618753838900318), Desc: [1.         0.99968344]
Niche: (0.8707613757428194, 0.9632791279727301), Desc: [0.83423697 0.99986643]
Niche: (0.6973072120769641, 0.96263217162625), Desc: [0.70408646 0.99965018]
Niche: (0.5271468842241742, 0.9395767853518899), Desc: [0.53643557 0.99978822]
Niche: (0.04006899421329474, 0.949899604597551), Desc: [0.07636287 0.99975598]
Niche: (0.793282349960764, 0.9515608105759343), Desc: [0.7506817  0.99971271]


In [15]:

# =============================================================================
# Cell 5: Results
# =============================================================================
best_key = max(archive.keys(), key=lambda k: archive[k].fitness)
print(f"Best fitness: {archive[best_key].fitness:.4f}")
print(f"Coverage: {100*len(archive)/cfg['n_niches']:.1f}%")

Best fitness: 0.2723
Coverage: 9.4%


In [16]:
# =============================================================================
# Cell 6: Evaluate best PGA policy on test set (same as DDPG evaluation)
# =============================================================================
from utilities.metrics import calculate_test_performance

# Get test data (last 504 days, same as DDPG experiments)
test_data = data.iloc[-504:]

# Get best actor from archive
best_actor = archive[best_key].x
best_actor.eval()

# Get final weights from best policy using last training window as input
train_data = data.iloc[:-504]
lookback = 20
input_state = train_data.iloc[-lookback:].values.flatten().astype(np.float32)

with torch.no_grad():
    state_tensor = torch.FloatTensor(input_state).unsqueeze(0)
    raw_action = best_actor(state_tensor).numpy().flatten()
    
# Softmax normalize (same as env.step does)
raw_action = raw_action - np.max(raw_action)
weights = np.exp(raw_action) / np.sum(np.exp(raw_action))

# Calculate Sharpe on test set
profit, sharpe = calculate_test_performance(test_data, weights)

print(f"\nPGA-MAP-Elites Test Results:")
print(f"Profit p.a.: {profit*100:.4f}%")
print(f"Sharpe Ratio: {sharpe:.4f}")

# Add to results for comparison
results['PGA'] = sharpe


PGA-MAP-Elites Test Results:
Profit p.a.: 18.7936%
Sharpe Ratio: 1.6148


In [17]:
# =============================================================================
# Cell 7: Updated Results Summary
# =============================================================================
print("\n" + "="*50)
print(f"{'Method':<12} {'Mean Sharpe':>12} {'Std':>10}")
print("-"*50)
print(f"{'EW':<12} {results['EW']:>12.3f} {'-':>10}")
print(f"{'DDPG':<12} {np.mean(results['DDPG']):>12.3f} {np.std(results['DDPG']):>10.3f}")
print(f"{'DDES':<12} {np.mean(results['DDES']):>12.3f} {np.std(results['DDES']):>10.3f}")
print(f"{'PGA':<12} {results['PGA']:>12.3f} {'-':>10}")
print("="*50)


Method        Mean Sharpe        Std
--------------------------------------------------
EW                  1.645          -
DDPG                1.328      0.340
DDES                1.003      0.175
PGA                 1.615          -


In [18]:
for k, v in archive.items():
    print(f"Niche: {k}, Desc: {v.desc}, Fitness: {v.fitness:.4f}")

Niche: (0.29679727167350234, 0.9455219444258902), Desc: [0.28535311 0.99992669], Fitness: 0.1771
Niche: (0.22130476551078593, 0.9602005137577128), Desc: [0.20213107 0.99973565], Fitness: 0.1406
Niche: (0.4615543901621344, 0.9653754951248095), Desc: [0.50497935 0.99993211], Fitness: 0.1921
Niche: (0.38239283555189124, 0.9490894019967555), Desc: [0.34375136 0.99993032], Fitness: 0.1397
Niche: (0.124652237668541, 0.9607944990415933), Desc: [0.16737247 0.99966186], Fitness: 0.0917
Niche: (0.6059602532881218, 0.9521211446663399), Desc: [0.610335   0.99974883], Fitness: 0.2723
Niche: (0.9556708238445168, 0.9618753838900318), Desc: [1.         0.99968344], Fitness: 0.1964
Niche: (0.8707613757428194, 0.9632791279727301), Desc: [0.83423697 0.99986643], Fitness: 0.1463
Niche: (0.6973072120769641, 0.96263217162625), Desc: [0.70408646 0.99965018], Fitness: 0.2040
Niche: (0.5271468842241742, 0.9395767853518899), Desc: [0.53643557 0.99978822], Fitness: 0.2207
Niche: (0.04006899421329474, 0.949899604

In [19]:
print("\nPGA Portfolio Weights:")
for i, ticker in enumerate(test_data.columns):
    if weights[i] > 0.01:  # only show >1%
        print(f"{ticker:<6} {weights[i]*100:>6.2f}%")


PGA Portfolio Weights:
XLK      2.80%
XLF      2.30%
XLV      2.43%
XLE      2.16%
XLI      2.70%
XLP      2.23%
XLY      2.61%
XLU      2.10%
XLB      2.95%
SPY      2.56%
IWM      2.25%
MDY      2.58%
VTV      2.68%
VUG      2.19%
EFA      2.64%
EEM      2.44%
VGK      2.42%
EWJ      2.60%
FXI      2.43%
EWY      2.47%
EWZ      2.52%
EWG      2.13%
EWU      2.47%
AGG      2.07%
TLT      2.53%
IEF      2.63%
SHY      2.13%
LQD      2.13%
HYG      2.28%
MUB      2.08%
TIP      2.48%
EMB      1.93%
BND      2.16%
GLD      2.01%
SLV      2.34%
VNQ      2.58%
DBA      2.10%
GSG      2.36%
IAU      2.58%
GDX      2.44%
QQQ      2.23%
VWO      2.30%
