# Portfolio Experiments

In [6]:
%reload_ext autoreload
%autoreload 2

In [7]:
import yaml, numpy as np, pandas as pd

def load_config(path='default_config.yaml', **ov):
    with open(path) as f: cfg = yaml.safe_load(f)
    cfg.update(ov)
    return cfg

In [8]:
cfg = load_config()  # or: load_config(num_epochs=50)

In [9]:
# =============================================================================
# Cell 2: Data Loading & Cross-Validation Splits
# =============================================================================
# Expanding window validation for financial time series
# Avoids look-ahead bias and tests across multiple market regimes
# Reference: de Prado (2018) "Advances in Financial Machine Learning", Ch. 7

def create_folds(data, test_years=2, init_train_years=10, val_years=1, step_years=1):
    """
    Expanding window cross-validation for time series.
    
    Returns:
        folds: list of (train_df, val_df) tuples
        test: held-out test set (final test_years of data)
    """
    days_per_year = 252  # trading days
    
    # Hold out final test set
    test_size = test_years * days_per_year
    test = data.iloc[-test_size:]
    remaining = data.iloc[:-test_size]
    
    # Create expanding folds
    folds = []
    train_end = init_train_years * days_per_year
    val_size = val_years * days_per_year
    step = step_years * days_per_year
    
    while train_end + val_size <= len(remaining):
        train = remaining.iloc[:train_end]
        val = remaining.iloc[train_end:train_end + val_size]
        folds.append((train, val))
        train_end += step
    
    return folds, test

data = pd.read_csv('data.csv', index_col=0, parse_dates=True)
folds, test = create_folds(data)
print(f"Created {len(folds)} folds, test set: {len(test)} days")

Created 5 folds, test set: 504 days


In [10]:
# =============================================================================
# Cell 3: Equal Weights Baseline
# =============================================================================
# 1/N portfolio - surprisingly hard to beat
# Reference: DeMiguel, Garlappi & Uppal (2009) "Optimal Versus Naive 
#            Diversification", Review of Financial Studies

w = np.ones(test.shape[1]) / test.shape[1]
r = (test * w).sum(axis=1)
ew_sharpe = r.mean() / r.std() * np.sqrt(252)
print(f"Equal Weights Sharpe: {ew_sharpe:.3f}")

Equal Weights Sharpe: 1.645


In [13]:
# =============================================================================
# Cell 4: DDPG / DDES-DDPG Runner
# =============================================================================
# DDPG: Lillicrap et al. (2015) "Continuous control with deep reinforcement learning"
# DDES: Hong et al. (2018) "Diversity-Driven Exploration Strategy for Deep RL"
#
# Key difference:
#   DDPG  - exploration via Gaussian noise on actions
#   DDES  - exploration via diversity term in actor loss: -Q(s,a) + α * D(a, a_prior)

from models.div_ddpg import DDPG
from models.networks import NeuralNetwork

def run_ddpg(train, val, test, cfg, use_ddes=False):
    """Train and evaluate DDPG or DDES-DDPG."""
    ddpg_cfg = cfg['ddpg']  # Extract nested config
    
    agent = DDPG(
        lookback_window=cfg['lookback_window'],
        predictor=NeuralNetwork,
        batch_size=ddpg_cfg['batch_size'],
        hidden_sizes=cfg['hidden_sizes'],
        seed=cfg['seeds'][0],
    )
    agent.train(
        train, val,
        actor_lr=cfg['actor_lr'],
        critic_lr=cfg['critic_lr'],
        gamma=cfg['gamma'],
        tau=cfg['tau'],
        soft_update=True,  # This was also missing from config
        num_epochs=ddpg_cfg['num_epochs'],
        patience=ddpg_cfg['patience'],
        noise=ddpg_cfg['noise'],
        use_ddes=use_ddes,
        ddes_alpha=ddpg_cfg['ddes_alpha'],
    )
    _, (_, sharpe) = agent.evaluate(test)
    return sharpe

In [14]:
# =============================================================================
# Cell 5: Run Experiments Across Folds
# =============================================================================
results = {'EW': ew_sharpe, 'DDPG': [], 'DDES': []}

for i, (train, val) in enumerate(folds):
    print(f"\n--- Fold {i+1}/{len(folds)} ---")
    print(f"Train: {len(train)} days, Val: {len(val)} days")
    
    results['DDPG'].append(run_ddpg(train, val, test, cfg, use_ddes=False))
    results['DDES'].append(run_ddpg(train, val, test, cfg, use_ddes=True))


--- Fold 1/5 ---
Train: 2520 days, Val: 252 days
Epoch 1/100, Actor: -0.093808, Critic: 0.057956, Val: 0.070732
Epoch 2/100, Actor: -0.095086, Critic: 0.003127, Val: 0.070831
Epoch 3/100, Actor: -0.097400, Critic: 0.137951, Val: 0.070674
Epoch 4/100, Actor: -0.098356, Critic: 0.006272, Val: 0.070586
Epoch 5/100, Actor: -0.101630, Critic: 0.046575, Val: 0.070459
Epoch 6/100, Actor: -0.103872, Critic: 0.008254, Val: 0.070380
Epoch 7/100, Actor: -0.103924, Critic: 0.054903, Val: 0.070321
Epoch 8/100, Actor: -0.106650, Critic: 0.116009, Val: 0.070310
Epoch 9/100, Actor: -0.108326, Critic: 0.000044, Val: 0.070400
Epoch 10/100, Actor: -0.109485, Critic: 0.034090, Val: 0.070481
Epoch 11/100, Actor: -0.112760, Critic: 0.026783, Val: 0.070635
Epoch 12/100, Actor: -0.112713, Critic: 0.005577, Val: 0.070845
Epoch 13/100, Actor: -0.112391, Critic: 0.022135, Val: 0.071065
Epoch 14/100, Actor: -0.115446, Critic: 0.142780, Val: 0.071286
Epoch 15/100, Actor: -0.118737, Critic: 0.070524, Val: 0.071593

In [15]:
# =============================================================================
# Cell 6: Results Summary
# =============================================================================
print("\n" + "="*50)
print(f"{'Method':<12} {'Mean Sharpe':>12} {'Std':>10}")
print("-"*50)
print(f"{'EW':<12} {results['EW']:>12.3f} {'-':>10}")
print(f"{'DDPG':<12} {np.mean(results['DDPG']):>12.3f} {np.std(results['DDPG']):>10.3f}")
print(f"{'DDES':<12} {np.mean(results['DDES']):>12.3f} {np.std(results['DDES']):>10.3f}")
print("="*50)


Method        Mean Sharpe        Std
--------------------------------------------------
EW                  1.645          -
DDPG                1.264      0.707
DDES                0.806      0.594


In [None]:
#!pip install cloudpickle


Collecting cloudpickle
  Using cached cloudpickle-3.1.2-py3-none-any.whl.metadata (7.1 kB)
Using cached cloudpickle-3.1.2-py3-none-any.whl (22 kB)
Installing collected packages: cloudpickle
Successfully installed cloudpickle-3.1.2


In [1]:
# =============================================================================
# Cell 1: Imports + Config
# =============================================================================
import numpy as np
import pandas as pd
import torch
from sklearn.neighbors import KDTree

from models.pga_map_elites.official_networks import Actor, Critic
from models.pga_map_elites.official_utils import (
    ReplayBuffer, Individual, add_to_archive, cvt
)
from models.pga_map_elites.official_variational_operators import VariationalOperator
from models.pga_map_elites.portfolio_env import PortfolioEnv

cfg = {
    'seeds': [42],
    'hidden_sizes': [128, 128],
    'gamma': 0.99,
    'tau': 0.005,
    'actor_lr': 0.001,
    'n_niches': 128,
    'max_evals': 5000,
    'random_init': 200,
    'batch_size': 100,
    'proportion_evo': 0.5,
    'iso_sigma': 0.005,
    'line_sigma': 0.05,
    'nr_of_steps_crit': 300,
    'nr_of_steps_act': 10,
    'train_batch_size': 256,
    'policy_noise': 0.2,
    'noise_clip': 0.5,
    'policy_freq': 2,
}





In [2]:
# =============================================================================
# Cell 2: eval_policy (only function not in utils)
# =============================================================================
def eval_policy(actor, env):
    """Run one episode. Source: parallel_worker() in original PGA-MAP-Elites."""
    state = env.reset()
    done = False
    states, actions, next_states, rewards, not_dones = [], [], [], [], []
    
    while not done:
        action = actor.select_action(np.array(state))
        next_state, reward, done, info = env.step(action)
        done_bool = float(done) if env._step < env._max_episode_steps else 0
        
        states.append(state.copy())
        actions.append(action.copy())
        next_states.append(next_state.copy())
        rewards.append(reward)
        not_dones.append(1.0 - done_bool)
        state = next_state
    
    transitions = (
        np.array(states), np.array(actions), np.array(next_states),
        np.array(rewards).reshape(-1,1), np.array(not_dones).reshape(-1,1)
    )
    return env.tot_reward, env.desc.copy(), transitions

In [3]:
# =============================================================================
# Cell 3: Setup
# =============================================================================
data = pd.read_csv('data.csv', index_col=0)
env = PortfolioEnv(data.iloc[:-504], lookback=20, episode_len=50)

torch.manual_seed(cfg['seeds'][0])
np.random.seed(cfg['seeds'][0])

critic = Critic(env.state_dim, env.action_dim, max_action=1.0,
                discount=cfg['gamma'], tau=cfg['tau'],
                policy_noise=cfg['policy_noise'], noise_clip=cfg['noise_clip'])
replay_buffer = ReplayBuffer(env.state_dim, env.action_dim)
kdt = KDTree(cvt(cfg['n_niches'], dim=2, samples=25000))
archive = {}

# Variation operator (uses your existing iso_dd and pg_variation)
var_op = VariationalOperator(
    actor_fn=lambda: Actor(env.state_dim, env.action_dim, 1.0, cfg['hidden_sizes']),
    num_cpu=4,
    iso_sigma=cfg['iso_sigma'],
    line_sigma=cfg['line_sigma'],
    learning_rate=cfg['actor_lr'],
)


Mutation operator: False
Crossover operator: <bound method VariationalOperator.iso_dd of <models.pga_map_elites.official_variational_operators.VariationalOperator object at 0x1625086e0>>


In [4]:

# =============================================================================
# Cell 4: Main loop
# =============================================================================
n_evals = 0

while n_evals < cfg['max_evals']:
    
    if n_evals < cfg['random_init']:
        offspring = [Actor(env.state_dim, env.action_dim, 1.0, cfg['hidden_sizes']) 
                     for _ in range(cfg['batch_size'])]
    else:
        if replay_buffer.size > cfg['train_batch_size']:
            critic.train(archive, replay_buffer, cfg['nr_of_steps_crit'], cfg['train_batch_size'])
            states = replay_buffer.sample_state(cfg['train_batch_size'], cfg['nr_of_steps_act'])
        else:
            states = None
        
        offspring = var_op(archive, cfg['batch_size'], cfg['proportion_evo'],
                          critic=critic, states=states, nr_of_steps_act=cfg['nr_of_steps_act'])
    
    for actor in offspring:
        fitness, desc, transitions = eval_policy(actor, env)
        replay_buffer.add(transitions)
        add_to_archive(Individual(actor, desc, fitness), desc, archive, kdt)
    
    n_evals += len(offspring)
    
    if n_evals % 500 == 0:
        best = max((x.fitness for x in archive.values()), default=0)
        print(f"[{n_evals}] Archive: {len(archive)} | Best: {best:.4f}")


[500] Archive: 1 | Best: 0.2279
[1000] Archive: 1 | Best: 0.2491
[1500] Archive: 1 | Best: 0.2491
[2000] Archive: 1 | Best: 0.2491
[2500] Archive: 1 | Best: 0.2491
[3000] Archive: 1 | Best: 0.2491
[3500] Archive: 1 | Best: 0.2547
[4000] Archive: 1 | Best: 0.2547
[4500] Archive: 1 | Best: 0.2561
[5000] Archive: 1 | Best: 0.2561


In [5]:

# =============================================================================
# Cell 5: Results
# =============================================================================
best_key = max(archive.keys(), key=lambda k: archive[k].fitness)
print(f"Best fitness: {archive[best_key].fitness:.4f}")
print(f"Coverage: {100*len(archive)/cfg['n_niches']:.1f}%")

Best fitness: 0.2561
Coverage: 0.8%


In [16]:
# =============================================================================
# Cell 6: Evaluate best PGA policy on test set (same as DDPG evaluation)
# =============================================================================
from utilities.metrics import calculate_test_performance

# Get test data (last 504 days, same as DDPG experiments)
test_data = data.iloc[-504:]

# Get best actor from archive
best_actor = archive[best_key].x
best_actor.eval()

# Get final weights from best policy using last training window as input
train_data = data.iloc[:-504]
lookback = 20
input_state = train_data.iloc[-lookback:].values.flatten().astype(np.float32)

with torch.no_grad():
    state_tensor = torch.FloatTensor(input_state).unsqueeze(0)
    raw_action = best_actor(state_tensor).numpy().flatten()
    
# Softmax normalize (same as env.step does)
raw_action = raw_action - np.max(raw_action)
weights = np.exp(raw_action) / np.sum(np.exp(raw_action))

# Calculate Sharpe on test set
profit, sharpe = calculate_test_performance(test_data, weights)

print(f"\nPGA-MAP-Elites Test Results:")
print(f"Profit p.a.: {profit*100:.4f}%")
print(f"Sharpe Ratio: {sharpe:.4f}")

# Add to results for comparison
results['PGA'] = sharpe


PGA-MAP-Elites Test Results:
Profit p.a.: 19.4541%
Sharpe Ratio: 1.6435


In [17]:
# =============================================================================
# Cell 7: Updated Results Summary
# =============================================================================
print("\n" + "="*50)
print(f"{'Method':<12} {'Mean Sharpe':>12} {'Std':>10}")
print("-"*50)
print(f"{'EW':<12} {results['EW']:>12.3f} {'-':>10}")
print(f"{'DDPG':<12} {np.mean(results['DDPG']):>12.3f} {np.std(results['DDPG']):>10.3f}")
print(f"{'DDES':<12} {np.mean(results['DDES']):>12.3f} {np.std(results['DDES']):>10.3f}")
print(f"{'PGA':<12} {results['PGA']:>12.3f} {'-':>10}")
print("="*50)


Method        Mean Sharpe        Std
--------------------------------------------------
EW                  1.645          -
DDPG                1.264      0.707
DDES                0.806      0.594
PGA                 1.643          -


In [18]:
for k, v in archive.items():
    print(f"Niche: {k}, Desc: {v.desc}, Fitness: {v.fitness:.4f}")

Niche: (0.04006899421329474, 0.949899604597551), Desc: [0.01889021 0.97561234], Fitness: 0.2561


In [19]:
print("\nPGA Portfolio Weights:")
for i, ticker in enumerate(test_data.columns):
    if weights[i] > 0.01:  # only show >1%
        print(f"{ticker:<6} {weights[i]*100:>6.2f}%")


PGA Portfolio Weights:
XLK      3.06%
XLF      2.44%
XLV      2.83%
XLE      2.06%
XLI      2.43%
XLP      2.65%
XLY      2.18%
XLU      2.17%
XLB      2.86%
SPY      3.17%
IWM      2.55%
MDY      2.55%
VTV      2.44%
VUG      2.64%
EFA      2.18%
EEM      2.80%
VGK      1.99%
EWJ      2.26%
FXI      1.70%
EWY      2.52%
EWZ      2.98%
EWG      2.71%
EWU      2.02%
AGG      2.46%
TLT      2.18%
IEF      2.13%
SHY      1.92%
LQD      1.92%
HYG      2.35%
MUB      1.99%
TIP      2.31%
EMB      2.02%
BND      1.59%
GLD      2.28%
SLV      3.00%
VNQ      2.33%
DBA      2.23%
GSG      2.35%
IAU      2.58%
GDX      2.79%
QQQ      1.73%
VWO      2.63%
