# Portfolio Experiments

In [12]:
%reload_ext autoreload
%autoreload 2

In [13]:
import yaml, numpy as np, pandas as pd

def load_config(path='default_config.yaml', **ov):
    with open(path) as f: cfg = yaml.safe_load(f)
    cfg.update(ov)
    return cfg

In [15]:
cfg = load_config()  # or: load_config(num_epochs=50)

In [4]:
# =============================================================================
# Cell 2: Data Loading & Cross-Validation Splits
# =============================================================================
# Expanding window validation for financial time series
# Avoids look-ahead bias and tests across multiple market regimes
# Reference: de Prado (2018) "Advances in Financial Machine Learning", Ch. 7

def create_folds(data, test_years=2, init_train_years=10, val_years=1, step_years=1):
    """
    Expanding window cross-validation for time series.
    
    Returns:
        folds: list of (train_df, val_df) tuples
        test: held-out test set (final test_years of data)
    """
    days_per_year = 252  # trading days
    
    # Hold out final test set
    test_size = test_years * days_per_year
    test = data.iloc[-test_size:]
    remaining = data.iloc[:-test_size]
    
    # Create expanding folds
    folds = []
    train_end = init_train_years * days_per_year
    val_size = val_years * days_per_year
    step = step_years * days_per_year
    
    while train_end + val_size <= len(remaining):
        train = remaining.iloc[:train_end]
        val = remaining.iloc[train_end:train_end + val_size]
        folds.append((train, val))
        train_end += step
    
    return folds, test

data = pd.read_csv('data.csv', index_col=0, parse_dates=True)
folds, test = create_folds(data)
print(f"Created {len(folds)} folds, test set: {len(test)} days")

Created 5 folds, test set: 504 days


In [5]:
# =============================================================================
# Cell 3: Equal Weights Baseline
# =============================================================================
# 1/N portfolio - surprisingly hard to beat
# Reference: DeMiguel, Garlappi & Uppal (2009) "Optimal Versus Naive 
#            Diversification", Review of Financial Studies

w = np.ones(test.shape[1]) / test.shape[1]
r = (test * w).sum(axis=1)
ew_sharpe = r.mean() / r.std() * np.sqrt(252)
print(f"Equal Weights Sharpe: {ew_sharpe:.3f}")

Equal Weights Sharpe: 1.645


In [6]:
# =============================================================================
# Cell 4: DDPG / DDES-DDPG Runner
# =============================================================================
# DDPG: Lillicrap et al. (2015) "Continuous control with deep reinforcement learning"
# DDES: Hong et al. (2018) "Diversity-Driven Exploration Strategy for Deep RL"
#
# Key difference:
#   DDPG  - exploration via Gaussian noise on actions
#   DDES  - exploration via diversity term in actor loss: -Q(s,a) + α * D(a, a_prior)

from models.div_ddpg import DDPG
from models.networks import NeuralNetwork

def run_ddpg(train, val, test, cfg, use_ddes=False):
    """Train and evaluate DDPG or DDES-DDPG."""
    agent = DDPG(
        lookback_window=cfg['lookback_window'],
        predictor=NeuralNetwork,
        batch_size=cfg['batch_size'],
        hidden_sizes=cfg['hidden_sizes'],
        seed=cfg['seeds'][0],
    )
    agent.train(
        train, val,
        actor_lr=cfg['actor_lr'],
        critic_lr=cfg['critic_lr'],
        gamma=cfg['gamma'],
        tau=cfg['tau'],
        soft_update=cfg['soft_update'],
        num_epochs=cfg['num_epochs'],
        patience=cfg['patience'],
        noise=cfg['noise'],
        use_ddes=use_ddes,
        ddes_alpha=cfg['ddes_alpha'],
    )
    _, (_, sharpe) = agent.evaluate(test)
    return sharpe

In [7]:
# =============================================================================
# Cell 5: Run Experiments Across Folds
# =============================================================================
results = {'EW': ew_sharpe, 'DDPG': [], 'DDES': []}

for i, (train, val) in enumerate(folds):
    print(f"\n--- Fold {i+1}/{len(folds)} ---")
    print(f"Train: {len(train)} days, Val: {len(val)} days")
    
    results['DDPG'].append(run_ddpg(train, val, test, cfg, use_ddes=False))
    results['DDES'].append(run_ddpg(train, val, test, cfg, use_ddes=True))


--- Fold 1/5 ---
Train: 2520 days, Val: 252 days
Epoch 1/100, Actor: 0.029523, Critic: 0.056732, Val: 0.063947
Epoch 2/100, Actor: 0.023541, Critic: 0.038785, Val: 0.063158
Epoch 3/100, Actor: 0.053638, Critic: 0.038965, Val: 0.064363
Epoch 4/100, Actor: 0.048778, Critic: 0.039997, Val: 0.062898
Epoch 5/100, Actor: 0.028295, Critic: 0.028644, Val: 0.061725
Epoch 6/100, Actor: 0.023837, Critic: 0.038226, Val: 0.069670
Epoch 7/100, Actor: 0.062768, Critic: 0.046346, Val: 0.051127
Epoch 8/100, Actor: 0.011503, Critic: 0.036531, Val: 0.038885
Epoch 9/100, Actor: 0.052739, Critic: 0.049695, Val: 0.029758
Epoch 10/100, Actor: 0.127211, Critic: 0.034204, Val: 0.026378
Epoch 11/100, Actor: 0.070279, Critic: 0.040387, Val: 0.028334
Epoch 12/100, Actor: 0.111693, Critic: 0.042556, Val: 0.024831
Epoch 13/100, Actor: 0.081022, Critic: 0.038051, Val: 0.048630
Epoch 14/100, Actor: 0.062855, Critic: 0.032015, Val: 0.049173
Epoch 15/100, Actor: 0.022099, Critic: 0.041674, Val: 0.036584
Epoch 16/100, 

In [8]:
# =============================================================================
# Cell 6: Results Summary
# =============================================================================
print("\n" + "="*50)
print(f"{'Method':<12} {'Mean Sharpe':>12} {'Std':>10}")
print("-"*50)
print(f"{'EW':<12} {results['EW']:>12.3f} {'-':>10}")
print(f"{'DDPG':<12} {np.mean(results['DDPG']):>12.3f} {np.std(results['DDPG']):>10.3f}")
print(f"{'DDES':<12} {np.mean(results['DDES']):>12.3f} {np.std(results['DDES']):>10.3f}")
print("="*50)


Method        Mean Sharpe        Std
--------------------------------------------------
EW                  1.645          -
DDPG                1.083      0.196
DDES                1.439      0.483


In [17]:
# =============================================================================
# Cell: PGA-MAP-Elites Setup
# =============================================================================
# PGA-MAP-Elites: Nilsson & Cully (2021) "Policy Gradient Assisted MAP-Elites"
# Combines TD3-style critics with MAP-Elites archive
# Reference: https://arxiv.org/abs/2102.10587

import torch
from models.pga_map_elites.pga_map_elites import main as pga_main, config as pga_config
from models.pga_map_elites.try_pga_out import PortfolioEnv

def run_pga(train, val, test, cfg, seed=42):
    """Run PGA-MAP-Elites and evaluate best policy."""
    
    # Combine train+val for PGA training (it doesn't need separate val)
    train_data = pd.concat([train, val])
    train_data.to_csv('temp_train.csv')
    
    # Create environment
    env = PortfolioEnv(
        'temp_train.csv', 
        lookback=cfg['lookback_window'], 
        episode_len=50
    )
    
    # Update PGA config
    pga_config['state_dim'] = env.state_dim
    pga_config['action_dim'] = env.action_dim
    pga_config['max_action'] = 1.0
    pga_config['n_niches'] = cfg['pga_n_niches']
    pga_config['max_evals'] = cfg['pga_max_evals']
    pga_config['random_init'] = cfg['pga_random_init']
    pga_config['eval_batch_size'] = cfg['pga_eval_batch_size']
    pga_config['proportion_evo'] = cfg['pga_proportion_evo']
    pga_config['nr_of_steps_crit'] = cfg['pga_nr_of_steps_crit']
    pga_config['nr_of_steps_act'] = cfg['pga_nr_of_steps_act']
    pga_config['iso_sigma'] = cfg['pga_iso_sigma']
    pga_config['line_sigma'] = cfg['pga_line_sigma']
    pga_config['seed'] = seed
    pga_config['verbose'] = False  # reduce output
    pga_config['save_path'] = f'./results/pga_seed{seed}'
    
    # Run PGA-MAP-Elites
    archive = pga_main(env, pga_config)
    
    # Get best policy from archive
    best_key = max(archive.keys(), key=lambda k: archive[k].fitness)
    best_policy = archive[best_key].x
    best_fitness = archive[best_key].fitness
    coverage = len(archive) / cfg['pga_n_niches']
    
    print(f"  Archive: {len(archive)} niches ({coverage*100:.1f}% coverage)")
    print(f"  Best training fitness: {best_fitness:.4f}")
    
    # Evaluate on test set
    test.to_csv('temp_test.csv')
    test_env = PortfolioEnv(
        'temp_test.csv',
        lookback=cfg['lookback_window'],
        episode_len=len(test) - cfg['lookback_window'] - 1
    )
    
    state = test_env.reset()
    returns = []
    weights_history = []
    
    for _ in range(len(test) - cfg['lookback_window'] - 1):
        with torch.no_grad():
            action = best_policy(torch.FloatTensor(state).unsqueeze(0)).cpu().numpy().flatten()
        # Normalize to portfolio weights
        weights = np.abs(action) / (np.sum(np.abs(action)) + 1e-8)
        weights_history.append(weights)
        
        state, reward, done, info = test_env.step(action)
        returns.append(info['portfolio_return'])
        if done:
            break
    
    returns = np.array(returns)
    sharpe = returns.mean() / (returns.std() + 1e-8) * np.sqrt(252)
    
    # Cleanup
    import os
    os.remove('temp_train.csv')
    os.remove('temp_test.csv')
    
    return sharpe, coverage, np.mean(weights_history, axis=0)

In [None]:
# =============================================================================
# Cell: Run PGA-MAP-Elites (takes longer - ~10-30 min per fold)
# =============================================================================
# Start with single fold for quick test
train, val = folds[-1]

print("Running PGA-MAP-Elites...")
pga_sharpe, pga_coverage, pga_weights = run_pga(train, val, test, cfg)
print(f"\nPGA-MAP-Elites: Sharpe = {pga_sharpe:.3f}")

Running PGA-MAP-Elites...
PGA-MAP-Elites Initialization
Computing CVT with 128 niches, 2D behavior space...
Computing CVT (this can take a while...): /Users/ekaterinabasova/Desktop/untitled folder/master_thesis/models/pga_map_elites/CVT/centroids_128_2.dat
Initialization complete
Iteration 0, inertia 38.593280436349595.
Iteration 1, inertia 34.59361164187381.
Iteration 2, inertia 33.64976192842315.
Iteration 3, inertia 33.15567020805344.
Iteration 4, inertia 32.88090147210166.
Iteration 5, inertia 32.68559076226745.
Iteration 6, inertia 32.540346822875264.
Iteration 7, inertia 32.42608667732519.
Iteration 8, inertia 32.339230630860165.
Iteration 9, inertia 32.269545966147206.
Iteration 10, inertia 32.210762173031156.
Iteration 11, inertia 32.16753108118966.
Iteration 12, inertia 32.12624382857882.
Iteration 13, inertia 32.0871548299414.
Iteration 14, inertia 32.05561467030343.
Iteration 15, inertia 32.03092343637521.
Iteration 16, inertia 32.01303594205345.
Iteration 17, inertia 31.993

In [None]:
# =============================================================================
# Cell: Compare Portfolio Diversity
# =============================================================================
# Herfindahl Index: lower = more diversified (EW = 1/N ≈ 0.024 for 42 assets)

ew_weights = np.ones(test.shape[1]) / test.shape[1]
ew_herfindahl = np.sum(ew_weights**2)

pga_herfindahl = np.sum(pga_weights**2)

print(f"Herfindahl Index (lower = more diverse):")
print(f"  Equal Weights: {ew_herfindahl:.4f}")
print(f"  PGA-MAP-Elites: {pga_herfindahl:.4f}")
print(f"\nTop 5 PGA allocations:")
top_idx = np.argsort(pga_weights)[::-1][:5]
for i in top_idx:
    print(f"  {test.columns[i]}: {pga_weights[i]*100:.1f}%")

In [None]:
# =============================================================================
# Cell: Full Comparison (optional - run PGA on all folds)
# =============================================================================
# Warning: This takes a while

# pga_results = []
# for i, (train, val) in enumerate(folds):
#     print(f"\n--- Fold {i+1}/{len(folds)} ---")
#     sharpe, coverage, _ = run_pga(train, val, test, cfg, seed=cfg['seeds'][0])
#     pga_results.append(sharpe)
#     print(f"Sharpe: {sharpe:.3f}, Coverage: {coverage*100:.1f}%")
# 
# results['PGA'] = pga_results

In [None]:
# =============================================================================
# Cell: Final Results Table
# =============================================================================
print("\n" + "="*60)
print("RESULTS: Open-Endedness in Portfolio Optimization")
print("="*60)
print(f"{'Method':<20} {'Sharpe':>10} {'Std':>10} {'vs EW':>10}")
print("-"*60)
print(f"{'Equal Weights':<20} {1.645:>10.3f} {'-':>10} {'-':>10}")
print(f"{'DDPG':<20} {1.083:>10.3f} {0.196:>10.3f} {'-34%':>10}")
print(f"{'DDES-DDPG':<20} {1.439:>10.3f} {0.483:>10.3f} {'-12%':>10}")
print(f"{'PGA-MAP-Elites':<20} {pga_sharpe:>10.3f} {'-':>10} {(pga_sharpe/1.645-1)*100:>+9.0f}%")
print("="*60)
print(f"\nPGA Archive Coverage: {pga_coverage*100:.1f}%")