# Portfolio Experiments

In [12]:
%reload_ext autoreload
%autoreload 2

In [13]:
import yaml, numpy as np, pandas as pd

def load_config(path='default_config.yaml', **ov):
    with open(path) as f: cfg = yaml.safe_load(f)
    cfg.update(ov)
    return cfg

In [15]:
cfg = load_config()  # or: load_config(num_epochs=50)

In [4]:
# =============================================================================
# Cell 2: Data Loading & Cross-Validation Splits
# =============================================================================
# Expanding window validation for financial time series
# Avoids look-ahead bias and tests across multiple market regimes
# Reference: de Prado (2018) "Advances in Financial Machine Learning", Ch. 7

def create_folds(data, test_years=2, init_train_years=10, val_years=1, step_years=1):
    """
    Expanding window cross-validation for time series.
    
    Returns:
        folds: list of (train_df, val_df) tuples
        test: held-out test set (final test_years of data)
    """
    days_per_year = 252  # trading days
    
    # Hold out final test set
    test_size = test_years * days_per_year
    test = data.iloc[-test_size:]
    remaining = data.iloc[:-test_size]
    
    # Create expanding folds
    folds = []
    train_end = init_train_years * days_per_year
    val_size = val_years * days_per_year
    step = step_years * days_per_year
    
    while train_end + val_size <= len(remaining):
        train = remaining.iloc[:train_end]
        val = remaining.iloc[train_end:train_end + val_size]
        folds.append((train, val))
        train_end += step
    
    return folds, test

data = pd.read_csv('data.csv', index_col=0, parse_dates=True)
folds, test = create_folds(data)
print(f"Created {len(folds)} folds, test set: {len(test)} days")

Created 5 folds, test set: 504 days


In [5]:
# =============================================================================
# Cell 3: Equal Weights Baseline
# =============================================================================
# 1/N portfolio - surprisingly hard to beat
# Reference: DeMiguel, Garlappi & Uppal (2009) "Optimal Versus Naive 
#            Diversification", Review of Financial Studies

w = np.ones(test.shape[1]) / test.shape[1]
r = (test * w).sum(axis=1)
ew_sharpe = r.mean() / r.std() * np.sqrt(252)
print(f"Equal Weights Sharpe: {ew_sharpe:.3f}")

Equal Weights Sharpe: 1.645


In [6]:
# =============================================================================
# Cell 4: DDPG / DDES-DDPG Runner
# =============================================================================
# DDPG: Lillicrap et al. (2015) "Continuous control with deep reinforcement learning"
# DDES: Hong et al. (2018) "Diversity-Driven Exploration Strategy for Deep RL"
#
# Key difference:
#   DDPG  - exploration via Gaussian noise on actions
#   DDES  - exploration via diversity term in actor loss: -Q(s,a) + α * D(a, a_prior)

from models.div_ddpg import DDPG
from models.networks import NeuralNetwork

def run_ddpg(train, val, test, cfg, use_ddes=False):
    """Train and evaluate DDPG or DDES-DDPG."""
    agent = DDPG(
        lookback_window=cfg['lookback_window'],
        predictor=NeuralNetwork,
        batch_size=cfg['batch_size'],
        hidden_sizes=cfg['hidden_sizes'],
        seed=cfg['seeds'][0],
    )
    agent.train(
        train, val,
        actor_lr=cfg['actor_lr'],
        critic_lr=cfg['critic_lr'],
        gamma=cfg['gamma'],
        tau=cfg['tau'],
        soft_update=cfg['soft_update'],
        num_epochs=cfg['num_epochs'],
        patience=cfg['patience'],
        noise=cfg['noise'],
        use_ddes=use_ddes,
        ddes_alpha=cfg['ddes_alpha'],
    )
    _, (_, sharpe) = agent.evaluate(test)
    return sharpe

In [7]:
# =============================================================================
# Cell 5: Run Experiments Across Folds
# =============================================================================
results = {'EW': ew_sharpe, 'DDPG': [], 'DDES': []}

for i, (train, val) in enumerate(folds):
    print(f"\n--- Fold {i+1}/{len(folds)} ---")
    print(f"Train: {len(train)} days, Val: {len(val)} days")
    
    results['DDPG'].append(run_ddpg(train, val, test, cfg, use_ddes=False))
    results['DDES'].append(run_ddpg(train, val, test, cfg, use_ddes=True))


--- Fold 1/5 ---
Train: 2520 days, Val: 252 days
Epoch 1/100, Actor: 0.029523, Critic: 0.056732, Val: 0.063947
Epoch 2/100, Actor: 0.023541, Critic: 0.038785, Val: 0.063158
Epoch 3/100, Actor: 0.053638, Critic: 0.038965, Val: 0.064363
Epoch 4/100, Actor: 0.048778, Critic: 0.039997, Val: 0.062898
Epoch 5/100, Actor: 0.028295, Critic: 0.028644, Val: 0.061725
Epoch 6/100, Actor: 0.023837, Critic: 0.038226, Val: 0.069670
Epoch 7/100, Actor: 0.062768, Critic: 0.046346, Val: 0.051127
Epoch 8/100, Actor: 0.011503, Critic: 0.036531, Val: 0.038885
Epoch 9/100, Actor: 0.052739, Critic: 0.049695, Val: 0.029758
Epoch 10/100, Actor: 0.127211, Critic: 0.034204, Val: 0.026378
Epoch 11/100, Actor: 0.070279, Critic: 0.040387, Val: 0.028334
Epoch 12/100, Actor: 0.111693, Critic: 0.042556, Val: 0.024831
Epoch 13/100, Actor: 0.081022, Critic: 0.038051, Val: 0.048630
Epoch 14/100, Actor: 0.062855, Critic: 0.032015, Val: 0.049173
Epoch 15/100, Actor: 0.022099, Critic: 0.041674, Val: 0.036584
Epoch 16/100, 

In [8]:
# =============================================================================
# Cell 6: Results Summary
# =============================================================================
print("\n" + "="*50)
print(f"{'Method':<12} {'Mean Sharpe':>12} {'Std':>10}")
print("-"*50)
print(f"{'EW':<12} {results['EW']:>12.3f} {'-':>10}")
print(f"{'DDPG':<12} {np.mean(results['DDPG']):>12.3f} {np.std(results['DDPG']):>10.3f}")
print(f"{'DDES':<12} {np.mean(results['DDES']):>12.3f} {np.std(results['DDES']):>10.3f}")
print("="*50)


Method        Mean Sharpe        Std
--------------------------------------------------
EW                  1.645          -
DDPG                1.083      0.196
DDES                1.439      0.483


In [21]:
# =============================================================================
# Cell 1: Imports + Config
# =============================================================================
import numpy as np
import pandas as pd
import torch
from sklearn.neighbors import KDTree

from models.pga_map_elites.official_networks import Actor, Critic
from models.pga_map_elites.official_utils import (
    ReplayBuffer, Individual, add_to_archive, cvt
)
from models.pga_map_elites.official_variational_operators import VariationalOperator
from models.pga_map_elites.portfolio_env import PortfolioEnv

cfg = {
    'seeds': [42],
    'hidden_sizes': [128, 128],
    'gamma': 0.99,
    'tau': 0.005,
    'actor_lr': 0.001,
    'n_niches': 128,
    'max_evals': 5000,
    'random_init': 200,
    'batch_size': 100,
    'proportion_evo': 0.5,
    'iso_sigma': 0.005,
    'line_sigma': 0.05,
    'nr_of_steps_crit': 300,
    'nr_of_steps_act': 10,
    'train_batch_size': 256,
    'policy_noise': 0.2,
    'noise_clip': 0.5,
    'policy_freq': 2,
}





In [22]:
# =============================================================================
# Cell 2: eval_policy (only function not in utils)
# =============================================================================
def eval_policy(actor, env):
    """Run one episode. Source: parallel_worker() in original PGA-MAP-Elites."""
    state = env.reset()
    done = False
    states, actions, next_states, rewards, not_dones = [], [], [], [], []
    
    while not done:
        action = actor.select_action(np.array(state))
        next_state, reward, done, info = env.step(action)
        done_bool = float(done) if env._step < env._max_episode_steps else 0
        
        states.append(state.copy())
        actions.append(action.copy())
        next_states.append(next_state.copy())
        rewards.append(reward)
        not_dones.append(1.0 - done_bool)
        state = next_state
    
    transitions = (
        np.array(states), np.array(actions), np.array(next_states),
        np.array(rewards).reshape(-1,1), np.array(not_dones).reshape(-1,1)
    )
    return env.tot_reward, env.desc.copy(), transitions

In [23]:
# =============================================================================
# Cell 3: Setup
# =============================================================================
data = pd.read_csv('data.csv', index_col=0)
env = PortfolioEnv(data.iloc[:-504], lookback=20, episode_len=50)

torch.manual_seed(cfg['seeds'][0])
np.random.seed(cfg['seeds'][0])

critic = Critic(env.state_dim, env.action_dim, max_action=1.0,
                discount=cfg['gamma'], tau=cfg['tau'],
                policy_noise=cfg['policy_noise'], noise_clip=cfg['noise_clip'])
replay_buffer = ReplayBuffer(env.state_dim, env.action_dim)
kdt = KDTree(cvt(cfg['n_niches'], dim=2, samples=25000))
archive = {}

# Variation operator (uses your existing iso_dd and pg_variation)
var_op = VariationalOperator(
    actor_fn=lambda: Actor(env.state_dim, env.action_dim, 1.0, cfg['hidden_sizes']),
    iso_sigma=cfg['iso_sigma'],
    line_sigma=cfg['line_sigma'],
    learning_rate=cfg['actor_lr'],
)


Computing CVT (this can take a while...): /Users/ekaterinabasova/Desktop/untitled folder/master_thesis/models/pga_map_elites/CVT/centroids_128_2.dat
Initialization complete
Iteration 0, inertia 38.593280436349595.
Iteration 1, inertia 34.593611641873814.
Iteration 2, inertia 33.64976192842315.
Iteration 3, inertia 33.15567020805343.
Iteration 4, inertia 32.88090147210166.
Iteration 5, inertia 32.68559076226745.
Iteration 6, inertia 32.54034682287526.
Iteration 7, inertia 32.42608667732518.
Iteration 8, inertia 32.339230630860165.
Iteration 9, inertia 32.269545966147206.
Iteration 10, inertia 32.210762173031156.
Iteration 11, inertia 32.16753108118965.
Iteration 12, inertia 32.12624382857882.
Iteration 13, inertia 32.08715482994139.
Iteration 14, inertia 32.055614670303434.
Iteration 15, inertia 32.030923436375204.
Iteration 16, inertia 32.01303594205345.
Iteration 17, inertia 31.993393563155557.
Iteration 18, inertia 31.973892175353853.
Iteration 19, inertia 31.953653270214026.
Iterati

In [24]:

# =============================================================================
# Cell 4: Main loop
# =============================================================================
n_evals = 0

while n_evals < cfg['max_evals']:
    
    if n_evals < cfg['random_init']:
        offspring = [Actor(env.state_dim, env.action_dim, 1.0, cfg['hidden_sizes']) 
                     for _ in range(cfg['batch_size'])]
    else:
        if replay_buffer.size > cfg['train_batch_size']:
            critic.train(archive, replay_buffer, cfg['nr_of_steps_crit'], cfg['train_batch_size'])
            states = replay_buffer.sample_state(cfg['train_batch_size'], cfg['nr_of_steps_act'])
        else:
            states = None
        
        offspring = var_op(archive, cfg['batch_size'], cfg['proportion_evo'],
                          critic=critic, states=states, nr_of_steps_act=cfg['nr_of_steps_act'])
    
    for actor in offspring:
        fitness, desc, transitions = eval_policy(actor, env)
        replay_buffer.add(transitions)
        add_to_archive(Individual(actor, desc, fitness), desc, archive, kdt)
    
    n_evals += len(offspring)
    
    if n_evals % 500 == 0:
        best = max((x.fitness for x in archive.values()), default=0)
        print(f"[{n_evals}] Archive: {len(archive)} | Best: {best:.4f}")


AttributeError: 'Critic' object has no attribute 'Q1'

In [None]:

# =============================================================================
# Cell 5: Results
# =============================================================================
best_key = max(archive.keys(), key=lambda k: archive[k].fitness)
print(f"Best fitness: {archive[best_key].fitness:.4f}")
print(f"Coverage: {100*len(archive)/cfg['n_niches']:.1f}%")