In [23]:
# Standard imports
import numpy as np
import matplotlib.pyplot as plt

# Projet imports
from portfolio_environment import PortfolioEnv
from rl_agent import PPOAgent
from hrp_optimizer import optimizeHRP
import config

In [24]:
# Get environment configuration (from config.py)
env_config = {
'assets': config.ASSETS,
'initial_capital': config.INITIAL_CAPITAL,
'transaction_cost': config.TRANSACTION_COST,
'lookback_window': config.LOOKBACK_WINDOW,
'hrp_lookback': config.HRP_LOOKBACK,
'start_date' : config.START_DATE,
'end_date' : config.END_DATE
}

In [25]:
# Function to train the RL agent
def train_agent(episodes: int = 100):

    env = PortfolioEnv(env_config)
    agent = PPOAgent(config.STATE_DIM, config.ACTION_DIM, config.LEARNING_RATE)
    
    episode_rewards = []
    
    for episode in range(episodes):
        state = env.reset()
        episode_reward = 0
        done = False
        
        while not done:
            action = agent.get_action(state)
            next_state, reward, done, info = env.step(action)
            
            agent.store_transition(state, action, reward, next_state, done)
            
            state = next_state
            episode_reward += reward
            
            # Train agent periodically
            if len(agent.memory) > config.BATCH_SIZE and episode % 5 == 0:
                agent.train(config.BATCH_SIZE)
        
        episode_rewards.append(episode_reward)
        
        if episode % 10 == 0:
            print(f"Episode {episode}, Reward: {episode_reward:.2f}, "
                  f"Portfolio Value: ${info['portfolio_value']:.2f}")
    
    return agent, episode_rewards

In [None]:
# Train RL agent
agent, training_rewards = train_agent(episodes=100)

In [None]:
# Test both strategies
env = PortfolioEnv(env_config)
hrp_optimizer = HRPOptimizer()

# RL strategy performance
state = env.reset()
rl_values = [env.initial_capital]
done = False

while not done:
    action = agent.get_action(state)
    state, _, done, info = env.step(action)
    rl_values.append(info['portfolio_value'])

# HRP strategy performance
env.reset()
hrp_values = [env.initial_capital]
done = False

while not done:
    # Get HRP weights
    if env.current_step >= env.hrp_lookback:
        recent_returns = env.returns.iloc[
            env.current_step - env.hrp_lookback:env.current_step
        ].values
        hrp_weights = hrp_optimizer.optimize(recent_returns)
    else:
        hrp_weights = np.ones(len(config.ASSETS)) / len(config.ASSETS)
    
    state, _, done, info = env.step(hrp_weights)
    hrp_values.append(info['portfolio_value'])

In [None]:
# Plot results
plt.figure(figsize=(12, 8))

plt.subplot(2, 2, 1)
plt.plot(training_rewards)
plt.title('RL Training Progress')
plt.xlabel('Episode')
plt.ylabel('Episode Reward')

plt.subplot(2, 2, 2)
plt.plot(rl_values, label='RL Strategy', alpha=0.8)
plt.plot(hrp_values, label='HRP Baseline', alpha=0.8)
plt.title('Portfolio Value Comparison')
plt.xlabel('Time Steps')
plt.ylabel('Portfolio Value ($)')
plt.legend()

# Calculate performance metrics
rl_return = (rl_values[-1] - rl_values[0]) / rl_values[0] * 100
hrp_return = (hrp_values[-1] - hrp_values[0]) / hrp_values[0] * 100

plt.subplot(2, 2, 3)
strategies = ['RL Strategy', 'HRP Baseline']
returns = [rl_return, hrp_return]
plt.bar(strategies, returns, color=['blue', 'orange'], alpha=0.7)
plt.title('Total Returns (%)')
plt.ylabel('Return (%)')

# Volatility comparison
rl_volatility = np.std(np.diff(rl_values) / rl_values[:-1]) * 100
hrp_volatility = np.std(np.diff(hrp_values) / hrp_values[:-1]) * 100

plt.subplot(2, 2, 4)
volatilities = [rl_volatility, hrp_volatility]
plt.bar(strategies, volatilities, color=['blue', 'orange'], alpha=0.7)
plt.title('Volatility (%)')
plt.ylabel('Volatility (%)')

plt.tight_layout()
plt.savefig('portfolio_optimization_results.png', dpi=300, bbox_inches='tight')
plt.show()

print(f"\n=== Performance Summary ===")
print(f"RL Strategy - Return: {rl_return:.2f}%, Volatility: {rl_volatility:.2f}%")
print(f"HRP Baseline - Return: {hrp_return:.2f}%, Volatility: {hrp_volatility:.2f}%")
print(f"Sharpe Ratio (RL): {(rl_return / rl_volatility):.2f}")
print(f"Sharpe Ratio (HRP): {(hrp_return / hrp_volatility):.2f}")