In [None]:
# Install required packages (run this cell first if packages are missing)
import subprocess
import sys

def install_package(package):
    try:
        subprocess.check_call([sys.executable, "-m", "pip", "install", package])
        print(f"✅ {package} installed successfully")
    except subprocess.CalledProcessError:
        print(f"❌ Failed to install {package}")

# List of required packages
required_packages = [
    "yfinance>=0.2.0",
    "stable-baselines3>=2.0.0", 
    "gymnasium>=0.29.0",
    "gym>=0.21.0",
    "matplotlib>=3.7.0",
    "seaborn>=0.12.0",
    "plotly>=5.15.0",
    "numpy>=1.24.0",
    "pandas>=2.0.0"
]

print("🔧 Installing required packages...")
for package in required_packages:
    try:
        __import__(package.split(">=")[0].replace("-", "_"))
        print(f"✅ {package.split('>=')[0]} already available")
    except ImportError:
        print(f"📦 Installing {package}...")
        install_package(package)

print("🎉 Package installation complete!")

# Reinforcement Learning for Asset Allocation

## 🎯 Objective

In this laboratory, we'll develop a **Reinforcement Learning agent** that learns to optimally allocate a portfolio of assets to maximize cumulative returns while considering:
- **Risk management** (volatility, drawdown)
- **Transaction costs** (realistic trading friction)
- **Market dynamics** (changing market conditions)

## 🤖 Why Reinforcement Learning for Finance?

Unlike supervised learning, which predicts future prices, RL focuses on **sequential decision-making**:

| Approach | Goal | Challenge |
|----------|------|----------|
| **Supervised Learning** | Predict next price | Prediction ≠ Optimal trading |
| **Reinforcement Learning** | Learn optimal actions | Direct optimization of trading performance |

**Key advantages of RL:**
- Learns from **trial and error** in market simulation
- Optimizes **long-term cumulative rewards**
- Naturally handles **sequential dependencies**
- Can incorporate **risk constraints** and **transaction costs**

## 📚 Import Required Libraries

In [None]:
# Core libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

# Financial data
import yfinance as yf
from datetime import datetime, timedelta

# RL libraries
import gymnasium
from gymnasium import spaces
from stable_baselines3 import PPO, DQN
from stable_baselines3.common.env_checker import check_env
from stable_baselines3.common.callbacks import EvalCallback
from stable_baselines3.common.vec_env import DummyVecEnv

# Plotting
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.offline as pyo
pyo.init_notebook_mode()

# Set style
try:
    plt.style.use('seaborn-v0_8')
except OSError:
    # Fallback for newer matplotlib versions
    plt.style.use('seaborn')
sns.set_palette("husl")

print("✅ All libraries imported successfully!")

## 📈 Data Collection and Preprocessing

We'll use **Yahoo Finance** to download historical data for a diversified portfolio of assets.

In [None]:
# Define our asset universe
ASSETS = ['SPY', 'AAPL', 'MSFT', 'GOOGL']
START_DATE = '2018-01-01'
END_DATE = '2023-12-31'
SPLIT_DATE = '2022-01-01'  # Train/test split

print(f"📊 Downloading data for {len(ASSETS)} assets: {', '.join(ASSETS)}")
print(f"📅 Period: {START_DATE} to {END_DATE}")
print(f"🔄 Train/Test split: {SPLIT_DATE}")

In [None]:
def download_data(symbols, start_date, end_date):
    """
    Download and preprocess financial data from Yahoo Finance
    """
    print("🔄 Downloading data...")
    
    # Download data
    data = yf.download(symbols, start=start_date, end=end_date, progress=False)
    
    # Check if data was downloaded successfully
    if data.empty:
        raise ValueError("No data downloaded. Please check symbols and date range.")
    
    print(f"📊 Data structure: {data.shape}")
    print(f"📊 Columns: {data.columns.tolist()}")
    print(f"📊 Column levels: {data.columns.nlevels}")
    
    # Handle different data structures based on number of symbols
    if len(symbols) == 1:
        # Single symbol case - data is a simple DataFrame
        if 'Adj Close' in data.columns:
            prices = data['Adj Close'].to_frame()
        else:
            prices = data['Close'].to_frame()
        prices.columns = symbols
    else:
        # Multiple symbols case - check if we have MultiIndex columns
        if isinstance(data.columns, pd.MultiIndex):
            # Check available columns
            available_columns = data.columns.get_level_values(0).unique()
            print(f"📊 Available price types: {available_columns.tolist()}")
            
            if 'Adj Close' in available_columns:
                prices = data['Adj Close'].copy()
            elif 'Close' in available_columns:
                prices = data['Close'].copy()
            else:
                # Use the first available price column
                price_col = available_columns[0]
                prices = data[price_col].copy()
                print(f"⚠️ Using {price_col} as no Close/Adj Close found")
        else:
            # Simple columns structure
            prices = data.copy()
    
    # Remove any missing data
    prices = prices.dropna()
    
    # Calculate returns
    returns = prices.pct_change().dropna()
    
    # Calculate technical indicators
    tech_indicators = pd.DataFrame(index=prices.index)
    
    for asset in prices.columns:
        # Simple moving averages
        tech_indicators[f'{asset}_SMA_10'] = prices[asset].rolling(10).mean() / prices[asset] - 1
        tech_indicators[f'{asset}_SMA_30'] = prices[asset].rolling(30).mean() / prices[asset] - 1
        
        # Volatility (rolling std of returns)
        tech_indicators[f'{asset}_VOL_10'] = returns[asset].rolling(10).std()
        
        # RSI-like momentum indicator
        tech_indicators[f'{asset}_MOMENTUM'] = returns[asset].rolling(5).sum()
    
    # Remove NaN values
    tech_indicators = tech_indicators.dropna()
    
    print(f"✅ Downloaded {len(prices)} days of data")
    print(f"📊 Assets: {list(prices.columns)}")
    
    return prices, returns, tech_indicators

# Download the data
prices, returns, tech_indicators = download_data(ASSETS, START_DATE, END_DATE)

In [None]:
# Display basic statistics
print("📊 Asset Price Statistics")
print("=" * 50)
display(prices.describe())

print("\n📈 Daily Returns Statistics")
print("=" * 50)
display(returns.describe())

In [None]:
# Visualize price evolution
fig = make_subplots(
    rows=2, cols=2,
    subplot_titles=ASSETS,
    vertical_spacing=0.1
)

# Normalize prices to start at 100 for comparison
normalized_prices = (prices / prices.iloc[0]) * 100

for i, asset in enumerate(ASSETS):
    row = (i // 2) + 1
    col = (i % 2) + 1
    
    fig.add_trace(
        go.Scatter(
            x=normalized_prices.index,
            y=normalized_prices[asset],
            name=asset,
            line=dict(width=2)
        ),
        row=row, col=col
    )

fig.update_layout(
    height=600,
    title_text="📈 Asset Price Evolution (Normalized to 100)",
    showlegend=False
)

fig.show()

## 🏗️ Portfolio Management Environment

We'll create a custom **OpenAI Gym environment** for portfolio management. The agent will learn to:
- **Observe** market conditions and current portfolio state
- **Act** by choosing new portfolio allocations
- **Receive rewards** based on risk-adjusted returns minus costs

In [None]:
class PortfolioEnv(gymnasium.Env):
    """
    Custom Portfolio Management Environment
    
    State: [current_weights, tech_indicators, returns_history]
    Action: New portfolio weights (must sum to 1)
    Reward: Risk-adjusted returns minus transaction costs
    """
    
    def __init__(self, prices, returns, tech_indicators, 
                 initial_balance=100000, transaction_cost=0.001,
                 lookback_window=10):
        super(PortfolioEnv, self).__init__()
        
        self.prices = prices
        self.returns = returns
        self.tech_indicators = tech_indicators
        self.initial_balance = initial_balance
        self.transaction_cost = transaction_cost
        self.lookback_window = lookback_window
        
        # Asset information
        self.n_assets = len(prices.columns)
        self.asset_names = list(prices.columns)
        
        # Find common dates across all dataframes
        common_dates = prices.index.intersection(returns.index).intersection(tech_indicators.index)
        self.dates = sorted(common_dates)
        
        # Filter data to common dates
        self.prices = self.prices.loc[self.dates]
        self.returns = self.returns.loc[self.dates]
        self.tech_indicators = self.tech_indicators.loc[self.dates]
        
        # Define action and observation spaces
        # Action: portfolio weights (continuous, must sum to 1)
        self.action_space = spaces.Box(low=0, high=1, shape=(self.n_assets,), dtype=np.float32)
        
        # Observation: current weights + tech indicators + recent returns
        n_tech_features = len(self.tech_indicators.columns)
        n_return_features = self.n_assets * self.lookback_window
        obs_dim = self.n_assets + n_tech_features + n_return_features
        
        self.observation_space = spaces.Box(
            low=-np.inf, high=np.inf, shape=(obs_dim,), dtype=np.float32
        )
        
        # Initialize state
        self.reset()
    
    def reset(self, seed=None, options=None):
        """Reset environment to initial state"""
        if seed is not None:
            np.random.seed(seed)
            
        self.current_step = self.lookback_window
        self.balance = self.initial_balance
        
        # Start with equal weight portfolio
        self.weights = np.ones(self.n_assets) / self.n_assets
        
        # Track portfolio value history
        self.portfolio_values = [self.initial_balance]
        self.weight_history = [self.weights.copy()]
        
        observation = self._get_observation()
        info = {}
        
        return observation, info
    
    def _get_observation(self):
        """Get current observation state"""
        if self.current_step >= len(self.dates):
            return np.zeros(self.observation_space.shape[0], dtype=np.float32)
        
        # Current portfolio weights
        obs = list(self.weights)
        
        # Technical indicators
        current_date = self.dates[self.current_step]
        tech_values = self.tech_indicators.loc[current_date].values
        obs.extend(tech_values)
        
        # Recent returns history
        start_idx = max(0, self.current_step - self.lookback_window)
        recent_returns = self.returns.iloc[start_idx:self.current_step].values
        
        # Flatten and pad if necessary
        recent_returns_flat = recent_returns.flatten()
        expected_length = self.n_assets * self.lookback_window
        
        if len(recent_returns_flat) < expected_length:
            # Pad with zeros if we don't have enough history
            padding = np.zeros(expected_length - len(recent_returns_flat))
            recent_returns_flat = np.concatenate([padding, recent_returns_flat])
        
        obs.extend(recent_returns_flat)
        
        return np.array(obs, dtype=np.float32)
    
    def step(self, action):
        """Execute one step in the environment"""
        if self.current_step >= len(self.dates) - 1:
            return self._get_observation(), 0, True, True, {}
        
        # Normalize action to ensure weights sum to 1
        new_weights = np.array(action)
        new_weights = np.clip(new_weights, 0, 1)
        new_weights = new_weights / (new_weights.sum() + 1e-8)
        
        # Calculate transaction costs
        weight_changes = np.abs(new_weights - self.weights)
        transaction_costs = np.sum(weight_changes) * self.transaction_cost
        
        # Update weights
        self.weights = new_weights
        
        # Calculate portfolio return
        current_date = self.dates[self.current_step]
        asset_returns = self.returns.loc[current_date].values
        portfolio_return = np.dot(self.weights, asset_returns)
        
        # Update portfolio value
        self.balance = self.balance * (1 + portfolio_return - transaction_costs)
        self.portfolio_values.append(self.balance)
        self.weight_history.append(self.weights.copy())
        
        # Calculate reward (Sharpe-like ratio with transaction cost penalty)
        if len(self.portfolio_values) >= 20:  # Need some history
            recent_values = np.array(self.portfolio_values[-20:])
            recent_returns = np.diff(recent_values) / recent_values[:-1]
            mean_return = np.mean(recent_returns)
            std_return = np.std(recent_returns) + 1e-8
            sharpe_ratio = mean_return / std_return
            reward = sharpe_ratio - transaction_costs * 100  # Scale transaction cost penalty
        else:
            reward = portfolio_return - transaction_costs * 100
        
        # Move to next step
        self.current_step += 1
        
        # Check if episode is done
        terminated = self.current_step >= len(self.dates) - 1
        truncated = False  # We don't truncate episodes
        
        info = {
            'portfolio_value': self.balance,
            'portfolio_return': portfolio_return,
            'transaction_costs': transaction_costs,
            'weights': self.weights.copy()
        }
        
        return self._get_observation(), reward, terminated, truncated, info
    
    def get_portfolio_stats(self):
        """Calculate portfolio performance statistics"""
        if len(self.portfolio_values) < 2:
            return {}
        
        values = np.array(self.portfolio_values)
        returns = np.diff(values) / values[:-1]
        
        total_return = (values[-1] / values[0]) - 1
        annualized_return = (1 + total_return) ** (252 / len(returns)) - 1
        volatility = np.std(returns) * np.sqrt(252)
        sharpe_ratio = annualized_return / volatility if volatility > 0 else 0
        
        # Maximum drawdown
        cumulative = values / np.maximum.accumulate(values)
        max_drawdown = (1 - np.min(cumulative))
        
        return {
            'total_return': total_return,
            'annualized_return': annualized_return,
            'volatility': volatility,
            'sharpe_ratio': sharpe_ratio,
            'max_drawdown': max_drawdown,
            'final_value': values[-1]
        }

print("✅ Portfolio Environment created successfully!")

## 🧪 Environment Setup and Testing

Let's create training and testing environments and verify they work correctly.

In [None]:
# Split data into training and testing periods
# Note: returns and tech_indicators have different lengths due to pct_change() and rolling calculations
train_mask_prices = prices.index < SPLIT_DATE
test_mask_prices = prices.index >= SPLIT_DATE

train_mask_returns = returns.index < SPLIT_DATE
test_mask_returns = returns.index >= SPLIT_DATE

train_mask_tech = tech_indicators.index < SPLIT_DATE
test_mask_tech = tech_indicators.index >= SPLIT_DATE

# Training data
train_prices = prices[train_mask_prices]
train_returns = returns[train_mask_returns]
train_tech = tech_indicators[train_mask_tech]

# Testing data
test_prices = prices[test_mask_prices]
test_returns = returns[test_mask_returns]
test_tech = tech_indicators[test_mask_tech]

print(f"📊 Training period: {train_prices.index[0].date()} to {train_prices.index[-1].date()}")
print(f"📊 Testing period: {test_prices.index[0].date()} to {test_prices.index[-1].date()}")
print(f"📈 Training days: {len(train_prices)}")
print(f"📈 Testing days: {len(test_prices)}")
print(f"📈 Training returns: {len(train_returns)} | tech indicators: {len(train_tech)}")
print(f"📈 Testing returns: {len(test_returns)} | tech indicators: {len(test_tech)}")

In [None]:
# Create environments
train_env = PortfolioEnv(
    prices=train_prices,
    returns=train_returns,
    tech_indicators=train_tech,
    initial_balance=100000,
    transaction_cost=0.001
)

test_env = PortfolioEnv(
    prices=test_prices,
    returns=test_returns,
    tech_indicators=test_tech,
    initial_balance=100000,
    transaction_cost=0.001
)

# Verify environment
print("🔍 Checking environment...")
check_env(train_env)
print("✅ Environment passed all checks!")

print(f"\n📊 Environment Details:")
print(f"   • Assets: {train_env.asset_names}")
print(f"   • Observation space: {train_env.observation_space.shape}")
print(f"   • Action space: {train_env.action_space.shape}")
print(f"   • Training episodes: {len(train_env.dates)}")

## 📊 Baseline Strategies

Before training our RL agent, let's implement baseline strategies to compare against:
1. **Buy and Hold**: Invest equally and never rebalance
2. **Equal Weight**: Rebalance to equal weights periodically
3. **Random**: Random allocations (worst case scenario)

In [None]:
def evaluate_baseline_strategy(env, strategy='equal_weight', rebalance_freq=20):
    """
    Evaluate baseline strategies
    
    Strategies:
    - 'buy_hold': Buy and hold equal weights
    - 'equal_weight': Rebalance to equal weights periodically
    - 'random': Random allocations
    """
    obs, info = env.reset()
    done = False
    step_count = 0
    
    # Initial equal weights
    equal_weights = np.ones(env.n_assets) / env.n_assets
    
    while not done:
        if strategy == 'buy_hold':
            # Never rebalance, keep current weights
            action = env.weights
        
        elif strategy == 'equal_weight':
            # Rebalance to equal weights periodically
            if step_count % rebalance_freq == 0:
                action = equal_weights
            else:
                action = env.weights
        
        elif strategy == 'random':
            # Random weights
            action = np.random.random(env.n_assets)
            action = action / action.sum()
        
        else:
            raise ValueError(f"Unknown strategy: {strategy}")
        
        obs, reward, terminated, truncated, info = env.step(action)
        done = terminated or truncated
        step_count += 1
    
    return env.get_portfolio_stats(), env.portfolio_values, env.weight_history

print("📊 Evaluating baseline strategies...")

# Evaluate on training data
strategies = ['buy_hold', 'equal_weight', 'random']
baseline_results = {}

for strategy in strategies:
    print(f"   • Evaluating {strategy} strategy...")
    stats, values, weights = evaluate_baseline_strategy(train_env, strategy)
    baseline_results[strategy] = {
        'stats': stats,
        'values': values,
        'weights': weights
    }

print("✅ Baseline evaluation complete!")

In [None]:
# Display baseline results
baseline_df = pd.DataFrame({
    strategy: results['stats'] 
    for strategy, results in baseline_results.items()
}).T

print("📊 Baseline Strategy Performance (Training Period)")
print("=" * 60)
display(baseline_df.round(4))

In [None]:
# Plot baseline performance
fig = go.Figure()

for strategy, results in baseline_results.items():
    values = results['values']
    fig.add_trace(go.Scatter(
        x=list(range(len(values))),
        y=values,
        name=strategy.replace('_', ' ').title(),
        line=dict(width=2)
    ))

fig.update_layout(
    title="📈 Baseline Strategy Performance (Training Period)",
    xaxis_title="Days",
    yaxis_title="Portfolio Value ($)",
    height=500,
    showlegend=True
)

fig.show()

## 🤖 Reinforcement Learning Agent Training

Now let's train our RL agent using **PPO (Proximal Policy Optimization)**, which is well-suited for continuous action spaces like portfolio allocation.

In [None]:
# Create vectorized environment for stable-baselines3
vec_env = DummyVecEnv([lambda: train_env])

# Initialize PPO agent
model = PPO(
    "MlpPolicy",
    vec_env,
    learning_rate=3e-4,
    n_steps=2048,
    batch_size=64,
    n_epochs=10,
    gamma=0.99,
    gae_lambda=0.95,
    clip_range=0.2,
    verbose=1,
    tensorboard_log=None  # Disable tensorboard logging
)

print("🤖 PPO Agent initialized!")
print(f"   • Policy: MLP (Multi-Layer Perceptron)")
print(f"   • Learning rate: {model.learning_rate}")
print(f"   • Batch size: {model.batch_size}")

In [None]:
# Train the agent
print("🚀 Starting training...")
print("   This may take a few minutes...")

# Train for multiple episodes
total_timesteps = 50000
model.learn(total_timesteps=total_timesteps)

print("✅ Training completed!")

# Save the trained model
model.save("data/ppo_portfolio_agent")
print("💾 Model saved!")

## 📈 Agent Evaluation and Testing

Let's evaluate our trained agent on both training and testing data to see how it performs.

In [None]:
def evaluate_rl_agent(model, env, deterministic=True):
    """
    Evaluate the trained RL agent
    """
    obs, info = env.reset()
    done = False
    
    while not done:
        action, _ = model.predict(obs, deterministic=deterministic)
        obs, reward, terminated, truncated, info = env.step(action)
        done = terminated or truncated
    
    return env.get_portfolio_stats(), env.portfolio_values, env.weight_history

print("🧪 Evaluating RL agent...")

# Evaluate on training data
train_stats, train_values, train_weights = evaluate_rl_agent(model, train_env)

# Evaluate on testing data (out-of-sample)
test_stats, test_values, test_weights = evaluate_rl_agent(model, test_env)

print("✅ RL agent evaluation complete!")

In [None]:
# Compare RL agent with baselines
print("📊 RL Agent vs Baselines (Training Data)")
print("=" * 50)

# Add RL results to comparison
comparison_data = {
    'Buy & Hold': baseline_results['buy_hold']['stats'],
    'Equal Weight': baseline_results['equal_weight']['stats'],
    'Random': baseline_results['random']['stats'],
    'RL Agent': train_stats
}

comparison_df = pd.DataFrame(comparison_data).T
display(comparison_df.round(4))

print("\n📊 RL Agent Performance (Testing Data - Out of Sample)")
print("=" * 50)
test_df = pd.DataFrame({'RL Agent (Test)': test_stats}, index=[0]).T
display(test_df.round(4))

## 📊 Performance Visualization and Analysis

Let's create comprehensive visualizations to analyze the performance of our RL agent.

In [None]:
# Portfolio value comparison
fig = make_subplots(
    rows=2, cols=1,
    subplot_titles=['Training Period Performance', 'Testing Period Performance'],
    vertical_spacing=0.15
)

# Training period
for strategy, results in baseline_results.items():
    fig.add_trace(
        go.Scatter(
            x=list(range(len(results['values']))),
            y=results['values'],
            name=strategy.replace('_', ' ').title(),
            line=dict(width=2)
        ),
        row=1, col=1
    )

fig.add_trace(
    go.Scatter(
        x=list(range(len(train_values))),
        y=train_values,
        name='RL Agent (Train)',
        line=dict(width=3, color='red')
    ),
    row=1, col=1
)

# Testing period
fig.add_trace(
    go.Scatter(
        x=list(range(len(test_values))),
        y=test_values,
        name='RL Agent (Test)',
        line=dict(width=3, color='darkred')
    ),
    row=2, col=1
)

fig.update_layout(
    height=800,
    title_text="🚀 Portfolio Performance: RL Agent vs Baselines",
    showlegend=True
)

fig.update_xaxes(title_text="Days", row=2, col=1)
fig.update_yaxes(title_text="Portfolio Value ($)")

fig.show()

In [None]:
# Portfolio allocation heatmap
def plot_allocation_heatmap(weights_history, asset_names, title):
    """
    Plot portfolio allocation over time as a heatmap
    """
    weights_df = pd.DataFrame(weights_history, columns=asset_names)
    
    fig = go.Figure(data=go.Heatmap(
        z=weights_df.T.values,
        x=list(range(len(weights_df))),
        y=asset_names,
        colorscale='RdYlBu_r',
        colorbar=dict(title="Weight")
    ))
    
    fig.update_layout(
        title=title,
        xaxis_title="Time Steps",
        yaxis_title="Assets",
        height=400
    )
    
    return fig

# Plot allocation heatmaps
fig1 = plot_allocation_heatmap(
    train_weights, 
    ASSETS, 
    "🎯 RL Agent Portfolio Allocation (Training)"
)
fig1.show()

fig2 = plot_allocation_heatmap(
    test_weights, 
    ASSETS, 
    "🎯 RL Agent Portfolio Allocation (Testing)"
)
fig2.show()

In [None]:
# Risk-Return scatter plot
def calculate_risk_return_metrics(values):
    """Calculate annualized return and volatility"""
    if len(values) < 2:
        return 0, 0
    
    returns = np.diff(values) / values[:-1]
    total_return = (values[-1] / values[0]) - 1
    annualized_return = (1 + total_return) ** (252 / len(returns)) - 1
    volatility = np.std(returns) * np.sqrt(252)
    
    return annualized_return, volatility

# Calculate metrics for all strategies
risk_return_data = []

# Baselines
for strategy, results in baseline_results.items():
    ann_return, volatility = calculate_risk_return_metrics(results['values'])
    risk_return_data.append({
        'Strategy': strategy.replace('_', ' ').title(),
        'Return': ann_return,
        'Volatility': volatility,
        'Type': 'Baseline'
    })

# RL Agent
train_return, train_vol = calculate_risk_return_metrics(train_values)
test_return, test_vol = calculate_risk_return_metrics(test_values)

risk_return_data.extend([
    {
        'Strategy': 'RL Agent (Train)',
        'Return': train_return,
        'Volatility': train_vol,
        'Type': 'RL Agent'
    },
    {
        'Strategy': 'RL Agent (Test)',
        'Return': test_return,
        'Volatility': test_vol,
        'Type': 'RL Agent'
    }
])

risk_return_df = pd.DataFrame(risk_return_data)

# Plot risk-return scatter
fig = px.scatter(
    risk_return_df,
    x='Volatility',
    y='Return',
    color='Type',
    text='Strategy',
    title="📊 Risk-Return Analysis: All Strategies",
    labels={
        'Return': 'Annualized Return',
        'Volatility': 'Annualized Volatility'
    }
)

fig.update_traces(textposition="top center")
fig.update_layout(height=500)
fig.show()

print("📈 Risk-Return Metrics")
print("=" * 40)
display(risk_return_df.round(4))

## 📋 Performance Summary and Analysis

Let's create a comprehensive summary of our results and analyze what the RL agent learned.

In [None]:
# Create comprehensive performance table
summary_data = {
    'Metric': [
        'Total Return (%)',
        'Annualized Return (%)',
        'Volatility (%)',
        'Sharpe Ratio',
        'Max Drawdown (%)',
        'Final Value ($)'
    ]
}

# Add baseline results
for strategy in ['buy_hold', 'equal_weight']:
    stats = baseline_results[strategy]['stats']
    summary_data[strategy.replace('_', ' ').title()] = [
        f"{stats['total_return']*100:.2f}%",
        f"{stats['annualized_return']*100:.2f}%",
        f"{stats['volatility']*100:.2f}%",
        f"{stats['sharpe_ratio']:.3f}",
        f"{stats['max_drawdown']*100:.2f}%",
        f"${stats['final_value']:,.0f}"
    ]

# Add RL results
summary_data['RL Agent (Train)'] = [
    f"{train_stats['total_return']*100:.2f}%",
    f"{train_stats['annualized_return']*100:.2f}%",
    f"{train_stats['volatility']*100:.2f}%",
    f"{train_stats['sharpe_ratio']:.3f}",
    f"{train_stats['max_drawdown']*100:.2f}%",
    f"${train_stats['final_value']:,.0f}"
]

summary_data['RL Agent (Test)'] = [
    f"{test_stats['total_return']*100:.2f}%",
    f"{test_stats['annualized_return']*100:.2f}%",
    f"{test_stats['volatility']*100:.2f}%",
    f"{test_stats['sharpe_ratio']:.3f}",
    f"{test_stats['max_drawdown']*100:.2f}%",
    f"${test_stats['final_value']:,.0f}"
]

summary_df = pd.DataFrame(summary_data)
summary_df.set_index('Metric', inplace=True)

print("🏆 FINAL PERFORMANCE SUMMARY")
print("=" * 80)
display(summary_df)

In [None]:
# Analyze allocation patterns
print("🎯 Portfolio Allocation Analysis")
print("=" * 50)

# Average allocations
train_weights_df = pd.DataFrame(train_weights, columns=ASSETS)
test_weights_df = pd.DataFrame(test_weights, columns=ASSETS)

print("📊 Average Asset Allocations:")
print(f"Training Period:")
train_avg = train_weights_df.mean()
for asset, weight in train_avg.items():
    print(f"   • {asset}: {weight:.1%}")

print(f"\nTesting Period:")
test_avg = test_weights_df.mean()
for asset, weight in test_avg.items():
    print(f"   • {asset}: {weight:.1%}")

# Allocation volatility (how much the agent rebalances)
print(f"\n📈 Allocation Volatility (Rebalancing Frequency):")
train_vol = train_weights_df.std()
test_vol = test_weights_df.std()

print(f"Training Period:")
for asset, vol in train_vol.items():
    print(f"   • {asset}: {vol:.3f}")

print(f"Testing Period:")
for asset, vol in test_vol.items():
    print(f"   • {asset}: {vol:.3f}")

## 🔍 Key Insights and Observations

Let's analyze what our RL agent learned and identify key insights from the experiment.

In [None]:
# Performance comparison insights
print("🧠 KEY INSIGHTS FROM RL ASSET ALLOCATION")
print("=" * 60)

# Compare returns
buy_hold_return = baseline_results['buy_hold']['stats']['total_return']
equal_weight_return = baseline_results['equal_weight']['stats']['total_return']
rl_train_return = train_stats['total_return']
rl_test_return = test_stats['total_return']

print("📈 RETURN ANALYSIS:")
print(f"   • Buy & Hold achieved {buy_hold_return:.1%} total return")
print(f"   • Equal Weight achieved {equal_weight_return:.1%} total return")
print(f"   • RL Agent achieved {rl_train_return:.1%} (train) / {rl_test_return:.1%} (test)")

if rl_test_return > buy_hold_return:
    print(f"   ✅ RL Agent outperformed Buy & Hold by {(rl_test_return - buy_hold_return):.1%}")
else:
    print(f"   ❌ RL Agent underperformed Buy & Hold by {(buy_hold_return - rl_test_return):.1%}")

# Compare Sharpe ratios
buy_hold_sharpe = baseline_results['buy_hold']['stats']['sharpe_ratio']
rl_test_sharpe = test_stats['sharpe_ratio']

print(f"\n📊 RISK-ADJUSTED RETURNS (Sharpe Ratio):")
print(f"   • Buy & Hold: {buy_hold_sharpe:.3f}")
print(f"   • RL Agent (Test): {rl_test_sharpe:.3f}")

if rl_test_sharpe > buy_hold_sharpe:
    print(f"   ✅ RL Agent achieved better risk-adjusted returns")
else:
    print(f"   ❌ Buy & Hold had better risk-adjusted returns")

# Analyze overfitting
performance_gap = abs(rl_train_return - rl_test_return)
print(f"\n🎯 OVERFITTING ANALYSIS:")
print(f"   • Train vs Test performance gap: {performance_gap:.1%}")
if performance_gap > 0.05:  # 5% threshold
    print(f"   ⚠️  Potential overfitting detected (gap > 5%)")
else:
    print(f"   ✅ Good generalization (gap < 5%)")

# Transaction cost impact
avg_rebalancing = np.mean([np.std(train_weights_df.iloc[i] - train_weights_df.iloc[i-1]) 
                          for i in range(1, len(train_weights_df))])
print(f"\n💰 TRANSACTION COST IMPACT:")
print(f"   • Average rebalancing magnitude: {avg_rebalancing:.3f}")
print(f"   • Estimated daily transaction costs: ~{avg_rebalancing * 0.001:.4f} ({avg_rebalancing * 0.1:.2f}%)")

## 🚀 Conclusions and Future Extensions

### What We Accomplished

1. **Custom RL Environment**: Built a realistic portfolio management environment with:
   - Transaction costs
   - Technical indicators
   - Risk-adjusted rewards

2. **Comprehensive Comparison**: Evaluated RL against traditional strategies:
   - Buy and Hold
   - Equal Weight Rebalancing
   - Random allocation

3. **Rigorous Testing**: Used out-of-sample testing to validate generalization

### Key Learnings

- **Sequential Decision Making**: RL naturally handles the temporal nature of portfolio management
- **Risk-Return Optimization**: The agent learned to balance returns with risk and transaction costs
- **Market Adaptation**: The agent can adapt to changing market conditions

### Limitations

1. **Market Assumptions**: Assumes historical patterns will continue
2. **Limited Features**: Only used basic technical indicators
3. **Transaction Costs**: Simplified cost model
4. **Sample Period**: Limited to specific market conditions

### 🔄 Potential Extensions

1. **Enhanced Features**:
   - Macro-economic indicators
   - Sentiment analysis
   - Alternative data sources

2. **Advanced RL Algorithms**:
   - SAC (Soft Actor-Critic)
   - TD3 (Twin Delayed DDPG)
   - Multi-agent approaches

3. **Risk Management**:
   - VaR constraints
   - Drawdown limits
   - Stress testing

4. **Multi-Asset Classes**:
   - Bonds, commodities, crypto
   - Currency hedging
   - International diversification

### 💡 Practical Considerations

- **Model Validation**: Always test on out-of-sample data
- **Risk Management**: Implement proper position sizing and stop-losses
- **Market Regime Changes**: Monitor for structural breaks
- **Transaction Costs**: Use realistic cost assumptions

**Remember**: This is an educational demonstration. Real trading requires careful risk management, regulatory compliance, and thorough backtesting!

In [None]:
# Save results for future analysis
results_summary = {
    'experiment_date': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
    'assets': ASSETS,
    'train_period': f"{train_prices.index[0].date()} to {train_prices.index[-1].date()}",
    'test_period': f"{test_prices.index[0].date()} to {test_prices.index[-1].date()}",
    'baseline_results': {k: v['stats'] for k, v in baseline_results.items()},
    'rl_train_results': train_stats,
    'rl_test_results': test_stats,
    'model_parameters': {
        'algorithm': 'PPO',
        'total_timesteps': total_timesteps,
        'transaction_cost': 0.001,
        'initial_balance': 100000
    }
}

# Save to file
import json
with open('data/experiment_results.json', 'w') as f:
    json.dump(results_summary, f, indent=2, default=str)

print("💾 Experiment results saved to 'data/experiment_results.json'")
print("\n🎉 Laboratory completed successfully!")
print("\n" + "="*60)
print("🏆 REINFORCEMENT LEARNING FOR ASSET ALLOCATION - COMPLETE")
print("="*60)