# Portfolio Optimization Comparative Analysis

This notebook provides a detailed comparison of different portfolio optimization techniques:
- Monte Carlo Simulation
- Simulated Annealing
- Genetic Algorithm
- Particle Swarm Optimization

We'll analyze their performance, efficiency, and practical implications for portfolio management.

In [None]:
# Import required libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta

# Import project modules
from data.data_loader import DataLoader
from optimizers.monte_carlo import MonteCarloOptimizer
from optimizers.simulated_annealing import SimulatedAnnealingOptimizer
from optimizers.genetic_algorithm import GeneticAlgorithmOptimizer
from optimizers.particle_swarm import ParticleSwarmOptimizer
from utils.metrics import PortfolioMetrics
from utils.visualization import PortfolioVisualizer
from config.config import CONFIG

%matplotlib inline
plt.style.use('seaborn')
sns.set_palette('husl')

## 1. Data Collection and Preprocessing

First, we'll load historical price data for our selected assets and prepare it for analysis.

In [None]:
# Initialize data loader
data_loader = DataLoader()

# Define stock universe
tickers = CONFIG.default_tickers

# Load price data
price_data = data_loader.load_data(tickers, period='5y')

# Calculate returns
returns_data = data_loader.calculate_returns()

# Get market data for comparison
market_returns, risk_free_rate = data_loader.get_market_data()

print(f"Dataset Summary:")
print(f"Number of assets: {len(tickers)}")
print(f"Date range: {returns_data.index[0]} to {returns_data.index[-1]}")
print(f"Number of trading days: {len(returns_data)}")
print(f"Risk-free rate: {risk_free_rate:.2%}")

### 1.1 Exploratory Data Analysis

In [None]:
# Calculate basic statistics
annual_factor = 252  # Trading days in a year
stats = pd.DataFrame({
    'Annual Return': returns_data.mean() * annual_factor,
    'Annual Volatility': returns_data.std() * np.sqrt(annual_factor),
    'Sharpe Ratio': (returns_data.mean() * annual_factor - risk_free_rate) / 
                    (returns_data.std() * np.sqrt(annual_factor)),
    'Skewness': returns_data.skew(),
    'Kurtosis': returns_data.kurtosis()
})

display(stats.style.format('{:.2%}').background_gradient(cmap='RdYlGn'))

In [None]:
# Visualize return distributions
fig, axes = plt.subplots(2, 1, figsize=(12, 10))

# Returns distribution
for ticker in tickers:
    sns.kdeplot(data=returns_data[ticker], ax=axes[0], label=ticker)
axes[0].set_title('Return Distributions')
axes[0].set_xlabel('Daily Return')
axes[0].legend()

# Cumulative returns
cumulative_returns = (1 + returns_data).cumprod()
cumulative_returns.plot(ax=axes[1])
axes[1].set_title('Cumulative Returns')
axes[1].set_ylabel('Growth of $1')

plt.tight_layout()

### 1.2 Correlation Analysis

In [None]:
# Calculate and visualize correlation matrix
correlation_matrix = returns_data.corr()

plt.figure(figsize=(10, 8))
sns.heatmap(correlation_matrix, 
            annot=True, 
            cmap='RdYlBu', 
            center=0, 
            fmt='.2f')
plt.title('Asset Correlation Matrix')
plt.tight_layout()

## 2. Portfolio Optimization

Now we'll implement and compare different optimization approaches.

In [None]:
def run_optimizer(optimizer_class, returns_data, risk_free_rate, **kwargs):
    """Run optimization and return results."""
    optimizer = optimizer_class(returns_data, risk_free_rate)
    weights, metrics = optimizer.optimize(**kwargs)
    
    results = {
        'weights': pd.Series(weights, index=returns_data.columns),
        'metrics': metrics
    }
    
    if hasattr(optimizer, 'get_optimization_history'):
        results['history'] = optimizer.get_optimization_history()
    
    return results

# Dictionary to store results
optimization_results = {}

# Run Monte Carlo optimization
optimization_results['Monte Carlo'] = run_optimizer(
    MonteCarloOptimizer,
    returns_data,
    risk_free_rate,
    n_portfolios=10000
)

# Run Simulated Annealing optimization
optimization_results['Simulated Annealing'] = run_optimizer(
    SimulatedAnnealingOptimizer,
    returns_data,
    risk_free_rate,
    max_iterations=1000
)

# Run Genetic Algorithm optimization
optimization_results['Genetic Algorithm'] = run_optimizer(
    GeneticAlgorithmOptimizer,
    returns_data,
    risk_free_rate,
    n_generations=100
)

# Run Particle Swarm optimization
optimization_results['Particle Swarm'] = run_optimizer(
    ParticleSwarmOptimizer,
    returns_data,
    risk_free_rate,
    n_iterations=100
)

### 2.1 Compare Optimal Portfolios

In [None]:
# Create comparison of portfolio weights
weights_comparison = pd.DataFrame({
    name: results['weights']
    for name, results in optimization_results.items()
})

# Plot weight comparison
ax = weights_comparison.plot(kind='bar', figsize=(12, 6))
plt.title('Portfolio Weights by Optimization Method')
plt.xlabel('Asset')
plt.ylabel('Weight')
plt.legend(title='Method')
plt.xticks(rotation=45)
plt.tight_layout()

# Display metrics comparison
metrics_comparison = pd.DataFrame({
    name: results['metrics']
    for name, results in optimization_results.items()
}).T

display(metrics_comparison.style
       .format('{:.4f}')
       .background_gradient(cmap='RdYlGn'))

### 2.2 Convergence Analysis

In [None]:
# Plot convergence for algorithms with history
fig, axes = plt.subplots(2, 2, figsize=(15, 10))
axes = axes.ravel()

for idx, (name, results) in enumerate(optimization_results.items()):
    if 'history' in results:
        history = results['history']
        if 'best_fitness' in history.columns:
            history['best_fitness'].plot(ax=axes[idx],
                                        label='Best Fitness')
        if 'avg_fitness' in history.columns:
            history['avg_fitness'].plot(ax=axes[idx],
                                       label='Average Fitness',
                                       alpha=0.7)
        axes[idx].set_title(f'{name} Convergence')
        axes[idx].set_xlabel('Iteration')
        axes[idx].set_ylabel('Fitness Value')
        axes[idx].legend()
        axes[idx].grid(True)

plt.tight_layout()

## 3. Risk Analysis of Optimal Portfolios

In [None]:
# Calculate portfolio returns for each optimization method
portfolio_returns = {}
for name, results in optimization_results.items():
    weights = results['weights']
    portfolio_returns[name] = returns_data.dot(weights)

portfolio_returns = pd.DataFrame(portfolio_returns)

# Calculate cumulative returns
cumulative_returns = (1 + portfolio_returns).cumprod()

# Plot cumulative returns
plt.figure(figsize=(12, 6))
cumulative_returns.plot()
plt.title('Cumulative Returns of Optimized Portfolios')
plt.xlabel('Date')
plt.ylabel('Growth of $1')
plt.grid(True)
plt.legend(title='Optimization Method')
plt.tight_layout()

In [None]:
# Calculate rolling metrics
window = 252  # One year rolling window

rolling_vol = portfolio_returns.rolling(window).std() * np.sqrt(252)
rolling_sharpe = (portfolio_returns.rolling(window).mean() * 252 - risk_free_rate) / (portfolio_returns.rolling(window).std() * np.sqrt(252))

# Plot rolling metrics
fig, axes = plt.subplots(2, 1, figsize=(12, 10))

rolling_vol.plot(ax=axes[0])
axes[0].set_title('Rolling Annual Volatility')
axes[0].set_ylabel('Volatility')
axes[0].grid(True)

rolling_sharpe.plot(ax=axes[1])
axes[1].set_title('Rolling Sharpe Ratio')
axes[1].set_ylabel('Sharpe Ratio')
axes[1].grid(True)

plt.tight_layout()

## 4. Performance Analysis and Risk Metrics

Let's analyze the risk-adjusted performance metrics for each optimization method in more detail.

In [None]:
# Calculate comprehensive performance metrics for each portfolio
performance_metrics = {}

for name, returns in portfolio_returns.items():
    # Calculate drawdowns
    cum_returns = (1 + returns).cumprod()
    rolling_max = cum_returns.expanding().max()
    drawdowns = (cum_returns - rolling_max) / rolling_max
    
    # Calculate various metrics
    annual_return = returns.mean() * 252
    annual_vol = returns.std() * np.sqrt(252)
    sharpe = (annual_return - risk_free_rate) / annual_vol
    
    # Sortino Ratio
    downside_returns = returns[returns < 0]
    downside_vol = downside_returns.std() * np.sqrt(252)
    sortino = (annual_return - risk_free_rate) / downside_vol
    
    # Maximum Drawdown
    max_drawdown = drawdowns.min()
    
    # Value at Risk (95%)
    var_95 = np.percentile(returns, 5)
    
    # Conditional VaR (Expected Shortfall)
    cvar_95 = returns[returns <= var_95].mean()
    
    performance_metrics[name] = {
        'Annual Return': annual_return,
        'Annual Volatility': annual_vol,
        'Sharpe Ratio': sharpe,
        'Sortino Ratio': sortino,
        'Maximum Drawdown': max_drawdown,
        'VaR (95%)': var_95,
        'CVaR (95%)': cvar_95
    }

# Convert to DataFrame
performance_df = pd.DataFrame(performance_metrics).T

# Display formatted results
display(performance_df.style
        .format({
            'Annual Return': '{:.2%}',
            'Annual Volatility': '{:.2%}',
            'Sharpe Ratio': '{:.2f}',
            'Sortino Ratio': '{:.2f}',
            'Maximum Drawdown': '{:.2%}',
            'VaR (95%)': '{:.2%}',
            'CVaR (95%)': '{:.2%}'
        })
        .background_gradient(cmap='RdYlGn'))

### 4.1 Risk Distribution Analysis

In [None]:
# Plot return distributions
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))

# Return distributions
for name in portfolio_returns.columns:
    sns.kdeplot(data=portfolio_returns[name], ax=ax1, label=name)
ax1.set_title('Return Distributions')
ax1.set_xlabel('Daily Return')
ax1.set_ylabel('Density')
ax1.legend(title='Method')

# QQ plot for normality check
from scipy import stats
for name in portfolio_returns.columns:
    returns = portfolio_returns[name]
    qq = stats.probplot(returns, dist='norm')
    ax2.plot(qq[0][0], qq[0][1], label=name)

ax2.set_title('Q-Q Plot vs Normal Distribution')
ax2.set_xlabel('Theoretical Quantiles')
ax2.set_ylabel('Sample Quantiles')
ax2.legend(title='Method')

plt.tight_layout()

### 4.2 Drawdown Analysis

In [None]:
# Calculate drawdowns for each portfolio
drawdowns = pd.DataFrame()

for name in portfolio_returns.columns:
    cum_returns = (1 + portfolio_returns[name]).cumprod()
    rolling_max = cum_returns.expanding().max()
    drawdowns[name] = (cum_returns - rolling_max) / rolling_max

# Plot drawdowns
plt.figure(figsize=(12, 6))
drawdowns.plot()
plt.title('Portfolio Drawdowns')
plt.xlabel('Date')
plt.ylabel('Drawdown')
plt.grid(True)
plt.legend(title='Method')
plt.tight_layout()

## 5. Comparative Analysis and Conclusions

Let's summarize the key findings from our analysis of different optimization methods.

In [None]:
# Create summary table
summary = pd.DataFrame(index=optimization_results.keys())

# Add key metrics
summary['Portfolio Diversification'] = [len(results['weights'][results['weights'] > 0.01]) 
                                       for results in optimization_results.values()]
summary['Max Asset Weight'] = [results['weights'].max() 
                              for results in optimization_results.values()]
summary['Sharpe Ratio'] = [results['metrics']['sharpe_ratio'] 
                          for results in optimization_results.values()]
summary['Annual Return'] = performance_df['Annual Return']
summary['Annual Risk'] = performance_df['Annual Volatility']

display(summary.style
        .format({
            'Max Asset Weight': '{:.2%}',
            'Sharpe Ratio': '{:.2f}',
            'Annual Return': '{:.2%}',
            'Annual Risk': '{:.2%}'
        })
        .background_gradient(cmap='RdYlGn'))

### Key Findings:

1. **Optimization Effectiveness**
   - Compare Sharpe ratios and risk-adjusted returns across methods
   - Analyze the trade-off between return and risk for each approach

2. **Portfolio Characteristics**
   - Evaluate diversification levels achieved by each method
   - Compare concentration risk and maximum position sizes

3. **Risk Management**
   - Assess downside protection capabilities
   - Compare drawdown characteristics

4. **Implementation Considerations**
   - Discuss computational efficiency
   - Consider stability of solutions
   - Evaluate practical implementation challenges

## 6. Out-of-Sample Testing

Let's evaluate how these portfolios would have performed in a different time period.

In [None]:
# Load test period data
test_data = data_loader.load_data(tickers, period='1y')
test_returns = data_loader.calculate_returns()

# Calculate out-of-sample portfolio returns
oos_returns = pd.DataFrame()
for name, results in optimization_results.items():
    weights = results['weights']
    oos_returns[name] = test_returns.dot(weights)

# Calculate cumulative returns
oos_cumulative = (1 + oos_returns).cumprod()

# Plot out-of-sample performance
plt.figure(figsize=(12, 6))
oos_cumulative.plot()
plt.title('Out-of-Sample Portfolio Performance')
plt.xlabel('Date')
plt.ylabel('Growth of $1')
plt.grid(True)
plt.legend(title='Method')
plt.tight_layout()