# Portfolio Optimization 

## Config & Imports
Set up configuration parameters amd imports neccessary libaries. 
The configuration defines stocks that will be analyzed, the time period, and key parameters like the risk-free rate.

In [1]:
# Import required libraries 
import numpy as np
import pandas as pd
import yfinance as yf
from datetime import datetime, timedelta
from scipy.optimize import minimize
import matplotlib.pyplot as plt
from fredapi import Fred

# Configuration
CONFIG = {
    'TICKERS': ['AAPL', 'MSFT', 'GOOGL', 'AMZN', 'TSLA', 'META', 'NVDA', 'BRK-B'],
    'START_DATE': datetime.now() - timedelta(days=5*365),  # 5 years ago
    'END_DATE': datetime.now(),  # Today
    'INTERVAL': '1mo',  # Monthly data
    'RF_ANNUAL': None,  # Will fetch from FRED
    'ALLOW_SHORTS': False,  # Long-only portfolios
    'FREQUENCY': 'monthly',
    'FRED_API_KEY': '80d939285a25091784ba2f5be77fa106'
}

def annualization_factor(freq): 
   # Convert frequency to annulization factor 
   if freq == 'daily':
      return 252
   elif freq == 'weekly':
      return 52
   elif freq == 'monthly':
      return 12
   else: 
      raise ValueError(f"Unsupported frequency: {freq}")

# Set risk-free rate 
def get_rf_rate():
   # Fetch curent risk-free rate from FRED
   fred = Fred(api_key = '80d939285a25091784ba2f5be77fa106')
   ten_year_treasury_rate = fred.get_series('GS10') / 100
   # Get 10-year Treasury Constant Maturity Rate (most recent)
   rf_rate = ten_year_treasury_rate[-1] # Convert to decimal 
   return rf_rate

# Fetch risk-free rate
CONFIG['RF_ANNUAL'] = get_rf_rate()

# Checkpoint: Print configuration 
print("Current Configuration:")
for key, value in CONFIG.items():
   if key!= 'FRED_API_KEY': # Don't print API key
      print(f"  {key}: {value}")
print(f"\nAnnualization factor: {annualization_factor(CONFIG['FREQUENCY'])}")
print(f"Date range: {CONFIG['START_DATE'].strftime('%Y-%m-%d')} to {CONFIG['END_DATE'].strftime('%Y-%m-%d')}")
print(f"Risk-free rate: {CONFIG['RF_ANNUAL']:.4f} ({CONFIG['RF_ANNUAL']*100:.2f}%)")
      


Current Configuration:
  TICKERS: ['AAPL', 'MSFT', 'GOOGL', 'AMZN', 'TSLA', 'META', 'NVDA', 'BRK-B']
  START_DATE: 2020-10-17 22:13:13.274921
  END_DATE: 2025-10-16 22:13:13.274941
  INTERVAL: 1mo
  RF_ANNUAL: 0.0412
  ALLOW_SHORTS: False
  FREQUENCY: monthly

Annualization factor: 12
Date range: 2020-10-17 to 2025-10-16
Risk-free rate: 0.0412 (4.12%)


  rf_rate = ten_year_treasury_rate[-1] # Convert to decimal


## Data Import (yfinance)
Fetch actual stock price data from yfinance. 
Will need to adjust closing prices for all tickers over the 5-year period, then clean data by forward-filling any missing values and dropping any remaining NaNs. 

In [2]:
# Data import 
def fetch_prices(tickers, start_date, end_date, interval):
    # Fetch adjusted closing prices for given tickers
    # Returns: DataFrame with forward-filled, cleaned price data
    print(f"Fetching data for {len(tickers)} tickers...")
    prices = yf.download(tickers, start=start_date, end=end_date, 
                        interval=interval, progress=False)
    
    # Handle multi-level columns - get Close prices 
    if prices.columns.nlevels > 1:
        prices = prices['Close']  # Using Close since Adj Close not available for monthly
    else:
        # Single level columns - prices should already be Close
        pass
    
    # Forward-fill missing values, then drop any remaining NaNs (updated syntax)
    prices = prices.ffill().dropna()
    
    return prices

# Fetch the price data
prices = fetch_prices(CONFIG['TICKERS'], CONFIG['START_DATE'], 
                     CONFIG['END_DATE'], CONFIG['INTERVAL'])

# Checkpoint: verify data quality 
print(f"\nData shape: {prices.shape}")
print(f"Date range: {prices.index[0].strftime('%Y-%m-%d')} to {prices.index[-1].strftime('%Y-%m-%d')}")
print(f"Number of observations: {len(prices)}")
print(f"Number of assets: {prices.shape[1]}")
print("\nFirst few rows:")
print(prices.head())
assert prices.isnull().sum().sum() == 0, "Should have no missing values after cleaning"

  prices = yf.download(tickers, start=start_date, end=end_date,


Fetching data for 8 tickers...

Data shape: (60, 8)
Date range: 2020-11-01 to 2025-10-01
Number of observations: 60
Number of assets: 8

First few rows:
Ticker            AAPL        AMZN       BRK-B       GOOGL        META  \
Date                                                                     
2020-11-01  115.802055  158.401993  228.910004   87.119133  275.273041   
2020-12-01  129.292618  162.846497  231.869995   87.031738  271.486359   
2021-01-01  128.581314  160.309998  227.869995   90.742142  256.747223   
2021-02-01  118.155258  154.646500  240.509995  100.403008  256.041565   
2021-03-01  119.200317  154.703995  255.470001  102.419601  292.725464   

Ticker            MSFT       NVDA        TSLA  
Date                                           
2020-11-01  205.255737  13.360716  189.199997  
2020-12-01  213.820267  13.015271  235.223328  
2021-01-01  222.991440  12.954045  264.510010  
2021-02-01  223.395203  13.676803  225.166672  
2021-03-01  227.176117  13.311560  222.6

## Returns and Annulization
Convert price data in returns. Then, calculate simple returns (percentage changes), then annualize both expected returns and covariance matrix by multiplying by annualization factor

In [3]:
# Returns & Annualization
def calculate_returns(prices):
    """Calculate simple returns from price data"""
    return prices.pct_change().dropna()

# Calculate returns
R = calculate_returns(prices)
print(f"Returns shape: {R.shape}")  
print(f"Date range: {R.index[0].strftime('%Y-%m-%d')} to {R.index[-1].strftime('%Y-%m-%d')}")

# Annualization factor
ann_factor = annualization_factor(CONFIG['FREQUENCY'])
print(f"Annualization factor: {ann_factor}")

# Calculate annualized expected returns and covariance matrix 
mu = R.mean() * ann_factor  # Annualized expected returns 
Sigma = R.cov() * ann_factor  # Annualized covariance matrix 

print(f"\nExpected returns shape: {mu.shape}")
print(f"Covariance matrix shape: {Sigma.shape}")  

# Checkpoint: Display key statistics
print("\nAnnualized Expected Returns (%):")
print((mu * 100).round(2))
print("\nAnnualized Volatilities (%):")
print((np.sqrt(np.diag(Sigma)) * 100).round(2))  

# Verify covariance matrix is positive semi-definite
eigenvals = np.linalg.eigvals(Sigma)  
min_eigenval = np.min(eigenvals)
print(f"\nMinimum eigenvalue: {min_eigenval:.2e}")
if min_eigenval < -1e-8:
    print("WARNING: Covariance matrix not positive semi-definite!")
    # Add small ridge to make it PSD
    Sigma = Sigma + 1e-8 * np.eye(Sigma.shape[0])  
    print("Added ridge regularization (1e-8)")
else:
    print("Covariance matrix is positive semi-definite ✓")

Returns shape: (59, 8)
Date range: 2020-12-01 to 2025-10-01
Annualization factor: 12

Expected returns shape: (8,)
Covariance matrix shape: (8, 8)

Annualized Expected Returns (%):
Ticker
AAPL     18.58
AMZN     10.80
BRK-B    17.25
GOOGL    25.39
META     28.08
MSFT     21.10
NVDA     66.64
TSLA     34.48
dtype: float64

Annualized Volatilities (%):
[25.05 30.83 18.75 27.05 40.99 22.28 50.77 61.12]

Minimum eigenvalue: 1.76e-02
Covariance matrix is positive semi-definite ✓
