# Portfolio Optimization 

## Config & Imports
This cell sets up configuration parameters amd imports neccessary libaries. 
The configuration defines stocks that will be analyzed, the time period, and key parameters like the risk-free rate.

In [2]:
# Import required libraries 
import numpy as np
import pandas as pd
import yfinance as yf
from datetime import datetime, timedelta
from scipy.optimize import minimize
import matplotlib.pyplot as plt
from fredapi import Fred

# Configuration
CONFIG = {
    'TICKERS': ['AAPL', 'MSFT', 'GOOGL', 'AMZN', 'TSLA', 'META', 'NVDA', 'BRK-B'],
    'START_DATE': datetime.now() - timedelta(days=5*365),  # 5 years ago
    'END_DATE': datetime.now(),  # Today
    'INTERVAL': '1mo',  # Monthly data
    'RF_ANNUAL': None,  # Will fetch from FRED
    'ALLOW_SHORTS': False,  # Long-only portfolios
    'FREQUENCY': 'monthly',
    'FRED_API_KEY': '80d939285a25091784ba2f5be77fa106'
}

def annualization_factor(freq): 
   # Convert frequency to annulization factor 
   if freq == 'daily':
      return 252
   elif freq == 'weekly':
      return 52
   elif freq == 'monthly':
      return 12
   else: 
      raise ValueError(f"Unsupported frequency: {freq}")

# Set risk-free rate 
def get_rf_rate():
   # Fetch curent risk-free rate from FRED
   fred = Fred(api_key = '80d939285a25091784ba2f5be77fa106')
   ten_year_treasury_rate = fred.get_series('GS10') / 100
   # Get 10-year Treasury Constant Maturity Rate (most recent)
   rf_rate = ten_year_treasury_rate[-1] # Convert to decimal 
   return rf_rate

# Fetch risk-free rate
CONFIG['RF_ANNUAL'] = get_rf_rate()

# Checkpoint: Print configuration 
print("Current Configuration:")
for key, value in CONFIG.items():
   if key!= 'FRED_API_KEY': # Don't print API key
      print(f"  {key}: {value}")
print(f"\nAnnualization factor: {annualization_factor(CONFIG['FREQUENCY'])}")
print(f"Date range: {CONFIG['START_DATE'].strftime('%Y-%m-%d')} to {CONFIG['END_DATE'].strftime('%Y-%m-%d')}")
print(f"Risk-free rate: {CONFIG['RF_ANNUAL']:.4f} ({CONFIG['RF_ANNUAL']*100:.2f}%)")
      


Current Configuration:
  TICKERS: ['AAPL', 'MSFT', 'GOOGL', 'AMZN', 'TSLA', 'META', 'NVDA', 'BRK-B']
  START_DATE: 2020-10-17 21:34:56.489310
  END_DATE: 2025-10-16 21:34:56.489319
  INTERVAL: 1mo
  RF_ANNUAL: 0.0412
  ALLOW_SHORTS: False
  FREQUENCY: monthly

Annualization factor: 12
Date range: 2020-10-17 to 2025-10-16
Risk-free rate: 0.0412 (4.12%)


  rf_rate = ten_year_treasury_rate[-1] # Convert to decimal


## Data Import (yfinance)
This cell fetches actual stock price data from yfinance. 
Will need to adjust closing prices for all tickers over the 5-year period, then clean data by forward-filling any missing values and dropping any remaining NaNs. 

In [7]:
# Data import 
def fetch_prices(tickers, start_date, end_date, interval):
    # Fetch adjusted closing prices for given tickers
    # Returns: DataFrame with forward-filled, cleaned price data
    print(f"Fetching data for {len(tickers)} tickers...")
    prices = yf.download(tickers, start=start_date, end=end_date, 
                        interval=interval, progress=False)
    
    # Handle multi-level columns - get Close prices 
    if prices.columns.nlevels > 1:
        prices = prices['Close']  # Using Close since Adj Close not available for monthly
    else:
        # Single level columns - prices should already be Close
        pass
    
    # Forward-fill missing values, then drop any remaining NaNs (updated syntax)
    prices = prices.ffill().dropna()
    
    return prices

# Fetch the price data
prices = fetch_prices(CONFIG['TICKERS'], CONFIG['START_DATE'], 
                     CONFIG['END_DATE'], CONFIG['INTERVAL'])

# Checkpoint: verify data quality 
print(f"\nData shape: {prices.shape}")
print(f"Date range: {prices.index[0].strftime('%Y-%m-%d')} to {prices.index[-1].strftime('%Y-%m-%d')}")
print(f"Number of observations: {len(prices)}")
print(f"Number of assets: {prices.shape[1]}")
print("\nFirst few rows:")
print(prices.head())
assert prices.isnull().sum().sum() == 0, "Should have no missing values after cleaning"

Fetching data for 8 tickers...


  prices = yf.download(tickers, start=start_date, end=end_date,



Data shape: (60, 8)
Date range: 2020-11-01 to 2025-10-01
Number of observations: 60
Number of assets: 8

First few rows:
Ticker            AAPL        AMZN       BRK-B       GOOGL        META  \
Date                                                                     
2020-11-01  115.802048  158.401993  228.910004   87.119125  275.273010   
2020-12-01  129.292572  162.846497  231.869995   87.031738  271.486389   
2021-01-01  128.581299  160.309998  227.869995   90.742142  256.747223   
2021-02-01  118.155243  154.646500  240.509995  100.403008  256.041565   
2021-03-01  119.200340  154.703995  255.470001  102.419601  292.725403   

Ticker            MSFT       NVDA        TSLA  
Date                                           
2020-11-01  205.255783  13.360714  189.199997  
2020-12-01  213.820312  13.015269  235.223328  
2021-01-01  222.991455  12.954044  264.510010  
2021-02-01  223.395187  13.676804  225.166672  
2021-03-01  227.176147  13.311560  222.643326  
