## Setup and Imports

In [28]:
# Core libraries
import pandas as pd
import numpy as np
from pathlib import Path
from datetime import datetime, date
import warnings
warnings.filterwarnings('ignore')

# For XIRR calculations
import numpy_financial as npf

# For data fetching
import os
import requests
import time
from typing import Dict, List, Tuple

# For visualization
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style('whitegrid')

print("All libraries imported successfully")

All libraries imported successfully


In [29]:
def load_transactions():
    """Load all transaction CSV files and combine them"""
    transactions_dir = Path('/Users/bhargav/Git/investments/transactions')
    all_transactions = []
    
    for csv_file in sorted(transactions_dir.glob('*.csv')):
        df = pd.read_csv(csv_file)
        # Clean column names (remove BOM if present)
        df.columns = df.columns.str.replace('﻿', '')
        all_transactions.append(df)
        print(f"Loaded {csv_file.name}: {len(df)} transactions")
    
    # Combine all years
    transactions = pd.concat(all_transactions, ignore_index=True)
    
    # Convert date column and sort
    transactions['Trade Date'] = pd.to_datetime(transactions['Trade Date'])
    transactions = transactions.sort_values('Trade Date')
    
    return transactions

# Load the data
transactions = load_transactions()
print(f"\nTotal transactions loaded: {len(transactions)}")

Loaded 2018.csv: 42 transactions
Loaded 2019.csv: 42 transactions
Loaded 2020.csv: 195 transactions
Loaded 2021.csv: 218 transactions
Loaded 2022.csv: 289 transactions
Loaded 2023.csv: 257 transactions
Loaded 2024.csv: 382 transactions
Loaded 2025.csv: 251 transactions

Total transactions loaded: 1676


In [30]:
# Explore the data
print("Transaction types and counts:")
print(transactions['Type'].value_counts())
transactions.reset_index(drop=True, inplace=True)
print("\nSample transactions:")
transactions.head(10)

Transaction types and counts:
Type
Dividend        530
Reinvest        415
Buy             398
DBS              70
WDL              68
Interest         57
Sell             21
DBT              19
DEPOSIT          17
ADR              14
BNK              13
WHT              12
Deposit          10
CAP              10
STK SPLT          5
FWT               4
Split             3
SPLT              2
LIQ               2
CIL               2
Exchange          2
Distribution      1
MER               1
Name: count, dtype: int64

Sample transactions:


Unnamed: 0,Trade Date,Type,Ticker,Security Type,Price USD,Quantity,Amount USD
0,2018-07-19,Buy,SCHF,ETF,33.25,74.0,-2460.5
1,2018-07-19,Buy,SCHX,ETF,67.0,74.0,-4958.0
2,2018-07-24,Buy,SCHA,ETF,75.0,25.0,-1875.0
3,2018-09-12,Buy,FTEC,ETF,59.42,40.0,-2376.8
4,2018-09-27,Reinvest,FTEC,ETF,60.19,0.0937,-5.64
5,2018-10-01,Reinvest,SCHX,ETF,70.06,0.3273,-22.93
6,2018-10-01,Reinvest,SCHA,ETF,76.28,0.0725,-5.53
7,2018-10-02,Buy,SCHA,ETF,74.77,30.0,-2243.1
8,2018-10-08,Buy,VGT,ETF,194.6,20.0,-3892.0
9,2018-10-09,Buy,VOX,ETF,85.25,20.0,-1705.0


In [31]:
def consolidate_transaction_types(df):
    """
    Consolidate duplicate and similar transaction types
    """
    # Create mapping for consolidation
    type_mapping = {
        # Deposits - consolidate variations
        'DEPOSIT': 'Deposit',
        'Deposit': 'Deposit',
        
        # Splits - consolidate variations
        'STK SPLT': 'Split',
        'SPLT': 'Split',
        'Split': 'Split',
        
        # Dividends - keep as is
        'Dividend': 'Dividend',
        'DBS': 'Dividend',
        'DBT': 'Dividend',
        
        # Reinvestments
        'Reinvest': 'Reinvest',
        
        # Buys and Sells
        'Buy': 'Buy',
        'Sell': 'Sell',
        'LIQ': 'Sell',      # Liquidation
        
        # Interest
        'Interest': 'Interest',
        
        # Distributions and capital gains
        'Distribution': 'Distribution',
        'CAP': 'Capital Gain',
        
        # Tax-related
        'WHT': 'Tax Withheld',     # Withholding tax
        'FWT': 'Tax Withheld',     # Foreign withholding tax
        
        # Fees
        'ADR': 'Fee',          # ADR fee
        'MER': 'Fee',    # Management expense ratio
        
        # Corporate actions
        'WDL': 'Withdrawal',       # Withdrawal
        'BNK': 'Bank Transfer',    # Bank transfer
        'CIL': 'Corporate Action',     # Cash in lieu
        'Exchange': 'Corporate Action',     # Currency exchange or security exchange
    }
    
    # Apply the mapping
    df['Type'] = df['Type'].map(type_mapping).fillna(df['Type'])

    df["Type"] = df["Type"].str.strip().str.upper()

    return df

In [32]:
transactions = consolidate_transaction_types(transactions)
transactions.head()

Unnamed: 0,Trade Date,Type,Ticker,Security Type,Price USD,Quantity,Amount USD
0,2018-07-19,BUY,SCHF,ETF,33.25,74.0,-2460.5
1,2018-07-19,BUY,SCHX,ETF,67.0,74.0,-4958.0
2,2018-07-24,BUY,SCHA,ETF,75.0,25.0,-1875.0
3,2018-09-12,BUY,FTEC,ETF,59.42,40.0,-2376.8
4,2018-09-27,REINVEST,FTEC,ETF,60.19,0.0937,-5.64


In [33]:
print(transactions['Type'].value_counts())

Type
DIVIDEND            619
REINVEST            415
BUY                 398
WITHDRAWAL           68
INTEREST             57
DEPOSIT              27
SELL                 23
TAX WITHHELD         16
FEE                  15
BANK TRANSFER        13
SPLIT                10
CAPITAL GAIN         10
CORPORATE ACTION      4
DISTRIBUTION          1
Name: count, dtype: int64


## Step 2: Filter Relevant Transactions

In [34]:
# List of money market funds to exclude (as per requirements)
MONEY_MARKET_FUNDS = ['VMFXX', 'QACDS', 'SPAXX', 'FDRXX', 'SWVXX', 'VMMXX']

def filter_equity_transactions(df):
    """Filter for equity/ETF transactions only"""
    # Remove money market funds
    df_filtered = df[~df['Ticker'].isin(MONEY_MARKET_FUNDS)].copy()
    
    # Remove bank transfers and other non-trading activities
    relevant_types = ['BUY', 'SELL', 'DIVIDEND', 'REINVEST', 'CAPITAL GAIN']
    df_filtered = df_filtered[df_filtered['Type'].isin(relevant_types)]
    
    # Remove rows without tickers
    df_filtered = df_filtered[df_filtered['Ticker'].notna()]
    
    return df_filtered

equity_transactions = filter_equity_transactions(transactions)
print(f"Filtered to {len(equity_transactions)} equity transactions")
print(f"\nUnique tickers ({len(equity_transactions['Ticker'].unique())}):")
print(sorted(equity_transactions['Ticker'].unique()))

Filtered to 1284 equity transactions

Unique tickers (83):
[' NOV 24 PUT 517.50', 'AAPL', 'ABNB', 'ADSK', 'ADYEY', 'AMD', 'AMZN', 'ANET', 'APPN', 'ASML', 'ATVI', 'AVB', 'BABA', 'BRKB', 'BTCO', 'CFLT', 'CHGG', 'CRM', 'CRWD', 'DIS', 'DXCM', 'ETSY', 'FB', 'FBSOX', 'FCOM', 'FHLC', 'FSLY', 'FTEC', 'FTNT', 'FUBO', 'FVRR', 'GS', 'HUBS', 'IDXX', 'IYW', 'JATAX', 'JD', 'LKNCY', 'LMND', 'MA', 'MELI', 'META', 'MKC', 'MSFT', 'MTN', 'NET', 'NFLX', 'NICE', 'NTDOY', 'NVDA', 'OKTA', 'PAYC', 'PINS', 'POTX', 'PYPL', 'QQQ', 'ROKU', 'SCHA', 'SCHF', 'SCHX', 'SCHZ', 'SHOP', 'SNBR', 'SNPS', 'TDOC', 'TEAM', 'TSLA', 'TTD', 'TWLO', 'TXG', 'UAL', 'UBER', 'UPST', 'VDC', 'VGT', 'VMSXX', 'VOO', 'VOX', 'VTI', 'WEX', 'ZBRA', 'ZM', 'ZNGA']


## Step 3: Nasdaq Data Link Setup

In [None]:
class NasdaqDataFetcher:
    """Fetch historical data from Nasdaq Data Link (Sharadar)"""
    
    def __init__(self, api_key=None):
        self.api_key = api_key or os.environ.get('NASDAQ_DATA_LINK_API_KEY')
        if not self.api_key:
            print("WARNING: No API key found. Set NASDAQ_DATA_LINK_API_KEY environment variable.")
            print("You can get a free API key at: https://data.nasdaq.com/sign-up")
        self.base_url = "https://data.nasdaq.com/api/v3"
        self.cache = {}
    
    def get_price_history(self, ticker: str, start_date: str, end_date: str) -> pd.DataFrame:
        """Get daily price history for a ticker including adjusted close and dividends"""
        
        # Check cache first
        cache_key = f"{ticker}_{start_date}_{end_date}"
        if cache_key in self.cache:
            return self.cache[cache_key]
        
        if not self.api_key:
            # Return dummy data for testing without API key
            print(f"Returning dummy data for {ticker} (no API key)")
            dates = pd.date_range(start=start_date, end=end_date, freq='B')
            df = pd.DataFrame({
                'ticker': ticker,
                'date': dates,
                'closeadj': np.random.uniform(100, 200, len(dates)),
                'divamt': np.random.choice([0, 0, 0, 0.5], len(dates))
            })
            self.cache[cache_key] = df
            return df
        
        # Sharadar SEP table for daily prices
        endpoint = f"{self.base_url}/datatables/SHARADAR/SEP"
        
        params = {
            'ticker': ticker,
            'date.gte': start_date,
            'date.lte': end_date,
            'api_key': self.api_key,
            'qopts.columns': 'ticker,date,closeadj,divamt'
        }
        
        try:
            response = requests.get(endpoint, params=params)
            response.raise_for_status()
            data = response.json()
            
            if 'datatable' in data and 'data' in data['datatable']:
                df = pd.DataFrame(data['datatable']['data'], 
                                columns=['ticker', 'date', 'closeadj', 'divamt'])
                df['date'] = pd.to_datetime(df['date'])
                df = df.sort_values('date')
                self.cache[cache_key] = df
                return df
            else:
                return pd.DataFrame()
                
        except Exception as e:
            print(f"Error fetching {ticker}: {e}")
            return pd.DataFrame()
        
        finally:
            # Rate limiting
            time.sleep(0.1)

# Initialize the fetcher
fetcher = NasdaqDataFetcher()
print("Nasdaq Data Fetcher initialized")
print("API Key status:", "Set" if fetcher.api_key else "Not set (using dummy data)")

## Step 4: Portfolio Tracking Functions

In [None]:
class Portfolio:
    """Track portfolio holdings and calculate values over time"""
    
    def __init__(self):
        self.holdings = {}  # ticker -> shares
        self.cash_flows = []  # List of (date, amount) for XIRR
        self.history = []  # Daily portfolio values
    
    def process_transaction(self, date, txn_type, ticker, quantity, amount):
        """Process a single transaction"""
        if ticker not in self.holdings:
            self.holdings[ticker] = 0
        
        if txn_type in ['Buy']:
            # Money leaves account (negative cash flow)
            self.holdings[ticker] += abs(quantity)
            self.cash_flows.append((date, -abs(amount)))
            
        elif txn_type in ['Reinvest']:
            # Dividend reinvested - no net cash flow for XIRR
            # But we do increase share count
            self.holdings[ticker] += abs(quantity)
            
        elif txn_type == 'Sell':
            # Money enters account (positive cash flow)
            self.holdings[ticker] -= abs(quantity)
            self.cash_flows.append((date, abs(amount)))
            
        elif txn_type == 'Dividend' and quantity == 0:
            # Cash dividend (not reinvested)
            self.cash_flows.append((date, abs(amount)))
            
        elif txn_type == 'CAP':
            # Capital gains distribution (usually mutual funds)
            # If reinvested, no cash flow
            if quantity != 0:
                self.holdings[ticker] += abs(quantity)
    
    def calculate_value(self, date, price_data):
        """Calculate total portfolio value on a given date"""
        total_value = 0
        for ticker, shares in self.holdings.items():
            if shares > 0 and ticker in price_data:
                # Get most recent price up to this date
                ticker_prices = price_data[ticker]
                prices_before = ticker_prices[ticker_prices['date'] <= date]
                if len(prices_before) > 0:
                    price = prices_before['closeadj'].iloc[-1]
                    total_value += shares * price
        return total_value
    
    def calculate_xirr(self, end_date, end_value):
        """Calculate XIRR including final portfolio value"""
        if len(self.cash_flows) == 0:
            return 0
        
        # Add final portfolio value as positive cash flow
        all_flows = self.cash_flows + [(end_date, end_value)]
        
        # Separate dates and amounts for numpy_financial
        dates = [cf[0] for cf in all_flows]
        amounts = [cf[1] for cf in all_flows]
        
        try:
            xirr = npf.xirr(amounts, dates)
            return xirr * 100  # Convert to percentage
        except:
            # XIRR calculation can fail if returns are extreme
            return None

print("Portfolio tracking class defined")

## Step 5: Fetch Price Data

In [None]:
# Get date range and tickers
start_date = equity_transactions['Trade Date'].min().strftime('%Y-%m-%d')
end_date = datetime.now().strftime('%Y-%m-%d')
tickers = equity_transactions['Ticker'].unique().tolist()

print(f"Fetching data for {len(tickers)} tickers")
print(f"Date range: {start_date} to {end_date}")

# Fetch price data
price_data = {}

# Always fetch VOO for benchmark
print("\nFetching VOO benchmark data...")
voo_prices = fetcher.get_price_history('VOO', start_date, end_date)
if not voo_prices.empty:
    price_data['VOO'] = voo_prices

# Fetch data for holdings (limit to first 10 for demo)
for i, ticker in enumerate(tickers[:10]):
    print(f"Fetching {ticker}... ({i+1}/10)")
    prices = fetcher.get_price_history(ticker, start_date, end_date)
    if not prices.empty:
        price_data[ticker] = prices

print(f"\nSuccessfully fetched data for {len(price_data)} tickers")

## Step 6: Calculate Actual Portfolio Performance

In [None]:
# Create actual portfolio
actual_portfolio = Portfolio()

# Process all transactions
for _, txn in equity_transactions.iterrows():
    actual_portfolio.process_transaction(
        date=txn['Trade Date'],
        txn_type=txn['Type'],
        ticker=txn['Ticker'],
        quantity=txn.get('Quantity', 0),
        amount=txn.get('Amount USD', 0)
    )

# Calculate final portfolio value
final_date = equity_transactions['Trade Date'].max()
final_value = actual_portfolio.calculate_value(final_date, price_data)

# Calculate XIRR
actual_xirr = actual_portfolio.calculate_xirr(final_date, final_value)

print(f"Actual Portfolio Summary:")
print(f"  Final Value: ${final_value:,.2f}")
print(f"  Number of transactions: {len(equity_transactions)}")
print(f"  Number of cash flows: {len(actual_portfolio.cash_flows)}")
if actual_xirr:
    print(f"  XIRR (Annualized Return): {actual_xirr:.2f}%")
else:
    print(f"  XIRR: Could not calculate")

## Step 7: Simulate VOO Benchmark Portfolio

In [None]:
def simulate_voo_portfolio(transactions_df, voo_prices):
    """Simulate portfolio if all Buy transactions went to VOO instead"""
    voo_portfolio = Portfolio()
    
    # Get all buy transactions
    buy_transactions = transactions_df[transactions_df['Type'] == 'Buy'].copy()
    
    for _, txn in buy_transactions.iterrows():
        date = txn['Trade Date']
        amount = abs(txn['Amount USD'])
        
        # Find VOO price on that date
        voo_on_date = voo_prices[voo_prices['date'] <= date]
        if len(voo_on_date) > 0:
            voo_price = voo_on_date['closeadj'].iloc[-1]
            shares = amount / voo_price
            
            # Simulate buying VOO
            voo_portfolio.process_transaction(
                date=date,
                txn_type='Buy',
                ticker='VOO',
                quantity=shares,
                amount=amount
            )
    
    # Handle dividend reinvestment for VOO
    # Get all dividend payments for VOO during holding period
    if len(voo_portfolio.holdings) > 0 and 'VOO' in voo_portfolio.holdings:
        voo_divs = voo_prices[voo_prices['divamt'] > 0]
        for _, div_row in voo_divs.iterrows():
            div_date = div_row['date']
            # Only process if we held shares on this date
            if div_date >= buy_transactions['Trade Date'].min():
                shares_held = voo_portfolio.holdings.get('VOO', 0)
                if shares_held > 0:
                    div_amount = shares_held * div_row['divamt']
                    new_shares = div_amount / div_row['closeadj']
                    # Reinvest dividend
                    voo_portfolio.process_transaction(
                        date=div_date,
                        txn_type='Reinvest',
                        ticker='VOO',
                        quantity=new_shares,
                        amount=div_amount
                    )
    
    return voo_portfolio

# Simulate VOO portfolio
if 'VOO' in price_data:
    voo_portfolio = simulate_voo_portfolio(equity_transactions, price_data['VOO'])
    voo_final_value = voo_portfolio.calculate_value(final_date, price_data)
    voo_xirr = voo_portfolio.calculate_xirr(final_date, voo_final_value)
    
    print(f"VOO Benchmark Portfolio:")
    print(f"  Final Value: ${voo_final_value:,.2f}")
    print(f"  Total VOO shares: {voo_portfolio.holdings.get('VOO', 0):.4f}")
    if voo_xirr:
        print(f"  XIRR (Annualized Return): {voo_xirr:.2f}%")
else:
    print("VOO price data not available")

## Step 8: Compare Results

In [None]:
def compare_portfolios(actual_xirr, voo_xirr, actual_value, voo_value, transactions_df):
    """Compare actual vs VOO benchmark returns"""
    
    # Calculate total invested
    total_invested = abs(transactions_df[transactions_df['Type'] == 'Buy']['Amount USD'].sum())
    
    # Calculate time period
    start_date = transactions_df['Trade Date'].min()
    end_date = transactions_df['Trade Date'].max()
    years = (end_date - start_date).days / 365.25
    
    print("="*60)
    print("PORTFOLIO COMPARISON RESULTS")
    print("="*60)
    print(f"\nInvestment Period: {start_date.date()} to {end_date.date()} ({years:.1f} years)")
    print(f"Total Invested: ${total_invested:,.2f}")
    
    print(f"\nActual Portfolio:")
    print(f"  Final Value: ${actual_value:,.2f}")
    print(f"  Total Return: {((actual_value/total_invested - 1) * 100):.2f}%")
    if actual_xirr:
        print(f"  XIRR (Annualized): {actual_xirr:.2f}%")
    
    print(f"\nVOO Benchmark:")
    print(f"  Final Value: ${voo_value:,.2f}")
    print(f"  Total Return: {((voo_value/total_invested - 1) * 100):.2f}%")
    if voo_xirr:
        print(f"  XIRR (Annualized): {voo_xirr:.2f}%")
    
    if actual_xirr and voo_xirr:
        print(f"\nPerformance Analysis:")
        outperformance = actual_xirr - voo_xirr
        if outperformance > 0:
            print(f"  ✅ Your portfolio OUTPERFORMED VOO by {outperformance:.2f}% annually")
        else:
            print(f"  ❌ Your portfolio UNDERPERFORMED VOO by {abs(outperformance):.2f}% annually")
        
        # Calculate dollar difference
        dollar_diff = actual_value - voo_value
        if dollar_diff > 0:
            print(f"  💰 You have ${dollar_diff:,.2f} more than the VOO strategy")
        else:
            print(f"  💸 You have ${abs(dollar_diff):,.2f} less than the VOO strategy")

# Run comparison
if 'voo_xirr' in locals():
    compare_portfolios(actual_xirr, voo_xirr, final_value, voo_final_value, equity_transactions)

## Step 9: Visualizations

In [None]:
# Portfolio composition analysis
holdings_summary = equity_transactions.groupby('Ticker')['Amount USD'].sum().abs().sort_values(ascending=False)
top_holdings = holdings_summary.head(10)

# Create visualization
fig, axes = plt.subplots(2, 2, figsize=(15, 12))

# 1. Top holdings by investment amount
ax1 = axes[0, 0]
top_holdings.plot(kind='barh', ax=ax1, color='steelblue')
ax1.set_title('Top 10 Holdings by Total Investment', fontsize=14, fontweight='bold')
ax1.set_xlabel('Total Invested ($)')
ax1.set_ylabel('Ticker')

# 2. Transaction types distribution
ax2 = axes[0, 1]
txn_types = equity_transactions['Type'].value_counts()
colors = plt.cm.Set3(range(len(txn_types)))
txn_types.plot(kind='pie', ax=ax2, autopct='%1.1f%%', colors=colors)
ax2.set_title('Transaction Types Distribution', fontsize=14, fontweight='bold')
ax2.set_ylabel('')

# 3. Investment timeline
ax3 = axes[1, 0]
monthly_buys = equity_transactions[equity_transactions['Type'] == 'Buy'].copy()
monthly_buys['YearMonth'] = monthly_buys['Trade Date'].dt.to_period('M')
monthly_investment = monthly_buys.groupby('YearMonth')['Amount USD'].sum().abs()
monthly_investment.plot(kind='bar', ax=ax3, color='green', alpha=0.7)
ax3.set_title('Monthly Investment Pattern', fontsize=14, fontweight='bold')
ax3.set_xlabel('Month')
ax3.set_ylabel('Amount Invested ($)')
ax3.tick_params(axis='x', rotation=45)

# 4. Cumulative investment over time
ax4 = axes[1, 1]
buys_only = equity_transactions[equity_transactions['Type'] == 'Buy'].copy()
buys_only = buys_only.sort_values('Trade Date')
buys_only['Cumulative'] = buys_only['Amount USD'].abs().cumsum()
ax4.plot(buys_only['Trade Date'], buys_only['Cumulative'], linewidth=2, color='navy')
ax4.fill_between(buys_only['Trade Date'], 0, buys_only['Cumulative'], alpha=0.3, color='navy')
ax4.set_title('Cumulative Investment Over Time', fontsize=14, fontweight='bold')
ax4.set_xlabel('Date')
ax4.set_ylabel('Cumulative Investment ($)')
ax4.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

## Step 10: Detailed Analysis by Year

In [None]:
# Analyze returns by year
def analyze_by_year(transactions_df):
    """Break down investment activity by year"""
    df = transactions_df.copy()
    df['Year'] = df['Trade Date'].dt.year
    
    yearly_summary = []
    for year in sorted(df['Year'].unique()):
        year_data = df[df['Year'] == year]
        buys = year_data[year_data['Type'] == 'Buy']['Amount USD'].sum()
        sells = year_data[year_data['Type'] == 'Sell']['Amount USD'].sum()
        dividends = year_data[year_data['Type'].isin(['Dividend', 'Reinvest'])]['Amount USD'].sum()
        
        yearly_summary.append({
            'Year': year,
            'Invested': abs(buys),
            'Sold': abs(sells),
            'Dividends': abs(dividends),
            'Net Cash Flow': -abs(buys) + abs(sells),
            'Transactions': len(year_data)
        })
    
    yearly_df = pd.DataFrame(yearly_summary)
    return yearly_df

yearly_analysis = analyze_by_year(equity_transactions)
print("Investment Activity by Year:")
print(yearly_analysis.to_string(index=False))

# Visualize yearly activity
fig, ax = plt.subplots(figsize=(12, 6))
x = yearly_analysis['Year']
width = 0.35
ax.bar(x - width/2, yearly_analysis['Invested'], width, label='Invested', color='green', alpha=0.7)
ax.bar(x + width/2, yearly_analysis['Sold'], width, label='Sold', color='red', alpha=0.7)
ax.set_xlabel('Year', fontsize=12)
ax.set_ylabel('Amount ($)', fontsize=12)
ax.set_title('Annual Investment and Sales Activity', fontsize=14, fontweight='bold')
ax.legend()
ax.grid(True, alpha=0.3)
plt.show()

## Conclusions and Next Steps

This notebook provides a comprehensive analysis of your investment portfolio performance using XIRR (money-weighted returns) compared to a VOO benchmark strategy.

### Key Insights:
1. **XIRR vs CAGR**: We use XIRR because it accounts for the timing and size of your cash flows, giving a more accurate picture of your investment performance
2. **Dividend Reinvestment**: Properly handled by tracking share increases without affecting XIRR cash flows
3. **Fair Comparison**: The VOO benchmark simulates investing the same amounts on the same dates as your actual investments

### To Run This Analysis:
1. Set your Nasdaq Data Link API key: `export NASDAQ_DATA_LINK_API_KEY="your_key"`
2. Run each cell sequentially to see results
3. The analysis will work with dummy data if no API key is provided (for testing)

### Potential Enhancements:
- Add support for stock splits and corporate actions
- Include transaction costs in the analysis
- Compare against multiple benchmarks (QQQ, IWM, etc.)
- Add risk-adjusted return metrics (Sharpe ratio, etc.)
- Generate a detailed PDF report of findings