<a href="https://colab.research.google.com/github/johnflem/FrameworkBenchmarks/blob/master/ef%20comparisons.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [7]:
pip install pandas yfinance PyPortfolioOpt

Collecting PyPortfolioOpt
  Downloading pyportfolioopt-1.5.6-py3-none-any.whl.metadata (22 kB)
Collecting ecos<3.0.0,>=2.0.14 (from PyPortfolioOpt)
  Downloading ecos-2.0.14-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (8.0 kB)
Downloading pyportfolioopt-1.5.6-py3-none-any.whl (62 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.7/62.7 kB[0m [31m4.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading ecos-2.0.14-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (220 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m220.1/220.1 kB[0m [31m12.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: ecos, PyPortfolioOpt
Successfully installed PyPortfolioOpt-1.5.6 ecos-2.0.14


In [3]:
import pandas as pd
import yfinance as yf
from pypfopt import EfficientFrontier
from pypfopt import risk_models
from pypfopt import expected_returns
from datetime import date, timedelta

def get_sp500_tickers():
    """
    Retrieves the list of S&P 500 tickers from Wikipedia.

    Returns:
        list: A list of S&P 500 ticker symbols.
    """
    try:
        # Scrapes the table of S&P 500 companies from the Wikipedia page
        payload = pd.read_html('https://en.wikipedia.org/wiki/List_of_S%26P_500_companies')
        sp500_tickers = payload[0]['Symbol'].values.tolist()
        # Cleans ticker symbols for yfinance compatibility (e.g., 'BRK.B' -> 'BRK-B')
        sp500_tickers = [ticker.replace('.', '-') for ticker in sp500_tickers]
        return sp500_tickers
    except Exception as e:
        print(f"Error retrieving S&P 500 tickers: {e}")
        return None

def get_historical_data(tickers, years=5):
    """
    Downloads the daily closing prices for a list of tickers for the past n years.

    Args:
        tickers (list): A list of stock tickers.
        years (int): The number of years of historical data to download.

    Returns:
        pandas.DataFrame: A DataFrame containing the daily closing prices for each ticker.
    """
    end_date = date.today()
    start_date = end_date - timedelta(days=years * 365)
    print(f"Downloading data from {start_date} to {end_date}...")
    try:
        # Download historical data. yfinance now auto-adjusts prices by default.
        data = yf.download(tickers, start=start_date, end=end_date, progress=True)
        prices = data['Close'] # Use 'Close' as it now contains the adjusted close price

        # Drop columns that are completely empty (for tickers that may have been delisted or have no data)
        initial_tickers = len(prices.columns)
        prices.dropna(axis=1, how='all', inplace=True)
        final_tickers = len(prices.columns)
        print(f"\nSuccessfully downloaded data for {final_tickers} of {initial_tickers} tickers.")

        # Fill any remaining missing values in the time series data
        prices.fillna(method='ffill', inplace=True)
        prices.fillna(method='bfill', inplace=True)

        return prices
    except Exception as e:
        print(f"Error downloading or processing historical data: {e}")
        return None

def find_optimal_portfolio(prices):
    """
    Uses the Efficient Frontier to find the portfolio with the maximum Sharpe ratio.

    Args:
        prices (pandas.DataFrame): A DataFrame of historical closing prices for multiple stocks.

    Returns:
        dict: A dictionary containing the optimal weights for each stock.
        EfficientFrontier: The efficient frontier object with weights set.
    """
    if prices is None or prices.empty:
        return None, None

    print("\nCalculating expected returns and risk...")
    try:
        # Calculate expected annualized returns and the annualized sample covariance matrix
        mu = expected_returns.mean_historical_return(prices)
        S = risk_models.sample_cov(prices)

        print("Optimizing portfolio for maximum Sharpe ratio...")
        # Initialize the EfficientFrontier object
        ef = EfficientFrontier(mu, S)

        # Find the portfolio that maximizes the Sharpe ratio
        weights = ef.max_sharpe()
        cleaned_weights = ef.clean_weights()

        return cleaned_weights, ef
    except Exception as e:
        print(f"Error during portfolio optimization: {e}")
        return None, None

if __name__ == "__main__":
    print("Fetching S&P 500 tickers...")
    sp500_tickers = get_sp500_tickers()

    if sp500_tickers:
        print(f"Found {len(sp500_tickers)} tickers in the S&P 500 index.")

        # --- This is the key change: Using all tickers from the S&P 500 ---
        tickers_to_download = sp500_tickers

        price_data = get_historical_data(tickers_to_download)

        if price_data is not None and not price_data.empty:
            optimal_weights, ef_instance = find_optimal_portfolio(price_data)

            if optimal_weights:
                print("\n" + "="*50)
                print("    OPTIMAL PORTFOLIO (MAXIMUM SHARPE RATIO)")
                print("="*50)
                print("\nRecommended Portfolio Weights:")
                # Print only the stocks with a non-zero weight
                for ticker, weight in optimal_weights.items():
                    if weight > 0:
                        print(f"  - {ticker:<6}: {weight:.4f} ({weight:>6.2%})")

                print("\n" + "-"*50)
                print("Expected Portfolio Performance:")
                # The verbose output is redirected to the print function
                ef_instance.portfolio_performance(verbose=True)
                print("-"*50)

Fetching S&P 500 tickers...
Found 502 tickers in the S&P 500 index.
Downloading data from 2020-08-10 to 2025-08-09...


  data = yf.download(tickers, start=start_date, end=end_date, progress=True)
[*********************100%***********************]  502 of 502 completed
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  prices.dropna(axis=1, how='all', inplace=True)
  prices.fillna(method='ffill', inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  prices.fillna(method='ffill', inplace=True)
  prices.fillna(method='bfill', inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  prices.fillna(method='bfill', inplace=True)


Successfully downloaded data for 502 of 502 tickers.

Calculating expected returns and risk...
Optimizing portfolio for maximum Sharpe ratio...

    OPTIMAL PORTFOLIO (MAXIMUM SHARPE RATIO)

Recommended Portfolio Weights:
  - ABBV  : 0.0169 ( 1.70%)
  - AVGO  : 0.0320 ( 3.20%)
  - AXON  : 0.0416 ( 4.16%)
  - AZO   : 0.0161 ( 1.61%)
  - CBOE  : 0.1561 (15.61%)
  - CEG   : 0.0184 ( 1.84%)
  - GE    : 0.0258 ( 2.58%)
  - GEV   : 0.1290 (12.90%)
  - HWM   : 0.0417 ( 4.17%)
  - K     : 0.0115 ( 1.15%)
  - KR    : 0.0601 ( 6.01%)
  - LLY   : 0.0379 ( 3.79%)
  - MCK   : 0.1745 (17.45%)
  - NVDA  : 0.0213 ( 2.12%)
  - ORLY  : 0.0341 ( 3.41%)
  - PGR   : 0.0047 ( 0.47%)
  - PLTR  : 0.0221 ( 2.21%)
  - PM    : 0.0775 ( 7.75%)
  - SMCI  : 0.0033 ( 0.33%)
  - TKO   : 0.0102 ( 1.02%)
  - TPR   : 0.0245 ( 2.45%)
  - TRGP  : 0.0405 ( 4.05%)

--------------------------------------------------
Expected Portfolio Performance:
Expected annual return: 38.0%
Annual volatility: 13.9%
Sharpe Ratio: 2.73
---

In [1]:
import pandas as pd
import yfinance as yf
from pypfopt import EfficientFrontier
from pypfopt import risk_models
from pypfopt import expected_returns
from datetime import date, timedelta

def get_sp500_tickers():
    """
    Retrieves the list of S&P 500 tickers from Wikipedia.

    Returns:
        list: A list of S&P 500 ticker symbols.
    """
    try:
        # Scrapes the table of S&P 500 companies from the Wikipedia page
        payload = pd.read_html('https://en.wikipedia.org/wiki/List_of_S%26P_500_companies')
        sp500_tickers = payload[0]['Symbol'].values.tolist()
        # Cleans ticker symbols for yfinance compatibility (e.g., 'BRK.B' -> 'BRK-B')
        sp500_tickers = [ticker.replace('.', '-') for ticker in sp500_tickers]
        return sp500_tickers
    except Exception as e:
        print(f"Error retrieving S&P 500 tickers: {e}")
        return None

def get_historical_data(tickers, from_year=None, years=5):
    """
    Downloads the annual closing prices for a list of tickers.

    Args:
        tickers (list): A list of stock tickers.
        from_year (int, optional): The starting year for the data. If None, uses the last 5 years from today.
        years (int): The number of years of historical data to download.

    Returns:
        pandas.DataFrame: A DataFrame containing the annual closing prices for each ticker.
    """
    end_date = date.today()
    if from_year:
        start_date = date(from_year, 1, 1)
        end_date = date(from_year + years, 1, 1) - timedelta(days=1) # Get data up to the end of the 5th year
    else:
        start_date = end_date - timedelta(days=years * 365)


    print(f"Downloading data from {start_date} to {end_date}...")
    try:
        # Download historical data on a yearly interval
        data = yf.download(tickers, start=start_date, end=end_date, interval='1y', progress=True)
        prices = data['Close'] # Use 'Close' as it now contains the adjusted close price

        # Drop columns that are completely empty (for tickers that may have been delisted or have no data)
        initial_tickers = len(prices.columns)
        prices.dropna(axis=1, how='all', inplace=True)
        final_tickers = len(prices.columns)
        print(f"\nSuccessfully downloaded data for {final_tickers} of {initial_tickers} tickers.")

        # Fill any remaining missing values in the time series data
        prices.fillna(method='ffill', inplace=True)
        prices.fillna(method='bfill', inplace=True)

        return prices
    except Exception as e:
        print(f"Error downloading or processing historical data: {e}")
        return None

def find_optimal_portfolio(prices):
    """
    Uses the Efficient Frontier to find the portfolio with the maximum Sharpe ratio.

    Args:
        prices (pandas.DataFrame): A DataFrame of historical closing prices for multiple stocks.

    Returns:
        dict: A dictionary containing the optimal weights for each stock.
        EfficientFrontier: The efficient frontier object with weights set.
    """
    if prices is None or prices.empty:
        return None, None

    print("\nCalculating expected returns and risk...")
    try:
        # Calculate expected annualized returns and the annualized sample covariance matrix
        mu = expected_returns.mean_historical_return(prices)
        S = risk_models.sample_cov(prices)

        print("Optimizing portfolio for maximum Sharpe ratio...")
        # Initialize the EfficientFrontier object
        ef = EfficientFrontier(mu, S)

        # Find the portfolio that maximizes the Sharpe ratio
        weights = ef.max_sharpe()
        cleaned_weights = ef.clean_weights()

        return cleaned_weights, ef
    except Exception as e:
        print(f"Error during portfolio optimization: {e}")
        return None, None

if __name__ == "__main__":
    print("Fetching S&P 500 tickers...")
    sp500_tickers = get_sp500_tickers()

    if sp500_tickers:
        print(f"Found {len(sp500_tickers)} tickers in the S&P 500 index.")

        # --- Using all tickers from the S&P 500 ---
        tickers_to_download = sp500_tickers

        # --- Get data for the last 5 years (annual timeframe) ---
        # To specify a starting year, change `from_year=None` to `from_year=YYYY`
        price_data = get_historical_data(tickers_to_download, from_year=None, years=5)


        if price_data is not None and not price_data.empty:
            optimal_weights, ef_instance = find_optimal_portfolio(price_data)

            if optimal_weights:
                print("\n" + "="*50)
                print("    OPTIMAL PORTFOLIO (MAXIMUM SHARPE RATIO)")
                print("="*50)
                print("\nRecommended Portfolio Weights:")
                # Print only the stocks with a non-zero weight
                for ticker, weight in optimal_weights.items():
                    if weight > 0:
                        print(f"  - {ticker:<6}: {weight:.4f} ({weight:>6.2%})")

                print("\n" + "-"*50)
                print("Expected Portfolio Performance:")
                # The verbose output is redirected to the print function
                ef_instance.portfolio_performance(verbose=True)
                print("-"*50)

ModuleNotFoundError: No module named 'pypfopt'

In [8]:
!pip install pypfopt

[31mERROR: Could not find a version that satisfies the requirement pypfopt (from versions: none)[0m[31m
[0m[31mERROR: No matching distribution found for pypfopt[0m[31m
[0m

In [16]:
import pandas as pd
import yfinance as yf
from pypfopt import EfficientFrontier, risk_models, expected_returns

def get_sp500_tickers():
    """Retrieves the list of S&P 500 tickers."""
    try:
        payload = pd.read_html('https://en.wikipedia.org/wiki/List_of_S%26P_500_companies')
        return [ticker.replace('.', '-') for ticker in payload[0]['Symbol'].values.tolist()]
    except Exception as e:
        print(f"Error retrieving S&P 500 tickers: {e}")
        return None

def get_historical_data(tickers, start_date, end_date):
    """Downloads and prepares daily closing prices for a given date range."""
    print(f"  Downloading data from {start_date.date()} to {end_date.date()}...")
    try:
        prices = yf.download(tickers, start=start_date, end=end_date, progress=False, auto_adjust=True, group_by='ticker')['Close']
        prices.dropna(axis=1, how='all', inplace=True)
        prices.ffill(inplace=True)
        prices.bfill(inplace=True)
        return prices
    except Exception as e:
        print(f"    Error during data download: {e}")
        return None

def create_final_portfolio(price_data):
    """Creates the optimal, filtered, and rebalanced portfolio from price data."""
    mu = expected_returns.mean_historical_return(price_data)
    S = risk_models.sample_cov(price_data)
    ef = EfficientFrontier(mu, S)

    try:
        initial_weights = ef.max_sharpe()
    except Exception:
        return None # Portfolio could not be optimized

    weights_series = pd.Series(initial_weights)
    filtered_weights = weights_series[weights_series >= 0.02]

    if filtered_weights.empty: return None

    top_weights = filtered_weights.sort_values(ascending=False).head(12)
    return (top_weights / top_weights.sum()).to_dict()

def run_backtest(portfolio_weights, comparison_year, benchmark_ticker='SPY'):
    """
    Runs a backtest for a given year and returns the performance of the portfolio and benchmark.
    """
    print(f"  Running backtest for year: {comparison_year}...")
    start_date = pd.Timestamp(f'{comparison_year}-01-01')

    # If comparison year is the current year, test YTD. Otherwise, full year.
    if comparison_year == pd.Timestamp.today().year:
        end_date = pd.Timestamp.today()
    else:
        end_date = pd.Timestamp(f'{comparison_year}-12-31')

    # Get data for portfolio and benchmark
    portfolio_tickers = list(portfolio_weights.keys())
    all_tickers = portfolio_tickers + [benchmark_ticker]
    comparison_prices = get_historical_data(all_tickers, start_date, end_date)

    if comparison_prices is None or benchmark_ticker not in comparison_prices.columns:
        print(f"    Could not download sufficient data for {comparison_year}. Skipping.")
        return None, None

    # Isolate benchmark and portfolio prices
    benchmark_prices = comparison_prices[[benchmark_ticker]].squeeze()
    portfolio_prices = comparison_prices[portfolio_tickers]

    # Calculate returns
    portfolio_daily_returns = portfolio_prices.pct_change().dropna()
    portfolio_total_return = (portfolio_daily_returns.dot(pd.Series(portfolio_weights)) + 1).prod() - 1
    benchmark_total_return = (benchmark_prices.pct_change().dropna() + 1).prod() - 1

    return portfolio_total_return, benchmark_total_return

def display_summary(results):
    """
    Displays a summary table of all backtest cycles and calculates overall performance.
    """
    if not results:
        print("No results to display.")
        return

    df = pd.DataFrame(results)

    # --- Calculate Overall Performance ---
    # We use the .prod() method on the returns (plus 1) to find the geometric cumulative return
    cumulative_portfolio = (1 + df['Portfolio Return']).prod() - 1
    cumulative_sp500 = (1 + df['S&P 500 Return']).prod() - 1

    # --- Format the DataFrame for Display ---
    df['Winner'] = df.apply(lambda row: 'Portfolio' if row['Portfolio Return'] > row['S&P 500 Return'] else 'S&P 500', axis=1)
    df['Portfolio Return'] = df['Portfolio Return'].map('{:.2%}'.format)
    df['S&P 500 Return'] = df['S&P 500 Return'].map('{:.2%}'.format)

    print("\n" + "="*65)
    print("    MULTI-YEAR BACKTESTING SUMMARY")
    print("="*65)
    print(df.to_string(index=False))
    print("-"*65)

    print("\n--- Overall Performance Conclusion ---")
    print(f"Total Cumulative Return of Portfolio Strategy: {cumulative_portfolio:.2%}")
    print(f"Total Cumulative Return of S&P 500 (Buy & Hold): {cumulative_sp500:.2%}")

    if cumulative_portfolio > cumulative_sp500:
        print(f"\n✅ Over the entire tested period, the Portfolio strategy was SUPERIOR.")
    else:
        print(f"\n❌ Over the entire tested period, a simple S&P 500 (Buy & Hold) strategy was SUPERIOR.")
    print("="*65)


if __name__ == "__main__":
    ANALYSIS_START_YEARS = [2016, 2017, 2018, 2019, 2020]
    all_results = []

    sp500_tickers = get_sp500_tickers()

    if sp500_tickers:
        for start_year in ANALYSIS_START_YEARS:
            analysis_end_year = start_year + 4
            comparison_year = analysis_end_year + 1

            print("\n" + "#"*40)
            print(f"# Starting Cycle: {start_year}-{analysis_end_year} (Test on {comparison_year})")
            print("#"*40)

            # Get data for the 5-year analysis period
            analysis_start_date = pd.Timestamp(f'{start_year}-01-01')
            analysis_end_date = pd.Timestamp(f'{analysis_end_year}-12-31')
            analysis_prices = get_historical_data(sp500_tickers, analysis_start_date, analysis_end_date)

            if analysis_prices is not None:
                # Create the optimal portfolio
                final_portfolio = create_final_portfolio(analysis_prices)

                if final_portfolio:
                    # Run the backtest for the following year
                    portfolio_return, sp500_return = run_backtest(final_portfolio, comparison_year)
                    if portfolio_return is not None:
                        all_results.append({
                            'Test Period': str(comparison_year),
                            'Portfolio Return': portfolio_return,
                            'S&P 500 Return': sp500_return
                        })
                else:
                    print("  Could not create a valid portfolio for this period.")

    # Finally, display the summary table and conclusion
    display_summary(all_results)


########################################
# Starting Cycle: 2016-2020 (Test on 2021)
########################################
  Downloading data from 2016-01-01 to 2020-12-31...


ERROR:yfinance:
8 Failed downloads:
ERROR:yfinance:['VLTO', 'EXE', 'COIN', 'GEV', 'SOLV', 'KVUE', 'CEG', 'GEHC']: YFPricesMissingError('possibly delisted; no price data found  (1d 2016-01-01 00:00:00 -> 2020-12-31 00:00:00) (Yahoo error = "Data doesn\'t exist for startDate = 1451624400, endDate = 1609390800")')


    Error during data download: 'Close'

########################################
# Starting Cycle: 2017-2021 (Test on 2022)
########################################
  Downloading data from 2017-01-01 to 2021-12-31...


ERROR:yfinance:
6 Failed downloads:
ERROR:yfinance:['VLTO', 'GEV', 'SOLV', 'KVUE', 'CEG', 'GEHC']: YFPricesMissingError('possibly delisted; no price data found  (1d 2017-01-01 00:00:00 -> 2021-12-31 00:00:00) (Yahoo error = "Data doesn\'t exist for startDate = 1483246800, endDate = 1640926800")')


    Error during data download: 'Close'

########################################
# Starting Cycle: 2018-2022 (Test on 2023)
########################################
  Downloading data from 2018-01-01 to 2022-12-31...


ERROR:yfinance:
4 Failed downloads:
ERROR:yfinance:['VLTO', 'GEV', 'SOLV', 'KVUE']: YFPricesMissingError('possibly delisted; no price data found  (1d 2018-01-01 00:00:00 -> 2022-12-31 00:00:00) (Yahoo error = "Data doesn\'t exist for startDate = 1514782800, endDate = 1672462800")')


    Error during data download: 'Close'

########################################
# Starting Cycle: 2019-2023 (Test on 2024)
########################################
  Downloading data from 2019-01-01 to 2023-12-31...


ERROR:yfinance:
2 Failed downloads:
ERROR:yfinance:['GEV', 'SOLV']: YFPricesMissingError('possibly delisted; no price data found  (1d 2019-01-01 00:00:00 -> 2023-12-31 00:00:00) (Yahoo error = "Data doesn\'t exist for startDate = 1546318800, endDate = 1703998800")')


    Error during data download: 'Close'

########################################
# Starting Cycle: 2020-2024 (Test on 2025)
########################################
  Downloading data from 2020-01-01 to 2024-12-31...
    Error during data download: 'Close'
No results to display.


In [7]:
import pandas as pd
import yfinance as yf
from pypfopt import EfficientFrontier, risk_models, expected_returns

def get_sp500_tickers():
    """Retrieves the list of S&P 500 tickers."""
    try:
        payload = pd.read_html('https://en.wikipedia.org/wiki/List_of_S%26P_500_companies')
        return [ticker.replace('.', '-') for ticker in payload[0]['Symbol'].values.tolist()]
    except Exception as e:
        print(f"Error retrieving S&P 500 tickers: {e}")
        return None

def get_master_historical_data(tickers, start_date, end_date):
    """Performs a single, large download for all required data."""
    print(f"Performing one master download for all data from {start_date.date()} to {end_date.date()}...")
    print("This may take several minutes due to the long date range...")
    try:
        data = yf.download(tickers, start=start_date, end=end_date, progress=True, auto_adjust=True)
        if data.empty:
            print("yfinance returned no data. Cannot proceed.")
            return None

        prices = data['Close']
        prices.dropna(axis=1, how='all', inplace=True)
        prices.ffill(inplace=True)
        prices.bfill(inplace=True)
        print("\nMaster download complete.")
        return prices
    except Exception as e:
        print(f"Error during master data download: {e}")
        return None

def create_final_portfolio(price_data):
    """Creates the optimal, filtered, and rebalanced portfolio."""
    if price_data.empty or len(price_data) < 2: return None
    mu = expected_returns.mean_historical_return(price_data)
    S = risk_models.sample_cov(price_data)
    ef = EfficientFrontier(mu, S)

    try:
        initial_weights = ef.max_sharpe()
    except Exception: return None

    weights_series = pd.Series(initial_weights)
    filtered_weights = weights_series[weights_series >= 0.02]
    if filtered_weights.empty: return None

    top_weights = filtered_weights.sort_values(ascending=False).head(12)
    return (top_weights / top_weights.sum()).to_dict()

def calculate_period_return(portfolio_weights, prices, benchmark_ticker='SPY'):
    """Calculates the total return for a portfolio and benchmark over a given price series."""
    if prices.empty or benchmark_ticker not in prices.columns:
        return None, None

    # Ensure all portfolio tickers are present in the price data
    valid_tickers = [t for t in portfolio_weights.keys() if t in prices.columns]
    if len(valid_tickers) != len(portfolio_weights):
        return None, None # Incomplete data for this period

    benchmark_prices = prices[benchmark_ticker].squeeze()
    portfolio_prices = prices[valid_tickers]

    portfolio_daily_returns = portfolio_prices.pct_change().dropna()
    portfolio_total_return = (portfolio_daily_returns.dot(pd.Series(portfolio_weights)) + 1).prod() - 1
    benchmark_total_return = (benchmark_prices.pct_change().dropna() + 1).prod() - 1

    return portfolio_total_return, benchmark_total_return

def display_historical_summary(results):
    """Displays the summary table and conclusion for the multi-year backtest."""
    if not results:
        print("\nNo historical results could be calculated.")
        return

    df = pd.DataFrame(results)
    cumulative_portfolio = (1 + df['Portfolio Return']).prod() - 1
    cumulative_sp500 = (1 + df['S&P 500 Return']).prod() - 1

    df['Winner'] = df.apply(lambda row: 'Portfolio' if row['Portfolio Return'] > row['S&P 500 Return'] else 'S&P 500', axis=1)
    df['Portfolio Return'] = df['Portfolio Return'].map('{:.2%}'.format)
    df['S&P 500 Return'] = df['S&P 500 Return'].map('{:.2%}'.format)

    print("\n" + "="*65)
    print("    PART 1: 10-YEAR HISTORICAL BACKTESTING SUMMARY")
    print("="*65)
    print(df.to_string(index=False))
    print("-"*65)

    print("\n--- Historical Conclusion ---")
    print(f"Total Cumulative Return of Portfolio Strategy: {cumulative_portfolio:.2%}")
    print(f"Total Cumulative Return of S&P 500 (Buy & Hold): {cumulative_sp500:.2%}")

    if cumulative_portfolio > cumulative_sp500:
        print(f"\n✅ Historically, the Portfolio strategy was SUPERIOR.")
    else:
        print(f"\n❌ Historically, a simple S&P 500 (Buy & Hold) strategy was SUPERIOR.")
    print("="*65)

def run_current_ytd_analysis(all_prices, benchmark_ticker='SPY'):
    """Creates a portfolio from the last 5 years and tests its YTD performance."""
    print("\n" + "="*65)
    print("    PART 2: CURRENT 'LIVE' PORTFOLIO & YTD ANALYSIS")
    print("="*65)

    end_date = pd.Timestamp.today()
    start_date = end_date - pd.DateOffset(years=5)
    print(f"\nCreating 'Live' Portfolio based on data from {start_date.date()} to {end_date.date()}...")
    live_analysis_prices = all_prices.loc[start_date:end_date]

    live_portfolio = create_final_portfolio(live_analysis_prices)

    if not live_portfolio:
        print("\nCould not create a 'Live' portfolio from the most recent 5-year data.")
        return

    print("\n--- 'Live' Model Portfolio Recommendation ---")
    for ticker, weight in live_portfolio.items():
        print(f"  - {ticker:<6}: {weight:.4f} ({weight:>7.2%})")

    print(f"\n--- Comparing YTD Performance (Year {end_date.year}) ---")
    ytd_start_date = pd.Timestamp(f'{end_date.year}-01-01')
    ytd_prices = all_prices.loc[ytd_start_date:end_date]

    portfolio_return, sp500_return = calculate_period_return(live_portfolio, ytd_prices, benchmark_ticker)

    if portfolio_return is not None:
        print(f"'Live' Portfolio YTD Return: {portfolio_return:.2%}")
        print(f"S&P 500 ({benchmark_ticker}) YTD Return:  {sp500_return:.2%}")

        print("\n--- YTD Conclusion ---")
        if portfolio_return > sp500_return:
            print(f"✅ So far this year, the 'Live' portfolio is OUTPERFORMING the S&P 500.")
        else:
            print(f"❌ So far this year, the 'Live' portfolio is UNDERPERFORMING the S&P 500.")
    else:
        print("Could not calculate YTD performance.")
    print("="*65)


if __name__ == "__main__":
    # --- THIS IS THE ONLY LINE THAT NEEDED TO CHANGE ---
    # Defines the start years for 10 cycles of 5-year analysis periods.
    ANALYSIS_START_YEARS = [2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019]
    # ---

    historical_results = []
    sp500_tickers = get_sp500_tickers()

    if sp500_tickers:
        master_start_date = pd.Timestamp(f'{ANALYSIS_START_YEARS[0]}-01-01')
        master_end_date = pd.Timestamp.today()
        all_prices = get_master_historical_data(sp500_tickers + ['SPY'], master_start_date, master_end_date)

        if all_prices is not None:
            # PART 1: Run the 10-year historical backtest
            for start_year in ANALYSIS_START_YEARS:
                analysis_end_year = start_year + 4
                comparison_year = analysis_end_year + 1

                print("\n" + "#"*45)
                print(f"# Historical Cycle: {start_year}-{analysis_end_year} (Test on {comparison_year})")
                print("#"*45)

                analysis_start_date = pd.Timestamp(f'{start_year}-01-01')
                analysis_end_date = pd.Timestamp(f'{analysis_end_year}-12-31')
                analysis_prices = all_prices.loc[analysis_start_date:analysis_end_date]

                print("  Creating portfolio from historical slice...")
                final_portfolio = create_final_portfolio(analysis_prices)

                if final_portfolio:
                    backtest_start = pd.Timestamp(f'{comparison_year}-01-01')
                    backtest_end = pd.Timestamp(f'{comparison_year}-12-31')
                    backtest_prices = all_prices.loc[backtest_start:backtest_end]

                    portfolio_return, sp500_return = calculate_period_return(final_portfolio, backtest_prices)
                    if portfolio_return is not None:
                        historical_results.append({
                            'Test Period': str(comparison_year),
                            'Portfolio Return': portfolio_return,
                            'S&P 500 Return': sp500_return
                        })
                else:
                    print("  Could not create a valid portfolio for this historical period.")

            display_historical_summary(historical_results)

            # PART 2: Run the current "live" analysis
            run_current_ytd_analysis(all_prices)

Performing one master download for all data from 2010-01-01 to 2025-08-10...
This may take several minutes due to the long date range...


[*********************100%***********************]  503 of 503 completed
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  prices.dropna(axis=1, how='all', inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  prices.ffill(inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  prices.bfill(inplace=True)



Master download complete.

#############################################
# Historical Cycle: 2010-2014 (Test on 2015)
#############################################
  Creating portfolio from historical slice...

#############################################
# Historical Cycle: 2011-2015 (Test on 2016)
#############################################
  Creating portfolio from historical slice...

#############################################
# Historical Cycle: 2012-2016 (Test on 2017)
#############################################
  Creating portfolio from historical slice...

#############################################
# Historical Cycle: 2013-2017 (Test on 2018)
#############################################
  Creating portfolio from historical slice...

#############################################
# Historical Cycle: 2014-2018 (Test on 2019)
#############################################
  Creating portfolio from historical slice...

#############################################
# Hist




    PART 1: 10-YEAR HISTORICAL BACKTESTING SUMMARY
Test Period Portfolio Return S&P 500 Return    Winner
       2015           -1.16%          1.29%   S&P 500
       2016            2.16%         13.59%   S&P 500
       2017           12.50%         20.78%   S&P 500
       2018            5.23%         -5.25% Portfolio
       2019            5.44%         31.09%   S&P 500
       2020           14.13%         17.24%   S&P 500
       2021           10.39%         30.51%   S&P 500
       2022           -6.58%        -18.65% Portfolio
       2023            8.82%         26.71%   S&P 500
       2024           34.31%         25.59% Portfolio
-----------------------------------------------------------------

--- Historical Conclusion ---
Total Cumulative Return of Portfolio Strategy: 116.84%
Total Cumulative Return of S&P 500 (Buy & Hold): 241.87%

❌ Historically, a simple S&P 500 (Buy & Hold) strategy was SUPERIOR.

    PART 2: CURRENT 'LIVE' PORTFOLIO & YTD ANALYSIS

Creating 'Live' Portf