<a href="https://colab.research.google.com/github/dheepdatascigit/etfperformcomnotebook/blob/main/etftrend_portfolio_prod.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# only two ETF comparison

import yfinance as yf
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime

# -------------------------------------------
# Configuration
# -------------------------------------------

# allocation = {
#     'FANG.AX': 0.00,
#     'VGS.AX': 0.00,
#     'IVV.AX': 0.00,
#     'VAS.AX': 0.50,
#     'VEU.AX': 0.50,
#     'RDV.AX': 0.00,
#     'MXT.AX': 0.00,
#     'WAM.AX': 0.00,
#     'VAF.AX': 0.00,
#     'VHY.AX': 0.00
# }

allocation = {
    'VAS.AX': 0.10,
    'IVV.AX': 0.15,
    'VHY.AX': 0.05,
    'FANG.AX': 0.40,
    'VGS.AX': 0.25,
    'VAF.AX': 0.05
}

benchmarks = {
    'S&P 500': '^GSPC',
    'ASX 200': '^AXJO',
    'DHHF': '^DHHF'
}

# ETF categories
# categories = {
#     'Growth': ['FANG.AX', 'VGS.AX', 'IVV.AX', 'VAS.AX', 'VEU.AX'],
#     'Income': ['RDV.AX', 'MXT.AX', 'WAM.AX'],
#     'Defensive': ['VAF.AX', 'VHY.AX']
# }

categories = {
    'Growth': ['VAS.AX', 'IVV.AX', 'VGS.AX', 'FANG.AX'],
    'Income': ['VHY.AX'],
    'Defensive': ['VAF.AX']
}


start_date = '2020-07-01'
end_date = '2025-08-13'

# -------------------------------------------
# Functions
# -------------------------------------------

def download_adjusted_close(tickers, start, end):
    data = yf.download(tickers, start=start, end=end, group_by='ticker', auto_adjust=True)

    if isinstance(tickers, str) or len(tickers) == 1:
        return pd.DataFrame(data['Close'])

    close_prices = pd.DataFrame()
    for ticker in tickers:
        if (ticker, 'Close') in data.columns:
            close_prices[ticker] = data[ticker]['Close']
    return close_prices

def normalize(df):
    return df / df.iloc[0] * 100

def calculate_portfolio(df, weights):
    weighted = df.multiply(pd.Series(weights), axis=1)
    return weighted.sum(axis=1)

def calculate_annual_return(series):
    days = (series.index[-1] - series.index[0]).days
    years = days / 365.25
    start = series.iloc[0]
    end = series.iloc[-1]
    return ((end / start) ** (1 / years)) - 1

def annotate_endpoints(ax, series_dict):
    for label, series in series_dict.items():
        value = series.iloc[-1]
        annual_return = calculate_annual_return(series) * 100
        label_with_return = f"{label} ({annual_return:.1f}%)"
        ax.annotate(label_with_return,
                    xy=(series.index[-1], value),
                    xytext=(5, 0),
                    textcoords="offset points",
                    fontsize=9,
                    color=ax.get_lines()[-1].get_color())

def calculate_category_trends(normalized_df, allocation, categories):
    category_trends = {}

    for category, tickers in categories.items():
        subset = normalized_df[tickers]
        weights = {ticker: allocation[ticker] for ticker in tickers}
        total_weight = sum(weights.values())
        norm_weights = {k: v / total_weight for k, v in weights.items()}
        category_trends[category] = calculate_portfolio(subset, norm_weights)

    return category_trends

def plot_all(normalized_data, portfolio_trend, benchmarks_data):
    fig, ax = plt.subplots(figsize=(16, 8))

    # Plot individual ETFs
    for column in normalized_data.columns:
        line = ax.plot(normalized_data[column], label=column.replace('.AX', ''), alpha=0.8)

    # Plot benchmarks
    for name, series in benchmarks_data.items():
        ax.plot(series, label=name, linestyle='--')

    # Plot portfolio trend
    ax.plot(portfolio_trend, label='Total Portfolio', color='black', linewidth=3)

    # Annotate annual returns
    all_series = {col.replace('.AX', ''): normalized_data[col] for col in normalized_data.columns}
    all_series.update(benchmarks_data)
    all_series['Total Portfolio'] = portfolio_trend
    annotate_endpoints(ax, all_series)

    ax.set_title("ETF Portfolio vs S&P500 and ASX200 (Scaled to 100)")
    ax.set_ylabel("Scaled Price (100 = start value)")
    ax.set_xlabel("Date")
    ax.grid(True)
    plt.tight_layout()
    plt.show()


def plot_category_vs_benchmarks(category_trends, total_portfolio, benchmark_data):
    fig, ax = plt.subplots(figsize=(16, 8))

    # Plot categories
    for name, series in category_trends.items():
        ax.plot(series, label=f"{name} Portfolio")

    # Plot total portfolio
    ax.plot(total_portfolio, label='Total Portfolio', color='black', linewidth=3)

    # Plot benchmarks
    for name, series in benchmark_data.items():
        ax.plot(series, label=name, linestyle='--')

    # Annotate returns
    all_series = {**category_trends, 'Total Portfolio': total_portfolio, **benchmark_data}
    annotate_endpoints(ax, all_series)

    ax.set_title("Category Portfolios vs Benchmarks (Total Return incl. Dividends, Scaled to 100)")
    ax.set_ylabel("Scaled Value (100 = start)")
    ax.set_xlabel("Date")
    ax.legend()
    ax.grid(True)
    plt.tight_layout()
    plt.show()


# -------------------------------------------
# Main Program
# -------------------------------------------

if __name__ == "__main__":
    etf_tickers = list(allocation.keys())
    benchmark_tickers = list(benchmarks.values())

    # Download ETF and benchmark data
    etf_data = download_adjusted_close(etf_tickers, start_date, end_date)
    benchmark_data = download_adjusted_close(benchmark_tickers, start_date, end_date)

    # Normalize prices
    etf_normalized = normalize(etf_data)
    benchmark_normalized = {
        name: normalize(benchmark_data[ticker]) for name, ticker in benchmarks.items()
    }

    # Calculate portfolio trend
    portfolio_trend = calculate_portfolio(etf_normalized, allocation)



    # Plot everything
    plot_all(etf_normalized, portfolio_trend, benchmark_normalized)

    # Category portfolio trends
    category_trends = calculate_category_trends(etf_normalized, allocation, categories)

    # Plot category vs total vs benchmarks
    plot_category_vs_benchmarks(category_trends, portfolio_trend, benchmark_normalized)

    # Calculate annualized returns
    returns = []

    # ETF returns
    for col in etf_normalized.columns:
        annual_ret = calculate_annual_return(etf_normalized[col]) * 100
        returns.append({'Name': col.replace('.AX', ''), 'Type': 'ETF', 'Annualized Return (%)': round(annual_ret, 2)})

    # Benchmark returns
    for name, series in benchmark_normalized.items():
        annual_ret = calculate_annual_return(series) * 100
        returns.append({'Name': name, 'Type': 'Benchmark', 'Annualized Return (%)': round(annual_ret, 2)})

    # Portfolio return
    portfolio_ret = calculate_annual_return(portfolio_trend) * 100
    returns.append({'Name': 'Total Portfolio', 'Type': 'Portfolio', 'Annualized Return (%)': round(portfolio_ret, 2)})

    # Display as DataFrame
    returns_df = pd.DataFrame(returns)
    print("\nAnnualized Returns (%):")
    print(returns_df.sort_values(by="Annualized Return (%)", ascending=False).to_string(index=False))

