# 📊 Tech Giants HRP Analysis

This notebook visualizes the Hierarchical Risk Parity (HRP) process step-by-step.
We will:
1. Download historical data.
2. View the Correlation Matrix.
3. Build the Hierarchical Clusters (Dendrogram).
4. Optimize the weights.
5. Backtest the performance (Equity Curve).

In [None]:
import yfinance as yf
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.figure_factory as ff
import matplotlib.pyplot as plt
from pypfopt import HRPOpt, risk_models
from scipy.cluster.hierarchy import linkage

# Configuration
TICKERS = [
    'AAPL', 'MSFT', 'NVDA', 'AMZN', 'GOOGL', 'META', 'TSLA', 'BRK-B', 'LLY', 'AVGO', 
    'JPM', 'UNH', 'V', 'XOM', 'HD', 'PG', 'COST', 'JNJ', 'ABBV', 'MRK', 
    'AMD', 'WMT', 'KO', 'NFLX', 'BAC', 'PEP', 'CVX', 'TMO', 'CRM', 'WFC', 
    'LIN', 'CSCO', 'MCD', 'DIS', 'ABT', 'INTU', 'QCOM', 'VZ', 'CMCSA', 'IBM', 
    'AMAT', 'PFE', 'UBER', 'HON', 'GE', 'UNP', 'TXN', 'NOW', 'SPGI', 'PM'
]
BENCHMARK = "SPY"
START_DATE = (pd.Timestamp.now() - pd.DateOffset(years=2)).strftime('%Y-%m-%d')
END_DATE = pd.Timestamp.now().strftime('%Y-%m-%d')

## 1. Data Fetching

In [None]:
print("Downloading data...")
all_tickers = TICKERS + [BENCHMARK]
data = yf.download(all_tickers, start=START_DATE, end=END_DATE, auto_adjust=True, progress=False)

if hasattr(data, "columns") and isinstance(data.columns, pd.MultiIndex):
     if 'Close' in data.columns.get_level_values(0):
         data = data['Close']
     elif 'Adj Close' in data.columns.get_level_values(0):
         data = data['Adj Close']

prices = data[TICKERS].dropna()
benchmark = data[BENCHMARK].dropna()

print(f"Data Downloaded. Shape: {prices.shape}")
prices.tail()

## 2. Cluster Analysis (Dendrogram)
HRP relies on the hierarchical structure of correlations.

In [None]:
# Calculate Correlation
returns = prices.pct_change().dropna()
corr = returns.corr()

# Plot Heatmap
fig = px.imshow(corr, text_auto=True, title="Asset Correlation Matrix")
fig.show()

# Plot Dendrogram
fig_dendro = ff.create_dendrogram(corr, labels=corr.columns)
fig_dendro.update_layout(title="Hierarchical Clustering Dendrogram")
fig_dendro.show()

## 3. Optimization (HRP)
We use `PyPortfolioOpt` to perform the recursive bisection allocation.

In [None]:
optimizer = HRPOpt(returns)
weights = optimizer.optimize()

print("Optimized Weights:")
pd.Series(weights).sort_values(ascending=False)

## 4. Backtest (Equity Curve)
Comparing the HRP portfolio against the SPY benchmark (Base 100).

In [None]:
# Portfolio Returns
port_weights = pd.Series(weights)
port_ret = returns @ port_weights
port_cum_ret = (1 + port_ret).cumprod() * 100

# Benchmark Returns
bench_ret = benchmark.pct_change().dropna()
bench_cum_ret = (1 + bench_ret).cumprod() * 100

# Align and Plot
comparison = pd.DataFrame({
    "HRP Portfolio": port_cum_ret,
    "SPY": bench_cum_ret
}).dropna()

fig = px.line(comparison, title="Cumulative Performance (Base 100)")
fig.show()

## 5. Key Metrics Comparison
Annualized Metrics (Return, Volatility, Sharpe, Sortino)

In [None]:
def calculate_metrics(returns, name="Portfolio"):
    # Annualized Return
    ann_ret = returns.mean() * 252
    
    # Annualized Volatility
    ann_vol = returns.std() * np.sqrt(252)
    
    # Sharpe Ratio (Rf=0)
    sharpe = ann_ret / ann_vol
    
    # Sortino Ratio (Rf=0)
    downside_returns = returns.copy()
    downside_returns[downside_returns > 0] = 0
    downside_std = downside_returns.std() * np.sqrt(252)
    sortino = ann_ret / downside_std
    
    return {
        "Return (Ann)": f"{ann_ret*100:.2f}%",
        "Volatility (Ann)": f"{ann_vol*100:.2f}%",
        "Sharpe Ratio": f"{sharpe:.2f}",
        "Sortino Ratio": f"{sortino:.2f}"
    }

metrics_hrp = calculate_metrics(port_ret, "HRP")
metrics_spy = calculate_metrics(bench_ret, "SPY")

results = pd.DataFrame([metrics_hrp, metrics_spy], index=["HRP Portfolio", "SPY Benchmark"])
results

## 6. Rolling Allocation (Backtest)
Simulating how the HRP allocation would have evolved over time (Rebalanced Monthly).

In [None]:
allocations = []
dates = []

print("Running Rolling Backtest (Monthly Rebalance)...")
# Start from 6 months in to have some data
start_idx = 126 
rebalance_freq = 21 # Approx 1 month

for i in range(start_idx, len(returns), rebalance_freq):
    current_date = returns.index[i]
    
    # Window: All history up to this point (Expanding Window)
    # Or Fixed Window (e.g. last 6 months). HRP likes more data for correlation.
    # Let's use Expanding Window from start
    window_returns = returns.iloc[:i]
    
    try:
        opt = HRPOpt(window_returns)
        w = opt.optimize()
        allocations.append(w)
        dates.append(current_date)
    except Exception as e:
        pass

df_alloc_hist = pd.DataFrame(allocations, index=dates)
df_alloc_hist.index.name = "Date"

# Plot Stacked Area Chart
fig_area = px.area(df_alloc_hist, title="Asset Allocation Evolution (Expanding HRP)")
fig_area.update_layout(yaxis=dict(range=[0, 1]))
fig_area.show()