Cell 1: Install and import dependencies

In [None]:
!pip install yfinance numpy pandas matplotlib scipy seaborn

import yfinance as yf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.cluster.hierarchy import linkage, dendrogram


Cell 2: Download historical price data

In [None]:
tickers = ['AAPL', 'MSFT', 'GOOG', 'TSLA', 'NVDA']
data = yf.download(tickers, start='2022-01-01', end='2023-12-31')['Adj Close']
returns = data.pct_change().dropna()


Cell 3: Compute correlation and distance matrix

In [None]:
corr = returns.corr()
dist = np.sqrt(0.5 * (1 - corr))  # Distance metric for clustering

sns.heatmap(corr, annot=True, cmap='coolwarm')
plt.title("Asset Correlation Matrix")
plt.show()


Cell 4: Build linkage matrix and plot dendrogram

In [None]:
link = linkage(dist, method='single')

plt.figure(figsize=(10, 5))
dendrogram(link, labels=returns.columns.tolist())
plt.title("Hierarchical Clustering Dendrogram")
plt.show()


Cell 5: Implement HRP Weighting Logic

In [None]:
def get_cluster_var(cov, cluster_items):
    cov_ = cov.loc[cluster_items, cluster_items]
    w = 1. / np.diag(cov_)
    w /= w.sum()
    return np.dot(np.dot(w, cov_), w.T)

def recursive_bisection(cov, items):
    if len(items) == 1:
        return {items[0]: 1.0}
    else:
        split = len(items) // 2
        left = items[:split]
        right = items[split:]
        var_left = get_cluster_var(cov, left)
        var_right = get_cluster_var(cov, right)
        alpha = 1 - var_left / (var_left + var_right)
        weights_left = recursive_bisection(cov, left)
        weights_right = recursive_bisection(cov, right)
        weights = {**{k: v * alpha for k, v in weights_left.items()},
                   **{k: v * (1 - alpha) for k, v in weights_right.items()}}
        return weights


Cell 6: Sort assets by cluster and compute weights

In [None]:
from scipy.cluster.hierarchy import leaves_list

ordered_indices = leaves_list(link)
ordered_tickers = corr.columns[ordered_indices].tolist()
cov = returns.cov()
weights = recursive_bisection(cov, ordered_tickers)

# Normalize weights to 1
total_weight = sum(weights.values())
normalized_weights = {k: v / total_weight for k, v in weights.items()}


Cell 7: Display and save results

In [None]:
df_weights = pd.DataFrame.from_dict(normalized_weights, orient='index', columns=['Weight'])
df_weights = df_weights.sort_values(by='Weight', ascending=False)

print(df_weights)
df_weights.to_csv("hrp_weights.csv")
print("✅ HRP weights saved.")
