# Efficient Frontier — S&P 500 Technology (Manual Ticker List, One-by-One Download)

This notebook computes the **Markowitz mean–variance efficient frontier** for a **manually specified list** of tickers (intended to match your pasted S&P 500 Information Technology table).

**Prices:** downloaded **one ticker at a time** from Yahoo Finance via `yfinance`, with `auto_adjust=True` (so the **Close** column is split/dividend-adjusted).



## 2) Imports

In [1]:
import warnings
warnings.filterwarnings("ignore")

import time
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import yfinance as yf
import cvxpy as cp

from sklearn.covariance import LedoitWolf
from scipy.cluster.hierarchy import linkage, leaves_list

plt.rcParams["figure.figsize"] = (10, 6)
plt.rcParams["axes.grid"] = True

ANNUALIZATION = 252

def dot_to_dash(ticker: str) -> str:
    return ticker.replace(".", "-")


In [2]:
import numpy as np
import pandas as pd

import plotly.express as px
import plotly.graph_objects as go

from scipy.cluster.hierarchy import linkage, leaves_list
from scipy.spatial.distance import squareform

from src import style
style.set_plotly_defaults()

## 3) Parameters

In [3]:
START = "2005-01-01"
END   = None
MIN_HISTORY_DAYS = 252

LONG_ONLY = True
RF = 0.05
N_FRONTIER = 60

SLEEP_BETWEEN = 0.25
RETRIES = 2

RANDOM_SEED = 7
np.random.seed(RANDOM_SEED)


In [4]:
prices = pd.read_csv("data/Tech_industry_prices-2005.csv", index_col=0, parse_dates=True)
prices

Unnamed: 0_level_0,ACN,ADBE,AMD,AKAM,APH,ADI,AAPL,AMAT,APP,ANET,...,TEL,TDY,TER,TXN,TRMB,TYL,VRSN,WDC,WDAY,ZBRA
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2005-01-03,18.431828,30.838949,21.410000,12.820000,1.977094,22.443081,0.949067,12.184169,,,...,,29.820000,15.252403,15.051235,8.082500,8.190000,27.409229,6.492141,,55.509998
2005-01-04,17.998463,30.024111,20.209999,12.200000,1.915191,21.832512,0.958813,11.952365,,,...,,29.180000,14.331919,14.690203,8.030000,8.020000,26.491657,6.328929,,54.470001
2005-01-05,17.928574,29.859142,19.750000,12.000000,1.870665,21.931196,0.967212,11.749535,,,...,,28.400000,14.138618,14.248246,7.962500,7.860000,25.792957,6.183853,,52.570000
2005-01-06,17.767807,29.364239,19.719999,11.940000,1.843514,21.894196,0.967961,11.662611,,,...,,28.420000,13.963725,14.192228,7.820000,7.830000,25.742451,6.129451,,52.650002
2005-01-07,18.599581,29.384233,19.920000,12.150000,1.846228,21.925028,1.038440,11.698831,,,...,,28.070000,14.267488,14.266931,7.660000,7.770000,26.053919,6.105269,,53.099998
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2026-01-14,288.540009,304.440002,223.600006,90.650002,146.750000,297.989990,259.959991,301.890015,617.760010,125.089996,...,237.300003,562.539978,230.190002,193.449997,79.760002,452.000000,249.309998,215.000000,193.990005,262.510010
2026-01-15,287.769989,304.089996,227.919998,91.529999,154.220001,302.100006,258.209991,319.079987,606.989990,130.589996,...,241.919998,569.719971,227.699997,189.119995,78.570000,446.149994,249.220001,222.100006,192.720001,258.850006
2026-01-16,286.209991,296.119995,231.830002,93.489998,154.389999,300.250000,255.529999,327.010010,568.760010,129.830002,...,241.009995,581.719971,228.149994,191.580002,73.889999,440.010010,249.470001,221.509995,186.860001,251.210007
2026-01-20,272.709991,290.369995,231.919998,89.599998,152.330002,295.670013,246.699997,318.230011,565.520020,127.519997,...,233.270004,566.229980,223.979996,189.589996,69.589996,428.470001,248.119995,222.970001,182.990005,238.710007


## Returns + moments

In [5]:
valid = (prices.notna().sum() >= MIN_HISTORY_DAYS)
prices_f = prices.loc[:, valid].copy()

rets = np.log(prices_f / prices_f.shift(1)).dropna()

mu = rets.mean() * ANNUALIZATION
Sigma = rets.cov() * ANNUALIZATION

lw = LedoitWolf().fit(rets.values)
Sigma_lw = pd.DataFrame(lw.covariance_ * ANNUALIZATION, index=rets.columns, columns=rets.columns)

print("Returns shape:", rets.shape)
mu.describe()

Returns shape: (1197, 68)


count    68.000000
mean      0.122441
std       0.157915
min      -0.223625
25%       0.021733
50%       0.093443
75%       0.220224
max       0.508517
dtype: float64

## Diagnostics

In [6]:
vol = np.sqrt(np.diag(Sigma.values))
asset_stats = pd.DataFrame({"mu": mu.values, "sigma": vol}, index=mu.index)

corr = rets.corr()
corr_vals = corr.values[np.triu_indices_from(corr.values, k=1)]


In [7]:
vol = np.sqrt(np.diag(Sigma.values))
asset_stats = pd.DataFrame({"mu": mu.values, "sigma": vol}, index=mu.index)

fig_assets = px.scatter(
    asset_stats,
    x="sigma",
    y="mu",
    hover_name=asset_stats.index,
    title="Assets: annualized volatility vs annualized mean return",
    labels={"sigma": "Volatility (σ)", "mu": "Mean return (μ)"},
)
fig_assets.update_traces(marker=dict(size=9))
fig_assets.show()


In [8]:
corr = rets.corr()
corr_vals = corr.values[np.triu_indices_from(corr.values, k=1)]

fig_corr_hist = px.histogram(
    x=corr_vals,
    nbins=50,
    title="Distribution of pairwise correlations (daily returns)",
    labels={"x": "Correlation", "y": "Count"},
)
fig_corr_hist.show()


In [9]:
def clustered_heatmap_plotly(corr: pd.DataFrame, title: str = ""):
    # Distance matrix and condensed form for linkage
    dist = 1.0 - corr.values
    np.fill_diagonal(dist, 0.0)
    dist_condensed = squareform(dist, checks=False)

    Z = linkage(dist_condensed, method="average")
    order = leaves_list(Z)

    corr_ord = corr.iloc[order, order]

    fig = px.imshow(
        corr_ord.values,
        x=corr_ord.columns,
        y=corr_ord.index,
        aspect="auto",
        color_continuous_scale="RdBu",
        zmin=-1,
        zmax=1,
        title=title or "Clustered correlation heatmap (daily returns)",
    )
    fig.update_layout(xaxis_title="", yaxis_title="")
    fig.show()

clustered_heatmap_plotly(corr, "Clustered correlation heatmap (daily returns)")


In [10]:
evals = np.linalg.eigvalsh(Sigma_lw.values)
evals = np.sort(evals)[::-1]
explained = evals / evals.sum()
cum_explained = np.cumsum(explained)

In [11]:
evals = np.linalg.eigvalsh(Sigma_lw.values)
evals = np.sort(evals)[::-1]
explained = evals / evals.sum()
cum_explained = np.cumsum(explained)

k = min(50, len(explained))

fig_eigs = go.Figure()
fig_eigs.add_trace(go.Scatter(
    x=np.arange(1, k + 1),
    y=explained[:k],
    mode="lines+markers",
    name="Share",
))
fig_eigs.update_layout(
    title="Top eigenvalue shares of total variance (Σ)",
    xaxis_title="Eigenvalue rank",
    yaxis_title="Variance share",
)
fig_eigs.show()

fig_cumeigs = go.Figure()
fig_cumeigs.add_trace(go.Scatter(
    x=np.arange(1, k + 1),
    y=cum_explained[:k],
    mode="lines+markers",
    name="Cumulative share",
))
fig_cumeigs.update_layout(
    title="Cumulative variance explained by top eigenvalues",
    xaxis_title="Number of components",
    yaxis_title="Cumulative share",
    yaxis=dict(range=[0, 1.01]),
)
fig_cumeigs.show()


## 9) Efficient frontier

In [12]:
def solve_gmv(mu: pd.Series, Sigma: pd.DataFrame, long_only: bool = True, solver: str = "OSQP"):
    n = len(mu)
    w = cp.Variable(n)
    Sigma_np = Sigma.values
    objective = cp.Minimize(cp.quad_form(w, Sigma_np))
    constraints = [cp.sum(w) == 1]
    if long_only:
        constraints.append(w >= 0)
    prob = cp.Problem(objective, constraints)
    prob.solve(solver=solver, verbose=False)
    if w.value is None:
        raise RuntimeError("GMV optimization failed. Try OSQP/ECOS/SCS.")
    return pd.Series(np.array(w.value).reshape(-1), index=mu.index)

def solve_frontier(mu: pd.Series, Sigma: pd.DataFrame, R_targets: np.ndarray,
                   long_only: bool = True, solver: str = "OSQP"):
    Sigma_np = Sigma.values
    mu_np = mu.values
    w = cp.Variable(len(mu))
    risk = cp.quad_form(w, Sigma_np)
    frontier = []
    weights = []
    for R in R_targets:
        constraints = [cp.sum(w) == 1, mu_np @ w >= R]
        if long_only:
            constraints.append(w >= 0)
        prob = cp.Problem(cp.Minimize(risk), constraints)
        prob.solve(solver=solver, verbose=False)
        if w.value is None:
            frontier.append((np.nan, np.nan))
            weights.append(np.full(len(mu), np.nan))
            continue
        wv = np.array(w.value).reshape(-1)
        frontier.append((float(np.sqrt(wv @ Sigma_np @ wv)), float(mu_np @ wv)))
        weights.append(wv)
    frontier = pd.DataFrame(frontier, columns=["sigma","mu"])
    W = pd.DataFrame(weights, columns=mu.index)
    return frontier, W

USE_SHRINKAGE = True
Sigma_use = Sigma_lw if USE_SHRINKAGE else Sigma

w_gmv = solve_gmv(mu, Sigma_use, long_only=LONG_ONLY)
mu_gmv = float(mu @ w_gmv)
sig_gmv = float(np.sqrt(w_gmv.values @ Sigma_use.values @ w_gmv.values))

R_targets = np.linspace(mu_gmv, float(mu.max()), N_FRONTIER)
frontier, W = solve_frontier(mu, Sigma_use, R_targets, long_only=LONG_ONLY)

mask = frontier.notna().all(axis=1)
frontier = frontier.loc[mask].reset_index(drop=True)
W = W.loc[mask].reset_index(drop=True)

frontier.head()


Unnamed: 0,sigma,mu
0,0.156916,0.105283
1,0.157016,0.112118
2,0.157314,0.118952
3,0.157781,0.125787
4,0.158368,0.132621


In [13]:
def sharpe(mu_p, sig_p, rf=RF):
    return (mu_p - rf) / sig_p

vol = np.sqrt(np.diag(Sigma.values))
asset_stats = pd.DataFrame({"mu": mu.values, "sigma": vol}, index=mu.index)

frontier["sharpe"] = frontier.apply(lambda r: sharpe(r["mu"], r["sigma"], RF) if r["sigma"] > 0 else np.nan, axis=1)

i_ms = int(frontier["sharpe"].idxmax())
w_ms = W.loc[i_ms]
mu_ms = float(mu @ w_ms)
sig_ms = float(np.sqrt(w_ms.values @ Sigma_use.values @ w_ms.values))
sh_ms = float(sharpe(mu_ms, sig_ms, RF))


In [14]:
def sharpe(mu_p, sig_p, rf=RF):
    return (mu_p - rf) / sig_p

# (re)compute asset stats if needed
vol = np.sqrt(np.diag(Sigma.values))
asset_stats = pd.DataFrame({"mu": mu.values, "sigma": vol}, index=mu.index)

frontier = frontier.copy()
frontier["sharpe"] = frontier.apply(
    lambda r: sharpe(r["mu"], r["sigma"], RF) if r["sigma"] > 0 else np.nan,
    axis=1
)

i_ms = int(frontier["sharpe"].idxmax())
w_ms = W.loc[i_ms]
mu_ms = float(mu @ w_ms)
sig_ms = float(np.sqrt(w_ms.values @ Sigma_use.values @ w_ms.values))
sh_ms = float(sharpe(mu_ms, sig_ms, RF))

# 5a) Frontier with assets overlay + GMV + Max Sharpe
fig_frontier = go.Figure()

fig_frontier.add_trace(go.Scatter(
    x=frontier["sigma"],
    y=frontier["mu"],
    mode="lines",
    name="Frontier",
))

fig_frontier.add_trace(go.Scatter(
    x=asset_stats["sigma"],
    y=asset_stats["mu"],
    mode="markers",
    name="Assets",
    text=asset_stats.index,
    hovertemplate="<b>%{text}</b><br>σ=%{x:.4f}<br>μ=%{y:.4f}<extra></extra>",
    marker=dict(size=7, opacity=0.55),
))

fig_frontier.add_trace(go.Scatter(
    x=[sig_gmv],
    y=[mu_gmv],
    mode="markers",
    name="GMV",
    marker=dict(symbol="star", size=14),
    hovertemplate="GMV<br>σ=%{x:.4f}<br>μ=%{y:.4f}<extra></extra>",
))

fig_frontier.add_trace(go.Scatter(
    x=[sig_ms],
    y=[mu_ms],
    mode="markers",
    name=f"Max Sharpe (S={sh_ms:.3f})",
    marker=dict(symbol="star", size=14),
    hovertemplate="Max Sharpe<br>σ=%{x:.4f}<br>μ=%{y:.4f}<extra></extra>",
))

fig_frontier.update_layout(
    title="Efficient frontier with assets overlay",
    xaxis_title="Volatility (σ)",
    yaxis_title="Mean return (μ)",
    legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="left", x=0),
)
fig_frontier.show()

# 5b) Frontier colored by Sharpe
fig_frontier_sh = px.scatter(
    frontier,
    x="sigma",
    y="mu",
    color="sharpe",
    title="Frontier colored by Sharpe",
    labels={"sigma": "Volatility (σ)", "mu": "Mean return (μ)", "sharpe": "Sharpe"},
)
fig_frontier_sh.update_traces(marker=dict(size=9))
fig_frontier_sh.show()


In [15]:
# --- identify key frontier portfolios ---
i_gmv = None  # you already have w_gmv, mu_gmv, sig_gmv from your code

i_ms = int(frontier["sharpe"].idxmax())
w_ms = W.loc[i_ms]

i_maxret = int(frontier["mu"].idxmax())     # "biggest expected return" on the computed frontier grid
w_maxret = W.loc[i_maxret]

# Optional: equal-weight baseline
w_eq = pd.Series(1 / len(mu), index=mu.index)

# Add markers to your plot
mu_maxret = float(mu @ w_maxret)
sig_maxret = float(np.sqrt(w_maxret.values @ Sigma_use.values @ w_maxret.values))


In [16]:
import numpy as np
import pandas as pd

TRADING_DAYS = 252

def to_simple_returns(log_rets: pd.DataFrame) -> pd.DataFrame:
    return np.exp(log_rets) - 1.0

def rf_daily(rf_annual: float) -> float:
    return (1.0 + rf_annual) ** (1.0 / TRADING_DAYS) - 1.0

def portfolio_simple_returns(simple_rets: pd.DataFrame, w: pd.Series) -> pd.Series:
    w = w.reindex(simple_rets.columns).fillna(0.0)
    return simple_rets @ w

def max_drawdown(wealth: pd.Series) -> float:
    peak = wealth.cummax()
    dd = wealth / peak - 1.0
    return float(dd.min())

def historical_var_cvar(r: pd.Series, alpha: float = 0.05) -> tuple[float, float]:
    # VaR and CVaR on return distribution (negative numbers represent losses)
    q = float(r.quantile(alpha))
    cvar = float(r[r <= q].mean()) if (r <= q).any() else q
    return q, cvar

def portfolio_metrics(
    r: pd.Series,
    rf_annual: float = 0.0,
    mar_annual: float | None = None,     # MAR = minimum acceptable return for Sortino; default=RF
) -> dict:
    """
    r: daily simple returns
    rf_annual: annual risk-free rate
    mar_annual: annual MAR for Sortino (if None => rf_annual)
    """
    r = r.dropna()
    rf_d = rf_daily(rf_annual)
    mar_a = rf_annual if mar_annual is None else mar_annual
    mar_d = rf_daily(mar_a)

    # excess returns
    ex = r - rf_d

    mu_d = float(r.mean())
    sig_d = float(r.std(ddof=1))

    ann_ret = float((1.0 + mu_d) ** TRADING_DAYS - 1.0)  # mean-based approx
    ann_vol = float(sig_d * np.sqrt(TRADING_DAYS))

    sharpe = float(ex.mean() / ex.std(ddof=1) * np.sqrt(TRADING_DAYS)) if ex.std(ddof=1) > 0 else np.nan

    downside = (r - mar_d).clip(upper=0.0)
    dd_d = float(np.sqrt((downside**2).mean()))
    sortino = float((r.mean() - mar_d) / dd_d * np.sqrt(TRADING_DAYS)) if dd_d > 0 else np.nan

    wealth = (1.0 + r).cumprod()
    mdd = max_drawdown(wealth)
    calmar = float(ann_ret / abs(mdd)) if mdd < 0 else np.nan

    var95, cvar95 = historical_var_cvar(r, alpha=0.05)

    win = float((r > 0).mean())
    skew = float(r.skew())
    kurt = float(r.kurtosis())

    return {
        "AnnReturn": ann_ret,
        "AnnVol": ann_vol,
        "Sharpe": sharpe,
        "Sortino(MAR=RF)": sortino,
        "MaxDrawdown": mdd,
        "Calmar": calmar,
        "VaR(5%)": var95,
        "CVaR(5%)": cvar95,
        "WinRate": win,
        "Skew": skew,
        "Kurtosis": kurt,
        "FinalWealth($1)": float(wealth.iloc[-1]),
    }


simple_rets = to_simple_returns(rets)

portfolios = {
    "GMV": w_gmv,
    "MaxSharpe": w_ms,
    "EqualWeight": w_eq,
}

series = {name: portfolio_simple_returns(simple_rets, w) for name, w in portfolios.items()}

metrics = pd.DataFrame({name: portfolio_metrics(r, rf_annual=RF) for name, r in series.items()}).T
metrics


Unnamed: 0,AnnReturn,AnnVol,Sharpe,Sortino(MAR=RF),MaxDrawdown,Calmar,VaR(5%),CVaR(5%),WinRate,Skew,Kurtosis,FinalWealth($1)
GMV,0.14512,0.15698,0.552624,0.788246,-0.206914,0.701355,-0.014852,-0.022262,0.538847,-0.106465,4.789765,1.795336
MaxSharpe,0.464193,0.238787,1.393698,2.068444,-0.274967,1.688176,-0.021962,-0.033192,0.565581,-0.049451,4.513876,5.34475
EqualWeight,0.225959,0.251011,0.617546,0.894014,-0.337442,0.669624,-0.024234,-0.034791,0.54553,0.229876,5.286981,2.267381


In [17]:
def plot_cum_wealth_plotly(series: dict[str, pd.Series], title: str = "Cumulative wealth (start = $1)"):
    fig = go.Figure()
    for name, r in series.items():
        r = r.dropna()
        wealth = (1.0 + r).cumprod()
        fig.add_trace(go.Scatter(
            x=wealth.index, y=wealth.values,
            mode="lines", name=name,
            hovertemplate=f"<b>{name}</b><br>Date=%{{x}}<br>Wealth=%{{y:.4f}}<extra></extra>",
        ))
    fig.update_layout(title=title, xaxis_title="", yaxis_title="Wealth", hovermode="x unified")
    fig.show()

plot_cum_wealth_plotly(series, "Cumulative wealth of selected portfolios (start = $1)")


In [18]:
def plot_drawdowns_plotly(series: dict[str, pd.Series], title: str = "Drawdowns"):
    fig = go.Figure()
    for name, r in series.items():
        r = r.dropna()
        wealth = (1.0 + r).cumprod()
        dd = wealth / wealth.cummax() - 1.0
        fig.add_trace(go.Scatter(
            x=dd.index, y=dd.values,
            mode="lines", name=name,
            hovertemplate=f"<b>{name}</b><br>Date=%{{x}}<br>DD=%{{y:.2%}}<extra></extra>",
        ))
    fig.update_layout(title=title, xaxis_title="", yaxis_title="Drawdown", hovermode="x unified")
    fig.update_yaxes(tickformat=".0%")
    fig.show()

plot_drawdowns_plotly(series, "Drawdowns of selected portfolios")

In [19]:
def plot_return_hists_plotly(series: dict[str, pd.Series], bins: int = 60, title: str = "Daily simple return distributions"):
    fig = go.Figure()
    for name, r in series.items():
        x = r.dropna().values
        fig.add_trace(go.Histogram(
            x=x,
            nbinsx=bins,
            name=name,
            opacity=0.45,
            histnorm="",  # counts
        ))
    fig.update_layout(
        title=title,
        xaxis_title="Daily return",
        yaxis_title="Count",
        barmode="overlay",
        hovermode="x",
    )
    fig.show()

plot_return_hists_plotly(series, title="Daily simple return distributions of selected portfolios")

## 10) Weights

In [20]:
def plot_top_weights_plotly(w: pd.Series, title: str, k: int = 15):
    tw = w.sort_values(ascending=False).head(k)      # top-k largest
    tw = tw.sort_values()                            # ascending for horizontal bar readability

    df = tw.rename("weight").reset_index()
    df.columns = ["asset", "weight"]

    fig = px.bar(
        df,
        x="weight",
        y="asset",
        orientation="h",
        title=title,
        labels={"weight": "Weight", "asset": ""},
    )
    fig.update_layout(yaxis=dict(categoryorder="total ascending"))
    fig.show()

plot_top_weights_plotly(w_gmv, "GMV — top weights", k=15)
plot_top_weights_plotly(w_ms, "Max Sharpe (sampled) — top weights", k=15)


In [21]:
w_gmv.sort_values(ascending=False).head(15)

ROP     2.070828e-01
IBM     2.040559e-01
MSI     1.438235e-01
CSCO    1.196090e-01
VRSN    9.148111e-02
GEN     6.869088e-02
TDY     5.082798e-02
MSFT    3.704157e-02
GLW     2.975414e-02
AKAM    2.203776e-02
FSLR    1.881911e-02
GDDY    6.776196e-03
MPWR    8.290142e-23
NVDA    7.610488e-23
TRMB    4.688608e-23
dtype: float64

In [22]:
w_ms.sort_values(ascending=False).head(15)

IBM     3.471900e-01
APH     3.306416e-01
NVDA    1.330942e-01
AVGO    9.817138e-02
STX     4.367974e-02
JBL     3.032694e-02
ANET    1.115738e-02
APP     5.738758e-03
PLTR   -2.993057e-23
SMCI   -5.647691e-23
WDC    -2.166341e-22
KLAC   -3.675753e-22
FSLR   -5.148568e-22
FICO   -6.078928e-22
MU     -6.310152e-22
Name: 32, dtype: float64