In [32]:
# ===================== City Temperatures — All Interactive Plots =====================
# pip install pandas numpy plotly openpyxl scikit-learn ipywidgets

from pathlib import Path
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer

# ----------------------------- Constants & Parsing -----------------------------
MONTH_NAMES = ["Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec"]
MONTH_MAP = {
    "jan":1,"january":1,"1":1,"01":1,
    "feb":2,"february":2,"2":2,"02":2,
    "mar":3,"march":3,"3":3,"03":3,
    "apr":4,"april":4,"4":4,"04":4,
    "may":5,"5":5,"05":5,
    "jun":6,"june":6,"6":6,"06":6,
    "jul":7,"july":7,"7":7,"07":7,
    "aug":8,"august":8,"8":8,"08":8,
    "sep":9,"sept":9,"september":9,"9":9,"09":9,
    "oct":10,"october":10,"10":10,
    "nov":11,"november":11,"11":11,
    "dec":12,"december":12,"12":12,
}
def parse_month_header(colname: str):
    s = str(colname).strip()
    try:
        n = int(s)
        if 1 <= n <= 12:
            return n
    except Exception:
        pass
    key3 = s.lower()[:3]
    return MONTH_MAP.get(key3, MONTH_MAP.get(s.lower(), np.nan))

# ----------------------------- Robust Path Resolver -----------------------------
def resolve_xlsx_path(xlsx_path=None, filename_candidates=("CityTemp.xlsx","citytemp.xlsx")) -> Path:
    """Find Excel path from explicit path, ./data/, ./, or shallow rglob under base dir."""
    if xlsx_path:
        p = Path(xlsx_path)
        if p.exists():
            return p
    base = Path(__file__).resolve().parent if "__file__" in globals() else Path.cwd()
    candidates = []
    for fn in filename_candidates:
        candidates += [base/"data"/fn, base/fn]
    for p in candidates:
        if p.exists():
            return p
    for fn in filename_candidates:
        matches = list(base.rglob(fn))
        if matches:
            return matches[0]
    tried = "\n  - " + "\n  - ".join(str(p) for p in candidates)
    raise FileNotFoundError(f"Could not find Excel. Tried:{tried}\nWorking directory: {base}")

# ----------------------------- Load & Tidy Data -----------------------------
def load_city_temps(xlsx_path=None, sheet=0):
    """
    Load Excel: first column is City; 12 month columns (auto-detect by name, else next 12).
    Returns (df, city_col, month_cols, col_to_num).
    """
    xlsx_path = resolve_xlsx_path(xlsx_path)
    df = pd.read_excel(xlsx_path, sheet_name=sheet)
    df.columns = [str(c).strip() for c in df.columns]
    city_col = df.columns[0]
    cand = [c for c in df.columns if c != city_col]

    col_to_num = {}
    for c in cand:
        n = parse_month_header(c)
        if pd.notna(n) and 1 <= int(n) <= 12:
            col_to_num.setdefault(c, int(n))  # keep first occurrence per month

    if len(set(col_to_num.values())) < 12 and len(cand) >= 12:
        month_cols = cand[:12]
        col_to_num = {c: i+1 for i, c in enumerate(month_cols)}
    else:
        month_cols = sorted(col_to_num, key=lambda c: col_to_num[c])

    for c in month_cols:
        df[c] = pd.to_numeric(df[c], errors="coerce")

    print(f"Loaded: {xlsx_path.resolve()}")
    return df, city_col, month_cols, col_to_num

def to_long(df, city_col, month_cols, col_to_num):
    """Wide -> long (City, Month, Temperature) with ordered month labels."""
    long_df = df[[city_col] + month_cols].melt(
        id_vars=[city_col], value_vars=month_cols, var_name="MonthCol", value_name="Temperature"
    ).dropna(subset=["Temperature"])
    long_df["MonthNum"] = long_df["MonthCol"].map(col_to_num)
    long_df["Month"] = long_df["MonthNum"].apply(lambda n: MONTH_NAMES[int(n)-1] if pd.notna(n) else str(n))
    return long_df.sort_values(["MonthNum", city_col])

# ----------------------------- Figures -----------------------------
def fig_line_monthly(long_df, city_col, S=800):
    fig = px.line(
        long_df, x="Month", y="Temperature", color=city_col,
        markers=True, title="Monthly Temperature by City",
        labels={"Month":"Month", city_col:"City"}
    )
    fig.update_layout(
        xaxis=dict(type="category", categoryorder="array", categoryarray=MONTH_NAMES),
        hovermode="x unified", legend_title_text="Click a city to toggle",
        margin=dict(l=40,r=20,t=80,b=40)
    )
    n = len(fig.data)
    fig.update_layout(updatemenus=[dict(
        type="buttons", direction="right", x=1, xanchor="right", y=1.15, yanchor="top",
        buttons=[dict(label="Show All", method="update", args=[{"visible":[True]*n}]),
                 dict(label="Hide All", method="update", args=[{"visible":[False]*n}])]
    )])
    fig.update_layout(width=S*1.5, height=S, autosize=False)
    return fig

def fig_scatter3d_month_picker(df, city_col, col_to_num, S=800):
    """3D scatter with dropdowns to pick X/Y/Z months; labels and show/hide buttons."""
    if not col_to_num:
        month_cols = [c for c in df.columns if c != city_col][:12]
        col_to_num = {c:i+1 for i,c in enumerate(month_cols)}
    num_to_col   = {v:k for k,v in col_to_num.items()}
    num_to_label = {i: MONTH_NAMES[i-1] for i in range(1,13)}
    cities       = df[city_col].astype(str).tolist()
    n_cities     = len(cities)
    def month_values(m): return df[num_to_col[m]].astype(float).tolist()
    M = {m: month_values(m) for m in range(1,13)}          # precompute
    mx,my,mz = 1,2,3
    traces = [go.Scatter3d(
        x=[M[mx][i]], y=[M[my][i]], z=[M[mz][i]],
        mode="markers", name=city, text=[city], textfont=dict(size=10),
        marker=dict(size=6),
        customdata=[[num_to_label[mx], num_to_label[my], num_to_label[mz]]],
        hovertemplate="<b>%{text}</b><br>X (%{customdata[0]}): %{x}<br>"
                      "Y (%{customdata[1]}): %{y}<br>Z (%{customdata[2]}): %{z}<extra></extra>"
    ) for i, city in enumerate(cities)]
    fig = go.Figure(traces)

    def axis_button(axis, m):
        vals = [[M[m][i]] for i in range(n_cities)]
        axis_idx = {"x":0,"y":1,"z":2}[axis]
        new_custom = []
        for i in range(n_cities):
            cur = list(traces[i].customdata[0]); cur[axis_idx] = num_to_label[m]
            new_custom.append([cur])
        return dict(method="update",
                    args=[{axis: vals, "customdata": new_custom},
                          {f"scene.{axis}axis.title.text": num_to_label[m]}],
                    label=num_to_label[m])

    def axis_dropdown(axis, xpos):
        return dict(type="dropdown", x=xpos, y=1.12, xanchor="center", yanchor="top",
                    direction="down", showactive=True,
                    buttons=[axis_button(axis, m) for m in range(1,13)],
                    pad={"t":4,"b":2,"r":2,"l":2})
    def label_anno(txt, xpos):
        return dict(text=txt, x=xpos, y=1.18, xref="paper", yref="paper",
                    xanchor="center", yanchor="top", showarrow=False)

    # show/hide & labels on/off
    showhide_menu = dict(type="buttons", x=0.93, y=1.12, xanchor="center", yanchor="top",
                         direction="right",
                         buttons=[dict(label="Show All", method="update", args=[{"visible":[True]*n_cities}]),
                                  dict(label="Hide All", method="update", args=[{"visible":[False]*n_cities}])])
    def labels_button(on=True):
        modes = [("markers+text" if on else "markers")] * n_cities
        return dict(label=("Labels: On" if on else "Labels: Off"), method="update", args=[{"mode": modes}])
    labels_menu = dict(type="buttons", x=0.78, y=1.12, xanchor="center", yanchor="top",
                       direction="right", buttons=[labels_button(True), labels_button(False)])

    xpos_x, xpos_y, xpos_z = 0.14, 0.39, 0.64
    fig.update_layout(
        scene=dict(xaxis_title=num_to_label[mx], yaxis_title=num_to_label[my], zaxis_title=num_to_label[mz],
                   xaxis=dict(showspikes=False), yaxis=dict(showspikes=False), zaxis=dict(showspikes=False)),
        legend_title_text="Toggle cities",
        margin=dict(l=10,r=10,t=140,b=10),
        updatemenus=[axis_dropdown('x', xpos_x), axis_dropdown('y', xpos_y), axis_dropdown('z', xpos_z),
                     labels_menu, showhide_menu],
        annotations=[label_anno("X Month", xpos_x), label_anno("Y Month", xpos_y), label_anno("Z Month", xpos_z),
                     label_anno("Point Labels", 0.78), label_anno("Cities", 0.93)],
        title_y=0.92, width=S*1.75, height=S, autosize=False
    )
    return fig

# ----------------------------- PCA Utilities & Plots -----------------------------
def compute_pca_matrix(df, city_col, month_cols, retain=0.95):
    """
    Matrix (rows=cities, cols=months) -> impute -> standardize -> full PCA.
    Returns selected-k PCA pieces, plus full variance arrays for scree & cumulative plots.
    """
    X = df[month_cols].apply(pd.to_numeric, errors="coerce").values
    cities = df[city_col].astype(str).values
    X_proc = SimpleImputer(strategy="mean").fit_transform(X)
    X_proc = StandardScaler().fit_transform(X_proc)

    # Full PCA for EVR curves
    pca_full = PCA().fit(X_proc)
    full_var_all = pca_full.explained_variance_ratio_ * 100.0   # per-PC %
    full_eig_all = pca_full.explained_variance_                  # eigenvalues
    cumvar_all   = np.cumsum(pca_full.explained_variance_ratio_) * 100.0

    # Choose k to reach retain (min 2)
    k = max(2, int(np.searchsorted(np.cumsum(pca_full.explained_variance_ratio_), retain) + 1))

    # Fit final PCA with k comps (used by axis picker & residual map)
    pca = PCA(n_components=k, random_state=0)
    Z = pca.fit_transform(X_proc)        # scores
    P = pca.components_.T                # loadings
    eig = pca.explained_variance_
    var = pca.explained_variance_ratio_ * 100.0

    return {
        "X_proc":X_proc, "Z":Z, "P":P, "eig":eig, "var":var, "k":k, "cities":cities,
        "full_var_all": full_var_all, "full_eig_all": full_eig_all, "cumvar_all": cumvar_all,
        "retain": retain
    }

def fig_pca_axis_picker(Z, cities, var, S=1000):
    """2D PCA scatter with dropdowns to pick ±PC for X and Y; city labels shown."""
    k = Z.shape[1]; pcx,pcy = 1,2
    fig = go.Figure()
    fig.add_trace(go.Scatter(
        x=Z[:,pcx-1], y=Z[:,pcy-1], mode="markers+text",
        text=cities, textposition="top center", textfont=dict(size=10),
        marker=dict(size=8, opacity=0.9),
        hovertemplate="<b>%{text}</b><br>PCX=%{x:.3f}<br>PCY=%{y:.3f}<extra></extra>"
    ))
    fig.add_hline(y=0); fig.add_vline(x=0)
    def title_pc(i, neg=False): return f"{'-' if neg else ''}PC{i} ({var[i-1]:.1f}%)"
    fig.update_xaxes(title_text=title_pc(pcx)); fig.update_yaxes(title_text=title_pc(pcy))
    SMALL = dict(font=dict(size=10), pad={"t":1,"b":1,"l":1,"r":1}, bgcolor="rgba(255,255,255,0.6)")
    def pc_dropdown(axis, xpos):
        buttons=[]
        for j in range(1, k+1):
            if axis=='x':
                buttons += [dict(label=f"PC{j} ({var[j-1]:.0f}%)", method="update",
                                 args=[{"x":[Z[:,j-1]]}, {"xaxis.title.text": title_pc(j)}]),
                            dict(label=f"-PC{j}", method="update",
                                 args=[{"x":[-Z[:,j-1]]}, {"xaxis.title.text": title_pc(j,True)}])]
            else:
                buttons += [dict(label=f"PC{j} ({var[j-1]:.0f}%)", method="update",
                                 args=[{"y":[Z[:,j-1]]}, {"yaxis.title.text": title_pc(j)}]),
                            dict(label=f"-PC{j}", method="update",
                                 args=[{"y":[-Z[:,j-1]]}, {"yaxis.title.text": title_pc(j,True)}])]
        d = dict(type="dropdown", x=xpos, y=1.08, xanchor="center", yanchor="top",
                 direction="down", showactive=True, buttons=buttons); d.update(SMALL); return d
    def label_anno(txt, xpos):
        return dict(text=txt, x=xpos, y=1.13, xref="paper", yref="paper",
                    xanchor="center", yanchor="top", showarrow=False, font=dict(size=11))
    xpos_xpc, xpos_ypc = 0.34, 0.58
    fig.update_layout(
        title="PCA — Pick PCs (± direction) • City names shown",
        updatemenus=[pc_dropdown('x', xpos_xpc), pc_dropdown('y', xpos_ypc)],
        annotations=[label_anno("X PC", xpos_xpc), label_anno("Y PC", xpos_ypc)],
        margin=dict(l=60,r=20,t=105,b=60), hovermode="closest",
        width=S, height=S, autosize=False
    )
    return fig

def fig_pca_residual_map(X_proc, Z, P, eig, cities, alpha=0.01, S=700):
    """Residual map: Hotelling T² vs Q residual with empirical thresholds (top 1%)."""
    X_hat = Z @ P.T
    R = X_proc - X_hat
    Q = np.sum(R**2, axis=1)
    T2 = np.sum((Z**2) / eig, axis=1)
    Q_thr  = float(np.quantile(Q,  1 - alpha))
    T2_thr = float(np.quantile(T2, 1 - alpha))
    out_df = pd.DataFrame({"City":cities,"Q_residual":Q,"T2":T2})
    outliers = out_df[(Q > Q_thr) | (T2 > T2_thr)].sort_values(["Q_residual","T2"], ascending=False)
    if len(outliers):
        print(f"Outliers (alpha = {alpha:.2%})\n"); print(outliers.to_string(index=False))
    else:
        print(f"No outliers at alpha = {alpha:.2%}")
    fig = go.Figure()
    fig.add_trace(go.Scatter(
        x=T2, y=Q, mode="markers+text", text=cities, textposition="top center", textfont=dict(size=10),
        marker=dict(size=8, opacity=0.9),
        hovertemplate="<b>%{text}</b><br>T²=%{x:.3f}<br>Q=%{y:.3f}<extra></extra>"
    ))
    fig.add_vline(x=T2_thr, line_dash="dash"); fig.add_hline(y=Q_thr, line_dash="dash")
    fig.update_xaxes(title="Hotelling T²"); fig.update_yaxes(title="Q residual (SPE)")
    fig.update_layout(title=f"PCA Residual Outlier Map (α={alpha}, k={Z.shape[1]})",
                      margin=dict(l=60,r=20,t=80,b=60), hovermode="closest",
                      width=900, height=S, autosize=False)
    return fig

# -------- Explained Variance Plots (scree and cumulative) --------
def fig_pca_scree(full_var_all, k_selected, S=700):
    """Scree plot: explained variance ratio (%) per component with a vertical line at selected k."""
    comps = np.arange(1, len(full_var_all)+1)
    fig = go.Figure()
    fig.add_bar(x=comps, y=full_var_all, name="Explained variance (%)")
    fig.add_vline(x=k_selected + 0.5, line_dash="dash",
                  annotation_text=f"k = {k_selected}", annotation_position="top")
    fig.update_xaxes(title="Principal component")
    fig.update_yaxes(title="Explained variance (%)", rangemode="tozero")
    fig.update_layout(title="PCA Scree Plot (per-component explained variance)",
                      margin=dict(l=60,r=20,t=80,b=60), hovermode="x unified",
                      width=900, height=S, autosize=False, showlegend=False)
    return fig

def fig_pca_cumulative(cumvar_all, k_selected, retain=0.95, S=700):
    """Cumulative explained variance (%) with horizontal retain line and vertical k marker."""
    comps = np.arange(1, len(cumvar_all)+1)
    fig = go.Figure()
    fig.add_scatter(x=comps, y=cumvar_all, mode="lines+markers", name="Cumulative EVR (%)")
    fig.add_hline(y=retain*100.0, line_dash="dash",
                  annotation_text=f"{retain*100:.0f}% threshold", annotation_position="bottom right")
    fig.add_vline(x=k_selected + 0.5, line_dash="dash",
                  annotation_text=f"k = {k_selected}", annotation_position="top")
    fig.update_xaxes(title="Principal component")
    fig.update_yaxes(title="Cumulative explained variance (%)", rangemode="tozero")
    fig.update_layout(title="PCA Cumulative Explained Variance",
                      margin=dict(l=60,r=20,t=80,b=60), hovermode="x unified",
                      width=900, height=S, autosize=False, showlegend=False)
    return fig

# ----------------------------- LOO PCA — Clickable Widget -----------------------------
def fig_loo_pca_widget(df, city_col, month_cols, S=800):
    """
    Interactive LOO PCA (2 PCs): click any BLUE city to recompute PCA trained
    without that city; RED points show the LOO projection; titles show LOO vs full variance.
    Returns a VBox to display in Jupyter.
    """
    import plotly.graph_objects as go
    from ipywidgets import HTML, VBox

    X = df[month_cols].apply(pd.to_numeric, errors="coerce").values
    cities = df[city_col].astype(str).values
    n = X.shape[0]
    idx_all = np.arange(n)

    def project_with_pca_2pc(X, train_idx):
        imp = SimpleImputer(strategy="mean").fit(X[train_idx])
        X_imp_train = imp.transform(X[train_idx]); X_imp_full = imp.transform(X)
        scaler = StandardScaler().fit(X_imp_train)
        X_std_train = scaler.transform(X_imp_train); X_std_full = scaler.transform(X_imp_full)
        pca = PCA(n_components=2, random_state=0).fit(X_std_train)
        scores_full = pca.transform(X_std_full); var = pca.explained_variance_ratio_ * 100.0
        return scores_full, var

    full_scores, full_var = project_with_pca_2pc(X, idx_all)
    fig = go.FigureWidget()
    # Trace 0: LOO (red)
    fig.add_scatter(x=full_scores[:,0], y=full_scores[:,1], mode="markers+text",
                    text=cities, textposition="top center", textfont=dict(size=10),
                    marker=dict(size=8, opacity=0.9, color="red"),
                    name="LOO (train without selected city)")
    # Trace 1: Full (blue) — clickable
    fig.add_scatter(x=full_scores[:,0], y=full_scores[:,1], mode="markers+text",
                    text=cities, textposition="bottom center", textfont=dict(size=10),
                    marker=dict(size=8, opacity=0.9, color="blue"),
                    name="Full (train with all cities)")
    fig.add_hline(y=0, line_width=1); fig.add_vline(x=0, line_width=1)
    fig.update_xaxes(title=f"PC1 (full {full_var[0]:.1f}%)")
    fig.update_yaxes(title=f"PC2 (full {full_var[1]:.1f}%)", scaleanchor="x", scaleratio=1)
    fig.update_layout(title="LOO PCA (click a blue city to recompute)",
                      margin=dict(l=60,r=20,t=80,b=60), hovermode="closest",
                      legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="left", x=0.0),
                      width=S, height=S, autosize=False)
    info = HTML(value="<b>Click a blue city to compute the LOO model…</b>")

    def handle_click(trace, points, selector):
        if not points.point_inds: return
        i = points.point_inds[0]; city = cities[i]
        train_idx = np.delete(idx_all, i)
        loo_scores, loo_var = project_with_pca_2pc(X, train_idx)
        full_scores_now, full_var_now = project_with_pca_2pc(X, idx_all)
        with fig.batch_update():
            fig.data[0].x, fig.data[0].y, fig.data[0].text = loo_scores[:,0], loo_scores[:,1], cities
            fig.data[1].x, fig.data[1].y, fig.data[1].text = full_scores_now[:,0], full_scores_now[:,1], cities
            fig.layout.xaxis.title = f"PC1 (LOO {loo_var[0]:.1f}% | full {full_var_now[0]:.1f}%)"
            fig.layout.yaxis.title = f"PC2 (LOO {loo_var[1]:.1f}% | full {full_var_now[1]:.1f}%)"
            fig.layout.title = f"LOO PCA — removed: {city}"
        info.value = (f"<b>Removed:</b> {city} &nbsp; | &nbsp; "
                      f"<b>LOO var:</b> ({loo_var[0]:.1f}%, {loo_var[1]:.1f}%) &nbsp; | &nbsp; "
                      f"<b>Full var:</b> ({full_var_now[0]:.1f}%, {full_var_now[1]:.1f}%)")

    fig.data[1].on_click(handle_click)  # attach to blue trace
    return VBox([info, fig])

# ----------------------------- LOO PCA — Dropdown Fallback (no widgets) -----------------------------
def fig_loo_pca_dropdown(df, city_col, month_cols, S=800):
    """
    LOO PCA without ipywidgets: use a dropdown to choose the city to leave out.
    Red = LOO projection (trained without selected city); Blue = full model.
    """
    import plotly.graph_objects as go

    # Data matrix
    X = df[month_cols].apply(pd.to_numeric, errors="coerce").values
    cities = df[city_col].astype(str).values
    n = X.shape[0]
    idx_all = np.arange(n)

    def project_with_pca_2pc(X, train_idx):
        imp = SimpleImputer(strategy="mean").fit(X[train_idx])
        X_imp_train = imp.transform(X[train_idx]); X_imp_full = imp.transform(X)
        scaler = StandardScaler().fit(X_imp_train)
        X_std_train = scaler.transform(X_imp_train); X_std_full = scaler.transform(X_imp_full)
        pca = PCA(n_components=2, random_state=0).fit(X_std_train)
        scores_full = pca.transform(X_std_full)          # (n_cities, 2)
        var = pca.explained_variance_ratio_ * 100.0      # (2,)
        return scores_full, var

    # Full model once
    full_scores, full_var = project_with_pca_2pc(X, idx_all)

    # Precompute LOO for each city
    loo_scores = []
    loo_vars = []
    for i in range(n):
        train_idx = np.delete(idx_all, i)
        s, v = project_with_pca_2pc(X, train_idx)
        loo_scores.append(s); loo_vars.append(v)

    # Base fig with two traces
    fig = go.Figure()
    # Trace 0: LOO (red) — start as full
    fig.add_trace(go.Scatter(
        x=full_scores[:,0], y=full_scores[:,1],
        mode="markers+text", text=cities, textposition="top center", textfont=dict(size=10),
        marker=dict(size=8, opacity=0.9, color="red"),
        name="LOO (train without selected city)"
    ))
    # Trace 1: Full (blue)
    fig.add_trace(go.Scatter(
        x=full_scores[:,0], y=full_scores[:,1],
        mode="markers+text", text=cities, textposition="bottom center", textfont=dict(size=10),
        marker=dict(size=8, opacity=0.9, color="blue"),
        name="Full (train with all cities)"
    ))

    # Crosshairs + layout
    fig.add_hline(y=0, line_width=1); fig.add_vline(x=0, line_width=1)
    fig.update_xaxes(title=f"PC1 (full {full_var[0]:.1f}%)")
    fig.update_yaxes(title=f"PC2 (full {full_var[1]:.1f}%)", scaleanchor="x", scaleratio=1)
    fig.update_layout(
        title="LOO PCA — choose a city to leave out",
        margin=dict(l=60, r=20, t=90, b=60),
        hovermode="closest",
        legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="left", x=0.0),
        width=S, height=S, autosize=False
    )

    # Build dropdown buttons
    buttons = []
    # Button 0: reset (no LOO)
    buttons.append(dict(
        label="— Select city —",
        method="update",
        args=[
            {"x":[full_scores[:,0], full_scores[:,0]],
             "y":[full_scores[:,1], full_scores[:,1]],
             "text":[cities, cities]},
            {"xaxis.title.text": f"PC1 (full {full_var[0]:.1f}%)",
             "yaxis.title.text": f"PC2 (full {full_var[1]:.1f}%)",
             "title": "LOO PCA — choose a city to leave out"}
        ]
    ))
    # One button per city
    for i, city in enumerate(cities):
        loo_s = loo_scores[i]; loo_v = loo_vars[i]
        buttons.append(dict(
            label=str(city),
            method="update",
            args=[
                {"x":[loo_s[:,0], full_scores[:,0]], "y":[loo_s[:,1], full_scores[:,1]],
                 "text":[cities, cities]},
                {"xaxis.title.text": f"PC1 (LOO {loo_v[0]:.1f}% | full {full_var[0]:.1f}%)",
                 "yaxis.title.text": f"PC2 (LOO {loo_v[1]:.1f}% | full {full_var[1]:.1f}%)",
                 "title": f"LOO PCA — removed: {city}"}
            ]
        ))

    fig.update_layout(
        updatemenus=[dict(
            type="dropdown", direction="down",
            x=0.02, xanchor="left", y=1.18, yanchor="top",
            buttons=buttons, showactive=True
        )],
        annotations=[dict(
            text="Leave out:", x=0.02, y=1.23, xref="paper", yref="paper",
            xanchor="left", yanchor="top", showarrow=False
        )]
    )
    return fig

# ----------------------------- Run Everything -----------------------------
if __name__ == "__main__":
    # Point to ./data/CityTemp.xlsx relative to this file (with robust fallback)
    BASE_DIR = Path(__file__).resolve().parent if "__file__" in globals() else Path.cwd()
    XLSX = (BASE_DIR / "data" / "CityTemp.xlsx")
    df, city_col, month_cols, col_to_num = load_city_temps(XLSX, sheet=0)
    long_df = to_long(df, city_col, month_cols, col_to_num)

    # 1) Line chart
    fig1 = fig_line_monthly(long_df, city_col); fig1.show()

    # 2) 3D month-picker
    fig2 = fig_scatter3d_month_picker(df, city_col, col_to_num); fig2.show()

    # 3) PCA axis picker + Explained variance + Residual map
    p = compute_pca_matrix(df, city_col, month_cols, retain=0.95)
    fig3 = fig_pca_axis_picker(p["Z"], p["cities"], p["var"]); fig3.show()

    # 4) Explained variance — scree
    fig4 = fig_pca_scree(p["full_var_all"], p["k"]); fig4.show()

    # 5) Explained variance — cumulative
    fig5 = fig_pca_cumulative(p["cumvar_all"], p["k"], retain=p["retain"]); fig5.show()

    # 6) Residual outlier map
    fig6 = fig_pca_residual_map(p["X_proc"], p["Z"], p["P"], p["eig"], p["cities"]); fig6.show()

    # 7) LOO PCA
    try:
        # Try the clickable widget version (requires ipywidgets)
        from IPython.display import display
        loo_widget = fig_loo_pca_widget(df, city_col, month_cols, S=800)
        display(loo_widget)
    except Exception as e:
        print("LOO PCA widget unavailable:", e)
        print("Falling back to dropdown-based figure (no widgets needed).")
        fig_loo = fig_loo_pca_dropdown(df, city_col, month_cols, S=800)
        fig_loo.show()

    # Optional: save standalone HTMLs
    # fig1.write_html("city_temps_interactive.html", include_plotlyjs="cdn")
    # fig2.write_html("city_temps_3d.html", include_plotlyjs="cdn")
    # fig3.write_html("pca_axis_picker.html", include_plotlyjs="cdn")
    # fig4.write_html("pca_scree.html", include_plotlyjs="cdn")
    # fig5.write_html("pca_cumulative.html", include_plotlyjs="cdn")
    # fig6.write_html("pca_residual_map.html", include_plotlyjs="cdn")


Loaded: D:\Codes\ham\PCA\data\CityTemp.xlsx


Outliers (alpha = 1.00%)

    City  Q_residual       T2
  Vienna    5.603110 1.152162
Marbella    0.301382 8.483886


VBox(children=(HTML(value='<b>Click a blue city to compute the LOO model…</b>'), FigureWidget({
    'data': [{…