In [1]:
!pip -q install streamlit pyngrok PyPortfolioOpt

In [2]:
from pyngrok import ngrok
ngrok.set_auth_token("38RvmAYXR8hNvwxW1S2eLp1ySXi_3H7XsAXiGCCjN9EhpT3id")


In [5]:
%%writefile app.py
import streamlit as st
import pandas as pd
import numpy as np
import plotly.express as px
import yfinance as yf

from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

from pypfopt.efficient_frontier import EfficientFrontier
from pypfopt import risk_models

st.set_page_config(page_title="Clustering + LSTM Dashboard", layout="wide")

# =========================
# DATA LOADING
# =========================
@st.cache_data
def load_data():
    df_roles = pd.read_parquet("df1_roles.parquet")
    preds = pd.read_parquet("predictions.parquet")
    bt = pd.read_csv("backtest_daily.csv", index_col=0, parse_dates=True)

    # safety: ensure MultiIndex sorted
    if not isinstance(df_roles.index, pd.MultiIndex):
        raise ValueError("df1_roles.parquet doit Ãªtre indexÃ© par (Date, Ticker).")
    if not isinstance(preds.index, pd.MultiIndex):
        raise ValueError("predictions.parquet doit Ãªtre indexÃ© par (Date, Ticker).")

    df_roles = df_roles.sort_index()
    preds = preds.sort_index()
    bt = bt.sort_index()
    return df_roles, preds, bt

df_roles, preds, bt = load_data()

st.title("ðŸ“Š Dashboard â€” Clustering + LSTM Expected Return + Backtest")

# =========================
# HELPERS
# =========================
def month_period(d):
    d = pd.Timestamp(d)
    start = (d + pd.offsets.MonthBegin(1)).normalize()
    end = (d + pd.offsets.MonthEnd(1)).normalize()
    return start, end

def select_role(df, score_col, top_frac, min_n, max_n):
    df = df.dropna(subset=[score_col]).sort_values(score_col, ascending=False)
    if df.empty:
        return df
    n = int(np.ceil(len(df) * top_frac))
    n = max(min_n, n)
    n = min(max_n, n)
    n = min(len(df), n)
    return df.head(n)

def to_base1_from_logrets(logrets: pd.Series) -> pd.Series:
    return np.exp(logrets.cumsum())

def perf_metrics(logrets, freq=252):
    r = logrets.dropna()
    if len(r) == 0:
        return pd.Series({"AnnReturn": np.nan, "AnnVol": np.nan, "Sharpe": np.nan, "MaxDrawdown": np.nan})

    ann_ret = np.exp(r.mean() * freq) - 1
    ann_vol = r.std(ddof=0) * np.sqrt(freq)
    sharpe = (r.mean() * freq) / (ann_vol + 1e-12)

    equity = np.exp(r.cumsum())
    dd = equity / equity.cummax() - 1
    mdd = dd.min()

    return pd.Series({"AnnReturn": ann_ret, "AnnVol": ann_vol, "Sharpe": sharpe, "MaxDrawdown": mdd})

@st.cache_data
def get_spy_logrets(start, end):
    spy = yf.download("SPY", start=start, end=end, auto_adjust=False, progress=False)["Adj Close"].dropna()
    r = np.log(spy).diff().dropna()
    r.name = "SPY"
    return r

def optimize_portfolio(prices_lb, mu_pred_monthly, max_weight=0.10, objective="Max Sharpe",
                       winners=None, stables=None, alloc_winners=0.50, annualize_mu=True):
    """
    prices_lb: DataFrame daily Adj Close for lookback
    mu_pred_monthly: Series indexed by ticker (monthly expected return)
    """
    tickers = [t for t in mu_pred_monthly.index if t in prices_lb.columns]
    prices_lb = prices_lb[tickers].dropna(axis=1, how="any")
    if prices_lb.shape[1] < 2:
        return pd.Series(dtype=float), {}

    # Covariance (annualized by frequency=252)
    S = risk_models.sample_cov(prices_lb, frequency=252)

    mu = mu_pred_monthly.reindex(prices_lb.columns).fillna(mu_pred_monthly.mean())

    # IMPORTANT: keep units consistent (mu annual if S annual)
    if annualize_mu:
        mu = (1 + mu).clip(lower=-0.99) ** 12 - 1  # monthly -> annual approx

    ef = EfficientFrontier(mu, S, weight_bounds=(0.0, max_weight), solver="SCS")

    # optional constraint winners/stables split
    if winners is not None and stables is not None:
        winners = [t for t in winners if t in prices_lb.columns]
        stables = [t for t in stables if t in prices_lb.columns]

        if len(winners) > 0 and len(stables) > 0:
            idx = list(prices_lb.columns)
            w_idx = [idx.index(t) for t in winners]
            s_idx = [idx.index(t) for t in stables]
            ef.add_constraint(lambda w: sum(w[i] for i in w_idx) == alloc_winners)
            ef.add_constraint(lambda w: sum(w[i] for i in s_idx) == 1 - alloc_winners)

    try:
        if objective == "Min Vol":
            ef.min_volatility()
        else:
            ef.max_sharpe()

        w = pd.Series(ef.clean_weights(), dtype=float)
        w = w[w > 0].sort_values(ascending=False)

        # perf
        perf = ef.portfolio_performance(verbose=False)  # (ret, vol, sharpe)
        info = {"ExpReturn": perf[0], "ExpVol": perf[1], "ExpSharpe": perf[2]}
        return w, info

    except Exception as e:
        return pd.Series(dtype=float), {"error": str(e)}

# =========================
# SIDEBAR CONTROLS (dates safe)
# =========================
dates_roles = df_roles.index.get_level_values("Date").unique().sort_values()
dates_preds = preds.index.get_level_values("Date").unique().sort_values()

dates_ok = dates_roles.intersection(dates_preds)
if len(dates_ok) == 0:
    st.error("Aucune date commune entre df_roles et preds. VÃ©rifie tes exports parquet.")
    st.stop()

st.sidebar.header("ContrÃ´les")

hide_early = st.sidebar.checkbox("Masquer pÃ©riodes avec peu de donnÃ©es", value=True)

# simple rule to hide early: keep last N years or keep dates after first year of availability
if hide_early:
    min_date = dates_ok.min() + pd.DateOffset(months=12)  # buffer
    dates_ui = dates_ok[dates_ok >= min_date]
    if len(dates_ui) == 0:
        dates_ui = dates_ok
else:
    dates_ui = dates_ok

default_idx = max(0, len(dates_ui) - 12)
date_sel = st.sidebar.selectbox("Date signal (fin de mois)", dates_ui, index=default_idx)

top_frac = st.sidebar.slider("Top fraction par rÃ´le", 0.10, 1.00, 0.50, 0.05)
min_n = st.sidebar.number_input("Min actions par rÃ´le", 3, 60, 10, 1)
max_n = st.sidebar.number_input("Max actions par rÃ´le", 10, 300, 60, 5)

st.sidebar.subheader("Portefeuille (optionnel)")
objective = st.sidebar.selectbox("Objectif", ["Max Sharpe", "Min Vol"])
max_weight = st.sidebar.slider("Max poids par action", 0.02, 0.20, 0.10, 0.01)
alloc_winners = st.sidebar.slider("Allocation Winners", 0.0, 1.0, 0.50, 0.05)
lookback_months = st.sidebar.slider("Lookback risque (mois)", 3, 24, 12, 1)
annualize_mu = st.sidebar.checkbox("Annualiser mu_pred (recommandÃ©)", value=True)

# =========================
# FILTER MONTH DATA
# =========================
m = df_roles.xs(date_sel, level=0).copy()
pred_m = preds.xs(date_sel, level=0).copy()  # safe now

winners_all = m[m["role"] == "Gagnants dynamiques"].copy()
stables_all = m[m["role"] == "Stables de qualitÃ©"].copy()

w_sel = select_role(winners_all, "TrendScore", top_frac, min_n, max_n)
s_sel = select_role(stables_all, "QualityScore", top_frac, min_n, max_n)

selected = pd.concat([w_sel, s_sel]).copy()
selected["pred_return_next"] = pred_m["pred_return_next"].reindex(selected.index)

# Universe summary
st.subheader("0) Universe du mois (Winners / Stables)")

u1, u2, u3, u4 = st.columns(4)
u1.metric("Winners (total)", len(winners_all))
u2.metric("Stables (total)", len(stables_all))
u3.metric("Winners sÃ©lectionnÃ©es", len(w_sel))
u4.metric("Stables sÃ©lectionnÃ©es", len(s_sel))

st.caption("Astuce : si tu vois des mois avec 0 sÃ©lection, câ€™est souvent au tout dÃ©but (pas assez dâ€™historique pour retours/betas/LSTM).")

st.divider()


# Scores graph (au lieu tableau)
st.subheader("2) Scores moyens par rÃ´le (visuel)")

score_cols = [c for c in ["TrendScore", "QualityScore", "RiskScore", "StressScore"] if c in m.columns]
if score_cols:
    role_scores = m.groupby("role")[score_cols].mean(numeric_only=True).reset_index()
    role_scores_long = role_scores.melt(id_vars="role", var_name="Score", value_name="Value")
    fig_scores = px.bar(role_scores_long, x="role", y="Value", color="Score",
                        title="Scores moyens par rÃ´le (ce mois)")
    st.plotly_chart(fig_scores, use_container_width=True)
else:
    st.info("Colonnes de scores non trouvÃ©es dans df_roles.")

st.divider()

# =========================
# TABLE SELECTED (tu dis que celle-lÃ  est bonne âœ…)
# =========================
st.subheader("3) Actions sÃ©lectionnÃ©es (scores + expected return LSTM)")

show_cols = [c for c in ["cluster","role","pred_return_next","atr_pct","GK","rsi",
                         "return_12m","return_6m","return_3m","TrendScore","QualityScore","RiskScore"]
             if c in selected.columns]

tbl = selected[show_cols].copy()
tbl = tbl.sort_values("pred_return_next", ascending=False)
st.dataframe(tbl, use_container_width=True, height=420)

st.divider()


# =========================
# RISK vs EXPECTED RETURN (ton plot 3 ok âœ…)
# =========================
st.subheader("5) Risk vs Expected Return (LSTM) â€” sÃ©lection")

if len(tbl):
    fig = px.scatter(
        tbl.reset_index(),
        x="atr_pct", y="pred_return_next",
        color="role",
        hover_name="Ticker",
        title="Expected return (LSTM) vs Risk (atr_pct)"
    )
    st.plotly_chart(fig, use_container_width=True)
else:
    st.info("Aucune action sÃ©lectionnÃ©e pour cette date.")

st.divider()


# =========================
# PRICE PLOT NEXT MONTH (remove max=2)
# =========================
st.subheader("7) Prix (mois suivant) â€” plusieurs actions (normalisÃ©)")

if len(tbl):
    tickers = tbl.index.tolist()
    pick = st.multiselect("Choisir des actions (Ã©vite >10 pour la lisibilitÃ©)", tickers, default=tickers[:3])

    start_hold, end_hold = month_period(date_sel)
    st.caption(f"PÃ©riode du mois suivant: {start_hold.date()} â†’ {end_hold.date()}")

    if pick:
        px_data = yf.download(
            pick,
            start=start_hold.strftime("%Y-%m-%d"),
            end=(end_hold + pd.Timedelta(days=1)).strftime("%Y-%m-%d"),
            auto_adjust=False,
            progress=False
        )["Adj Close"]

        if isinstance(px_data, pd.Series):
            px_data = px_data.to_frame()

        px_data = px_data.dropna(how="all")

        if not px_data.empty:
            # normalize base=1 for comparison
            px_norm = px_data / px_data.iloc[0]
            figp = px.line(px_norm, title="Adj Close normalisÃ© (base 1) â€” mois suivant")
            st.plotly_chart(figp, use_container_width=True)
        else:
            st.warning("Pas de donnÃ©es de prix pour la sÃ©lection sur ce mois.")
else:
    st.info("Aucune action sÃ©lectionnÃ©e, donc rien Ã  tracer.")

st.divider()

# =========================
# BACKTEST + SPY (always show SPY)
# =========================
st.subheader("8) Backtest â€” Strategy vs Baseline vs SPY")

if bt.shape[0] > 0:
    bt2 = bt.dropna(how="all").copy()

    # add SPY if missing
    start = bt2.index.min().strftime("%Y-%m-%d")
    end = (bt2.index.max() + pd.Timedelta(days=1)).strftime("%Y-%m-%d")

    if "SPY" not in bt2.columns:
        spy = get_spy_logrets(start, end)
        bt2 = bt2.join(spy, how="left")

    # keep only numeric columns
    cols = [c for c in bt2.columns if pd.api.types.is_numeric_dtype(bt2[c])]
    bt2 = bt2[cols].dropna()

    if bt2.shape[0] == 0:
        st.warning("Backtest vide aprÃ¨s nettoyage (NaN).")
    else:
        base1 = bt2.apply(to_base1_from_logrets, axis=0)
        figbt = px.line(base1, title="Performance cumulÃ©e (base 1)")
        st.plotly_chart(figbt, use_container_width=True)

        st.subheader("9) MÃ©triques (annualisÃ© / drawdown)")
        met = pd.DataFrame({c: perf_metrics(bt2[c]) for c in bt2.columns}).T
        met_pct = met.copy()
        met_pct["AnnReturn"] = (100 * met_pct["AnnReturn"]).round(2)
        met_pct["AnnVol"] = (100 * met_pct["AnnVol"]).round(2)
        met_pct["Sharpe"] = met_pct["Sharpe"].round(2)
        met_pct["MaxDrawdown"] = (100 * met_pct["MaxDrawdown"]).round(2)

        st.dataframe(met_pct, use_container_width=True)

else:
    st.warning("backtest_daily.csv est vide ou mal formatÃ©.")

st.caption("Signaux fin de mois, prÃ©diction LSTM du mois+1, sÃ©lection winners/stables, portefeuille optimisÃ© et comparaison Ã  SPY.")


Overwriting app.py


In [6]:
import subprocess, time

# Lance Streamlit
process = subprocess.Popen([
    "streamlit", "run", "app.py",
    "--server.port", "8501",
    "--server.address", "0.0.0.0",
    "--server.headless", "true",
    "--browser.gatherUsageStats", "false"
])

time.sleep(3)

# Expose le port 8501
public_url = ngrok.connect(8501)
print("âœ… Ouvre ce lien :", public_url)


âœ… Ouvre ce lien : NgrokTunnel: "https://uninfused-brooklyn-tawdrily.ngrok-free.dev" -> "http://localhost:8501"
