# 03 – Pipeline TDA (Persistence Landscapes)
Pipeline completo para SEIO 2023: descarga, retornos, PCA y TDA (H₀ y H₁), con corrección en PersLandscapeExact.

In [None]:
# ===============================================================
# 0. Importaciones y carpetas
# ===============================================================
from pathlib import Path
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import yfinance as yf
from sklearn.decomposition import PCA
from ripser import ripser
from persim import PersLandscapeExact
from tqdm import tqdm

# carpetas
BASE = Path("/content/data")
RAW = BASE / "raw"
PROCESSED = BASE / "processed"
ANALYSIS = BASE / "analysis"
for f in [RAW, PROCESSED, ANALYSIS]: f.mkdir(parents=True, exist_ok=True)

print("Carpetas listas en /content/data/")

In [None]:
# ===============================================================
# 1. Descarga robusta (BTC, ETH, LTC, DOGE)
# ===============================================================
tickers = {
    "BTC": "BTC-USD",
    "ETH": "ETH-USD",
    "LTC": "LTC-USD",
    "DOGE": "DOGE-USD"
}

start_date = "2018-01-01"
end_date   = "2020-12-30"

prices = {}

for name, ticker in tickers.items():
    print(f"\nDescargando {name} ({ticker})...")

    data = yf.download(
        ticker,
        start=start_date,
        end=end_date,
        auto_adjust=False
    )

    if data.empty:
        raise ValueError(f"❌ {name}: sin datos en rango.")

    if isinstance(data.columns, pd.MultiIndex):
        data.columns = data.columns.get_level_values(0)

    # selección robusta
    if "Adj Close" in data.columns:
        serie = data["Adj Close"]
    elif "Close" in data.columns:
        serie = data["Close"]
    elif "Price" in data.columns:
        serie = data["Price"]
    else:
        raise ValueError(f"❌ {name}: columnas = {data.columns}")

    prices[name] = serie

prices_df = pd.DataFrame(prices).dropna()
prices_df.to_csv(RAW / "criptos_precios_2018_2020.csv")

print("\nPrecios listos:")
print(prices_df.head())

In [None]:
# ===============================================================
# 2. Retornos logarítmicos
# ===============================================================
log_returns = np.log(prices_df / prices_df.shift(1)).dropna()

log_returns.to_parquet(PROCESSED / "retornos_logaritmicos.parquet")
log_returns.to_csv(PROCESSED / "criptos_retornos_log_2018_2020.csv")

print("Retornos listos.")

In [None]:
# ===============================================================
# 3. PCA en R⁴
# ===============================================================

pca = PCA(n_components=2)
proj = pca.fit_transform(log_returns.values)
df_pca = pd.DataFrame(proj, index=log_returns.index, columns=["PC1","PC2"])

df_pca.to_csv(PROCESSED / "pca_nube_R4.csv")

plt.figure(figsize=(10,6))
plt.scatter(df_pca["PC1"], df_pca["PC2"], s=10, alpha=0.5)
plt.title("PCA de la nube R⁴ (BTC–ETH–LTC–DOGE)")
plt.xlabel("PC1"); plt.ylabel("PC2"); plt.grid(True)
plt.savefig(ANALYSIS / "pca_nube_R4.png", dpi=300)

print("PCA completado.")

In [None]:
# ===============================================================
# 4. TDA — Persistence Landscapes (H₀ y H₁)
# ===============================================================

def compute_norms(returns_df, window_size=60, step=1, hom_deg=1):
    values = returns_df.values
    idx = returns_df.index
    n = len(returns_df)

    dates_c, L1, L2 = [], [], []

    for start in tqdm(range(0, n - window_size + 1, step), desc=f"TDA H{hom_deg}"):
        end = start + window_size
        cloud = values[start:end, :]

        res = ripser(cloud, maxdim=1)
        dgms = res["dgms"]

        # H0 siempre existe
        dgm_H0 = dgms[0]
        dgm_H0 = dgm_H0[np.isfinite(dgm_H0[:,1])]
        dgm_H0 = [tuple(x) for x in dgm_H0]

        # H1 puede existir o no
        if len(dgms) > 1:
            dgm_H1 = dgms[1]
            dgm_H1 = dgm_H1[np.isfinite(dgm_H1[:,1])]
            dgm_H1 = [tuple(x) for x in dgm_H1]
        else:
            dgm_H1 = []

        # Estructura EXACTA que PersLandscapeExact espera:
        # dgms_persim[0] = H0
        # dgms_persim[1] = H1
        dgms_persim = [dgm_H0, dgm_H1]

        pl = PersLandscapeExact(dgms=dgms_persim, hom_deg=hom_deg)

        L1.append(pl.p_norm(1))
        L2.append(pl.p_norm(2))
        dates_c.append(idx[start + window_size//2])

    return pd.DataFrame({"L1": L1, "L2": L2}, index=dates_c)


tda_H1 = compute_norms(log_returns, window_size=60, step=1, hom_deg=1)
tda_H0 = compute_norms(log_returns, window_size=60, step=1, hom_deg=0)

tda_H1.to_csv(PROCESSED / "tda_landscapes_H1_norms.csv")
tda_H0.to_csv(PROCESSED / "tda_landscapes_H0_norms.csv")

print("TDA completado.")

In [None]:
# ===============================================================
# 5. Figuras finales
# ===============================================================

plt.figure(figsize=(12,5))
plt.plot(tda_H1.index, tda_H1["L1"], label="H1 L1")
plt.plot(tda_H1.index, tda_H1["L2"], label="H1 L2")
plt.legend(); plt.grid(True)
plt.title("TDA – Normas L1 y L2 (H1)")
plt.savefig(ANALYSIS / "tda_H1_norms.png", dpi=300)
plt.close()

plt.figure(figsize=(12,5))
plt.plot(tda_H0.index, tda_H0["L1"], label="H0 L1")
plt.plot(tda_H0.index, tda_H0["L2"], label="H0 L2")
plt.legend(); plt.grid(True)
plt.title("TDA – Normas L1 y L2 (H0)")
plt.savefig(ANALYSIS / "tda_H0_norms.png", dpi=300)
plt.close()

print("Figuras exportadas.")