In [8]:
# === 01. IMPORTACIONES Y CONFIGURACI√ìN ===
import pandas as pd
import numpy as np
import json
from pathlib import Path
import os

# Rutas principales
BASE_DIR = Path("C:/Users/User/Downloads/Mini reinanse")  # Ajusta si cambia
NOTEBOOKS_DIR = BASE_DIR / "notebooks"
ESTADO_DIR = NOTEBOOKS_DIR / "data" / "estado"
PATH_ALIASES_EXT = NOTEBOOKS_DIR / "symbol_aliases_ext.json"

print("üìÅ Usando carpeta ESTADO_DIR:", ESTADO_DIR.resolve())


üìÅ Usando carpeta ESTADO_DIR: C:\Users\User\Downloads\Mini reinanse\notebooks\data\estado


In [9]:
# === 02. CARGAR UNIVERSO DE ACTIVOS ===
with open(PATH_ALIASES_EXT, "r", encoding="utf-8") as f:
    mapping_ext = json.load(f)

df_universo = pd.DataFrame.from_dict(mapping_ext, orient="index").reset_index()
df_universo.rename(columns={"index": "alias"}, inplace=True)
df_universo["Resultado"] = "Pendiente"

activos = df_universo.set_index("alias").to_dict(orient="index")


In [10]:
# === 03. CONSOLIDAR ARCHIVOS estado_<symbol>.csv ===
df_list = []
errores_simbolos = []

for alias in activos.keys():
    try:
        file_path = ESTADO_DIR / f"estado_{alias}.csv"
        df_proc = pd.read_csv(file_path, parse_dates=["time"])
        df_proc["symbol"] = alias
        df_list.append(df_proc)
    except Exception as e:
        errores_simbolos.append({"symbol": alias, "motivo": str(e)})
        print(f"‚ùå {alias}: {e}")

# Diagn√≥stico
if errores_simbolos:
    df_errores = pd.DataFrame(errores_simbolos)
    print("üìã Resumen de errores:")
    display(df_errores.groupby("motivo").size().to_frame("conteo"))


‚ùå AAL.US: [Errno 2] No such file or directory: 'C:\\Users\\User\\Downloads\\Mini reinanse\\notebooks\\data\\estado\\estado_AAL.US.csv'
‚ùå AES.US: [Errno 2] No such file or directory: 'C:\\Users\\User\\Downloads\\Mini reinanse\\notebooks\\data\\estado\\estado_AES.US.csv'
‚ùå AVTR.US: [Errno 2] No such file or directory: 'C:\\Users\\User\\Downloads\\Mini reinanse\\notebooks\\data\\estado\\estado_AVTR.US.csv'
‚ùå CC.US: [Errno 2] No such file or directory: 'C:\\Users\\User\\Downloads\\Mini reinanse\\notebooks\\data\\estado\\estado_CC.US.csv'
‚ùå CLF.US: [Errno 2] No such file or directory: 'C:\\Users\\User\\Downloads\\Mini reinanse\\notebooks\\data\\estado\\estado_CLF.US.csv'
‚ùå DXC.US: [Errno 2] No such file or directory: 'C:\\Users\\User\\Downloads\\Mini reinanse\\notebooks\\data\\estado\\estado_DXC.US.csv'
‚ùå EXPI.US: [Errno 2] No such file or directory: 'C:\\Users\\User\\Downloads\\Mini reinanse\\notebooks\\data\\estado\\estado_EXPI.US.csv'
‚ùå F.US: [Errno 2] No such file or dir

Unnamed: 0_level_0,conteo
motivo,Unnamed: 1_level_1
[Errno 2] No such file or directory: 'C:\\Users\\User\\Downloads\\Mini reinanse\\notebooks\\data\\estado\\estado_AAL.US.csv',1
[Errno 2] No such file or directory: 'C:\\Users\\User\\Downloads\\Mini reinanse\\notebooks\\data\\estado\\estado_AES.US.csv',1
[Errno 2] No such file or directory: 'C:\\Users\\User\\Downloads\\Mini reinanse\\notebooks\\data\\estado\\estado_AVTR.US.csv',1
[Errno 2] No such file or directory: 'C:\\Users\\User\\Downloads\\Mini reinanse\\notebooks\\data\\estado\\estado_CC.US.csv',1
[Errno 2] No such file or directory: 'C:\\Users\\User\\Downloads\\Mini reinanse\\notebooks\\data\\estado\\estado_CLF.US.csv',1
[Errno 2] No such file or directory: 'C:\\Users\\User\\Downloads\\Mini reinanse\\notebooks\\data\\estado\\estado_DXC.US.csv',1
[Errno 2] No such file or directory: 'C:\\Users\\User\\Downloads\\Mini reinanse\\notebooks\\data\\estado\\estado_EXPI.US.csv',1
[Errno 2] No such file or directory: 'C:\\Users\\User\\Downloads\\Mini reinanse\\notebooks\\data\\estado\\estado_F.US.csv',1
[Errno 2] No such file or directory: 'C:\\Users\\User\\Downloads\\Mini reinanse\\notebooks\\data\\estado\\estado_HUN.US.csv',1
[Errno 2] No such file or directory: 'C:\\Users\\User\\Downloads\\Mini reinanse\\notebooks\\data\\estado\\estado_LEG.US.csv',1


In [11]:
# === 04. CONSOLIDAR EN df_precios Y AGREGAR M√âTRICAS ===
if not df_list:
    raise RuntimeError("‚ùå No se pudo consolidar df_precios: ning√∫n archivo fue le√≠do.")

df_precios = pd.concat(df_list, ignore_index=True)
df_precios = df_precios.sort_values(["symbol", "time"])
print(f"‚úÖ Consolidado df_precios: {df_precios.shape}")

# √öltima fila por s√≠mbolo
df_metrics = df_precios.groupby("symbol").tail(1).copy()

# Resultado de tendencia
df_resultado = df_universo[["alias", "Resultado"]].rename(columns={"alias": "symbol"}).drop_duplicates()

# Uni√≥n final
df_final = pd.merge(df_metrics, df_resultado, on="symbol", how="inner")


‚úÖ Consolidado df_precios: (509661, 4)


In [12]:
# === 05. CALCULAR FEATURES ADICIONALES (HMM / RISK PARITY) ===

# Asegura columna de precio base
price_col = "equity" if "equity" in df_precios.columns else ("close" if "close" in df_precios.columns else None)
if price_col is None:
    raise RuntimeError("‚ùå No se encontr√≥ la columna de precio ('equity' o 'close') en df_precios.")

# Returns simples y logar√≠tmicos
df_precios["return"] = df_precios.groupby("symbol")[price_col].pct_change()
df_precios["log_return"] = df_precios.groupby("symbol")[price_col].transform(lambda x: np.log(x) - np.log(x.shift(1)))

# Momento 63 (‚âà 3 meses burs√°tiles)
df_precios["mom_63"] = df_precios.groupby("symbol")[price_col].pct_change(63)

# Drawdown a partir de m√°ximo acumulado por s√≠mbolo
grp = df_precios.groupby("symbol")[price_col]
rolling_peak = grp.cummax()
df_precios["drawdown"] = (df_precios[price_col] / rolling_peak) - 1.0

# Validaci√≥n de columnas requeridas
features_hmm = ["return", "log_return", "drawdown", "mom_63"]
faltantes = [col for col in features_hmm if col not in df_precios.columns]
if faltantes:
    raise ValueError(f"‚ùå Faltan columnas necesarias: {faltantes}")

# Agregaci√≥n por s√≠mbolo (toma el √∫ltimo valor disponible de cada feature)
agg_funcs = {col: "last" for col in features_hmm}
df_features_extra = df_precios.groupby("symbol").agg(agg_funcs).reset_index()

# Uni√≥n con tu df_final
df_final = pd.merge(df_final, df_features_extra, on="symbol", how="left")


In [13]:
# === 06. FILTRAR TOP 50 Y EXPORTAR ===
TOP_N = 50

if "score_compuesto" in df_final.columns:
    df_export = df_final.dropna(subset=["score_compuesto"])
    df_export = df_export.sort_values("score_compuesto", ascending=False).head(TOP_N)
else:
    df_export = df_final.head(TOP_N)  # Si no hay score, exportar lo que se tenga

# A√±adir Resultado por defecto si falta
if "Resultado" not in df_export.columns:
    df_export["Resultado"] = "Alcista"

# Renombrar columnas clave si necesario
df_export = df_export.rename(columns={
    "score_compuesto": "score",
    "sharpe_20": "sharpe",
    "symbol": "symbol"
})

# Guardar
DATA_PROCESSED_DIR = NOTEBOOKS_DIR / "data"
DATA_PROCESSED_DIR.mkdir(exist_ok=True)

df_export.to_csv(DATA_PROCESSED_DIR / "resumen_entrenamiento_hmm_top50.csv", index=False)
df_export.to_csv(DATA_PROCESSED_DIR / "top_activos.csv", index=False)
df_final.to_csv(DATA_PROCESSED_DIR / "df_scores.csv", index=False)

print("‚úÖ Archivos exportados correctamente")
display(df_export.head())


‚úÖ Archivos exportados correctamente


Unnamed: 0,time,retornos,equity,symbol,Resultado,return,log_return,drawdown,mom_63
0,2025-11-14,0.001295,146.87,A.US,Pendiente,0.001295,0.001294,-0.075009,0.240037
1,2025-11-14,-0.01678,37.5,AA.US,Pendiente,-0.01678,-0.016923,-0.325297,0.264755
2,2025-11-14,-0.006574,49.87,AAP.US,Pendiente,-0.006574,-0.006595,-0.681199,-0.115623
3,2025-11-14,-0.002675,272.14,AAPL.US,Pendiente,-0.002675,-0.002679,-0.010796,0.177484
4,2025-11-14,0.000775,232.35,ABBV.US,Pendiente,0.000775,0.000775,-0.050354,0.126873


In [14]:
import numpy as np
import pandas as pd
import os

# ================================
# ‚úÖ CONFIGURACI√ìN DE RUTAS
# ================================
BASE_DIR = r"C:\Users\User\Downloads\Mini reinanse"
DATA_DIR = os.path.join(BASE_DIR, "data")
OUTPUT_DIR = os.path.join(DATA_DIR, "processed")
os.makedirs(OUTPUT_DIR, exist_ok=True)

# ================================
# ‚úÖ 1. VALIDACI√ìN INICIAL
# ================================
if df_precios is None or df_precios.empty:
    raise RuntimeError("‚ùå df_precios no est√° cargado o est√° vac√≠o. Verifica el flujo anterior.")

if "symbol" not in df_precios.columns or "time" not in df_precios.columns:
    raise ValueError("‚ùå df_precios debe contener columnas 'symbol' y 'time'.")

df_precios = df_precios.sort_values(["symbol", "time"]).reset_index(drop=True)

# ================================
# ‚úÖ 2. DEFINIR COLUMNA DE PRECIO
# ================================
price_col = None
for col in ["equity", "close", "price"]:
    if col in df_precios.columns:
        price_col = col
        break

if price_col is None:
    raise ValueError("‚ùå No se encontr√≥ columna de precio (equity/close/price).")

# ================================
# ‚úÖ 3. FEATURE ENGINEERING
# ================================

# Retornos simples y logar√≠tmicos
df_precios["return"] = df_precios.groupby("symbol")[price_col].pct_change()
df_precios["log_return"] = df_precios.groupby("symbol")[price_col].transform(lambda x: np.log(x) - np.log(x.shift(1)))
df_precios["cumulative_return"] = df_precios.groupby("symbol")["return"].cumsum()

# Medias m√≥viles con transform (evita MultiIndex)
for window in [20, 50, 200]:
    df_precios[f"MA_{window}"] = df_precios.groupby("symbol")[price_col].transform(lambda x: x.rolling(window).mean())

# Momentum
df_precios["mom_21"] = df_precios.groupby("symbol")[price_col].transform(lambda x: x.pct_change(21))
df_precios["mom_63"] = df_precios.groupby("symbol")[price_col].transform(lambda x: x.pct_change(63))

# Volatilidad (rolling std)
df_precios["vol_20"] = df_precios.groupby("symbol")["return"].transform(lambda x: x.rolling(20).std())
df_precios["vol_63"] = df_precios.groupby("symbol")["return"].transform(lambda x: x.rolling(63).std())

# Drawdown
df_precios["rolling_peak"] = df_precios.groupby("symbol")[price_col].cummax()
df_precios["drawdown"] = (df_precios[price_col] / df_precios["rolling_peak"]) - 1

# ================================
# ‚úÖ 4. LIMPIEZA Y VALIDACI√ìN
# ================================
df_precios.replace([np.inf, -np.inf], np.nan, inplace=True)
df_precios = df_precios.groupby("symbol").apply(lambda x: x.ffill().bfill()).reset_index(drop=True)

# Eliminar s√≠mbolos con pocos datos (<100 registros)
valid_symbols = df_precios["symbol"].value_counts()
valid_symbols = valid_symbols[valid_symbols > 100].index
df_master = df_precios[df_precios["symbol"].isin(valid_symbols)].copy()

# ================================
# ‚úÖ 5. EXPORTAR
# ================================
output_parquet = os.path.join(OUTPUT_DIR, "dataset_master.parquet")
output_csv = os.path.join(OUTPUT_DIR, "dataset_master.csv")

df_master.to_parquet(output_parquet, index=False)
df_master.to_csv(output_csv, index=False)

print("‚úÖ Dataset Maestro generado con √©xito")
print(f"üìÅ Parquet: {output_parquet}")
print(f"üìÅ CSV: {output_csv}")
print("‚úÖ Filas:", df_master.shape[0], "| Columnas:", df_master.shape[1])
print("‚úÖ Symbols:", df_master['symbol'].nunique())

df_master.head()


‚úÖ Dataset Maestro generado con √©xito
üìÅ Parquet: C:\Users\User\Downloads\Mini reinanse\data\processed\dataset_master.parquet
üìÅ CSV: C:\Users\User\Downloads\Mini reinanse\data\processed\dataset_master.csv
‚úÖ Filas: 509661 | Columnas: 16
‚úÖ Symbols: 681


Unnamed: 0,time,retornos,equity,symbol,return,log_return,mom_63,drawdown,cumulative_return,MA_20,MA_50,MA_200,mom_21,vol_20,vol_63,rolling_peak
0,2022-11-18,-0.010611,146.07,A.US,-0.010611,-0.010668,-0.027932,0.0,-0.010611,153.0775,152.6428,135.446,0.012802,0.025984,0.019476,146.07
1,2022-11-21,-0.010611,144.52,A.US,-0.010611,-0.010668,-0.027932,-0.010611,-0.010611,153.0775,152.6428,135.446,0.012802,0.025984,0.019476,146.07
2,2022-11-22,0.08594,156.94,A.US,0.08594,0.082446,-0.027932,0.0,0.075328,153.0775,152.6428,135.446,0.012802,0.025984,0.019476,156.94
3,2022-11-23,-0.010768,155.25,A.US,-0.010768,-0.010827,-0.027932,-0.010768,0.06456,153.0775,152.6428,135.446,0.012802,0.025984,0.019476,156.94
4,2022-11-25,0.009275,156.69,A.US,0.009275,0.009233,-0.027932,-0.001593,0.073835,153.0775,152.6428,135.446,0.012802,0.025984,0.019476,156.94
