In [None]:
import pandas as pd
import numpy as np
from pathlib import Path
from datetime import date
import os
import json
import concurrent.futures
import logging
import time

# === Configuracion ===
BASE_PATH = Path("..").resolve().parent
HISTORIC_DIR = BASE_PATH / "data" / "historic"
SENALES_FILE = BASE_PATH / "reports" / "senales_heuristicas" / f"senales_heuristicas_{date.today().isoformat()}.csv"
CONFIG_PATH = Path("config/json/symbols.json")
OUTPUT_DIR = BASE_PATH / "reports" / "backtesting"
LOG_PATH = BASE_PATH / "logs" / "bt_heuristicas.log"
os.makedirs(OUTPUT_DIR, exist_ok=True)
LOG_PATH.parent.mkdir(parents=True, exist_ok=True)

# === Logger local
logging.basicConfig(
    filename=LOG_PATH,
    level=logging.INFO,
    format="%(asctime)s - %(levelname)s - %(message)s",
    datefmt="%Y-%m-%d %H:%M:%S"
)

# === Parametros
TAKE_PROFIT = 0.04
STOP_LOSS = 0.02
MAX_DIAS = 7

# === Leer simbolos
with open(CONFIG_PATH) as f:
    simbolos = json.load(f)["simbolos"]

# === Cargar senales
df_senales = pd.read_csv(SENALES_FILE)
df_senales["fecha"] = pd.to_datetime(df_senales["fecha"])

# === Funcion principal
def backtest_simbolo(simbolo):
    resultados = []
    try:
        logging.info(f"Procesando simbolo {simbolo}...")
        df_hist = pd.read_parquet(HISTORIC_DIR / f"{simbolo.lower()}.parquet").reset_index()

        # Renombrar columna 'datetime' a 'fecha' si es necesario
        if "datetime" in df_hist.columns and "fecha" not in df_hist.columns:
            df_hist = df_hist.rename(columns={"datetime": "fecha"})
            logging.info(f"{simbolo} - columna 'datetime' renombrada a 'fecha'")

        if "fecha" not in df_hist.columns:
            logging.error(f"{simbolo} no contiene columna 'fecha' ni 'datetime'")
            return []

        df_hist["fecha"] = pd.to_datetime(df_hist["fecha"])
        df_hist = df_hist.reset_index(drop=True)

        df_s = df_senales[df_senales["simbolo"] == simbolo]

        for estrategia in df_s["estrategia"].unique():
            df_e = df_s[df_s["estrategia"] == estrategia]

            for _, row in df_e.iterrows():
                fecha = pd.to_datetime(row["fecha"])
                tipo = row["signal"]
                fila = df_hist[df_hist["fecha"] == fecha]
                if fila.empty:
                    logging.warning(f"{simbolo} - {estrategia} - fecha no encontrada: {fecha.date()}")
                    continue

                idx_entry = fila.index[0]
                entry = df_hist.loc[idx_entry, "close"]
                exit_price = None
                fecha_salida = None
                sl_hit = False
                tp_hit = False

                for offset in range(1, MAX_DIAS + 1):
                    if idx_entry + offset >= len(df_hist):
                        break
                    precio = df_hist.loc[idx_entry + offset, "close"]
                    variacion = (precio - entry) / entry if tipo == "buy" else (entry - precio) / entry

                    if variacion >= TAKE_PROFIT:
                        exit_price = precio
                        fecha_salida = df_hist.loc[idx_entry + offset, "fecha"]
                        tp_hit = True
                        break
                    elif variacion <= -STOP_LOSS:
                        exit_price = precio
                        fecha_salida = df_hist.loc[idx_entry + offset, "fecha"]
                        sl_hit = True
                        break

                if exit_price is None:
                    if idx_entry + MAX_DIAS < len(df_hist):
                        exit_price = df_hist.loc[idx_entry + MAX_DIAS, "close"]
                        fecha_salida = df_hist.loc[idx_entry + MAX_DIAS, "fecha"]
                    else:
                        continue

                retorno = (exit_price - entry) / entry if tipo == "buy" else (entry - exit_price) / entry
                resultados.append({
                    "simbolo": simbolo,
                    "estrategia": estrategia,
                    "fecha_entry": fecha.date(),
                    "fecha_exit": fecha_salida.date(),
                    "signal": tipo,
                    "entry": entry,
                    "exit": exit_price,
                    "retorno": round(retorno, 4),
                    "tp_hit": tp_hit,
                    "sl_hit": sl_hit
                })

        logging.info(f"{simbolo} finalizado. Operaciones: {len(resultados)}")

    except Exception as e:
        logging.error(f"Error en simbolo {simbolo}: {e}")

    return resultados

# === Ejecutar
inicio = time.time()
logging.info("=== INICIO DE BACKTESTING ===")

resultados = []
with concurrent.futures.ThreadPoolExecutor(max_workers=8) as executor:
    futuros = [executor.submit(backtest_simbolo, simbolo) for simbolo in simbolos]
    for future in concurrent.futures.as_completed(futuros):
        resultados.extend(future.result())

# === Guardar resultados
df_resultados = pd.DataFrame(resultados)
output_file = OUTPUT_DIR / f"bt_heuristicas_{date.today().isoformat()}.csv"
df_resultados.to_csv(output_file, index=False)
print(f"Backtest guardado en: {output_file}")
print(f"Total operaciones: {len(df_resultados)}")

# === Metricas
if not df_resultados.empty:
    resumen = (
        df_resultados
        .groupby(["simbolo", "estrategia"])
        .agg(
            total_op=("retorno", "count"),
            winrate=("retorno", lambda x: round((x > 0).sum() / len(x), 2)),
            retorno_promedio=("retorno", "mean"),
            retorno_total=("retorno", "sum"),
            sharpe=("retorno", lambda x: round(np.mean(x) / np.std(x), 2) if np.std(x) > 0 else 0),
            profit_factor=("retorno", lambda x: round(x[x > 0].sum() / abs(x[x < 0].sum()), 2) if x[x < 0].sum() != 0 else np.inf),
            max_drawdown=("retorno", lambda x: round(x.cumsum().min(), 4))
        )
        .reset_index()
        .sort_values(["simbolo", "estrategia"])
    )
    resumen_file = OUTPUT_DIR / f"resumen_metricas_{date.today().isoformat()}.csv"
    resumen.to_csv(resumen_file, index=False)
    print(f"Resumen guardado en: {resumen_file}")

# === Duracion final
duracion = time.time() - inicio
logging.info(f"Backtesting completado en {duracion:.2f} segundos.")
print(f"Duracion total: {duracion:.2f} segundos")


In [None]:
import pandas as pd
import numpy as np
from pathlib import Path
from datetime import date
import os
import json
import concurrent.futures
import logging
import time

# === Configuracion ===
BASE_PATH = Path("..").resolve().parent
HISTORIC_DIR = BASE_PATH / "data" / "historic"
SENALES_FILE = BASE_PATH / "reports" / "senales_heuristicas" / f"senales_heuristicas_{date.today().isoformat()}.csv"
CONFIG_PATH = Path("config/json/symbols.json")
OUTPUT_DIR = BASE_PATH / "reports" / "backtesting"
LOG_PATH = BASE_PATH / "logs" / "bt_heuristicas.log"

# Crear carpetas si no existen
os.makedirs(OUTPUT_DIR, exist_ok=True)
LOG_PATH.parent.mkdir(parents=True, exist_ok=True)

# === Logger local
logging.basicConfig(
    filename=LOG_PATH,
    level=logging.INFO,
    format="%(asctime)s - %(levelname)s - %(message)s",
    datefmt="%Y-%m-%d %H:%M:%S"
)

# === Parametros de backtest
TAKE_PROFIT = 0.04
STOP_LOSS = 0.02
MAX_DIAS = 7

# === Cargar lista de simbolos
with open(CONFIG_PATH) as f:
    simbolos = json.load(f)["simbolos"]
print(f"Simbolos a procesar: {simbolos}")
logging.info(f"Simbolos a procesar: {simbolos}")

# === Cargar archivo de señales
df_senales = pd.read_csv(SENALES_FILE)
df_senales["fecha"] = pd.to_datetime(df_senales["fecha"])
print(f"Total de señales cargadas: {len(df_senales)}")
logging.info(f"Total de señales cargadas: {len(df_senales)}")

# === Backtesting por simbolo
def backtest_simbolo(simbolo):
    resultados = []
    t0 = time.time()
    try:
        logging.info(f"=== {simbolo}: Iniciando backtest ===")
        df_hist = pd.read_parquet(HISTORIC_DIR / f"{simbolo.lower()}.parquet").reset_index()

        if "datetime" in df_hist.columns and "fecha" not in df_hist.columns:
            df_hist = df_hist.rename(columns={"datetime": "fecha"})
            logging.info(f"{simbolo}: 'datetime' renombrado a 'fecha'")

        if "fecha" not in df_hist.columns:
            logging.error(f"{simbolo}: no contiene columna 'fecha'")
            return []

        df_hist["fecha"] = pd.to_datetime(df_hist["fecha"])
        df_hist = df_hist.reset_index(drop=True)

        df_s = df_senales[df_senales["simbolo"] == simbolo]

        for estrategia in df_s["estrategia"].unique():
            df_e = df_s[df_s["estrategia"] == estrategia]

            for _, row in df_e.iterrows():
                fecha = pd.to_datetime(row["fecha"])
                tipo = row["signal"]
                fila = df_hist[df_hist["fecha"] == fecha]
                if fila.empty:
                    logging.warning(f"{simbolo} - {estrategia}: fecha no encontrada: {fecha.date()}")
                    continue

                idx_entry = fila.index[0]
                entry = df_hist.loc[idx_entry, "close"]
                exit_price = None
                fecha_salida = None
                sl_hit = False
                tp_hit = False

                for offset in range(1, MAX_DIAS + 1):
                    if idx_entry + offset >= len(df_hist):
                        break
                    precio = df_hist.loc[idx_entry + offset, "close"]
                    variacion = (precio - entry) / entry if tipo == "buy" else (entry - precio) / entry

                    if variacion >= TAKE_PROFIT:
                        exit_price = precio
                        fecha_salida = df_hist.loc[idx_entry + offset, "fecha"]
                        tp_hit = True
                        break
                    elif variacion <= -STOP_LOSS:
                        exit_price = precio
                        fecha_salida = df_hist.loc[idx_entry + offset, "fecha"]
                        sl_hit = True
                        break

                if exit_price is None and idx_entry + MAX_DIAS < len(df_hist):
                    exit_price = df_hist.loc[idx_entry + MAX_DIAS, "close"]
                    fecha_salida = df_hist.loc[idx_entry + MAX_DIAS, "fecha"]

                if exit_price is not None:
                    retorno = (exit_price - entry) / entry if tipo == "buy" else (entry - exit_price) / entry
                    resultados.append({
                        "simbolo": simbolo,
                        "estrategia": estrategia,
                        "fecha_entry": fecha.date(),
                        "fecha_exit": fecha_salida.date(),
                        "signal": tipo,
                        "entry": entry,
                        "exit": exit_price,
                        "retorno": round(retorno, 4),
                        "tp_hit": tp_hit,
                        "sl_hit": sl_hit
                    })

        logging.info(f"{simbolo}: Finalizado. Operaciones generadas: {len(resultados)} en {time.time() - t0:.2f}s")

    except Exception as e:
        logging.error(f"{simbolo}: ERROR: {str(e)}")

    return resultados

# === Ejecutar todo
inicio = time.time()
logging.info("=== INICIO GLOBAL BACKTESTING ===")

resultados = []
with concurrent.futures.ThreadPoolExecutor(max_workers=8) as executor:
    futuros = [executor.submit(backtest_simbolo, simbolo) for simbolo in simbolos]
    for future in concurrent.futures.as_completed(futuros):
        resultados.extend(future.result())

# === Consolidar resultados
df_resultados = pd.DataFrame(resultados)
output_file = OUTPUT_DIR / f"bt_heuristicas_{date.today().isoformat()}.csv"
df_resultados.to_csv(output_file, index=False)
print(f"Backtest guardado en: {output_file}")
print(f"Total operaciones: {len(df_resultados)}")

# === Metricas
if not df_resultados.empty:
    resumen = (
        df_resultados
        .groupby(["simbolo", "estrategia"])
        .agg(
            total_op=("retorno", "count"),
            winrate=("retorno", lambda x: round((x > 0).sum() / len(x), 2)),
            retorno_promedio=("retorno", "mean"),
            retorno_total=("retorno", "sum"),
            sharpe=("retorno", lambda x: round(np.mean(x) / np.std(x), 2) if np.std(x) > 0 else 0),
            profit_factor=("retorno", lambda x: round(x[x > 0].sum() / abs(x[x < 0].sum()), 2) if x[x < 0].sum() != 0 else np.inf),
            max_drawdown=("retorno", lambda x: round(x.cumsum().min(), 4))
        )
        .reset_index()
        .sort_values(["simbolo", "estrategia"])
    )
    resumen_file = OUTPUT_DIR / f"resumen_metricas_{date.today().isoformat()}.csv"
    resumen.to_csv(resumen_file, index=False)
    print(f"Resumen guardado en: {resumen_file}")

# === Duracion final
duracion = time.time() - inicio
logging.info(f"Backtesting COMPLETADO en {duracion:.2f} segundos.")
print(f"Duracion total: {duracion:.2f} segundos")


In [None]:
import pandas as pd
import plotly.express as px
from datetime import date
from pathlib import Path

# === Cargar archivo resumen
resumen_file = Path("..").resolve().parent / "reports" / "backtesting" / f"resumen_metricas_{date.today().isoformat()}.csv"
df = pd.read_csv(resumen_file)

# Filtrar solo combinaciones con al menos 3 operaciones
df_filtrado = df[df["total_op"] >= 3].copy()

# === Top 10 por retorno total
top_retorno = df_filtrado.sort_values("retorno_total", ascending=False).head(10)
fig1 = px.bar(
    top_retorno,
    x="retorno_total",
    y="estrategia",
    color="simbolo",
    orientation="h",
    title="Top 10 estrategias por retorno total",
    labels={"retorno_total": "Retorno total"},
)
fig1.update_layout(yaxis=dict(categoryorder="total ascending"))
fig1.show()

# === Top 10 por winrate
top_winrate = df_filtrado.sort_values("winrate", ascending=False).head(10)
fig2 = px.bar(
    top_winrate,
    x="winrate",
    y="estrategia",
    color="simbolo",
    orientation="h",
    title="Top 10 estrategias por winrate",
    labels={"winrate": "Winrate"},
)
fig2.update_layout(yaxis=dict(categoryorder="total ascending"))
fig2.show()

# === Sharpe promedio por estrategia
sharpe_prom = (
    df_filtrado.groupby("estrategia")["sharpe"]
    .mean()
    .sort_values(ascending=False)
    .reset_index()
    .head(10)
)
fig3 = px.bar(
    sharpe_prom,
    x="estrategia",
    y="sharpe",
    title="Sharpe promedio por estrategia (top 10)",
    labels={"sharpe": "Sharpe Ratio"},
)
fig3.show()

# === Scatter retorno promedio vs winrate
fig4 = px.scatter(
    df_filtrado,
    x="retorno_promedio",
    y="winrate",
    color="estrategia",
    hover_data=["simbolo", "total_op"],
    title="Retorno promedio vs Winrate",
    labels={"retorno_promedio": "Retorno Promedio", "winrate": "Winrate"},
)
fig4.update_traces(marker=dict(size=10, line=dict(width=1, color='DarkSlateGrey')))
fig4.show()


In [None]:
import pandas as pd
import plotly.express as px
from datetime import date
from pathlib import Path

# === Cargar archivo de operaciones
archivo_ops = Path("..").resolve().parent / "reports" / "backtesting" / f"bt_heuristicas_{date.today().isoformat()}.csv"
df = pd.read_csv(archivo_ops)
print(f"Operaciones cargadas: {len(df)}")

# === Histograma de retornos
fig1 = px.histogram(
    df,
    x="retorno",
    nbins=50,
    color_discrete_sequence=["#3b8bba"],
    title="Distribución de retornos por operación"
)
fig1.update_layout(bargap=0.1)
fig1.show()

# === Top estrategias por volumen de operaciones
estrategias_top = df.groupby("estrategia")["retorno"].count().reset_index(name="operaciones").sort_values("operaciones", ascending=False).head(10)
fig2 = px.bar(
    estrategias_top,
    x="estrategia",
    y="operaciones",
    title="Top 10 estrategias por cantidad de operaciones",
    color="operaciones",
)
fig2.show()

# === % SL y TP alcanzado por estrategia
resumen_hit = df.groupby("estrategia")[["tp_hit", "sl_hit"]].mean().reset_index()
resumen_hit["tp_hit"] = resumen_hit["tp_hit"] * 100
resumen_hit["sl_hit"] = resumen_hit["sl_hit"] * 100
fig3 = px.bar(
    resumen_hit.melt(id_vars="estrategia", value_name="porcentaje", var_name="tipo"),
    x="estrategia",
    y="porcentaje",
    color="tipo",
    barmode="group",
    title="Porcentaje de operaciones que alcanzaron Take Profit o Stop Loss"
)
fig3.show()

# === Boxplot de retorno por estrategia
fig4 = px.box(
    df[df["retorno"].abs() < 0.2],  # filtro para evitar outliers extremos
    x="estrategia",
    y="retorno",
    title="Distribución de retornos por estrategia",
    points="outliers",
    color="estrategia"
)
fig4.update_layout(xaxis_tickangle=-45)
fig4.show()

# === Retorno acumulado por simbolo (timeline)
df["fecha_entry"] = pd.to_datetime(df["fecha_entry"])
df = df.sort_values("fecha_entry")
df["retorno_acumulado"] = df.groupby("simbolo")["retorno"].cumsum()

fig5 = px.line(
    df,
    x="fecha_entry",
    y="retorno_acumulado",
    color="simbolo",
    title="Retorno acumulado por simbolo en el tiempo"
)
fig5.show()
