In [None]:
import re, ast, json
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path
from IPython.display import clear_output

In [None]:
log_path = Path("../logs/celery/worker.log")

pat = re.compile(r"Task\s+logistica\.procesar_entrega\[(?P<task_id>[^\]]+)\]\s+succeeded\s+in\s+(?P<secs>[\d\.]+)s:\s+(?P<payload>\{.*\})")

rows=[]
with log_path.open("r", encoding="utf-8") as f:
    for line in f:
        m = pat.search(line)
        if not m:
            continue
        task_id = m.group("task_id").strip()
        duration = float(m.group("secs"))
        payload_txt = m.group("payload")
        try:
            payload = ast.literal_eval(payload_txt)
        except Exception:
            try: payload = json.loads(payload_txt.replace("'", '"'))
            except Exception: payload = {}
        rows.append({
            "entrega_id": payload.get("entrega_id"),
            "task_id": task_id,
            "status": payload.get("status"),
            "retry": payload.get("retry_count"),
            "duration_s": duration,
        })




In [None]:
df = pd.DataFrame(rows, columns=["entrega_id","task_id","status","retry","duration_s"])\
    .sort_values(["entrega_id","retry"])\
    .reset_index(drop=True)

clear_output(wait=True)
out = Path(".")
out.mkdir(parents=True, exist_ok=True)
df.to_csv(out / "worker_durations_simple.csv", index=False)

In [None]:
df = pd.DataFrame(rows, columns=["entrega_id","task_id","status","retry","duration_s"])\
    .sort_values(["entrega_id","retry"])\
    .reset_index(drop=True)
display(df.head(10))

print("Resumen duración (s):")
print(df["duration_s"].describe(percentiles=[.5,.9,.95,.99]))

fig, axes = plt.subplots(1, 2, figsize=(12,4))
n = len(df)

# Histograma
axes[0].hist(df["duration_s"], bins=15, edgecolor="black")
axes[0].axvline(2, color="red", linestyle="--", label="ASR 2s")
axes[0].set_title(f"Histograma de duración (N={n})")
axes[0].set_xlabel("Duración (s)")
axes[0].set_ylabel("Frecuencia")
axes[0].legend()

# Percentiles
percentiles = [50, 90, 95, 99]
values = [df["duration_s"].quantile(p/100) for p in percentiles]
axes[1].plot(percentiles, values, marker="o")
axes[1].axhline(2, color="red", linestyle="--", label="ASR 2s")
axes[1].set_title(f"Duración por percentil (N={n})")
axes[1].set_xlabel("Percentil")
axes[1].set_ylabel("Duración (s)")
axes[1].set_xticks(percentiles)
axes[1].legend()

plt.tight_layout()
plt.show()

In [None]:
df = pd.DataFrame(rows, columns=["entrega_id","task_id","status","retry","duration_s"])\
    .query("status == 'ENTREGADA'")\
    .sort_values(["entrega_id","retry"])\
    .reset_index(drop=True)
display(df.head(10))

print("Resumen duración (s):")
print(df["duration_s"].describe(percentiles=[.5,.9,.95,.99]))


fig, axes = plt.subplots(1, 2, figsize=(12,4))
n = len(df)

# Histograma
axes[0].hist(df["duration_s"], bins=15, edgecolor="black")
axes[0].axvline(2, color="red", linestyle="--", label="ASR 2s")
axes[0].set_title(f"Histograma de duración (N={n})")
axes[0].set_xlabel("Duración (s)")
axes[0].set_ylabel("Frecuencia")
axes[0].legend()

# Percentiles
percentiles = [50, 90, 95, 99]
values = [df["duration_s"].quantile(p/100) for p in percentiles]
axes[1].plot(percentiles, values, marker="o")
axes[1].axhline(2, color="red", linestyle="--", label="ASR 2s")
axes[1].set_title(f"Duración por percentil (N={n})")
axes[1].set_xlabel("Percentil")
axes[1].set_ylabel("Duración (s)")
axes[1].set_xticks(percentiles)
axes[1].legend()

plt.tight_layout()
plt.show()

In [None]:
df = pd.DataFrame(rows, columns=["entrega_id","task_id","status","retry","duration_s"])\
    .query("status == 'PENDING_SYSTEM_CONFIRMATION'")\
    .sort_values(["entrega_id","retry"])\
    .reset_index(drop=True)
display(df.head(10))

print("Resumen duración (s):")
print(df["duration_s"].describe(percentiles=[.5,.9,.95,.99]))


fig, axes = plt.subplots(1, 2, figsize=(12,4))
n = len(df)

# Histograma
axes[0].hist(df["duration_s"], bins=15, edgecolor="black")
axes[0].axvline(2, color="red", linestyle="--", label="ASR 2s")
axes[0].set_title(f"Histograma de duración (N={n})")
axes[0].set_xlabel("Duración (s)")
axes[0].set_ylabel("Frecuencia")
axes[0].legend()

# Percentiles
percentiles = [50, 90, 95, 99]
values = [df["duration_s"].quantile(p/100) for p in percentiles]
axes[1].plot(percentiles, values, marker="o")
axes[1].axhline(2, color="red", linestyle="--", label="ASR 2s")
axes[1].set_title(f"Duración por percentil (N={n})")
axes[1].set_xlabel("Percentil")
axes[1].set_ylabel("Duración (s)")
axes[1].set_xticks(percentiles)
axes[1].legend()

plt.tight_layout()
plt.show()