# Proyecto A · Urgencias Respiratorias — **Colab Ready** ✅

### Cómo usar (rápido)
1) Sube `at_urg_respiratorio_semanal.parquet` (o CSV) a `data/raw/` (+ diccionario opcional).  
2) Ejecuta todo.  
3) Salidas en `data/processed/` y figuras en `figures/`.  
4) En Power BI usa `urgencias_long_tidy.csv` + `calendar_semanal.csv` (join por `anio`+`semana`).

## 0) Setup

In [None]:
try:
    import google.colab  # type: ignore
    IS_COLAB = True
except Exception:
    IS_COLAB = False
from pathlib import Path
BASE = Path("/content/Portafolio-AB") if IS_COLAB else Path("/mnt/data/Portafolio-AB")
RAW, PROC, FIG = BASE/"data/raw", BASE/"data/processed", BASE/"figures"
for d in [RAW, PROC, FIG]: d.mkdir(parents=True, exist_ok=True)

## 1) Carga + limpieza

In [None]:
import pandas as pd, numpy as np
PARQUET = RAW / "at_urg_respiratorio_semanal.parquet"
CSV = RAW / "at_urg_respiratorio_semanal.csv"
DICT = RAW / "diccionario-de-datos-urgenciasrespiratoriasporsemana.xlsx"

df=None
if PARQUET.exists():
    try: df=pd.read_parquet(PARQUET); print("Leído Parquet")
    except Exception as e: print("Error Parquet:", e)
if df is None and CSV.exists():
    try: df=pd.read_csv(CSV); print("Leído CSV")
    except Exception as e: print("Error CSV:", e)
if df is None: raise RuntimeError("No se encontró input en data/raw/.")

# Estandarizar columnas
df = df.copy()
df.columns = [str(c).strip().lower().replace(" ","_").replace("-","_").replace(".","_") for c in df.columns]
df.to_csv(PROC / "urgencias_wide_clean.csv", index=False)

# Derivar anio/semana desde fecha si existe
date_col = next((c for c in df.columns if any(k in c for k in ["fecha","date"])), None)
if date_col:
    df[date_col] = pd.to_datetime(df[date_col], errors="coerce")
    if pd.api.types.is_datetime64_any_dtype(df[date_col]):
        df["anio"] = df[date_col].dt.isocalendar().year.astype("Int64")
        df["semana"] = df[date_col].dt.isocalendar().week.astype("Int64")

## 2) Tidy + calendario

In [None]:
import pandas as pd, numpy as np
num_cols = df.select_dtypes(include=[np.number]).columns.tolist()
id_vars = [c for c in df.columns if c not in num_cols]
for c in ["anio","semana"]:
    if c in df.columns and c not in id_vars:
        id_vars.append(c); 
        if c in num_cols: num_cols.remove(c)

tidy = df.melt(id_vars=id_vars, value_vars=num_cols, var_name="variable", value_name="valor")
tidy["valor"] = pd.to_numeric(tidy["valor"], errors="coerce").fillna(0.0)
tidy.to_csv(PROC / "urgencias_long_tidy.csv", index=False)

from datetime import date
def iso_week_start(y,w,d=1):
    try: return date.fromisocalendar(int(y), int(w), int(d)).isoformat()
    except: return None
if "anio" in tidy.columns and "semana" in tidy.columns:
    cal = tidy[["anio","semana"]].drop_duplicates().assign(fecha_inicio_semana=lambda x: [iso_week_start(a,s) for a,s in zip(x["anio"],x["semana"])])
    cal.to_csv(PROC / "calendar_semanal.csv", index=False)

## 3) Figuras rápidas

In [None]:
import matplotlib.pyplot as plt, pandas as pd
# Tendencia total
if {"anio","semana"}.issubset(tidy.columns):
    ts = tidy.groupby(["anio","semana"])["valor"].sum().reset_index().sort_values(["anio","semana"])
    plt.figure(figsize=(9,4)); plt.plot(range(len(ts)), ts["valor"]); plt.title("Tendencia semanal total"); plt.tight_layout()
    out = FIG/"A_tendencia_semanal_total.png"; plt.savefig(out, dpi=150); print("Guardado:", out)

### Figuras adicionales: Top variables y Heatmap

In [None]:

# Top variables por total acumulado (barras horizontales)
import pandas as pd, matplotlib.pyplot as plt
if "variable" in tidy.columns and "valor" in tidy.columns:
    top = (tidy.groupby("variable")["valor"].sum()
           .sort_values(ascending=False).head(10)).reset_index()
    plt.figure(figsize=(9,5))
    plt.barh(top["variable"], top["valor"])
    plt.gca().invert_yaxis()
    plt.title("Top 10 variables por total acumulado")
    plt.tight_layout()
    out = FIG / "B_top_variables_total.png"
    plt.savefig(out, dpi=150)
    plt.close()
    print("Guardado:", out)


In [None]:

# Heatmap variables × semana (si existen anio y semana)
import pandas as pd, numpy as np, matplotlib.pyplot as plt
if {"anio","semana","variable","valor"}.issubset(tidy.columns):
    mat = tidy.pivot_table(index="variable", columns=["anio","semana"], values="valor", aggfunc="sum", fill_value=0)
    arr = mat.values
    plt.figure(figsize=(12, max(4, arr.shape[0]*0.2)))
    plt.imshow(arr, aspect="auto")
    plt.colorbar()
    plt.title("Heatmap variables × semanas (totales)")
    plt.yticks(range(len(mat.index)), mat.index)
    plt.xticks([])
    plt.tight_layout()
    out = FIG / "C_heatmap_variables_semanas.png"
    plt.savefig(out, dpi=150)
    plt.close()
    print("Guardado:", out)


## 4) Export BI

In [None]:
print("Hechos:", PROC/"urgencias_long_tidy.csv")
print("Calendario:", PROC/"calendar_semanal.csv")
print("Wide:", PROC/"urgencias_wide_clean.csv")