In [None]:
import os
from pathlib import Path
import numpy as np
import pandas as pd
from scipy.stats import shapiro, kstest, norm, anderson
import matplotlib.pyplot as plt


In [None]:
FILE_CANDIDATES = [
    "Taller 2.1.DN.SR.xlsx"]  # ruta del entorno actual

xlsx_path = None
for p in FILE_CANDIDATES:
    if Path(p).exists():
        xlsx_path = p
        break

if xlsx_path is None:
    raise FileNotFoundError(
        "No se encontró 'Taller 2.1.DN.SR.xlsx'. Ajusta FILE_CANDIDATES con tu ruta local."
    )

df = pd.read_excel(xlsx_path, sheet_name="Data set")

In [None]:
# ESTADÍSTICOS (μ y σ muestral) por variable
# ---------------------------------------------
def mu_sigma(series: pd.Series):
    x = pd.to_numeric(series, errors="coerce").dropna()
    return float(x.mean()), float(x.std(ddof=1))

mu_age, sd_age = mu_sigma(df["edad_años"])
mu_pas, sd_pas = mu_sigma(df["pas_mmHg"])
mu_col, sd_col = mu_sigma(df["colesterol_mgdl"])
mu_imc, sd_imc = mu_sigma(df["imc"])
mu_coag, sd_coag = mu_sigma(df["coag_seg"])

print("=== Parámetros estimados (μ, σ) desde la hoja 'Data set' ===")
print(f"Edad (años)        -> μ={mu_age:.4f}, σ={sd_age:.4f}")
print(f"PAS (mmHg)         -> μ={mu_pas:.4f}, σ={sd_pas:.4f}")
print(f"Colesterol (mg/dL) -> μ={mu_col:.4f}, σ={sd_col:.4f}")
print(f"IMC (kg/m²)        -> μ={mu_imc:.4f}, σ={sd_imc:.4f}")
print(f"Coagulación (s)    -> μ={mu_coag:.4f}, σ={sd_coag:.4f}")
print()

# Helpers para impresión
fmt = lambda v: f"{v:.4f}"

In [None]:
# [1] Edad: ¿P(Edad < 40)?
# Excel: =NORM.DIST(40, μ_edad, σ_edad, VERDADERO)
p1 = norm.cdf(40, loc=mu_age, scale=sd_age)
print("[1] P(Edad < 40) =", fmt(p1))

In [None]:
# [2] Edad: ¿P(45 ≤ Edad ≤ 60)?
# Excel: =NORM.DIST(60, μ_edad, σ_edad, VERDADERO) - NORM.DIST(45, μ_edad, σ_edad, VERDADERO)
p2 = norm.cdf(60, mu_age, sd_age) - norm.cdf(45, mu_age, sd_age)
print("[2] P(45 ≤ Edad ≤ 60) =", fmt(p2))

In [None]:
# [3] Edad: Percentil 90 (mínimo del 10% superior)
# Excel: =NORM.INV(0.90, μ_edad, σ_edad)
# =========================================================
p3 = norm.ppf(0.90, mu_age, sd_age)
print("[3] Edad en P90 =", fmt(p3), "años")


In [None]:
# [4] PAS: ¿P(PAS > 140 mmHg)?
# Excel: =1 - NORM.DIST(140, μ_pas, σ_pas, VERDADERO)
# =========================================================
p4 = 1.0 - norm.cdf(140, mu_pas, sd_pas)
print("[4] P(PAS > 140) =", fmt(p4))


In [None]:
# [5] PAS: valor en el percentil 95
# Excel: =NORM.INV(0.95, μ_pas, σ_pas)
# =========================================================
p5 = norm.ppf(0.95, mu_pas, sd_pas)
print("[5] PAS en P95 =", fmt(p5), "mmHg")


In [None]:
# [6] Colesterol: ¿P(Colesterol > 240 mg/dL)?
# Excel: =1 - NORM.DIST(240, μ_col, σ_col, VERDADERO)
# =========================================================
p6 = 1.0 - norm.cdf(240, mu_col, sd_col)
print("[6] P(Colesterol > 240) =", fmt(p6))


In [None]:
# [7] Colesterol: Intervalo central 95% (P2.5 a P97.5)
# Excel (inf): =NORM.INV(0.025, μ_col, σ_col)
# Excel (sup): =NORM.INV(0.975, μ_col, σ_col)
# =========================================================
col_p025 = norm.ppf(0.025, mu_col, sd_col)
col_p975 = norm.ppf(0.975, mu_col, sd_col)
print("[7] Colesterol central 95% =", f"[{fmt(col_p025)}, {fmt(col_p975)}] mg/dL")

In [None]:
# [8] IMC: ¿P(IMC > 30)? (obesidad)
# Excel: =1 - NORM.DIST(30, μ_imc, σ_imc, VERDADERO)
# =========================================================
p8 = 1.0 - norm.cdf(30, mu_imc, sd_imc)
print("[8] P(IMC > 30) =", fmt(p8))

In [None]:
# [9] IMC: Percentil 75
# Excel: =NORM.INV(0.75, μ_imc, σ_imc)
# =========================================================
p9 = norm.ppf(0.75, mu_imc, sd_imc)
print("[9] IMC en P75 =", fmt(p9), "kg/m²")


In [None]:
# [10] Tiempo de coagulación: ¿P(Coag < 10 s)?
# Excel: =NORM.DIST(10, μ_coag, σ_coag, VERDADERO)
# =========================================================
p10 = norm.cdf(10, mu_coag, sd_coag)
print("[10] P(Coag < 10 s) =", fmt(p10))

# (Opcional) si quieres devolver los resultados en un dict:
results = {
    "p_age_lt_40": p1,
    "p_age_45_60": p2,
    "age_p90": p3,
    "p_pas_gt_140": p4,
    "pas_p95": p5,
    "p_col_gt_240": p6,
    "col_p025": col_p025,
    "col_p975": col_p975,
    "p_imc_gt_30": p8,
    "imc_p75": p9,
    "p_coag_lt_10": p10,
}

In [None]:
# [11] Intervalo 95% de coagulación
# Excel: =NORM.INV(0.025, μ_coag, σ_coag) ; =NORM.INV(0.975, μ_coag, σ_coag)
# =========================================================
x = pd.to_numeric(df["coag_seg"], errors="coerce").dropna()
mu, sd = float(x.mean()), float(x.std(ddof=1))
low, high = norm.ppf(0.025, mu, sd), norm.ppf(0.975, mu, sd)
print(f"[11] Intervalo 95% coagulación: [{fmt(low)}, {fmt(high)}] segundos")


In [None]:
# [12] Vacunación: proporción observada vs 80% esperado
# Excel: p_obs = PROMEDIO(vacunado)
#        z = (p_obs-0.8)/RAIZ(0.8*0.2/n)
# =========================================================
vacc = pd.to_numeric(df["vacunado"], errors="coerce").dropna()
n = len(vacc)
p_obs = float(vacc.mean())
p0 = 0.80
se0 = (p0*(1-p0)/n)**0.5
z = (p_obs - p0) / se0
pval = 2*(1 - norm.cdf(abs(z)))
print(f"[12] p_obs={fmt(p_obs)}, z={fmt(z)}, p-valor={fmt(pval)} (n={n})")


In [None]:
# [13] Proporción atendidos en 1er semestre 2025
# Excel: =CONTAR.SI.CONJ(AÑO(fecha);2025; MES(fecha);"<="&6)/N
# =========================================================
fechas = pd.to_datetime(df["fecha_atencion"], errors="coerce")
mask = (fechas.dt.year == 2025) & (fechas.dt.month <= 6)
prop = mask.mean()
print(f"[13] Proporción atendidos 1er semestre 2025 = {fmt(prop)}")


In [None]:
# [14] Valor Z para PAS=160 corregir con la imagen y entender
# Excel: =(160 - μ_pas)/σ_pas
# =========================================================
pas = pd.to_numeric(df["pas_mmHg"], errors="coerce").dropna()
mu_pas, sd_pas = float(pas.mean()), float(pas.std(ddof=1))
z_val = (160 - mu_pas) / sd_pas
tail = 1 - norm.cdf(160, mu_pas, sd_pas)
print(f"[14] z(PAS=160) = {fmt(z_val)} ; P(PAS≥160) = {fmt(tail)}")

In [None]:
# [15] Percentil 80 del colesterol
# Excel: =NORM.INV(0.80, μ_col, σ_col)
# =========================================================
col = pd.to_numeric(df["colesterol_mgdl"], errors="coerce").dropna()
mu_col, sd_col = float(col.mean()), float(col.std(ddof=1))
cut80 = norm.ppf(0.80, mu_col, sd_col)
print(f"[15] Percentil 80 colesterol = {fmt(cut80)} mg/dL") 