<a href="https://colab.research.google.com/github/investigapyrm/sesgoenerrormuestreo/blob/main/sesgomuestreo.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# ────────────── Montar Google Drive ────────────────────────────────
from google.colab import drive
drive.mount('/content/drive')



In [4]:
# ────────────── instalación (solo 1.ª vez) ─────────────────────────
!pip install --quiet gspread gspread_dataframe google-auth google-api-python-client polars matplotlib scipy


In [7]:

# ────────────── directorio de trabajo y autenticación ──────────────
import os
WORKING_DIR = "/content/drive/MyDrive/articuloMuestreo"
os.makedirs(WORKING_DIR, exist_ok=True)
os.chdir(WORKING_DIR)

SERVICE_ACCOUNT_FILE = os.path.join(WORKING_DIR,
    "deep-span-462419-q9-ed811608a142.json"
)
SPREADSHEET_ID  = "1ph9aroyTg5ASJi3NXnbIGhziGuDcE3_F9XwFI-T4FOQ"
DRIVE_FOLDER_ID = "1rs9CKKv88iZLUDy5zCMBQXlNGKTq5bha"

# ────────────── imports generales ──────────────────────────────────
import numpy as np
import polars as pl
import matplotlib.pyplot as plt
import tempfile, os

import gspread
from gspread_dataframe import set_with_dataframe
from google.oauth2.service_account import Credentials
from googleapiclient.discovery import build
from googleapiclient.http import MediaFileUpload
from scipy.stats import t

# ────────────── autenticación ──────────────────────────────────────
scopes = [
    "https://www.googleapis.com/auth/spreadsheets",
    "https://www.googleapis.com/auth/drive.file"
]
creds  = Credentials.from_service_account_file(
    SERVICE_ACCOUNT_FILE, scopes=scopes
)
gc     = gspread.authorize(creds)
sheets = gc.open_by_key(SPREADSHEET_ID)
drive  = build("drive", "v3", credentials=creds, cache_discovery=False)

# ────────────── funciones genéricas ────────────────────────────────
def write_df_to_sheet(df: pl.DataFrame, worksheet_name: str) -> None:
    try:
        ws = sheets.worksheet(worksheet_name)
    except gspread.WorksheetNotFound:
        ws = sheets.add_worksheet(title=worksheet_name, rows=2000, cols=26)
    ws.clear()
    pdf = df.to_pandas()
    set_with_dataframe(ws, pdf, include_index=False, include_column_header=True)

def upload_file_to_drive(local_path: str, remote_name: str, mimetype: str="image/png") -> None:
    media = MediaFileUpload(local_path, mimetype=mimetype)
    body  = {
        "name": remote_name,
        "mimeType": mimetype,
        "parents": [DRIVE_FOLDER_ID]
    }
    drive.files().create(body=body, media_body=media, fields="id").execute()

def save_plot_to_drive(fig, filename: str) -> None:
    with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp:
        fig.savefig(tmp.name, dpi=300, bbox_inches="tight")
        upload_file_to_drive(tmp.name, filename)
        os.remove(tmp.name)

# ────────────── funciones de simulación ────────────────────────────
def _wald_ci(p_hat: np.ndarray, n: int):
    se = np.sqrt(p_hat * (1 - p_hat) / n)
    z  = 1.96
    return p_hat - z * se, p_hat + z * se

def _t_ci(x_bar: np.ndarray, s: np.ndarray, n: int):
    tval = t.ppf(0.975, df=n-1)
    se   = s / np.sqrt(n)
    return x_bar - tval * se, x_bar + tval * se

def simulate_proportion(p_true: float, ns: list[int], reps: int=10_000, bias_factor: float=2.0) -> pl.DataFrame:
    rng     = np.random.default_rng()
    records = []
    p_bias  = (bias_factor * p_true) / (bias_factor * p_true + (1 - p_true))
    for n in ns:
        y = rng.binomial(1, p_true, size=(reps, n))
        p_hat = y.mean(axis=1)
        lo, hi = _wald_ci(p_hat, n)
        records.append({
            "method": "MAS", "n": n,
            "coverage": ((lo <= p_true) & (hi >= p_true)).mean(),
            "bias": p_hat.mean() - p_true
        })
        yb = rng.binomial(1, p_bias, size=(reps, n))
        pb = yb.mean(axis=1)
        lob, hib = _wald_ci(pb, n)
        records.append({
            "method": f"Conveniencia(b={bias_factor})", "n": n,
            "coverage": ((lob <= p_true) & (hib >= p_true)).mean(),
            "bias": pb.mean() - p_true
        })
    return pl.DataFrame(records)

def simulate_mean(mu: float, sigma: float, ns: list[int], reps: int=10_000) -> pl.DataFrame:
    rng     = np.random.default_rng()
    records = []
    x_long  = rng.normal(mu, sigma, 1_000_000)
    x_trunc = x_long[x_long > 0]
    for n in ns:
        x = rng.normal(mu, sigma, size=(reps, n))
        xb = x.mean(axis=1)
        s  = x.std(axis=1, ddof=1)
        lo, hi = _t_ci(xb, s, n)
        records.append({
            "method": "MAS", "n": n,
            "coverage": ((lo <= mu) & (hi >= mu)).mean(),
            "bias": xb.mean() - mu
        })
        idx = rng.integers(0, len(x_trunc), size=(reps, n))
        xt  = x_trunc[idx]
        xbt = xt.mean(axis=1)
        sb  = xt.std(axis=1, ddof=1)
        lot, hit = _t_ci(xbt, sb, n)
        records.append({
            "method": "Truncado(X>0)", "n": n,
            "coverage": ((lot <= mu) & (hit >= mu)).mean(),
            "bias": xbt.mean() - mu
        })
    return pl.DataFrame(records)

# ────────────── pipeline completo ─────────────────────────────────
def main():
    ns      = [30, 60, 120, 240, 480, 960]
    df_prop = simulate_proportion(p_true=0.40, ns=ns, bias_factor=2.0)
    df_mean = simulate_mean(mu=0.0, sigma=1.0, ns=ns)

    write_df_to_sheet(df_prop, "t1")
    write_df_to_sheet(df_mean, "t2")

    def plot_and_upload(df: pl.DataFrame, metric: str, ylabel: str, ref: float|None, filename: str):
        fig = plt.figure()
        for m in df["method"].unique():
            sub = df.filter(pl.col("method") == m).sort("n")
            plt.plot(sub["n"], sub[metric], marker="o", label=m)
        if ref is not None:
            plt.axhline(ref, ls="--")
        plt.xscale("log", base=2)
        plt.xlabel("Tamaño muestral (n)")
        plt.ylabel(ylabel)
        plt.legend()
        plt.tight_layout()
        save_plot_to_drive(fig, filename)
        plt.close(fig)

    plot_and_upload(df_prop,   "coverage",   "Cobertura IC 95 %", 0.95, "fig_cobertura_prop.png")
    plot_and_upload(df_prop,   "bias",       "Sesgo de p̂",       None, "fig_sesgo_prop.png")
    plot_and_upload(df_mean,   "coverage",   "Cobertura IC 95 %", 0.95, "fig_cobertura_media.png")
    plot_and_upload(df_mean,   "bias",       "Sesgo de x̄",       None, "fig_sesgo_media.png")

if __name__ == "__main__":
    main()
