In [1]:
import pandas as pd
from pathlib import Path

# ============================================================
# Tabla celltype × condición con mediana (q25–q75)
# INPUT (WIDE): cell_proportions_Level2final_by_patient.csv
#   cols esperadas: patientID, disease, <celltypes...>
# OUTPUT: Table_median_IQR_Level2final.{html,tsv,docx}
# ============================================================

# --- 1) Ruta al CSV ---
csv_path = r"D:\Users\Coni\Documents\TFM_CirrhosIS\summary_tables_final\cell_proportions_Level2final_by_patient.csv"
out_dir = Path(csv_path).parent
TABLE_TAG = "Level2final"

# --- 2) Leer WIDE ---
df_wide = pd.read_csv(csv_path)

# sanity mínima
needed = {"patientID", "disease"}
missing = needed - set(df_wide.columns)
if missing:
    raise KeyError(f"Faltan columnas en {csv_path}: {missing}")

# --- 3) wide -> long (patientID, disease, celltype, proportion) ---
long = df_wide.melt(
    id_vars=["patientID", "disease"],
    var_name="celltype",
    value_name="proportion",
)

# asegurar numérico
long["proportion"] = pd.to_numeric(long["proportion"], errors="coerce")

# --- 4) Stats por (celltype × disease): mean, median, q25, q75 ---
df = (
    long.groupby(["celltype", "disease"], as_index=False)["proportion"]
        .agg(
            mean_prop="mean",
            median_prop="median",
            q25_prop=lambda x: x.quantile(0.25),
            q75_prop=lambda x: x.quantile(0.75),
        )
)

# --- 5) Formato: proporciones como % (recomendado para memoria) ---
AS_PERCENT = True
DECIMALS = 1

def fmt(x):
    if pd.isna(x):
        return ""
    x = x * 100 if AS_PERCENT else x
    return f"{x:.{DECIMALS}f}"

df["median_IQR"] = df.apply(
    lambda r: f"{fmt(r['median_prop'])} ({fmt(r['q25_prop'])}–{fmt(r['q75_prop'])})",
    axis=1
)

# --- 6) Pivot a celltype × disease ---
tab = (
    df.pivot(index="celltype", columns="disease", values="median_IQR")
      .reset_index()
)

# Orden de columnas (primero Healthy, luego Cirrhosis; ajusta si quieres)
col_order = ["celltype"] + [c for c in ["Healthy", "Cirrhosis"] if c in tab.columns]
tab = tab[col_order].sort_values("celltype")

# --- 7) Exportar en formatos "pegables" ---
html_path = out_dir / f"Table_median_IQR_{TABLE_TAG}.html"
tsv_path  = out_dir / f"Table_median_IQR_{TABLE_TAG}.tsv"

tab.to_html(html_path, index=False)
tab.to_csv(tsv_path, sep="\t", index=False)

print("OK:")
print("HTML:", html_path)
print("TSV :", tsv_path)

# --- 8) (Opcional) DOCX ---
try:
    from docx import Document
    docx_path = out_dir / f"Table_median_IQR_{TABLE_TAG}.docx"
    doc = Document()
    doc.add_paragraph(
        "Tabla. Proporciones por paciente (mediana [q25–q75]) por subpoblación y condición."
    )
    table = doc.add_table(rows=1, cols=len(tab.columns))
    hdr = table.rows[0].cells
    for j, col in enumerate(tab.columns):
        hdr[j].text = str(col)

    for _, row in tab.iterrows():
        cells = table.add_row().cells
        for j, col in enumerate(tab.columns):
            cells[j].text = str(row[col])

    doc.save(docx_path)
    print("DOCX:", docx_path)
except Exception as e:
    print("DOCX export skipped:", e)


OK:
HTML: D:\Users\Coni\Documents\TFM_CirrhosIS\summary_tables_final\Table_median_IQR_Level2final.html
TSV : D:\Users\Coni\Documents\TFM_CirrhosIS\summary_tables_final\Table_median_IQR_Level2final.tsv
DOCX export skipped: No module named 'docx'
