<a href="https://colab.research.google.com/github/ebalderasr/ysi-processor/blob/main/process_ysi.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Imports + Upload**

In [1]:
import pandas as pd
import numpy as np
from google.colab import files
import io

print("üìÇ Sube el archivo RAW exportado del YSI (CSV).")
uploaded = files.upload()

# Detectar nombre autom√°ticamente
filename = list(uploaded.keys())[0]
df_raw = pd.read_csv(io.BytesIO(uploaded[filename]))

print("\n‚úî Archivo cargado correctamente:\n")
df_raw.head()


üìÇ Sube el archivo RAW exportado del YSI (CSV).


IndexError: list index out of range

**Procesar replicas ‚Üí Summary**

In [None]:
# Funci√≥n outliers
def detect_outliers_zscore(values, threshold=2.0):
    if len(values) < 3:
        return [False] * len(values)
    mean = np.mean(values)
    std = np.std(values)
    if std == 0:
        return [False] * len(values)
    return [abs((v - mean) / std) > threshold for v in values]

# Procesamiento
df = df_raw.copy()
df.columns = df.columns.str.strip()
df["Conc_clean"] = pd.to_numeric(df["Concentration"], errors="coerce")
df["is_outlier"] = False

for (plate, well, chem), group in df.groupby(["PlateSequenceName", "WellId", "ChemistryId"]):
    idx = group.index
    vals = group["Conc_clean"].tolist()
    flags = detect_outliers_zscore(vals)
    df.loc[idx, "is_outlier"] = flags

summary = (
    df.groupby(["PlateSequenceName", "WellId", "ChemistryId"])
    .agg(
        mean_value=("Conc_clean", "mean"),
        std_value=("Conc_clean", "std"),
        cv_value=("Conc_clean", lambda x: np.std(x) / np.mean(x) if np.mean(x) != 0 else np.nan),
        n_reps=("Conc_clean", "count"),
        outlier_count=("is_outlier", "sum"),
        units=("Units", lambda x: list(set(x))[0]),
    )
    .reset_index()
)

print("üìä Resumen generado:")
display(summary)


**Convertir a formato wide**

In [None]:
wide = summary.pivot_table(
    index=["PlateSequenceName", "WellId"],
    columns="ChemistryId",
    values="mean_value"
).reset_index()

cols = ["PlateSequenceName", "WellId", "Glucose", "Lactate", "Glutamine", "Glutamate"]
wide = wide.reindex(columns=[c for c in cols if c in wide.columns])

print("üìå Datos en formato ancho:")
display(wide)


**Descargar archivos**

In [None]:
# Guardar archivos
summary.to_csv("ysi_summary.csv", index=False)
wide.to_csv("ysi_summary_wide.csv", index=False)

print("üìÅ Archivos generados:")

files.download("ysi_summary.csv")
files.download("ysi_summary_wide.csv")
