In [2]:
# %% EDA PROFESIONAL SOBRE TODOS LOS .parquet "split" (una celda lista para ejecutar)

from __future__ import annotations
import os
import json
import math
import textwrap
from pathlib import Path
from typing import Dict, List, Tuple, Optional

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# --------------------------- CONFIGURACIÓN ---------------------------
PARQUET_DIR = Path("data_drive/_parquet_export/split")  # carpeta de entrada
EDA_ROOT    = Path("data_drive/_eda")                   # carpeta raíz de informes EDA
MAX_BARS_PER_CAT = 10                                   # top categorías a graficar
MAX_NUM_HISTS    = 6                                    # máximo de histogramas por dataset
MAX_CAT_BARS     = 6                                    # máximo de barras de categorías por dataset
MAX_TIME_SERIES  = 2                                    # máximo de series temporales a graficar
SAMPLE_FOR_PLOTS = 250_000                              # muestreo para plots si el DF es muy grande

pd.set_option("display.max_columns", 200)
pd.set_option("display.width", 2000)

EDA_ROOT.mkdir(parents=True, exist_ok=True)

# --------------------------- UTILIDADES ---------------------------

def normalize_colnames(cols: List[str]) -> List[str]:
    out = []
    for c in cols:
        c2 = (
            str(c)
            .strip()
            .replace("\n", " ")
            .replace("\r", " ")
        )
        c2 = "_".join(c2.split())  # colapsa espacios
        out.append(c2)
    # maneja duplicados conservando el orden
    seen = {}
    final = []
    for c in out:
        if c not in seen:
            seen[c] = 0
            final.append(c)
        else:
            seen[c] += 1
            final.append(f"{c}__dup{seen[c]}")
    return final

def maybe_to_numeric(series: pd.Series) -> pd.Series:
    if pd.api.types.is_numeric_dtype(series):
        return series
    # intenta parseo numérico si hay muchos caracteres numéricos
    s = pd.to_numeric(series.astype("string").str.replace(",", ".", regex=False), errors="coerce")
    # convierte si al menos 85% de no-nulos sobreviven
    non_null = series.notna().sum()
    if non_null == 0:
        return series
    good = s.notna().sum() / non_null
    return s if good >= 0.85 else series

def maybe_to_datetime(series: pd.Series) -> pd.Series:
    if pd.api.types.is_datetime64_any_dtype(series):
        return series
    # intento agresivo de parseo de fecha
    s = pd.to_datetime(series, errors="coerce", utc=False, infer_datetime_format=True)
    non_null = series.notna().sum()
    if non_null == 0:
        return series
    good = s.notna().sum() / non_null
    return s if good >= 0.85 else series

def infer_types(df: pd.DataFrame) -> pd.DataFrame:
    df2 = df.copy()
    # strip en strings
    obj_cols = df2.select_dtypes(include=["object", "string"]).columns
    for c in obj_cols:
        df2[c] = df2[c].astype("string").str.strip()
    # numéricos potenciales
    for c in obj_cols:
        df2[c] = maybe_to_numeric(df2[c])
    # fechas potenciales (incluye columnas recién convertidas a string si falló numérico)
    for c in df2.columns:
        if df2[c].dtype == "object" or str(df2[c].dtype) == "string":
            df2[c] = maybe_to_datetime(df2[c])
    return df2

def mem_info(df: pd.DataFrame) -> Tuple[int, str]:
    b = df.memory_usage(deep=True).sum()
    units = ["B","KB","MB","GB","TB"]
    i = 0
    val = float(b)
    while val >= 1024 and i < len(units)-1:
        val /= 1024.0
        i += 1
    return b, f"{val:,.2f} {units[i]}"

def iqr_outlier_stats(s: pd.Series) -> Tuple[int, float, float, float]:
    q1 = s.quantile(0.25)
    q3 = s.quantile(0.75)
    iqr = q3 - q1
    if not np.isfinite(iqr) or iqr == 0:
        return 0, q1, q3, iqr
    low = q1 - 1.5 * iqr
    high = q3 + 1.5 * iqr
    outliers = ((s < low) | (s > high)).sum()
    return int(outliers), low, high, float(iqr)

def safe_fig(path: Path):
    path.parent.mkdir(parents=True, exist_ok=True)
    plt.savefig(path, bbox_inches="tight", dpi=150)
    plt.close()

def plot_histograms(df: pd.DataFrame, name: str, out_dir: Path, max_plots: int = MAX_NUM_HISTS):
    nums = df.select_dtypes(include=np.number).columns.tolist()
    if not nums:
        return []
    cols = nums[:max_plots]
    paths = []
    # posible muestreo
    data = df[cols]
    if len(df) > SAMPLE_FOR_PLOTS:
        data = data.sample(SAMPLE_FOR_PLOTS, random_state=42)
    for c in cols:
        plt.figure()
        data[c].dropna().plot(kind="hist", bins=50, title=f"{name} - Histograma: {c}")
        p = out_dir / f"hist_{c}.png"
        safe_fig(p)
        paths.append(p)
    return paths

def plot_top_categories(df: pd.DataFrame, name: str, out_dir: Path, max_plots: int = MAX_CAT_BARS, k:int = MAX_BARS_PER_CAT):
    cats = df.select_dtypes(include=["object","string","category"]).columns.tolist()
    if not cats:
        return []
    cols = cats[:max_plots]
    paths = []
    for c in cols:
        vc = df[c].value_counts(dropna=False).head(k)
        plt.figure(figsize=(8,4))
        vc.plot(kind="bar", title=f"{name} - Top {k} categorías: {c}", rot=45)
        p = out_dir / f"bar_{c}.png"
        safe_fig(p)
        paths.append(p)
    return paths

def plot_time_series(df: pd.DataFrame, name: str, out_dir: Path, max_plots: int = MAX_TIME_SERIES):
    dt_cols = df.select_dtypes(include=["datetime64[ns]", "datetime64[ns, UTC]"]).columns.tolist()
    paths = []
    if not dt_cols:
        return paths
    cols = dt_cols[:max_plots]
    for c in cols:
        s = df[c].dropna().sort_values()
        if s.empty:
            continue
        # agrega por día
        counts = s.dt.floor("D").value_counts().sort_index()
        plt.figure(figsize=(9,4))
        counts.plot(title=f"{name} - Frecuencia diaria: {c}")
        p = out_dir / f"ts_daily_{c}.png"
        safe_fig(p)
        paths.append(p)
    return paths

def top_correlations(df: pd.DataFrame, thr: float = 0.7, topk: int = 20) -> List[Tuple[str, str, float]]:
    nums = df.select_dtypes(include=np.number)
    if nums.shape[1] < 2:
        return []
    corr = nums.corr(numeric_only=True)
    pairs = []
    cols = corr.columns.tolist()
    for i in range(len(cols)):
        for j in range(i+1, len(cols)):
            v = corr.iloc[i, j]
            if pd.notna(v):
                pairs.append((cols[i], cols[j], float(v)))
    pairs = sorted(pairs, key=lambda x: abs(x[2]), reverse=True)
    if thr is not None:
        pairs = [p for p in pairs if abs(p[2]) >= thr]
    return pairs[:topk]

def df_quick_profile(df: pd.DataFrame) -> Dict:
    n_rows, n_cols = df.shape
    dtypes = df.dtypes.astype(str).to_dict()
    nulls = df.isna().sum().to_dict()
    nulls_pct = {k: (v / n_rows * 100.0) if n_rows else 0.0 for k, v in nulls.items()}
    uniques = df.nunique(dropna=False).to_dict()
    zeros_pct = {}
    for c in df.select_dtypes(include=np.number).columns:
        zeros = (df[c] == 0).sum()
        zeros_pct[c] = (zeros / n_rows * 100.0) if n_rows else 0.0
    mem_bytes, mem_hr = mem_info(df)
    return {
        "shape": {"rows": int(n_rows), "cols": int(n_cols)},
        "memory": {"bytes": int(mem_bytes), "human_readable": mem_hr},
        "dtypes": dtypes,
        "nulls": nulls,
        "nulls_pct": {k: round(v, 3) for k, v in nulls_pct.items()},
        "uniques": uniques,
        "zeros_pct_numeric": {k: round(v, 3) for k, v in zeros_pct.items()},
    }

def describe_numeric_plus(df: pd.DataFrame) -> pd.DataFrame:
    nums = df.select_dtypes(include=np.number)
    if nums.empty:
        return pd.DataFrame()
    desc = nums.describe().T
    desc["skew"] = nums.skew(numeric_only=True)
    desc["kurtosis"] = nums.kurt(numeric_only=True)
    # outliers IQR
    outs = []
    for c in nums.columns:
        s = nums[c].dropna()
        count, low, high, iqr = iqr_outlier_stats(s)
        pct = (count / len(nums)) * 100 if len(nums) else 0.0
        outs.append((c, count, pct, low, high, iqr))
    outs_df = pd.DataFrame(outs, columns=["col","outliers_count","outliers_pct","low_fence","high_fence","iqr"]).set_index("col")
    return desc.join(outs_df, how="left")

def write_markdown_report(
    name: str,
    profile: Dict,
    num_desc: pd.DataFrame,
    corr_pairs: List[Tuple[str,str,float]],
    fig_paths: List[Path],
    out_dir: Path
) -> None:
    md = []
    md.append(f"# EDA - {name}\n")
    md.append("## Resumen")
    md.append(f"- **Filas**: {profile['shape']['rows']:,}")
    md.append(f"- **Columnas**: {profile['shape']['cols']:,}")
    md.append(f"- **Memoria**: {profile['memory']['human_readable']} ({profile['memory']['bytes']:,} bytes)")
    # tipos
    type_counts = pd.Series(profile["dtypes"]).value_counts().to_dict()
    md.append(f"- **Tipos de datos**: {', '.join([f'{k}: {v}' for k,v in type_counts.items()])}")
    # nulos top
    nulls_pct = pd.Series(profile["nulls_pct"]).sort_values(ascending=False).head(10)
    if len(nulls_pct) > 0:
        md.append("\n## Top columnas por % de nulos")
        md.append(nulls_pct.to_frame("nulls_pct").to_markdown())
    # únicos top
    uniques = pd.Series(profile["uniques"]).sort_values(ascending=False).head(10)
    md.append("\n## Top columnas por cardinalidad")
    md.append(uniques.to_frame("unique_values").to_markdown())
    # numéricos
    if not num_desc.empty:
        md.append("\n## Resumen numérico extendido")
        md.append(num_desc.round(4).to_markdown())
    # correlaciones
    if corr_pairs:
        md.append("\n## Correlaciones fuertes (|r| ≥ 0.70)")
        corr_df = pd.DataFrame(corr_pairs, columns=["col1","col2","pearson_r"])
        md.append(corr_df.to_markdown(index=False))
    # figuras
    if fig_paths:
        md.append("\n## Gráficas")
        for p in fig_paths:
            rel = p.name
            md.append(f"![{rel}]({rel})")
    # guarda
    out_dir.mkdir(parents=True, exist_ok=True)
    (out_dir / "report.md").write_text("\n\n".join(md), encoding="utf-8")

def eda_for_dataframe(df: pd.DataFrame, dataset_name: str, base_out_dir: Path) -> Dict:
    """Ejecuta EDA completo, guarda artefactos y devuelve un dict resumen."""
    out_dir = base_out_dir / dataset_name
    out_dir.mkdir(parents=True, exist_ok=True)

    # Perfil básico
    profile = df_quick_profile(df)

    # Descripción numérica extendida
    num_desc = describe_numeric_plus(df)

    # Correlaciones fuertes
    corr_pairs = top_correlations(df, thr=0.70, topk=30)

    # Gráficas
    fig_paths = []
    fig_paths += plot_histograms(df, dataset_name, out_dir)
    fig_paths += plot_top_categories(df, dataset_name, out_dir)
    fig_paths += plot_time_series(df, dataset_name, out_dir)

    # Guarda perfil JSON + CSVs de apoyo
    (out_dir / "profile.json").write_text(json.dumps(profile, indent=2, ensure_ascii=False), encoding="utf-8")
    if not num_desc.empty:
        num_desc.to_csv(out_dir / "numeric_summary.csv", index=True)

    # Reporte Markdown
    write_markdown_report(dataset_name, profile, num_desc, corr_pairs, fig_paths, out_dir)

    # Devuelve resumen ejecutivo
    return {
        "name": dataset_name,
        "shape": profile["shape"],
        "memory_human": profile["memory"]["human_readable"],
        "n_numeric": int(df.select_dtypes(include=np.number).shape[1]),
        "n_categorical": int(df.select_dtypes(include=['object','string','category']).shape[1]),
        "n_datetime": int(df.select_dtypes(include=['datetime64[ns]','datetime64[ns, UTC]']).shape[1]),
        "corr_strong_pairs": corr_pairs[:5],
        "report_path": str((out_dir / "report.md").resolve()),
    }

# --------------------------- PIPELINE PRINCIPAL ---------------------------

# 1) Descubre .parquet
parquets = sorted(PARQUET_DIR.glob("*.parquet"))
if not parquets:
    raise SystemExit(f"No se encontraron .parquet en {PARQUET_DIR.resolve()}")

# 2) Carga y tipificación
dfs: Dict[str, pd.DataFrame] = {}
for p in parquets:
    name = p.stem  # p.ej., "ls1"
    df = pd.read_parquet(p)
    # normaliza nombres de columnas
    df.columns = normalize_colnames(df.columns.tolist())
    # inferencia de tipos
    df = infer_types(df)
    # expone como variable global df_<name> si es un nombre válido
    var_name = f"df_{name.lower()}"
    globals()[var_name] = df
    dfs[name.lower()] = df
    print(f"Cargado {p.name} -> variable: {var_name} | shape={df.shape}")

# 3) EDA por DataFrame
executive_summary = []
for name, df in dfs.items():
    print(f"\n=== EDA: {name} ===")
    # duplicados de filas completas
    dup_count = df.duplicated().sum()
    print(f"Duplicados (filas completas): {dup_count:,}")
    # vista rápida
    display(df.head(10))

    summary = eda_for_dataframe(df, dataset_name=name, base_out_dir=EDA_ROOT)
    executive_summary.append(summary)
    print(f"➡ Informe Markdown: {summary['report_path']}")

# 4) Resumen ejecutivo global
print("\n\n================ RESUMEN EJECUTIVO GLOBAL ================")
summary_df = pd.DataFrame(executive_summary)
display(summary_df)

# 5) Guardado del resumen global
summary_path = (EDA_ROOT / "resumen_global.csv")
summary_df.to_csv(summary_path, index=False)
print(f"Resumen global guardado en: {summary_path.resolve()}")

"""
Salida:
- Variables disponibles en el notebook: df_ls1, df_ls2, ... (según existan .parquet).
- Carpeta con informes por dataset (Markdown + PNG + CSV): data_drive/_eda/<dataset>/
- Resumen ejecutivo consolidado: data_drive/_eda/resumen_global.csv
"""


Cargado ls1.parquet -> variable: df_ls1 | shape=(1058, 4)
Cargado ls2.parquet -> variable: df_ls2 | shape=(902, 4)
Cargado ls3.parquet -> variable: df_ls3 | shape=(1056, 4)
Cargado ls4.parquet -> variable: df_ls4 | shape=(1056, 4)
Cargado ls5.parquet -> variable: df_ls5 | shape=(1058, 4)
Cargado ls6.parquet -> variable: df_ls6 | shape=(15, 4)

=== EDA: ls1 ===
Duplicados (filas completas): 0


  s = pd.to_datetime(series, errors="coerce", utc=False, infer_datetime_format=True)
  s = pd.to_datetime(series, errors="coerce", utc=False, infer_datetime_format=True)
  s = pd.to_datetime(series, errors="coerce", utc=False, infer_datetime_format=True)
  s = pd.to_datetime(series, errors="coerce", utc=False, infer_datetime_format=True)
  s = pd.to_datetime(series, errors="coerce", utc=False, infer_datetime_format=True)
  s = pd.to_datetime(series, errors="coerce", utc=False, infer_datetime_format=True)
  s = pd.to_datetime(series, errors="coerce", utc=False, infer_datetime_format=True)
  s = pd.to_datetime(series, errors="coerce", utc=False, infer_datetime_format=True)
  s = pd.to_datetime(series, errors="coerce", utc=False, infer_datetime_format=True)
  s = pd.to_datetime(series, errors="coerce", utc=False, infer_datetime_format=True)
  s = pd.to_datetime(series, errors="coerce", utc=False, infer_datetime_format=True)
  s = pd.to_datetime(series, errors="coerce", utc=False, infer_da

Unnamed: 0,mac,time,vlx,source_file
0,40:22:D8:F1:E3:70,2025-07-15 12:14:21,317.0,data_drive\LS1\silos_vlx_1.csv
1,40:22:D8:F1:E3:70,2025-07-15 12:44:22,326.0,data_drive\LS1\silos_vlx_1.csv
2,40:22:D8:F1:E3:70,2025-07-15 13:14:24,315.0,data_drive\LS1\silos_vlx_1.csv
3,40:22:D8:F1:E3:70,2025-07-15 13:44:26,316.0,data_drive\LS1\silos_vlx_1.csv
4,40:22:D8:F1:E3:70,2025-07-15 14:14:27,308.0,data_drive\LS1\silos_vlx_1.csv
5,40:22:D8:F1:E3:70,2025-07-15 14:44:29,309.0,data_drive\LS1\silos_vlx_1.csv
6,40:22:D8:F1:E3:70,2025-07-15 15:14:30,309.0,data_drive\LS1\silos_vlx_1.csv
7,40:22:D8:F1:E3:70,2025-07-15 15:44:32,310.0,data_drive\LS1\silos_vlx_1.csv
8,40:22:D8:F1:E3:70,2025-07-15 16:14:33,316.0,data_drive\LS1\silos_vlx_1.csv
9,40:22:D8:F1:E3:70,2025-07-15 16:44:35,316.0,data_drive\LS1\silos_vlx_1.csv


➡ Informe Markdown: D:\ls_feed\data_drive\_eda\ls1\report.md

=== EDA: ls2 ===
Duplicados (filas completas): 0


Unnamed: 0,mac,time,vlx,source_file
0,40:22:D8:F1:E2:CC,2025-07-15 12:14:48,283.0,data_drive\LS2\silos_vlx_0.csv
1,40:22:D8:F1:E2:CC,2025-07-15 12:44:49,283.0,data_drive\LS2\silos_vlx_0.csv
2,40:22:D8:F1:E2:CC,2025-07-15 13:14:51,281.0,data_drive\LS2\silos_vlx_0.csv
3,40:22:D8:F1:E2:CC,2025-07-15 13:44:53,278.0,data_drive\LS2\silos_vlx_0.csv
4,40:22:D8:F1:E2:CC,2025-07-15 14:14:54,279.0,data_drive\LS2\silos_vlx_0.csv
5,40:22:D8:F1:E2:CC,2025-07-15 14:44:56,285.0,data_drive\LS2\silos_vlx_0.csv
6,40:22:D8:F1:E2:CC,2025-07-15 15:14:57,281.0,data_drive\LS2\silos_vlx_0.csv
7,40:22:D8:F1:E2:CC,2025-07-15 15:44:59,277.0,data_drive\LS2\silos_vlx_0.csv
8,40:22:D8:F1:E2:CC,2025-07-15 16:15:00,276.0,data_drive\LS2\silos_vlx_0.csv
9,40:22:D8:F1:E2:CC,2025-07-15 16:45:02,279.0,data_drive\LS2\silos_vlx_0.csv


➡ Informe Markdown: D:\ls_feed\data_drive\_eda\ls2\report.md

=== EDA: ls3 ===
Duplicados (filas completas): 0


Unnamed: 0,mac,time,vlx,source_file
0,40:22:D8:F1:E3:80,2025-07-15 12:37:08,298.0,data_drive\LS3\silos_vlx_0.csv
1,40:22:D8:F1:E3:80,2025-07-15 13:07:10,296.0,data_drive\LS3\silos_vlx_0.csv
2,40:22:D8:F1:E3:80,2025-07-15 13:37:11,297.0,data_drive\LS3\silos_vlx_0.csv
3,40:22:D8:F1:E3:80,2025-07-15 14:07:13,303.0,data_drive\LS3\silos_vlx_0.csv
4,40:22:D8:F1:E3:80,2025-07-15 14:37:14,296.0,data_drive\LS3\silos_vlx_0.csv
5,40:22:D8:F1:E3:80,2025-07-15 15:07:16,296.0,data_drive\LS3\silos_vlx_0.csv
6,40:22:D8:F1:E3:80,2025-07-15 15:37:17,294.0,data_drive\LS3\silos_vlx_0.csv
7,40:22:D8:F1:E3:80,2025-07-15 16:07:19,292.0,data_drive\LS3\silos_vlx_0.csv
8,40:22:D8:F1:E3:80,2025-07-15 16:37:20,286.0,data_drive\LS3\silos_vlx_0.csv
9,40:22:D8:F1:E3:80,2025-07-15 17:07:22,292.0,data_drive\LS3\silos_vlx_0.csv


➡ Informe Markdown: D:\ls_feed\data_drive\_eda\ls3\report.md

=== EDA: ls4 ===
Duplicados (filas completas): 71


Unnamed: 0,mac,time,vlx,source_file
0,B8:D6:1A:60:95:30,2024-06-14 00:01:02,349.0,data_drive\LS4\silos_vlx_0.csv
1,B8:D6:1A:60:95:30,2024-06-14 00:01:00,352.0,data_drive\LS4\silos_vlx_0.csv
2,B8:D6:1A:60:95:30,2024-06-14 00:01:01,354.0,data_drive\LS4\silos_vlx_0.csv
3,B8:D6:1A:60:95:30,2024-06-14 00:01:03,344.0,data_drive\LS4\silos_vlx_0.csv
4,B8:D6:1A:60:95:30,2024-06-14 00:01:00,342.0,data_drive\LS4\silos_vlx_0.csv
5,B8:D6:1A:60:95:30,2024-06-14 00:01:00,346.0,data_drive\LS4\silos_vlx_0.csv
6,B8:D6:1A:60:95:30,2024-06-14 00:01:01,346.0,data_drive\LS4\silos_vlx_0.csv
7,B8:D6:1A:60:95:30,2024-06-14 00:01:03,349.0,data_drive\LS4\silos_vlx_0.csv
8,B8:D6:1A:60:95:30,2024-06-14 00:01:00,345.0,data_drive\LS4\silos_vlx_0.csv
9,B8:D6:1A:60:95:30,2024-06-14 00:01:02,342.0,data_drive\LS4\silos_vlx_0.csv


➡ Informe Markdown: D:\ls_feed\data_drive\_eda\ls4\report.md

=== EDA: ls5 ===
Duplicados (filas completas): 0


Unnamed: 0,mac,time,vlx,source_file
0,B8:D6:1A:60:94:1C,2025-07-15 11:47:13,330.0,data_drive\LS5\silos_vlx_0.csv
1,B8:D6:1A:60:94:1C,2025-07-15 12:17:15,333.0,data_drive\LS5\silos_vlx_0.csv
2,B8:D6:1A:60:94:1C,2025-07-15 12:47:17,339.0,data_drive\LS5\silos_vlx_0.csv
3,B8:D6:1A:60:94:1C,2025-07-15 13:17:18,338.0,data_drive\LS5\silos_vlx_0.csv
4,B8:D6:1A:60:94:1C,2025-07-15 13:47:20,342.0,data_drive\LS5\silos_vlx_0.csv
5,B8:D6:1A:60:94:1C,2025-07-15 14:17:21,335.0,data_drive\LS5\silos_vlx_0.csv
6,B8:D6:1A:60:94:1C,2025-07-15 14:47:23,335.0,data_drive\LS5\silos_vlx_0.csv
7,B8:D6:1A:60:94:1C,2025-07-15 15:17:24,334.0,data_drive\LS5\silos_vlx_0.csv
8,B8:D6:1A:60:94:1C,2025-07-15 15:47:26,331.0,data_drive\LS5\silos_vlx_0.csv
9,B8:D6:1A:60:94:1C,2025-07-15 16:17:27,335.0,data_drive\LS5\silos_vlx_0.csv


➡ Informe Markdown: D:\ls_feed\data_drive\_eda\ls5\report.md

=== EDA: ls6 ===
Duplicados (filas completas): 0


Unnamed: 0,mac,time,vlx,source_file
0,D8:13:2A:D2:36:B4,2025-07-15 11:33:22,374.0,data_drive\LS6\silos_vlx_1.csv
1,D8:13:2A:D2:36:B4,2025-07-30 17:58:37,373.0,data_drive\LS6\silos_vlx_10.csv
2,D8:13:2A:D2:36:B4,2025-07-30 18:08:37,381.0,data_drive\LS6\silos_vlx_11.csv
3,D8:13:2A:D2:36:B4,2025-07-30 18:18:37,391.0,data_drive\LS6\silos_vlx_12.csv
4,D8:13:2A:D2:36:B4,2025-08-06 11:08:39,236.0,data_drive\LS6\silos_vlx_13.csv
5,D8:13:2A:D2:36:B4,2025-08-06 11:20:27,230.0,data_drive\LS6\silos_vlx_14.csv
6,D8:13:2A:D2:36:B4,2025-08-06 11:34:13,229.0,data_drive\LS6\silos_vlx_15.csv
7,D8:13:2A:D2:36:B4,2025-07-15 11:46:09,371.0,data_drive\LS6\silos_vlx_2.csv
8,D8:13:2A:D2:36:B4,2025-07-15 11:58:16,378.0,data_drive\LS6\silos_vlx_3.csv
9,D8:13:2A:D2:36:B4,2025-07-15 12:10:16,379.0,data_drive\LS6\silos_vlx_4.csv


➡ Informe Markdown: D:\ls_feed\data_drive\_eda\ls6\report.md




Unnamed: 0,name,shape,memory_human,n_numeric,n_categorical,n_datetime,corr_strong_pairs,report_path
0,ls1,"{'rows': 1058, 'cols': 4}",167.51 KB,1,2,1,[],D:\ls_feed\data_drive\_eda\ls1\report.md
1,ls2,"{'rows': 902, 'cols': 4}",142.83 KB,1,2,1,[],D:\ls_feed\data_drive\_eda\ls2\report.md
2,ls3,"{'rows': 1056, 'cols': 4}",167.19 KB,1,2,1,[],D:\ls_feed\data_drive\_eda\ls3\report.md
3,ls4,"{'rows': 1056, 'cols': 4}",167.19 KB,1,2,1,[],D:\ls_feed\data_drive\_eda\ls4\report.md
4,ls5,"{'rows': 1058, 'cols': 4}",167.51 KB,1,2,1,[],D:\ls_feed\data_drive\_eda\ls5\report.md
5,ls6,"{'rows': 15, 'cols': 4}",2.51 KB,1,2,1,[],D:\ls_feed\data_drive\_eda\ls6\report.md


Resumen global guardado en: D:\ls_feed\data_drive\_eda\resumen_global.csv


'\nSalida:\n- Variables disponibles en el notebook: df_ls1, df_ls2, ... (según existan .parquet).\n- Carpeta con informes por dataset (Markdown + PNG + CSV): data_drive/_eda/<dataset>/\n- Resumen ejecutivo consolidado: data_drive/_eda/resumen_global.csv\n'