In [None]:
from pathlib import Path
import pandas as pd

BASE_DIR = Path("/Users/davidmc/Documents/09-Aprendizaje/01-Diplomados/01-ciencia_de_datos/01-módulo/00-Exámen")
DATA_DIR = BASE_DIR / "datos"

def _safe_key(p: Path) -> str:
    # nombre único: archivo sin extensión + extensión
    return f"{p.stem}__{p.suffix.lower().lstrip('.')}"

def load_all_tables(data_dir: Path) -> dict[str, pd.DataFrame]:
    dfs: dict[str, pd.DataFrame] = {}

    files = sorted([*data_dir.glob("*.csv"), *data_dir.glob("*.xlsx"), *data_dir.glob("*.xls")])
    if not files:
        raise FileNotFoundError(f"No encontré .csv/.xlsx en: {data_dir}")

    for f in files:
        key = _safe_key(f)

        if f.suffix.lower() == ".csv":
            # intenta utf-8; si falla, prueba latin-1 (común en datasets viejos)
            try:
                df = pd.read_csv(f)
            except UnicodeDecodeError:
                df = pd.read_csv(f, encoding="latin-1")
        else:
            # Excel (lee la primera hoja por defecto)
            df = pd.read_excel(f)

        # normaliza nombres de columnas: quita espacios alrededor
        df.columns = [c.strip() for c in df.columns]
        dfs[key] = df

    return dfs

dfs = load_all_tables(DATA_DIR)

# Resumen rápido
summary = []
for name, df in dfs.items():
    summary.append({
        "table": name,
        "rows": df.shape[0],
        "cols": df.shape[1],
        "columns": list(df.columns),
    })

summary_df = pd.DataFrame(summary).sort_values(["table"])
summary_df

Unnamed: 0,table,rows,cols,columns
0,cuisine__csv,916,2,"[placeID, Rcuisine]"
1,hours__csv,2339,3,"[placeID, hours, days]"
2,parking__csv,702,2,"[placeID, parking_lot]"
3,payment_methods__csv,1314,2,"[placeID, Rpayment]"
4,ratings__csv,1161,5,"[userID, placeID, rating, food_rating, service..."
5,restaurants__csv,130,21,"[placeID, latitude, longitude, the_geom_meter,..."
6,restaurants__xlsx,130,21,"[placeID, latitude, longitude, the_geom_meter,..."
7,usercuisine__csv,330,2,"[userID, Rcuisine]"
8,userpayment__csv,177,2,"[userID, Upayment]"
9,users__csv,138,19,"[userID, latitude, longitude, smoker, drink_le..."


Unnamed: 0,placeID,Rcuisine
0,135110,Spanish
1,135109,Italian
2,135107,Latin_American
3,135106,Mexican
4,135105,Fast_Food
...,...,...
911,132005,Seafood
912,132004,Seafood
913,132003,International
914,132002,Seafood
