In [None]:
import pandas as pd

# ==============================
# 1) LECTOR
# ==============================
def read_csv_clean(path):
    for sep in [",", ";", "\t"]:
        try:
            df = pd.read_csv(path, sep=sep, comment="#", engine="python")
            if df.shape[1] > 1:
                return df
        except Exception:
            continue
    raise ValueError(f"No pude leer bien el archivo {path}")

# ==============================
# 2) RUTAS
# ==============================
path_cum = "cumulative_2025.10.04_03.48.39.csv"  # Kepler KOI cumulative
path_k2p = "k2pandc_2025.10.04_03.48.47.csv"     # K2 planets & candidates
path_toi = "TOI_2025.10.04_03.48.53.csv"         # TESS TOI

kepler = read_csv_clean(path_cum)
k2     = read_csv_clean(path_k2p)
tess   = read_csv_clean(path_toi)

# ==============================
# 3) ESQUEMA
# ==============================
target_cols = [
    "periodo_orbital",     # días
    "duracion_transito",   # horas
    "profundidad",         # ppm
    "pl_radio",            # radios terrestres
    "insolacion",          # Earth flux
    "st_radio",            # radios solares
    "st_temperatura",      # K
    "st_gravedad",         # log g (cgs)
    "pl_temperatura_eq",   # K
    "RA",                  # grados decimales
    "DEC",                 # grados decimales
    "mission",
    "object_id",
    "label"
]

kepler_map = {
    "koi_period":   "periodo_orbital",
    "koi_duration": "duracion_transito",
    "koi_depth":    "profundidad",
    "koi_prad":     "pl_radio",
    "koi_insol":    "insolacion",
    "koi_srad":     "st_radio",
    "koi_steff":    "st_temperatura",
    "koi_slogg":    "st_gravedad",
    "koi_teq":      "pl_temperatura_eq",
    "ra":           "RA",
    "dec":          "DEC",
    "kepoi_name":   "object_id",          # KOI-NNNNN.DD
    "koi_disposition":  "label"
}

k2_map = {
    "pl_orbper":    "periodo_orbital",
    "pl_trandur":   "duracion_transito",
    "pl_trandep":   "profundidad",        # % -> convertir a ppm
    "pl_rade":      "pl_radio",
    "pl_insol":     "insolacion",
    "st_rad":       "st_radio",
    "st_teff":      "st_temperatura",
    "st_logg":      "st_gravedad",
    "pl_eqt":       "pl_temperatura_eq",
    "ra":           "RA",
    "dec":          "DEC",
    "pl_name":      "object_id",
    "disposition":  "label"
}

tess_map = {
    "pl_orbper":    "periodo_orbital",
    "pl_trandurh":  "duracion_transito",
    "pl_trandep":   "profundidad",
    "pl_rade":      "pl_radio",
    "pl_insol":     "insolacion",
    "st_rad":       "st_radio",
    "st_teff":      "st_temperatura",
    "st_logg":      "st_gravedad",
    "pl_eqt":       "pl_temperatura_eq",
    "ra":           "RA",
    "dec":          "DEC",
    "toi":          "object_id",          # TOI-NNNNN.DD
    "tfopwg_disp":  "label"
}

# ==============================
# 4) NORMALIZACIÓN & ESTANDARIZACIÓN
# ==============================
NUM_COLS = {
    "periodo_orbital","duracion_transito","profundidad","pl_radio","insolacion",
    "st_radio","st_temperatura","st_gravedad","pl_temperatura_eq","RA","DEC"
}

def standardize(df, colmap, mission_name):
    cols_presentes = [c for c in colmap if c in df.columns]
    out = df[cols_presentes].rename(columns=colmap).copy()

    for c in (NUM_COLS & set(out.columns)):
        out[c] = pd.to_numeric(out[c], errors="coerce")

    # IDs mínimos
    if "object_id" not in out.columns:
        out["object_id"] = pd.NA
    out["mission"] = mission_name

    # Reindexar al esquema
    return out.reindex(columns=target_cols)

df_kepler = standardize(kepler, kepler_map, "Kepler")
df_k2     = standardize(k2,     k2_map,     "K2")
df_tess   = standardize(tess,   tess_map,   "TESS")

# --- Conversiones de unidades ---
df_k2["profundidad"] = df_k2["profundidad"] * 10000.0

# ==============================
# 5) CONCATENAR Y GUARDAR
# ==============================
df_final = pd.concat([df_kepler, df_k2, df_tess], ignore_index=True)

print(df_final.head())
print("Filas totales:", len(df_final))

df_final.to_csv("exoplanetas_unificado.csv", index=False)
