In [None]:
import pandas as pd

path_cum = "cumulative_2025.09.29_10.27.17.csv"
path_k2p = "k2pandc_2025.09.29_10.31.52.csv"
path_toi = "TOI_2025.09.29_10.31.26.csv"

kepler = pd.read_csv(path_cum)
k2 = pd.read_csv(path_k2p)
tess = pd.read_csv(path_toi)

# Columnas a juntar
dfkepler_cols = [
    "koi_period",
    "koi_duration",
    "koi_depth",
    "koi_prad",
    "koi_insol",
    "koi_impact",
    "koi_srad",
    "koi_steff",
    "koi_slogg",
    "koi_teq",
    "ra",
    "dec"
]
dfk2_cols = [
    "pl_orbper",
    "pl_trandur",
    "pl_trandep",
    "pl_rade",
    "pl_insol",
    "st_rad",
    "st_teff",
    "st_logg",
    "pl_eqt"
    "ra",
    "dec"
]
dftess_cols = [
    "pl_orbper",
    "pl_trandurh",
    "pl_trandep",
    "pl_rade",
    "pl_insol",
    "st_rad",
    "st_teff",
    "st_logg",
    "pl_eqt",
    "ra",
    "dec"
]

# Nuevos nombres
nombres_nuevos = [
    "periodo_orbital",
    "duracion_transito",
    "profundidad", #igual no es importante
    "pl_radio",
    "insolacion", #esta o pl_temperatura_eq
    "st_radio",
    "st_temperatura", 
    "st_gravedad",
    "pl_temperatura_eq",
    "RA",
    "DEC" 
]

dfkepler = kepler[dfkepler_cols]
dfk2 = k2[dfk2_cols]
dftess_sub = test[dftess_cols]

df_concat = pd.concat([dfkepler.reset_index(drop=True), dfk2.reset_index(drop=True), dftess_sub.reset_index(drop=True)], axis=1)

# 3. Ahora agrupamos las columnas correspondientes
columnas_agrupadas = [df1_cols, df2_cols, df3_cols]

# Creamos un nuevo dataframe con las columnas fusionadas
df_final = pd.DataFrame()

for nuevo_nombre, cols in zip(nombres_nuevos, zip(df1_cols, df2_cols, df3_cols)):
    df_final[nuevo_nombre] = pd.concat([df_concat[c] for c in cols], axis=1).bfill(axis=1).iloc[:,0]

print(df_final)


Kepler: (9708, 1)
K2    : (4304, 1)
TESS  : (7793, 1)
   periodo_orbital  duracion_transito  profundidad  pl_radio  insolacion  \
0              NaN                NaN          NaN       NaN         NaN   
1              NaN                NaN          NaN       NaN         NaN   
2              NaN                NaN          NaN       NaN         NaN   
3              NaN                NaN          NaN       NaN         NaN   
4              NaN                NaN          NaN       NaN         NaN   

   st_radio  st_temperatura  st_gravedad  pl_temperatura_eq  RA  DEC  
0       NaN             NaN          NaN                NaN NaN  NaN  
1       NaN             NaN          NaN                NaN NaN  NaN  
2       NaN             NaN          NaN                NaN NaN  NaN  
3       NaN             NaN          NaN                NaN NaN  NaN  
4       NaN             NaN          NaN                NaN NaN  NaN  
Filas totales: 21805
Columnas: ['periodo_orbital', 'duracion_tr

In [14]:
import pandas as pd

# ==============================
# 1) LECTOR SIMPLE
# ==============================
def read_csv_clean(path):
    for sep in [",", ";", "\t"]:
        try:
            df = pd.read_csv(path, sep=sep, comment="#", engine="python")
            if df.shape[1] > 1:  # si encontró varias columnas
                return df
        except Exception:
            continue
    raise ValueError(f"No pude leer bien el archivo {path}")

# ==============================
# 2) RUTAS
# ==============================
path_cum = "cumulative_2025.10.04_03.48.39.csv"
path_k2p = "k2pandc_2025.10.04_03.48.47.csv"
path_toi = "TOI_2025.10.04_03.48.53.csv"

kepler = read_csv_clean(path_cum)
k2     = read_csv_clean(path_k2p)
tess   = read_csv_clean(path_toi)

# ==============================
# 3) ESQUEMA DESTINO
# ==============================
target_cols = [
    "periodo_orbital",
    "duracion_transito",
    "profundidad",
    "pl_radio",
    "insolacion",
    "st_radio",
    "st_temperatura",
    "st_gravedad",
    "pl_temperatura_eq",
    "RA",
    "DEC",
]

kepler_map = {
    "koi_period":  "periodo_orbital",
    "koi_duration":"duracion_transito",
    "koi_depth":   "profundidad",
    "koi_prad":    "pl_radio",
    "koi_insol":   "insolacion",
    "koi_srad":    "st_radio",
    "koi_steff":   "st_temperatura",
    "koi_slogg":   "st_gravedad",
    "koi_teq":     "pl_temperatura_eq",
    "ra":          "RA",
    "dec":         "DEC",
}

k2_map = {
    "pl_orbper":   "periodo_orbital",
    "pl_trandur":  "duracion_transito",
    "pl_trandep":  "profundidad",
    "pl_rade":     "pl_radio",
    "pl_insol":    "insolacion",
    "st_rad":      "st_radio",
    "st_teff":     "st_temperatura",
    "st_logg":     "st_gravedad",
    "pl_eqt":      "pl_temperatura_eq",
    "ra":          "RA",
    "dec":         "DEC",
}

tess_map = {
    "pl_orbper":   "periodo_orbital",
    "pl_trandurh": "duracion_transito",  # horas
    "pl_trandep":  "profundidad",
    "pl_rade":     "pl_radio",
    "pl_insol":    "insolacion",
    "st_rad":      "st_radio",
    "st_teff":     "st_temperatura",
    "st_logg":     "st_gravedad",
    "pl_eqt":      "pl_temperatura_eq",
    "ra":          "RA",
    "dec":         "DEC",
}

# ==============================
# 4) NORMALIZACIÓN
# ==============================
def standardize(df, colmap):
    cols = [c for c in colmap if c in df.columns]
    out = df[cols].rename(columns=colmap)
    return out.reindex(columns=target_cols)

df_kepler = standardize(kepler, kepler_map)
df_k2     = standardize(k2,     k2_map)
df_tess   = standardize(tess,   tess_map)

# convertir TESS de horas a días
df_tess["duracion_transito"] = pd.to_numeric(df_tess["duracion_transito"], errors="coerce") / 24.0

# ==============================
# 5) CONCATENAR
# ==============================
df_final = pd.concat([df_kepler, df_k2, df_tess], ignore_index=True)

print(df_final.head())
print("Filas totales:", len(df_final))

df_final.to_csv("exoplanetas_unificado.csv", index=False)
print("✅ Guardado en exoplanetas_unificado.csv")


   periodo_orbital  duracion_transito  profundidad  pl_radio  insolacion  \
0         9.488036            2.95750        615.8      2.26       93.59   
1        54.418383            4.50700        874.8      2.83        9.11   
2        19.899140            1.78220      10829.0     14.60       39.30   
3         1.736952            2.40641       8079.2     33.46      891.96   
4         2.525592            1.65450        603.3      2.75      926.16   

   st_radio  st_temperatura  st_gravedad  pl_temperatura_eq         RA  \
0     0.927          5455.0        4.467              793.0  291.93423   
1     0.927          5455.0        4.467              443.0  291.93423   
2     0.868          5853.0        4.544              638.0  297.00482   
3     0.791          5805.0        4.564             1395.0  285.53461   
4     1.046          6031.0        4.438             1406.0  288.75488   

         DEC  
0  48.141651  
1  48.141651  
2  48.134129  
3  48.285210  
4  48.226200  
Filas to