In [1]:
import pandas as pd
import shutil
import os

# === 1. Descomprimir shuttle.trn.Z si no existe shuttle.trn
input_z_path = "shuttle.trn.Z"
output_trn_path = input_z_path.replace(".Z", "")

if not os.path.exists(output_trn_path):
    with open(input_z_path, "rb") as f_in, open(output_trn_path, "wb") as f_out:
        shutil.copyfileobj(f_in, f_out)
    print("✅ Archivo shuttle.trn.Z descomprimido.")

# === 2. Leer shuttle.trn y shuttle.tst
train_path = "shuttle.trn"
test_path = "shuttle.tst"

column_names = [f"feat_{i}" for i in range(9)] + ["target"]

df_train = pd.read_csv(train_path, sep=r'\s+', header=None, names=column_names)
df_test = pd.read_csv(test_path, sep=r'\s+', header=None, names=column_names)

print(f"📊 Train shape: {df_train.shape}, Test shape: {df_test.shape}")

# === 3. Unir datasets
df_total = pd.concat([df_train, df_test], ignore_index=True)
print(f"📦 Dataset final shape: {df_total.shape}")

# === 4. Guardar como CSV
output_csv = "shuttle.csv"
df_total.to_csv(output_csv, index=False)
print(f"✅ CSV final guardado en: {output_csv}")


📊 Train shape: (43500, 10), Test shape: (14500, 10)
📦 Dataset final shape: (58000, 10)
✅ CSV final guardado en: shuttle.csv


In [2]:
from collections import Counter

conteo = Counter(df_total["target"])
print("📊 Distribución de clases:")
for clase, cantidad in sorted(conteo.items()):
    print(f"Clase {clase}: {cantidad} muestras")


📊 Distribución de clases:
Clase 1: 45586 muestras
Clase 2: 50 muestras
Clase 3: 171 muestras
Clase 4: 8903 muestras
Clase 5: 3267 muestras
Clase 6: 10 muestras
Clase 7: 13 muestras
