Detección de barras en galaxias - Modelos Alternativos

Proyecto integrador MNA

Integrantes

Jonathan Jesús Marmolejo Hernández - A01795195

Isaid Posadas Oropeza - A01795015

Luis Daniel Ortega Muñoz - A01795197


## Extraer imágenes del archivo .zip

In [3]:
import os
from zipfile import ZipFile
from google.colab import drive

# 1. Montar Google Drive
drive.mount('/content/drive')

zip_path = "/content/drive/MyDrive/MNA_ProyectoIntegrador/dataset.processed.GRLogDiff.640x640.zip"
extract_path = "/content/drive/MyDrive/MNA_ProyectoIntegrador/Imagenes_preprocesadas_2"

# Extraer solo si no se ha hecho antes
if not os.path.exists(extract_path):
    with ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall(extract_path)
    print(" Imágenes extraídas del ZIP correctamente.")
else:
    print(" Imágenes ya extraídas anteriormente.")


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
 Imágenes extraídas del ZIP correctamente.



# Preparación de dataset

In [9]:
import pandas as pd
import os
import shutil
from sklearn.model_selection import train_test_split
from google.colab import drive

# 1. Montar Google Drive
drive.mount('/content/drive')

# 2. Cargar el CSV
csv_path = "/content/drive/MyDrive/MNA_ProyectoIntegrador/dataset.csv"
df = pd.read_csv(csv_path)

# 3. Filtrar y crear columna 'has_bar'
df = df[(df['Bars'] == 0.0) | (df['Bars'] > 0.25)].copy()
df['has_bar'] = df['Bars'].apply(lambda x: 1 if x > 0.25 else 0)

# 4. Balancear el dataset
min_count = df['has_bar'].value_counts().min()
df_balanced = pd.concat([
    df[df['has_bar'] == 0].sample(min_count, random_state=42),
    df[df['has_bar'] == 1].sample(min_count, random_state=42)
]).sample(frac=1, random_state=42).reset_index(drop=True)

print(f"Dataset balanceado: {df_balanced['has_bar'].value_counts().to_dict()}")

# 5. Dividir en train/val
train_df, val_df = train_test_split(df_balanced, test_size=0.2, stratify=df_balanced['has_bar'], random_state=42)

# 6. Eliminar si ya existe la carpeta en /content o en Drive
base_path = "/content/galaxy_dataset_2"
drive_path = "/content/drive/MyDrive/MNA_ProyectoIntegrador/galaxy_dataset_2"

for path in [base_path, drive_path]:
    if os.path.exists(path):
        shutil.rmtree(path)
        print(f"Carpeta eliminada: {path}")

# 7. Crear estructura de carpetas en Colab
os.makedirs(f"{base_path}/images/train", exist_ok=True)
os.makedirs(f"{base_path}/images/val", exist_ok=True)
os.makedirs(f"{base_path}/labels/train", exist_ok=True)
os.makedirs(f"{base_path}/labels/val", exist_ok=True)

# 8. Ruta de imágenes extraídas
image_src_path = "/content/drive/MyDrive/MNA_ProyectoIntegrador/Imagenes_preprocesadas_2"
for root, dirs, files in os.walk(image_src_path):
    if any(file.endswith(".png") for file in files):
        image_src_path = root
        break
print(f"Ruta final de imágenes: {image_src_path}")

# 9. Función para copiar imágenes y crear etiquetas
def copiar_y_etiquetar(df, subset):
    copiadas = 0
    faltantes = 0
    for _, row in df.iterrows():
        img_name = f"{row['name']}.png"
        label = row['has_bar']
        src = os.path.join(image_src_path, img_name)
        dst_img = f"{base_path}/images/{subset}/{img_name}"
        dst_txt = f"{base_path}/labels/{subset}/{row['name']}.txt"

        if os.path.exists(src):
            shutil.copy(src, dst_img)
            with open(dst_txt, 'w') as f:
                if label == 1:
                    f.write("0 0.5 0.5 1.0 1.0\n")
            copiadas += 1
        else:
            faltantes += 1
    print(f"Copiadas en {subset}: {copiadas}, faltantes: {faltantes}")

# 10. Ejecutar para ambos conjuntos
copiar_y_etiquetar(train_df, "train")
copiar_y_etiquetar(val_df, "val")

# 11. Crear archivo data.yaml
yaml_content = f"""
train: {base_path}/images/train
val: {base_path}/images/val
nc: 1
names: ['barra']
"""
with open(f"{base_path}/data.yaml", "w") as f:
    f.write(yaml_content)

# 12. Copiar estructura final a Google Drive
for subdir in ["images/train", "images/val", "labels/train", "labels/val"]:
    os.makedirs(os.path.join(drive_path, subdir), exist_ok=True)
    src_folder = os.path.join(base_path, subdir)
    dst_folder = os.path.join(drive_path, subdir)
    for file in os.listdir(src_folder):
        shutil.copy(os.path.join(src_folder, file), os.path.join(dst_folder, file))

shutil.copy(f"{base_path}/data.yaml", f"{drive_path}/data.yaml")

print("Dataset completo copiado correctamente a tu Google Drive.")


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Dataset balanceado: {0: 3343, 1: 3343}
Carpeta eliminada: /content/galaxy_dataset_2
Ruta final de imágenes: /content/drive/MyDrive/MNA_ProyectoIntegrador/Imagenes_preprocesadas_2
Copiadas en train: 5348, faltantes: 0
Copiadas en val: 1338, faltantes: 0
Dataset completo copiado correctamente a tu Google Drive.
