In [2]:
import cv2
import numpy as np
import matplotlib.pyplot as plt
import os
import sys
import random
import pandas as pd # Usaremos pandas solo para mostrar la tabla bonita
import logging
from pathlib import Path

# Agrego path de root
ROOT_PATH = str(Path.cwd().parent.parent)
if ROOT_PATH not in sys.path:
    sys.path.append(ROOT_PATH)

# Path de las imagenes
IMG_DATA_PATH = Path(ROOT_PATH + "/data/raw/images/all")

from src.vision.preprocessor import ImagePreprocessor
from src.vision.segmentator import Segmentator
from src.vision.features import FeatureExtractor

In [6]:
logging.getLogger().setLevel(logging.WARNING)

def get_random_image(category, base_path="data/raw/images/all"):
    dir_path = os.path.join(base_path, category)
    if not os.path.exists(dir_path): return None, None
    files = [f for f in os.listdir(dir_path) if f.lower().endswith(('.jpg', '.jpeg', '.png'))]
    if not files: return None, None
    import random
    random_file = random.choice(files)
    return os.path.join(dir_path, random_file), cv2.imread(os.path.join(dir_path, random_file))

def visualize_FeatureExtractor_pipeline():
    # 1. Configuraci√≥n del Pipeline (Tu mejor configuraci√≥n)
    preprocessor = ImagePreprocessor(
        target_size = (600,800),
        gamma = 1.7,
        d_bFilter = 5,
        binarization_block_size = 31,
        binarization_C = -11,       # No puede ser mas de 11
        open_kernel_size = (5, 5),
        close_kernel_size = (9, 9),
        clear_border_margin = 5
    )
    
    segmentator = Segmentator(
        min_area = 80,
        merge_distance = 20
    )
    
    extractor = FeatureExtractor()
    
    target_features = extractor.get_recommended_features()

    categories = ["arandelas", "clavos", "tornillos", "tuercas"]
    base_path = IMG_DATA_PATH
    
    print(f"{'='*100}")
    print(f"üî¨ FEATURES SELECCIONADAS PARA CLUSTERING")
    print(f"   Variables ({len(target_features)}): {target_features}")
    print(f"{'='*100}\n")

    # Lista para acumular datos y mostrar tabla final
    all_data = []

    for category in categories:
        # Obtener Imagen
        path, raw_img = get_random_image(category, base_path)
        if raw_img is None: continue
        
        filename = os.path.basename(path)
        
        try:
            # Pipeline
            binary = preprocessor.process(raw_img)
            seg_res = segmentator.process(binary)
            
            bboxes = seg_result = seg_res.get("bounding_boxes", [])
            masks = seg_res.get("masks", [])
            
            if not bboxes:
                print(f"‚ùå {category.upper()}: No se detectaron objetos en {filename}")
                continue

            # Extraer Features
            features_list = extractor.extract_features(bboxes, masks)
            
            # D. Mostrar Datos
            if features_list:
                # Ordenamos por √°rea para tomar el objeto principal y no ruido
                main_obj = sorted(features_list, key=lambda x: x['area'], reverse=True)[0]
                # Agregamos la etiqueta real para comparar
                main_obj['Label'] = category.upper()
                
                all_data.append(main_obj)
                
        except Exception as e:
            print(f"‚ùå Error en {category}: {e}")

    # VISUALIZACI√ìN COMO TABLA (DataFrame)
    if all_data:
        df = pd.DataFrame(all_data)
        
        # Seleccionamos SOLO: Etiqueta + Las features recomendadas
        cols_to_show = ['Label'] + target_features
        
        # Filtramos por si alguna feature no se calcul√≥ (seguridad)
        final_cols = [c for c in cols_to_show if c in df.columns]
        df_display = df[final_cols]
        
        # Formateo
        pd.set_option('display.max_columns', None)
        pd.set_option('display.width', 1000)
        pd.set_option('display.float_format', lambda x: '%.4f' % x)
        
        print(df_display.to_string(index=False))

In [7]:
visualize_FeatureExtractor_pipeline()

üî¨ FEATURES SELECCIONADAS PARA CLUSTERING
   Variables (5): ['aspect_ratio', 'solidity', 'hole_confidence', 'circle_ratio', 'radius_variance']

    Label  aspect_ratio  solidity  hole_confidence  circle_ratio  radius_variance
ARANDELAS        1.0094    0.9906           1.0000        0.9574           0.0190
   CLAVOS       10.9625    0.5119           0.0000        0.0399           0.4678
TORNILLOS        3.1997    0.5945           0.6667        0.1664           0.4813
  TUERCAS        1.1102    0.9756           1.0000        0.8310           0.0484


In [9]:
from src.vision.data_prep import DataPreprocessor

def get_images_batch(base_path="data/raw/images/all", samples_per_class=5):
    """Recolecta un lote de im√°genes para tener estad√≠stica suficiente."""
    batch = []
    categories = ["arandelas", "clavos", "tornillos", "tuercas"]
    
    print(f"üì¶ Recolectando {samples_per_class} im√°genes por categor√≠a...")
    
    for category in categories:
        dir_path = os.path.join(base_path, category)
        if not os.path.exists(dir_path): continue
        
        files = [f for f in os.listdir(dir_path) if f.lower().endswith(('.jpg', '.png'))]
        # Tomamos los primeros N (o aleatorios)
        selected_files = files[:samples_per_class]
        
        for f in selected_files:
            path = os.path.join(dir_path, f)
            batch.append({
                'path': path,
                'category': category.upper(),
                'filename': f
            })
    return batch

def visualize_DataPreprocessor_pipeline():
    # 1. Instanciar Pipeline Completo
    img_prep = ImagePreprocessor(
        target_size = (600,800),
        gamma = 1.7,
        d_bFilter = 5,
        binarization_block_size = 31,
        binarization_C = -11,
        open_kernel_size = (5, 5),
        close_kernel_size = (9, 9),
        clear_border_margin = 5
    )
    
    segmentator = Segmentator(
        min_area = 80,
        merge_distance = 20
    )
    
    extractor = FeatureExtractor()
    data_prep = DataPreprocessor() # El nuevo integrante

    # 2. Obtener Datos Crudos (Feature Extraction)
    raw_dataset = []
    
    batch = get_images_batch(base_path=IMG_DATA_PATH)
    print("‚öôÔ∏è  Ejecutando pipeline de visi√≥n (Preproceso -> Segmentaci√≥n -> Extracci√≥n)...")

    for item in batch:
        try:
            # A. Cargar
            raw_img = cv2.imread(item['path'])
            if raw_img is None: continue

            # B. Visi√≥n
            binary = img_prep.process(raw_img)
            seg_res = segmentator.process(binary)
            
            # C. Extracci√≥n
            features = extractor.extract_features(seg_res['bounding_boxes'], seg_res['masks'])
            
            if features:
                # Tomamos el objeto principal
                main_obj = sorted(features, key=lambda x: x.get('area', 0), reverse=True)[0]
                main_obj['Label_Real'] = item['category'] # Guardamos etiqueta para referencia
                raw_dataset.append(main_obj)
                
        except Exception as e:
            print(f"   ‚ö†Ô∏è Error en {item['filename']}: {e}")

    if not raw_dataset:
        print("‚ùå No se extrajeron caracter√≠sticas.")
        return

    # 3. NORMALIZACI√ìN (DataPreprocessor)
    print(f"\nüßÆ Entrenando DataPreprocessor con {len(raw_dataset)} muestras...")
    
    # Obtenemos las columnas oficiales
    target_features = extractor.get_recommended_features()
    
    # FIT + TRANSFORM
    # Esto calcula medias/std y devuelve la matriz X normalizada
    X_normalized = data_prep.fit_transform(raw_dataset, target_features=target_features)

    # 4. VISUALIZACI√ìN (Comparativa Antes/Despu√©s)
    print(f"\n{'='*100}")
    print(f"üìä COMPARATIVA: CRUDO vs NORMALIZADO (Z-SCORE)")
    print(f"{'='*100}")

    # Creamos DataFrames para mostrar bonito
    df_raw = pd.DataFrame(raw_dataset)
    df_norm = pd.DataFrame(X_normalized, columns=target_features)
    
    # Agregamos la etiqueta al DF normalizado para saber qu√© es qu√©
    df_norm.insert(0, 'Label', df_raw['Label_Real'])

    # Tomamos 1 ejemplo representativo de cada clase para no imprimir todo
    unique_labels = df_norm['Label'].unique()
    
    pd.set_option('display.max_columns', None)
    pd.set_option('display.width', 1000)
    pd.set_option('display.float_format', lambda x: '%.4f' % x)

    for label in unique_labels:
        # Indices de esta clase
        indices = df_norm.index[df_norm['Label'] == label].tolist()
        if not indices: continue
        
        idx = indices[0] # Tomamos el primero
        
        print(f"\nüîπ EJEMPLO: {label}")
        print("-" * 100)
        
        # Construimos una tablita comparativa para este objeto
        comparison = {}
        for feat in target_features:
            val_raw = df_raw.iloc[idx][feat]
            val_norm = df_norm.iloc[idx][feat]
            comparison[feat] = [val_raw, val_norm]
            
        df_comp = pd.DataFrame(comparison, index=["Crudo", "Norm."])
        print(df_comp)

    # 5. VALIDACI√ìN ESTAD√çSTICA
    print(f"\n{'='*100}")
    print("‚úÖ VALIDACI√ìN ESTAD√çSTICA DE LA MATRIZ DE SALIDA")
    print(f"   (Esperado: Media ~ 0.0 | Desviaci√≥n Std ~ 1.0)")
    print("-" * 100)
    
    means = np.mean(X_normalized, axis=0)
    stds = np.std(X_normalized, axis=0)
    
    stats_df = pd.DataFrame([means, stds], columns=target_features, index=["Media Final", "Std Final"])
    print(stats_df)

if __name__ == "__main__":
    visualize_DataPreprocessor_pipeline()

üì¶ Recolectando 5 im√°genes por categor√≠a...
‚öôÔ∏è  Ejecutando pipeline de visi√≥n (Preproceso -> Segmentaci√≥n -> Extracci√≥n)...

üßÆ Entrenando DataPreprocessor con 20 muestras...

üìä COMPARATIVA: CRUDO vs NORMALIZADO (Z-SCORE)

üîπ EJEMPLO: ARANDELAS
----------------------------------------------------------------------------------------------------
       aspect_ratio  solidity  hole_confidence  circle_ratio  radius_variance
Crudo        1.0154    0.9894           1.0000        0.9623           0.0189
Norm.        1.0154    0.9894           1.0000        0.9623           0.0189

üîπ EJEMPLO: CLAVOS
----------------------------------------------------------------------------------------------------
       aspect_ratio  solidity  hole_confidence  circle_ratio  radius_variance
Crudo        8.2201    0.4136           0.0000        0.0426           0.5629
Norm.        8.2201    0.4136           0.0000        0.0426           0.5629

üîπ EJEMPLO: TORNILLOS
--------------------