In [1]:
import cv2
import numpy as np
import matplotlib.pyplot as plt
import os
import sys
import random
import pandas as pd # Usaremos pandas solo para mostrar la tabla bonita
import logging

from src.agent.ImagePreprocessor import ImagePreprocessor
from src.agent.Segmentator import Segmentator
from src.agent.FeatureExtractor import FeatureExtractor
from src.agent.ContourManager import ContourManager # Solo para asegurar que se carga

In [None]:
logging.getLogger().setLevel(logging.WARNING)

def get_random_image(category, base_path="data/raw/images/all"):
    dir_path = os.path.join(base_path, category)
    if not os.path.exists(dir_path): return None, None
    files = [f for f in os.listdir(dir_path) if f.lower().endswith(('.jpg', '.jpeg', '.png'))]
    if not files: return None, None
    import random
    random_file = random.choice(files)
    return os.path.join(dir_path, random_file), cv2.imread(os.path.join(dir_path, random_file))

def visualize_FeatureExtractor_pipeline():
    # 1. Configuraci√≥n del Pipeline (Tu mejor configuraci√≥n)
    preprocessor = ImagePreprocessor(
        target_size = (600,800),
        gamma = 1.7,
        d_bFilter = 5,
        binarization_block_size = 31,
        binarization_C = -11,       # No puede ser mas de 11
        open_kernel_size = (5, 5),
        close_kernel_size = (9, 9),
        clear_border_margin = 5
    )
    
    segmentator = Segmentator(
        min_area = 80,
        merge_distance = 20
    )
    
    extractor = FeatureExtractor()
    
    target_features = extractor.get_recommended_features()

    categories = ["arandelas", "clavos", "tornillos", "tuercas"]
    base_path = "data/raw/images/all"
    
    print(f"{'='*100}")
    print(f"üî¨ FEATURES SELECCIONADAS PARA CLUSTERING")
    print(f"   Variables ({len(target_features)}): {target_features}")
    print(f"{'='*100}\n")

    # Lista para acumular datos y mostrar tabla final
    all_data = []

    for category in categories:
        # Obtener Imagen
        path, raw_img = get_random_image(category, base_path)
        if raw_img is None: continue
        
        filename = os.path.basename(path)
        
        try:
            # Pipeline
            binary = preprocessor.process(raw_img)
            seg_res = segmentator.process(binary)
            
            bboxes = seg_result = seg_res.get("bounding_boxes", [])
            masks = seg_res.get("masks", [])
            
            if not bboxes:
                print(f"‚ùå {category.upper()}: No se detectaron objetos en {filename}")
                continue

            # Extraer Features
            features_list = extractor.extract_features(bboxes, masks)
            
            # D. Mostrar Datos
            if features_list:
                # Ordenamos por √°rea para tomar el objeto principal y no ruido
                main_obj = sorted(features_list, key=lambda x: x['area'], reverse=True)[0]
                # Agregamos la etiqueta real para comparar
                main_obj['Label'] = category.upper()
                
                all_data.append(main_obj)
                
        except Exception as e:
            print(f"‚ùå Error en {category}: {e}")

    # VISUALIZACI√ìN COMO TABLA (DataFrame)
    if all_data:
        df = pd.DataFrame(all_data)
        
        # Seleccionamos SOLO: Etiqueta + Las features recomendadas
        cols_to_show = ['Label'] + target_features
        
        # Filtramos por si alguna feature no se calcul√≥ (seguridad)
        final_cols = [c for c in cols_to_show if c in df.columns]
        df_display = df[final_cols]
        
        # Formateo
        pd.set_option('display.max_columns', None)
        pd.set_option('display.width', 1000)
        pd.set_option('display.float_format', lambda x: '%.4f' % x)
        
        print(df_display.to_string(index=False))
        
        print("\n" + "="*100)
        print("üí° CHECKLIST DE VALIDACI√ìN:")
        print("   1. [Tuerca vs Arandela]: ¬øDifieren circle_ratio y radius_variance?")
        print("   2. [Con Agujero vs Sin]: ¬øhole_confidence es 1.0 vs 0.0?")
        print("   3. [Tornillo vs Clavo]:  ¬øaspect_ratio y solidity los separan?")
        print("="*100) 

In [3]:
visualize_FeatureExtractor_pipeline()

üî¨ AN√ÅLISIS DE CARACTER√çSTICAS POR CATEGOR√çA

Label_Real      area  perimeter  aspect_ratio  solidity  circularity  hole_confidence  num_vertices  circle_ratio  radius_variance     hu1
 ARANDELAS 5231.5000   271.1787        1.0263    0.9895       0.8940           1.0000        8.0000        0.9598           0.0151  0.7979
    CLAVOS 6587.5000  1014.1778        8.2645    0.4583       0.0805           0.0000        4.0000        0.0445           0.5360 -0.4080
 TORNILLOS 3299.5000   414.6001        3.8622    0.6695       0.2412           0.0000        5.0000        0.1546           0.5112  0.1223
   TUERCAS 5682.0000   291.9066        1.0812    0.9799       0.8380           1.0000        6.0000        0.8339           0.0472  0.7951

üí° GU√çA DE INTERPRETACI√ìN R√ÅPIDA:
   - Tuerca vs Arandela:
     * Radius Variance: Arandela < 0.02 | Tuerca > 0.05
     * Circle Ratio:    Arandela > 0.90 | Tuerca ~ 0.82
     * Num Vertices:    Arandela > 8    | Tuerca ~ 6
   - Clavo vs Tornillo:
