In [3]:
import pandas as pd
import os
import cv2
import numpy as np
import pydicom
import pandas as pd
from tqdm import tqdm
from skimage.feature import local_binary_pattern
from skimage.feature import graycomatrix, graycoprops

In [4]:
# Cargar dataset original
df = pd.read_csv('labels.csv', sep=';')
labels = df.copy()
# Dropear las filas donde type == 'FB'
labels = labels[labels['type'] != 'FB'].reset_index(drop=True)

In [5]:
labels.to_csv('labels_oneclass.csv', index=False)

In [7]:

def dcm_a_png(dcm_path, png_path, window_min=-1000, window_max=400):
    try:
        ds = pydicom.dcmread(dcm_path)
        img = ds.pixel_array.astype(np.float32)
        img = np.clip(img, window_min, window_max)
        img = ((img - window_min) / (window_max - window_min)) * 255.0
        img = img.astype(np.uint8)
        os.makedirs(os.path.dirname(png_path), exist_ok=True)
        cv2.imwrite(png_path, img)
    except Exception as e:
        print(f"Error procesando {dcm_path}: {e}")

def convertir_a_png_condicional(df, input_dir='Experiments', output_dir='output_png',
                                window_min=-1000, window_max=400):
    for idx, row in tqdm(df.iterrows(), total=df.shape[0], desc="Procesando imágenes"):
        tipo = row['type']
        uuid = row['uuid']
        slice_num = row['slice']

        folder = os.path.join(input_dir, str(uuid))
        
        if not os.path.exists(folder):
            print(f"Carpeta no encontrada: {folder}")
            continue
        
        if tipo in ['TB', 'TM']:
            # Procesar todos los .dcm del paciente
            for file in os.listdir(folder):
                if file.endswith('.dcm'):
                    dcm_path = os.path.join(folder, file)
                    png_name = os.path.splitext(file)[0] + '.png'
                    png_path = os.path.join(output_dir, str(uuid), png_name)
                    dcm_a_png(dcm_path, png_path, window_min, window_max)

        elif tipo == 'FM':
            # Solo procesar el slice correspondiente
            dcm_path = os.path.join(folder, f"{slice_num}.dcm")
            png_path = os.path.join(output_dir, str(uuid), f"{slice_num}.png")
            if os.path.exists(dcm_path):
                dcm_a_png(dcm_path, png_path, window_min, window_max)
            else:
                print(f"Slice no encontrado: {dcm_path}")

convertir_a_png_condicional(labels, input_dir='Experiments', output_dir='Experiments-OneClass')

Procesando imágenes: 100%|██████████| 92/92 [05:08<00:00,  3.35s/it] 


In [14]:

import torch
import torch.nn as nn
import torchvision.models as models
import torchvision.transforms as transforms

def extraer_features_glcm(img):
    # Configuración de GLCM
    distances = [1]
    angles = [0, np.pi/4, np.pi/2, 3*np.pi/4]

    # ------ GLCM Features ------ #
    glcm = graycomatrix(img, 
                        distances=distances, 
                        angles=angles, 
                        levels=256, 
                        symmetric=True, 
                        normed=True)
    
    contraste = graycoprops(glcm, 'contrast').flatten()
    homogeneidad = graycoprops(glcm, 'homogeneity').flatten()
    energia = graycoprops(glcm, 'energy').flatten()
    correlacion = graycoprops(glcm, 'correlation').flatten()
    entropia = -np.sum(glcm * np.log2(glcm + 1e-10), axis=(0, 1)).flatten()

    return np.concatenate([contraste, homogeneidad, energia, correlacion, entropia])

def extraer_features_lbp(img):
     # Configuración de LBP
    radius = 1
    n_points = 8 * radius
    method = 'uniform'  # Da un histograma de 59 bins con 8 vecinos

    lbp = local_binary_pattern(img, n_points, radius, method)
    hist, _ = np.histogram(lbp.ravel(), 
                           bins=np.arange(0, n_points + 3),
                           range=(0, n_points + 2))
    hist = hist.astype("float")
    hist /= (hist.sum() + 1e-6)  # Normalización

    return hist

# Configuración ResNet
resnet = models.resnet18(pretrained=True)
resnet.fc = nn.Identity()
resnet.eval()

preprocess = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

def extraer_features_resnet(img):
    if len(img.shape) == 2:
        img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
    
    input_tensor = preprocess(img).unsqueeze(0)
    with torch.no_grad():
        features = resnet(input_tensor).numpy().flatten()
    return features





In [None]:



# ---------------- Configuración ---------------- #
carpeta_png = 'Experiments-OneClass/'
labels = pd.read_csv('labels_oneclass.csv')
labels['label'] = labels['type'].apply(lambda x: 1 if x == 'FM' else 0)

# Crear diccionario para lookup rápido
dict_labels = {}
for idx, row in labels.iterrows():
    key = (str(row['uuid']), int(row['slice']))
    dict_labels[key] = row['label']

# Lista para guardar resultados
lista_features = []

# ---------------- Recorrido de todas las imágenes ---------------- #
for uuid in tqdm(os.listdir(carpeta_png)):
    uuid_path = os.path.join(carpeta_png, uuid)
    if not os.path.isdir(uuid_path):
        continue

    for file in os.listdir(uuid_path):
        if not file.endswith(".png"):
            continue

        slice_num = int(file.replace(".png", ""))

        ruta_imagen = os.path.join(uuid_path, file)
        img = cv2.imread(ruta_imagen, cv2.IMREAD_GRAYSCALE)

        if img is None:
            print(f"Error cargando {ruta_imagen}")
            continue

        # Buscar label
        label = dict_labels.get((uuid, slice_num), 0)  # Por defecto 0 si no está listado

        # Extraer features
        features_concatenados = []
        columnas = []

        features_lbp = extraer_features_lbp(img)
        features_concatenados = np.concatenate([features_concatenados, features_lbp])
        columnas = np.concatenate([columnas,[f'lbp_hist_{i}' for i in range(features_lbp.shape[0])]])

        features_glcm = extraer_features_glcm(img)
        features_concatenados = np.concatenate([features_concatenados, features_glcm])
        columnas = np.concatenate([columnas,[f'glcm_feat_{i}' for i in range(features_glcm.shape[0])]])

        lista_features.append([uuid, slice_num, label] + features_concatenados.tolist())

# ---------------- DataFrame final ---------------- #
if isinstance(columnas, np.ndarray):
    columnas = columnas.tolist()

columnas_finales = ['uuid', 'slice', 'label'] + columnas
df_features = pd.DataFrame(lista_features, columns=columnas_finales)

# Eliminar columna slice si no la necesitas
df_features = df_features.drop(columns=['slice'])

# Reordenar columnas
columnas_ordenadas = [col for col in df_features.columns if col not in ['uuid', 'label']] + ['uuid', 'label']
df_features = df_features[columnas_ordenadas]

print(f"✅ DataFrame final con shape: {df_features.shape}")

df_features.to_pickle("data_oneclass.pkl")

100%|██████████| 64/64 [15:07<00:00, 14.19s/it]


✅ DataFrame final con shape: (6783, 32)


In [15]:
# ---------------- Cargar labels ---------------- #
carpeta_png = 'Experiments-OneClass/'
labels = pd.read_csv('labels_oneclass.csv')
labels['label'] = labels['type'].apply(lambda x: 1 if x == 'FM' else 0)

# Crear diccionario para lookup rápido
dict_labels = {(str(row['uuid']), int(row['slice'])): row['label'] for idx, row in labels.iterrows()}

lista_features = []

# ---------------- Recorrido ---------------- #
print("🔧 Procesando imágenes...")

for uuid in tqdm(os.listdir(carpeta_png), desc="Pacientes"):
    uuid_path = os.path.join(carpeta_png, uuid)
    if not os.path.isdir(uuid_path):
        continue

    archivos = [f for f in os.listdir(uuid_path) if f.endswith(".png")]

    for file in tqdm(archivos, desc=f"Procesando {uuid}", leave=False):
        slice_num = int(file.replace(".png", ""))
        ruta_imagen = os.path.join(uuid_path, file)
        img = cv2.imread(ruta_imagen, cv2.IMREAD_GRAYSCALE)

        if img is None:
            print(f"⚠️ Error cargando {ruta_imagen}")
            continue

        label = dict_labels.get((uuid, slice_num), 0)

        features_resnet = extraer_features_resnet(img)
        lista_features.append([uuid, slice_num, label] + features_resnet.tolist())

# ---------------- DataFrame final ---------------- #
columnas_finales = ['uuid', 'slice', 'label'] + [f'resnet_feat_{i}' for i in range(512)]
df_features = pd.DataFrame(lista_features, columns=columnas_finales)

# Eliminar columna slice si no la necesitas
df_features = df_features.drop(columns=['slice'])

# Reordenar columnas
columnas_ordenadas = [col for col in df_features.columns if col not in ['uuid', 'label']] + ['uuid', 'label']
df_features = df_features[columnas_ordenadas]

print(f"✅ DataFrame final con shape: {df_features.shape}")

df_features.to_pickle("data_oneclass_resnet.pkl")

🔧 Procesando imágenes...


Pacientes: 100%|██████████| 64/64 [07:05<00:00,  6.64s/it]


✅ DataFrame final con shape: (6783, 514)


In [13]:
X = np.array(df_features.drop(columns=['uuid', 'label']))
y = np.array(df_features['label'])
groups = np.array(df_features['uuid'])

# Dataset completo
np.savez_compressed("data_oneclass_completo.npz", X=X, y=y, groups=groups)
df_features.to_pickle("data_oneclass_completo.pkl")

# Solo normales
mask_normales = y == 0
X_normales = X[mask_normales]
y_normales = y[mask_normales]
groups_normales = groups[mask_normales]

np.savez_compressed("data_oneclass_normales.npz", X=X_normales, y=y_normales, groups=groups_normales)

# Solo anormales
mask_anormales = y == 1
X_anormales = X[mask_anormales]
y_anormales = y[mask_anormales]
groups_anormales = groups[mask_anormales]

np.savez_compressed("data_oneclass_anormales.npz", X=X_anormales, y=y_anormales, groups=groups_anormales)

print("✅ Datasets guardados exitosamente.")


✅ Datasets guardados exitosamente.


In [16]:
X = np.array(df_features.drop(columns=['uuid', 'label']))
y = np.array(df_features['label'])
groups = np.array(df_features['uuid'])

# Dataset completo
np.savez_compressed("data_oneclass_resnet_completo.npz", X=X, y=y, groups=groups)
df_features.to_pickle("data_oneclass_resnet_completo.pkl")

# Solo normales
mask_normales = y == 0
X_normales = X[mask_normales]
y_normales = y[mask_normales]
groups_normales = groups[mask_normales]

np.savez_compressed("data_oneclass_resnet_normales.npz", X=X_normales, y=y_normales, groups=groups_normales)

# Solo anormales
mask_anormales = y == 1
X_anormales = X[mask_anormales]
y_anormales = y[mask_anormales]
groups_anormales = groups[mask_anormales]

np.savez_compressed("data_oneclass_resnet_anormales.npz", X=X_anormales, y=y_anormales, groups=groups_anormales)

print("✅ Datasets guardados exitosamente.")


✅ Datasets guardados exitosamente.
