In [19]:
import pandas as pd

# Cargar dataset original
df = pd.read_csv('labels.csv', sep=';')
labels = df.copy()

Tomo solo 1 slice aleatorio por paciente

In [35]:
# --- 1. Filtrar FB, FM, TM y tomar 1 slice aleatorio por paciente ---
fb_df = labels[labels['type'] == 'FB'].groupby('uuid').sample(1, random_state=42)
fm_df = labels[labels['type'] == 'FM'].groupby('uuid').sample(1, random_state=42)
tm_df = labels[labels['type'] == 'TM'].groupby('uuid').sample(1, random_state=42)

In [36]:
import os

# --- 2. Procesar los TB con slices central -20, central, central +20 ---
tb_df = labels[labels['type'] == 'TB'].drop_duplicates(subset='uuid')

# Suponiendo que tienes carpetas por uuid y que cada una contiene los slices numerados
tb_final = []

for _, row in tb_df.iterrows():
    uuid = row['uuid']
    
    # Ruta a la carpeta de slices de ese paciente (ajusta según tu estructura)
    carpeta = os.path.join('Experiments/', str(uuid))
    
    # Listar los archivos (asegúrate que sean solo los png de slices)
    slices = sorted([int(f.split('.')[0]) for f in os.listdir(carpeta) if f.endswith('.dcm')])
    
    if len(slices) == 0:
        continue  # Por seguridad

    central = slices[len(slices) // 2]
    
    # Slices a elegir: central-30, central, central+30 (con control de bordes)
    candidatos = set(slices)
    
    for s in [central - 30, central, central + 30]:
        if s in candidatos:
            tb_final.append({
                'type': 'TB',
                'uuid': uuid,
                'slice': s,
                'x': 0,  # Si no necesitas x e y en TB pon 0
                'y': 0
            })

# Convertir a DataFrame
tb_df_final = pd.DataFrame(tb_final)

# --- 3. Concatenar todo ---
labels_final = pd.concat([fb_df, fm_df, tm_df, tb_df_final], ignore_index=True)

In [37]:
print(labels_final['type'].value_counts())
print(f"Total final: {labels_final.shape[0]} muestras")

type
TB    45
FB    35
FM    35
TM    14
Name: count, dtype: int64
Total final: 129 muestras


In [39]:
# Crear la columna 'label': 1 si FB o FM, 0 si TB o TM
labels_final['label'] = labels_final['type'].map(lambda x: 1 if x in ['FB', 'FM'] else 0)

# Eliminar columnas 'x' y 'y'
labels_final = labels_final.drop(columns=['x', 'y'])

# Revisar que quedó bien
print(labels_final.head())
print(labels_final['label'].value_counts())


  type  uuid  slice  label
0   FB  1009     76      1
1   FB  1067    186      1
2   FB  1219     57      1
3   FB  1251    188      1
4   FB  1280    100      1
label
1    70
0    59
Name: count, dtype: int64


In [43]:
print(labels_final)
labels_final.to_csv('labels_final.csv', index=False)

    type  uuid  slice  label
0     FB  1009     76      1
1     FB  1067    186      1
2     FB  1219     57      1
3     FB  1251    188      1
4     FB  1280    100      1
..   ...   ...    ...    ...
124   TB  6080    139      0
125   TB  6080    169      0
126   TB  6644     51      0
127   TB  6644     81      0
128   TB  6644    111      0

[129 rows x 4 columns]


In [41]:
import os
import cv2
import numpy as np
import pydicom
import pandas as pd

def dcm_a_png(dcm_path, png_path, window_min=-1000, window_max=400):
    try:
        ds = pydicom.dcmread(dcm_path)
        img = ds.pixel_array.astype(np.float32)
        img = np.clip(img, window_min, window_max)
        img = ((img - window_min) / (window_max - window_min)) * 255.0
        img = img.astype(np.uint8)
        os.makedirs(os.path.dirname(png_path), exist_ok=True)
        cv2.imwrite(png_path, img)
    except Exception as e:
        print(f"Error procesando {dcm_path}: {e}")

def convertir_df_dcm_a_png(df, input_dir='Experiments', output_dir='output_png',
                           window_min=-1000, window_max=400):
    for idx, row in df.iterrows():
        uuid = row['uuid']
        slice_num = row['slice']
        dcm_file = os.path.join(input_dir, f"{uuid}/{slice_num}.dcm")
        png_file = os.path.join(output_dir, f"{uuid}/{slice_num}.png")
        if os.path.exists(dcm_file):
            dcm_a_png(dcm_file, png_file, window_min, window_max)
        else:
            print(f"No encontrado: {dcm_file}")


In [42]:
convertir_df_dcm_a_png(labels_final, input_dir='Experiments', output_dir='Experiments-png')

GLCM + LBP

In [54]:
import cv2
import numpy as np
from skimage.feature import local_binary_pattern
from skimage.feature import graycomatrix, graycoprops
from skimage.color import rgb2gray

# ---------------- Configuración ---------------- #

# Configuración de LBP
radius = 1
n_points = 8 * radius
method = 'uniform'  # Da un histograma de 59 bins con 8 vecinos

# Configuración de GLCM
distances = [1]
angles = [0, np.pi/4, np.pi/2, 3*np.pi/4]

# ---------------- Función de extracción ---------------- #

def extraer_features(img):
    """
    Extrae características GLCM + LBP sobre toda la imagen.
    Retorna: features_glcm, features_lbp, features_concatenados
    """
    
    if len(img.shape) == 3:
        img = rgb2gray(img)
    
    img = (img * 255).astype(np.uint8)  # Asegurar escala de 0-255 si es float

    # ------ GLCM Features ------ #
    glcm = graycomatrix(img, 
                        distances=distances, 
                        angles=angles, 
                        levels=256, 
                        symmetric=True, 
                        normed=True)
    
    contraste = graycoprops(glcm, 'contrast').flatten()
    homogeneidad = graycoprops(glcm, 'homogeneity').flatten()
    energia = graycoprops(glcm, 'energy').flatten()
    correlacion = graycoprops(glcm, 'correlation').flatten()
    entropia = -np.sum(glcm * np.log2(glcm + 1e-10), axis=(0, 1)).flatten()

    features_glcm = np.concatenate([contraste, homogeneidad, energia, correlacion, entropia])

    # ------ LBP Features ------ #
    lbp = local_binary_pattern(img, n_points, radius, method)
    hist, _ = np.histogram(lbp.ravel(), 
                           bins=np.arange(0, n_points + 3),
                           range=(0, n_points + 2))
    hist = hist.astype("float")
    hist /= (hist.sum() + 1e-6)  # Normalización

    features_lbp = hist

    # ------ Vector final ------ #
    features = np.concatenate([features_glcm, features_lbp])

    return features_glcm, features_lbp, features


Crear la matriz

In [57]:
import os
import cv2
import pandas as pd
from tqdm import tqdm  # Barra de progreso

# ---------------- Configuración ---------------- #
carpeta_png = 'Experiments-png/'  # Carpeta donde están los PNGs
labels = pd.read_csv('labels_final.csv')  # Tu dataframe con columnas uuid, slice, label

# Lista para guardar resultados
lista_features = []

# ---------------- Recorrido ---------------- #
for idx, row in tqdm(labels.iterrows(), total=labels.shape[0]):
    uuid = row['uuid']
    slice_num = row['slice']
    etiqueta = row['label']

    ruta_imagen = os.path.join(carpeta_png, f"{uuid}/{slice_num}.png")
    
    if not os.path.exists(ruta_imagen):
        print(f"Advertencia: No se encontró {ruta_imagen}")
        continue

    img = cv2.imread(ruta_imagen, cv2.IMREAD_GRAYSCALE)
    if img is None:
        print(f"Error cargando {ruta_imagen}")
        continue

    features_glcm, features_lbp, features_concatenados = extraer_features(img)
    
    # Guardar en lista con uuid, slice y label
    lista_features.append([uuid, slice_num, etiqueta] + features_concatenados.tolist())

# ---------------- DataFrame final ---------------- #

# Solo necesitas definir columnas una vez, conociendo el tamaño de los vectores
num_glcm = features_glcm.shape[0]
num_lbp = features_lbp.shape[0]

columnas_glcm = [f'glcm_feat_{i}' for i in range(num_glcm)]
columnas_lbp = [f'lbp_hist_{i}' for i in range(num_lbp)]
columnas = ['uuid', 'slice', 'label'] + columnas_glcm + columnas_lbp

df_features = pd.DataFrame(lista_features, columns=columnas)

# Eliminar columna slice
df_features = df_features.drop(columns=['slice'])

# Reordenar columnas
columnas = [col for col in df_features.columns if col not in ['uuid', 'label']] + ['uuid', 'label']

df_features = df_features[columnas]

print(df_features.head())



100%|██████████| 129/129 [00:10<00:00, 12.32it/s]

   glcm_feat_0  glcm_feat_1  glcm_feat_2  glcm_feat_3  glcm_feat_4  \
0  2987.199020  3228.768092  2054.245012  3304.448715     0.501893   
1  1762.941154  2341.829382  1740.008164  2346.661586     0.544249   
2  2499.043756  3024.646581  2426.883172  3254.103753     0.533793   
3  7780.496201  8272.937167  7005.812867  8576.090043     0.222883   
4  3358.162128  4028.171101  4295.801477  4770.171062     0.567026   

   glcm_feat_5  glcm_feat_6  glcm_feat_7  glcm_feat_8  glcm_feat_9  ...  \
0     0.437208     0.485942     0.442947     0.236925     0.234076  ...   
1     0.498127     0.535225     0.500700     0.360304     0.355159  ...   
2     0.468999     0.501406     0.466401     0.282675     0.277729  ...   
3     0.198182     0.221299     0.199584     0.112676     0.108759  ...   
4     0.532282     0.546979     0.522355     0.404752     0.398929  ...   

   lbp_hist_2  lbp_hist_3  lbp_hist_4  lbp_hist_5  lbp_hist_6  lbp_hist_7  \
0    0.030975    0.081432    0.155048    0.100048  




In [58]:
import numpy as np

# Separar features, labels y uuids
X = df_features.drop(columns=['uuid', 'label']).values
y = df_features['label'].values
uuids = df_features['uuid'].values

# Guardar en formato comprimido
np.savez_compressed("df-deepfake.npz", X=X, y=y, uuids=uuids)

print("Archivo df-deepfake.npz guardado correctamente.")


Archivo df-deepfake.npz guardado correctamente.


In [84]:
import torch
import torchvision.models as models
import torchvision.transforms as transforms
from PIL import Image
import numpy as np
import os


# ---------------- Configuración ---------------- #

# Cargar ResNet preentrenada
resnet = models.resnet18(pretrained=True)  # Puedes usar resnet34, resnet50, etc.
resnet.eval()  # Modo evaluación

# Quitar la capa final (clasificación) para quedarte solo con los features
feature_extractor = torch.nn.Sequential(*list(resnet.children())[:-1])

# Transformaciones necesarias para la imagen
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Las redes esperan 224x224
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],  # ImagenNet stats
                         std=[0.229, 0.224, 0.225])
])

# ---------------- Función para extraer features ---------------- #

def extraer_features_resnet(ruta_img):
    img = Image.open(ruta_img).convert('RGB')  # Convertir a RGB (aunque sea CT, se duplican canales)
    img = transform(img).unsqueeze(0)  # Añadir batch dimension
    with torch.no_grad():
        features = feature_extractor(img).squeeze().numpy()  # shape: (512,)
    return features






In [85]:
import torch
import torchvision.models as models
import torchvision.transforms as transforms
from PIL import Image
import numpy as np
import os



# Función de extracción
def extraer_features_resnet(ruta_img):
    img = Image.open(ruta_img).convert('RGB')
    img = transform(img).unsqueeze(0)
    with torch.no_grad():
        features = feature_extractor(img).squeeze().numpy()
    return features

# Leer CSV
labels = pd.read_csv("labels_final.csv")  # Asegúrate que se llama así

# Listas para guardar
features_lista = []
labels_lista = []
groups_lista = []

carpeta_png = "Experiments-png/"

# Recorrido basado en el CSV
for idx, row in labels.iterrows():
    uuid = str(row['uuid'])
    slice_num = str(row['slice'])
    label = row['label']

    ruta_img = os.path.join(carpeta_png, uuid, f"{slice_num}.png")

    if not os.path.exists(ruta_img):
        print(f"⚠ No se encontró {ruta_img}")
        continue

    features = extraer_features_resnet(ruta_img)
    features_lista.append(features)
    labels_lista.append(label)
    groups_lista.append(uuid)

# Convertir a arrays
X = np.array(features_lista)
y = np.array(labels_lista)
groups = np.array(groups_lista)

print(f"✅ Shape final X: {X.shape}")
print(f"✅ Shape final y: {y.shape}")
print(f"✅ Shape final groups: {groups.shape}")

✅ Shape final X: (129, 512)
✅ Shape final y: (129,)
✅ Shape final groups: (129,)


In [88]:
# Guardar como archivo comprimido
np.savez_compressed("features_labels_resnet.npz", X=X, y=y, groups = groups)

print("✅ Archivo 'features_labels_resnet.npz' guardado exitosamente.")

✅ Archivo 'features_labels_resnet.npz' guardado exitosamente.
