In [73]:
import pandas as pd
import os
import glob
import random
import cv2
import numpy as np

def create_labels_TB(df, n, seed=None):
    for idx, row in df.loc[df['type'] == 'TB'].iterrows():
        folder = os.path.join('Experiments', str(row.uuid))
        files = glob.glob(os.path.join(folder, '*.dcm'))
        id = list(map(lambda x: int(os.path.basename(x).split('.')[0]), files))
        if seed is not None:
            random.seed(seed)
        df.loc[idx, 'slice'] = random.randint(n, max(id)-n)

def add_labels(df, n):
    temp = df.copy()
    for i in range(n):
        temp1 = df.copy()
        temp2 = df.copy()
        temp1['slice'] = temp1['slice']-(1+i)
        temp2['slice'] = temp2['slice']+(1+i)
        temp = pd.concat([temp, temp1, temp2], ignore_index=True)
    return temp

def expandir_tb_con_vecinos(df, n_vecinos=1):
    df_result = df.copy()
    tb = df[df['type'] == 'TB'].copy()
    nuevos = []

    for i in range(1, n_vecinos + 1):
        temp1 = tb.copy()
        temp2 = tb.copy()
        temp1['slice'] -= i
        temp2['slice'] += i
        nuevos.append(temp1)
        nuevos.append(temp2)

    df_result = pd.concat([df_result] + nuevos, ignore_index=True)
    return df_result


In [74]:
# Cargar dataset original
labels = pd.read_csv('labels.csv', sep=';')

# Etiquetar las clases: 1 para 'F', 0 para 'T'
labels['tag'] = labels['type'].apply(lambda x: 1 if x.startswith('F') else (0 if x.startswith('T') else ''))

# Elegir n para crear slices válidos en TB
n = 7
print("Antes de asignar slices TB:\n", labels[labels['type'] == 'TB'].head())
create_labels_TB(labels, n, seed=42)
print("Después de asignar slices TB:\n", labels[labels['type'] == 'TB'].head())
print("Tamaño original:", labels.shape)

# Expandir todos los datos con n vecinos
labels = add_labels(labels, n)

# Balancear agregando más vecinos a TB (no duplica arbitrariamente)
labels = expandir_tb_con_vecinos(labels, n_vecinos=1)

print("Tamaño final con vecinos TB:", labels.shape)
print(labels['tag'].value_counts())

Antes de asignar slices TB:
     type  uuid  slice  x  y  tag
113   TB  1531      0  0  0    0
114   TB  1563      0  0  0    0
115   TB  1610      0  0  0    0
116   TB  1610      0  0  0    0
117   TB  1632      0  0  0    0
Después de asignar slices TB:
     type  uuid  slice  x  y  tag
113   TB  1531     88  0  0    0
114   TB  1563    334  0  0    0
115   TB  1610     88  0  0    0
116   TB  1610     88  0  0    0
117   TB  1632    170  0  0    0
Tamaño original: (164, 6)
Tamaño final con vecinos TB: (3090, 6)
tag
1    1695
0    1395
Name: count, dtype: int64


In [75]:

print(labels['type'].value_counts())

type
FB    1080
TB     945
FM     615
TM     450
Name: count, dtype: int64


In [76]:
import os
import cv2
import numpy as np
import pydicom
import pandas as pd

def dcm_a_png(dcm_path, png_path, window_min=-1000, window_max=400):
    try:
        ds = pydicom.dcmread(dcm_path)
        img = ds.pixel_array.astype(np.float32)
        img = np.clip(img, window_min, window_max)
        img = ((img - window_min) / (window_max - window_min)) * 255.0
        img = img.astype(np.uint8)
        os.makedirs(os.path.dirname(png_path), exist_ok=True)
        cv2.imwrite(png_path, img)
    except Exception as e:
        print(f"Error procesando {dcm_path}: {e}")

def convertir_df_dcm_a_png(df, input_dir='Experiments', output_dir='output_png',
                           window_min=-1000, window_max=400):
    for idx, row in df.iterrows():
        uuid = row['uuid']
        slice_num = row['slice']
        dcm_file = os.path.join(input_dir, f"{uuid}/{slice_num}.dcm")
        png_file = os.path.join(output_dir, f"{uuid}/{slice_num}.png")
        if os.path.exists(dcm_file):
            dcm_a_png(dcm_file, png_file, window_min, window_max)
        else:
            print(f"No encontrado: {dcm_file}")


In [77]:
convertir_df_dcm_a_png(labels, input_dir='Experiments', output_dir='Experiments-png')


Features GLCM

In [78]:
from skimage.feature import graycomatrix, graycoprops
from skimage.io import imread
from skimage.transform import resize

def extraer_features_glcm(path_imagen, tamano=(128, 128), distancias=[1], angulos=[0]):
    try:
        img = imread(path_imagen, as_gray=True)
        img = resize(img, tamano, anti_aliasing=True)
        

        # Convertir a entero (niveles de gris)
        img = (img * 255).astype(np.uint8)
        # Calcular GLCM
        glcm = graycomatrix(img, 
                            distances=distancias,
                            angles=angulos,
                            levels=256,
                            symmetric=True,
                            normed=True)
        
        # Extraer características
        props = ['contrast', 'dissimilarity', 'homogeneity', 'ASM', 'correlation']
        features = [graycoprops(glcm, prop).ravel() for prop in props]
        return np.concatenate(features)
    except Exception as e:
        print(f"Error en {path_imagen}: {e}")
        return None


In [79]:
def crear_matriz_glcm(df, path_base='Experiments-png', tamano=(128, 128), distancias=[1], angulos=[0]):
    from tqdm import tqdm  # Para ver progreso
    X = []
    y = []
    fallos = 0

    for _, row in tqdm(df.iterrows(), total=len(df)):
        uuid = row['uuid']
        slice_num = row['slice']
        tag = row['tag']
        path_img = os.path.join(path_base, f"{uuid}/{slice_num}.png")

        if os.path.exists(path_img):
            features = extraer_features_glcm(path_img, tamano, distancias, angulos)
            if features is not None:
                X.append(features)
                y.append(tag)
            else:
                fallos += 1
        else:
            print(f"Imagen no encontrada: {path_img}")
            fallos += 1

    print(f"Total fallos: {fallos}")
    return np.array(X), np.array(y)


In [80]:
X, y = crear_matriz_glcm(labels,
                         tamano=(128, 128),
                         distancias=[1, 2],
                         angulos=[0, np.pi/4, np.pi/2, 3*np.pi/4])

np.savez_compressed("df-deepfake.npz", X=X, y=y)


100%|██████████| 3090/3090 [03:08<00:00, 16.38it/s]

Total fallos: 0





In [83]:
import random
i = random.randint(0, len(X) - 1)
print("Índice seleccionado:", i)
features_originales = X[i]
print("Features desde matriz X:", features_originales)
uuid = labels.iloc[i]['uuid']
slice_num = labels.iloc[i]['slice']
tag = labels.iloc[i]['tag']
print(f"Imagen: {uuid}/{slice_num}.png - Etiqueta: {tag}")
ruta_img = f"Experiments-png/{uuid}/{slice_num}.png"
features_verificadas = extraer_features_glcm(
    ruta_img,
    distancias=[1, 2],
    angulos=[0, np.pi/4, np.pi/2, 3*np.pi/4]
)
print("Features recalculadas desde imagen:", features_verificadas)

print("¿Iguales?")
print("Originales:", features_originales)
print("Verificadas:", features_verificadas)


Índice seleccionado: 1126
Features desde matriz X: [4.70142224e+02 1.21064294e+03 8.30377953e+02 1.09262775e+03
 1.33932819e+03 1.21064294e+03 2.20881306e+03 1.09262775e+03
 8.82160433e+00 1.48670097e+01 1.21425935e+01 1.41088102e+01
 1.55425967e+01 1.48670097e+01 2.11690848e+01 1.41088102e+01
 4.55737456e-01 4.01870322e-01 4.22957098e-01 4.08477111e-01
 3.99469355e-01 4.01870322e-01 3.68728735e-01 4.08477111e-01
 7.49311687e-02 6.98365837e-02 7.25057620e-02 7.09101211e-02
 6.99474155e-02 6.98365837e-02 6.54008923e-02 7.09101211e-02
 9.69526870e-01 9.21534456e-01 9.46091447e-01 9.29183752e-01
 9.13318652e-01 9.21534456e-01 8.56840504e-01 9.29183752e-01]
Imagen: 2131/214.png - Etiqueta: 0
Features recalculadas desde imagen: [4.70142224e+02 1.21064294e+03 8.30377953e+02 1.09262775e+03
 1.33932819e+03 1.21064294e+03 2.20881306e+03 1.09262775e+03
 8.82160433e+00 1.48670097e+01 1.21425935e+01 1.41088102e+01
 1.55425967e+01 1.48670097e+01 2.11690848e+01 1.41088102e+01
 4.55737456e-01 4.01870