In [1]:
import pandas as pd
import os
import glob
import random
import cv2
import numpy as np

In [2]:


def create_labels_TB(df, n, seed=None):
    for idx, row in df.loc[df['type'] == 'TB'].iterrows():
        folder = os.path.join('Experiments', str(row.uuid))
        files = glob.glob(os.path.join(folder, '*.dcm'))
        id = list(map(lambda x: int(os.path.basename(x).split('.')[0]), files))
        if seed is not None:
            random.seed(seed)
        df.loc[idx, 'slice'] = random.randint(n, max(id)-n)

def add_labels(df, n):
    temp = df.copy()
    for i in range(n):
        temp1 = df.copy()
        temp2 = df.copy()
        temp1['slice'] = temp1['slice']-(1+i)
        temp2['slice'] = temp2['slice']+(1+i)
        temp = pd.concat([temp, temp1, temp2], ignore_index=True)
    return temp

def expandir_tb_con_vecinos(df, n_vecinos=1):
    df_result = df.copy()
    tb = df[df['type'] == 'TB'].copy()
    nuevos = []

    for i in range(1, n_vecinos + 1):
        temp1 = tb.copy()
        temp2 = tb.copy()
        temp1['slice'] -= i
        temp2['slice'] += i
        nuevos.append(temp1)
        nuevos.append(temp2)

    df_result = pd.concat([df_result] + nuevos, ignore_index=True)
    return df_result

In [3]:
def add_labels(df, n=1, st=1):
    temp = df.copy()
    temp['generated'] = 0
    for i in range(n):
        temp1 = df.copy()
        temp2 = df.copy()
        temp1['slice'] = temp1['slice']-(1+i*st)
        temp1['generated'] = 1
        temp2['slice'] = temp2['slice']+(1+i*st)
        temp2['generated'] = 1
        temp = pd.concat([temp, temp1, temp2], ignore_index=True)
    return temp

In [4]:
# Cargar dataset original
labels = pd.read_csv('labels.csv', sep=';')

# Etiquetar las clases: 1 para 'F', 0 para 'T'
labels['tag'] = labels['type'].apply(lambda x: 1 if x.startswith('F') else (0 if x.startswith('T') else ''))

# Elegir n para crear slices válidos en TB
n = 1
print("Antes de asignar slices TB:\n", labels[labels['type'] == 'TB'].head())
create_labels_TB(labels, n, seed=42)
print("Después de asignar slices TB:\n", labels[labels['type'] == 'TB'].head())
print("Tamaño original:", labels.shape)

# Expandir todos los datos con n vecinos
labels = add_labels(labels, n, st=2)

# Balancear agregando más vecinos a TB (no duplica arbitrariamente)
#labels = expandir_tb_con_vecinos(labels, n_vecinos=1)

print("Tamaño final con vecinos TB:", labels.shape)
print(labels['tag'].value_counts())

Antes de asignar slices TB:
     type  uuid  slice  x  y  tag
113   TB  1531      0  0  0    0
114   TB  1563      0  0  0    0
115   TB  1610      0  0  0    0
116   TB  1610      0  0  0    0
117   TB  1632      0  0  0    0
Después de asignar slices TB:
     type  uuid  slice  x  y  tag
113   TB  1531     82  0  0    0
114   TB  1563    328  0  0    0
115   TB  1610     29  0  0    0
116   TB  1610     29  0  0    0
117   TB  1632    164  0  0    0
Tamaño original: (164, 6)
Tamaño final con vecinos TB: (492, 7)
tag
1    339
0    153
Name: count, dtype: int64


In [5]:
labels.to_csv('labels_temp.csv', index=False)

In [11]:
import os
import cv2
import numpy as np
import pydicom
import pandas as pd

def transfor_image(img, seed=None):
    np.random.seed(seed)
    dgreges = np.random.randint(-7, 7)
    scale = np.random.uniform(0.9, 1.0)
    center = (img.shape[1] // 2, img.shape[0] // 2)
    M = cv2.getRotationMatrix2D(center, dgreges, scale)
    img = cv2.warpAffine(img, M, (img.shape[1], img.shape[0]), flags=cv2.INTER_LINEAR)

    std = img.std()
    gauss = np.random.normal(0, std/100, img.shape).astype(np.float32)
    beta = np.random.randint(-10, 10, img.shape).astype(np.float32)
    
    img = img + gauss
    img = img + beta
    
    img = np.clip(img, 0, 255).astype(np.uint8)
    return img

def dcm_a_png(dcm_path, png_path, window_min=-1000, window_max=400, transform=False, seed=None):
    try:
        ds = pydicom.dcmread(dcm_path)
        img = ds.pixel_array.astype(np.float32)
        img = np.clip(img, window_min, window_max)
        img = ((img - window_min) / (window_max - window_min)) * 255.0
        img = img.astype(np.uint8)
        os.makedirs(os.path.dirname(png_path), exist_ok=True)
        if transform:
            img = transfor_image(img, seed=42)  # Aplicar transformación
        cv2.imwrite(png_path, img)
    except Exception as e:
        print(f"Error procesando {dcm_path}: {e}")

def convertir_df_dcm_a_png(df, input_dir='Experiments', output_dir='output_png',
                           window_min=-1000, window_max=400):
    for idx, row in df.iterrows():
        uuid = row['uuid']
        slice_num = row['slice']
        transform = True if row['generated'] == 1 else False
        dcm_file = os.path.join(input_dir, f"{uuid}/{slice_num}.dcm")
        png_file = os.path.join(output_dir, f"{uuid}/{slice_num}.png")
        if os.path.exists(dcm_file):
            dcm_a_png(dcm_file, png_file, window_min, window_max, transform=transform, seed=42)
        else:
            print(f"No encontrado: {dcm_file}")



In [12]:
convertir_df_dcm_a_png(labels, input_dir='Experiments', output_dir='Experiments-png')