In [1]:
from google.colab import drive

# Monta o Drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
import os
import nibabel as nib
import numpy as np
from sklearn.decomposition import PCA
from tqdm import tqdm
import joblib

# =============================
# CONFIGURA√á√ïES GERAIS
# =============================
input_base = "/content/drive/MyDrive/TCC/1.Data/nnU-net_format/subset_50/nnUNet_raw_data_base/Dataset001_MyBraTS"
output_base = "/content/drive/MyDrive/TCC/1.Data/nnU-net_format/subset_50_PCA/nnUNet_raw_data_base/Dataset001_MyBraTS"
modalities = ["_0000.nii.gz", "_0001.nii.gz", "_0002.nii.gz", "_0003.nii.gz"]  # T1, T1ce, T2, FLAIR
n_components = 3
sample_fraction = 0.01  # fra√ß√£o de voxels usada para ajustar o PCA
os.makedirs(f"{output_base}/imagesTr", exist_ok=True)
os.makedirs(f"{output_base}/imagesTs", exist_ok=True)

# =============================
# 1Ô∏è‚É£ COLETA DE AMOSTRAS PARA PCA GLOBAL
# =============================
print("üîç Coletando amostras para PCA global...")

train_dir = os.path.join(input_base, "imagesTr")
patient_ids = sorted(list(set(["_".join(f.split("_")[:-1]) for f in os.listdir(train_dir) if f.endswith(".nii.gz")])))

samples = []

for pid in tqdm(patient_ids, desc="Amostrando pacientes"):
    imgs = []
    for mod in modalities:
        path = os.path.join(train_dir, pid + mod)
        data = nib.load(path).get_fdata()
        imgs.append(data)
    imgs = np.stack(imgs, axis=-1)  # (H, W, D, 4)

    # Seleciona voxels v√°lidos (com intensidade > 0 em qualquer canal)
    mask = np.any(imgs > 0, axis=-1)
    voxels = imgs[mask]

    # Normaliza por canal (z-score)
    voxels = (voxels - voxels.mean(axis=0)) / (voxels.std(axis=0) + 1e-8)

    # Amostra fra√ß√£o aleat√≥ria
    n_vox = int(len(voxels) * sample_fraction)
    idx = np.random.choice(len(voxels), size=n_vox, replace=False)
    samples.append(voxels[idx])

# Concatena todas as amostras
samples = np.concatenate(samples, axis=0)
print(f"Total de voxels amostrados: {samples.shape[0]:,}")

# =============================
# 2Ô∏è‚É£ AJUSTE DO PCA GLOBAL
# =============================
print("\n‚öôÔ∏è Ajustando PCA global...")
pca = PCA(n_components=n_components, svd_solver="randomized")
pca.fit(samples)

# Salva o PCA para uso futuro
joblib.dump(pca, os.path.join(output_base, "pca_model.joblib"))
print("‚úÖ PCA ajustado e salvo!")

# =============================
# 3Ô∏è‚É£ FUN√á√ÉO PARA APLICAR PCA E SALVAR NOVAS IMAGENS
# =============================
def apply_pca_and_save(input_dir, output_dir):
    files = sorted(list(set(["_".join(f.split("_")[:-1]) for f in os.listdir(input_dir) if f.endswith(".nii.gz")])))

    for pid in tqdm(files, desc=f"Processando {os.path.basename(input_dir)}"):
        imgs = []
        for mod in modalities:
            path = os.path.join(input_dir, pid + mod)
            imgs.append(nib.load(path).get_fdata())
        imgs = np.stack(imgs, axis=-1)

        H, W, D, C = imgs.shape
        X = imgs.reshape(-1, C)

        # Normaliza√ß√£o por canal (mesmo padr√£o do fit)
        X = (X - X.mean(axis=0)) / (X.std(axis=0) + 1e-8)

        # Aplica PCA
        X_pca = pca.transform(X)
        X_pca = X_pca.reshape(H, W, D, n_components)

        # ‚öôÔ∏è CONVERS√ÉO PARA FLOAT16 (reduz 4x o tamanho dos arquivos)
        X_pca = X_pca.astype(np.float32)

        # Salva cada componente como um novo canal
        affine = nib.load(os.path.join(input_dir, pid + modalities[0])).affine
        for i in range(n_components):
            out_path = os.path.join(output_dir, f"{pid}_{i:04d}.nii.gz")
            nib.save(nib.Nifti1Image(X_pca[..., i], affine), out_path)

# =============================
# 4Ô∏è‚É£ APLICA O PCA GLOBAL NAS IMAGENS
# =============================
apply_pca_and_save(os.path.join(input_base, "imagesTr"), os.path.join(output_base, "imagesTr"))
apply_pca_and_save(os.path.join(input_base, "imagesTs"), os.path.join(output_base, "imagesTs"))

print("\nüéâ Finalizado! As imagens PCA foram salvas em:")
print(output_base)

üîç Coletando amostras para PCA global...


Amostrando pacientes: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 40/40 [01:09<00:00,  1.73s/it]


Total de voxels amostrados: 621,452

‚öôÔ∏è Ajustando PCA global...
‚úÖ PCA ajustado e salvo!


Processando imagesTr: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 40/40 [03:16<00:00,  4.90s/it]
Processando imagesTs: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 10/10 [01:29<00:00,  8.97s/it]


üéâ Finalizado! As imagens PCA foram salvas em:
/content/drive/MyDrive/TCC/1.Data/nnU-net_format/subset_50_PCA/nnUNet_raw_data_base/Dataset001_MyBraTS





In [4]:
import os
import shutil
from tqdm import tqdm

# Caminhos base
src_base = "/content/drive/MyDrive/TCC/1.Data/nnU-net_format/subset_50/nnUNet_raw_data_base/Dataset001_MyBraTS"
dst_base = "/content/drive/MyDrive/TCC/1.Data/nnU-net_format/subset_50_PCA/nnUNet_raw_data_base/Dataset001_MyBraTS"

# Pastas de labels
for subset in ["labelsTr", "labelsTs"]:
    src_dir = os.path.join(src_base, subset)
    dst_dir = os.path.join(dst_base, subset)
    os.makedirs(dst_dir, exist_ok=True)

    if not os.path.exists(src_dir):
        print(f"‚ö†Ô∏è Pasta {src_dir} n√£o encontrada, pulando...")
        continue

    files = [f for f in os.listdir(src_dir) if f.endswith(".nii.gz")]
    print(f"üì¶ Copiando {len(files)} arquivos de {subset}...")

    for f in tqdm(files, desc=f"Copiando {subset}"):
        shutil.copy2(os.path.join(src_dir, f), os.path.join(dst_dir, f))

print("\n‚úÖ Labels copiados com sucesso!")
print(f"As labels agora est√£o em:\n{os.path.join(dst_base, 'labelsTr')}\n{os.path.join(dst_base, 'labelsTs')}")

üì¶ Copiando 40 arquivos de labelsTr...


Copiando labelsTr: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 40/40 [00:16<00:00,  2.39it/s]


üì¶ Copiando 10 arquivos de labelsTs...


Copiando labelsTs: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 10/10 [00:04<00:00,  2.48it/s]


‚úÖ Labels copiados com sucesso!
As labels agora est√£o em:
/content/drive/MyDrive/TCC/1.Data/nnU-net_format/subset_50_PCA/nnUNet_raw_data_base/Dataset001_MyBraTS/labelsTr
/content/drive/MyDrive/TCC/1.Data/nnU-net_format/subset_50_PCA/nnUNet_raw_data_base/Dataset001_MyBraTS/labelsTs



