# You Have to run DataDownload.ipynb First!

In [1]:
import pandas as pd
import h5py
import numpy as np
import nibabel as nib
from tqdm import tqdm
from pathlib import Path
import os
import shutil
import os
import zipfile

In [2]:
csv_path = '/root/Data/BraTS2020_training_data/content/data/meta_data.csv'
target_path = '/root/Data/BraTS2020_training_data/BraTS2020_converted/'

folders_to_create = ['image', 'label']

# 각 폴더 생성
for folder in folders_to_create:
    folder_path = os.path.join(target_path, folder)
    if not os.path.exists(folder_path):
        os.makedirs(folder_path)
        print(f'Created folder: {folder_path}')
    else:
        print(f'Folder already exists: {folder_path}')

# CSV 파일 로드
df = pd.read_csv(csv_path)

Folder already exists: /root/Data/BraTS2020_training_data//BraTS2020_converted/image
Folder already exists: /root/Data/BraTS2020_training_data//BraTS2020_converted/label


In [3]:
# Group the dataframe by volume
grouped = df.groupby('volume')

# Iterate through each volume group
for volume, group in tqdm(grouped, desc="Processing volumes"):
    if os.path.exists(f'{target_path}image/volume_{volume}_image.nii'):
        continue
    
    image_slices = []
    mask_slices = []

    # Read image and mask slices
    for _, row in group.iterrows():
        with h5py.File("/root/Data/BraTS2020_training_data/"+ row['slice_path'], 'r') as file:
            image_slices.append(file['image'][:])
            mask_slices.append(file['mask'][:])

    # Convert lists to numpy arrays and stack along the correct axis
    image_data = np.stack(image_slices, axis=-1).transpose(0, 1, 3, 2)  # Stack along the last axis
    mask_data = np.stack(mask_slices, axis=-1).transpose(0, 1, 3, 2)   # Stack along the last axis

    # Save as NIfTI files
    image_nii = nib.Nifti1Image(image_data, np.eye(4))
    mask_nii = nib.Nifti1Image(mask_data, np.eye(4))

    nib.save(image_nii, f'{target_path}image/volume_{volume}_image.nii')
    nib.save(mask_nii, f'{target_path}label/volume_{volume}_mask.nii')

Processing volumes:   0%|          | 0/369 [00:00<?, ?it/s]

Processing volumes: 100%|██████████| 369/369 [12:15<00:00,  1.99s/it]


In [4]:
def compress_directory(src, dest_zip):
    # Compress the source directory into a zip file
    shutil.make_archive(dest_zip, 'zip', src)

In [5]:
def copy_file(src, dest):
    # Copy the zip file to the destination directory
    shutil.copy(src, dest)

In [6]:
compressed_file = '/root/Data/BraTS2020_training_data/compressed_file'  # Temporary location for compressed file
compressed_file_zip = compressed_file + '.zip'
drive_path = '/content/drive/MyDrive/DeepLearningData/'

In [8]:
# Compress the source directory
!7z a -tzip {compressed_file_zip} {target_path}


7-Zip [64] 16.02 : Copyright (c) 1999-2016 Igor Pavlov : 2016-05-21
p7zip Version 16.02 (locale=en_US.UTF-8,Utf16=on,HugeFiles=on,64 bits,2 CPUs Intel(R) Xeon(R) CPU @ 2.20GHz (406F0),ASM,AES-NI)

Scanning the drive:
  0M Scan  /root/Data/BraTS2020_training_data/                                              3 folders, 737 files, 115055568224 bytes (108 GiB)

Creating archive: /root/Data/BraTS2020_training_data/compressed_file.zip

Items to compress: 740

      0% 2 + BraTS2020_converted/image/volume_101_image.ni                                                        0% 2 + BraTS2020_converted/image/volume_102_image.ni                                                        0% 3 + BraTS2020_converted/image/volume_102_image.ni                                                        0% 3 + BraTS2020_converted/image/volume_103_image.ni                                                        0% 4 + BraTS2020_converted/image/volume_103_image.ni                                                  

In [10]:
copy_file(compressed_file_zip, drive_path)

# Check Result

In [12]:
%cd {drive_path}

/content/drive/MyDrive/DeepLearningData


In [13]:
!ls

brats2020-training-data.zip  compressed_file.zip


In [15]:
%cd {target_path}

/root/Data/BraTS2020_training_data/BraTS2020_converted


In [16]:
!ls

image  label


In [17]:
len(os.listdir(os.path.join(target_path, "image")))

369

In [18]:
len(os.listdir(os.path.join(target_path, "label")))

368