In [None]:
# Copyright 2025 Claudio Giovannoni, Carlo Metta, Anna Monreale,
# Salvatore Rinzivillo, Andrea Berti, Sara Colantonio, and
# Francesca Pratesi
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Dataset Creation and Labeling Process

In [None]:
import os
import sys
import pandas as pd
import tqdm
from PIL import Image

from google.colab import drive
drive.mount('/content/drive')

BASE_DIR = '/content/drive/MyDrive/ABELE_prostate/claudio/'
sys.path.append(os.path.join(BASE_DIR, 'code'))

from crop_data_utils import file_checker
from dataset_creation_utils import dataset_creation

In [None]:
csv_dir = os.path.join(BASE_DIR, 'black_box', 'data', 'CSV', 'dataframes', 'val')
traindf_path = os.path.join(csv_dir, 'traindf.csv')
valdf_path = os.path.join(csv_dir, 'valdf.csv')
testdf_path = os.path.join(csv_dir, 'testdf.csv')

output_dir = os.path.join(BASE_DIR, 'black_box', 'data', 'dataset')
destination_path_train = os.path.join(output_dir, 'val', 'train')
destination_path_valid = os.path.join(output_dir, 'val', 'valid')
destination_path_test = os.path.join(output_dir, 'val', 'test')

In [5]:
# load train and test mapping csv
traindf = pd.read_csv(traindf_path)
valdf = pd.read_csv(valdf_path)
testdf = pd.read_csv(testdf_path)

print(len(traindf)+len(valdf)+len(testdf))
print(traindf.shape)
print(valdf.shape)
print(testdf.shape)
# checking class distribution in the dfs
dataframes = [traindf, testdf, valdf]
for i, df in enumerate(dataframes):
    label_counts = df['label'].value_counts().sort_index()
    total_samples = len(df)
    percentage_per_class = (label_counts / total_samples) * 100

    result_df = pd.DataFrame({
        'Count': label_counts,
        'Percentage': percentage_per_class
    }).sort_index()

    print(f"\nClass balance for dataframe {i + 1}:\n")
    print(result_df)
    print("\n" + "=" * 40 + "\n")

21222
(16971, 4)
(2125, 4)
(2126, 4)

Class balance for dataframe 1:

   Count  Percentage
0  12257   72.223204
1   4714   27.776796



Class balance for dataframe 2:

   Count  Percentage
0   1519    71.44873
1    607    28.55127



Class balance for dataframe 3:

   Count  Percentage
0   1512   71.152941
1    613   28.847059




## create different dataset for each modality

In [None]:
df_path_adc = os.path.join(output_dir, 'adc')

destination_path_train_adc = os.path.join(df_path_adc, 'train')
destination_path_valid_adc = os.path.join(df_path_adc, 'valid')
destination_path_test_adc = os.path.join(df_path_adc, 'test')

traindf_adc_path = os.path.join(df_path_adc,'traindf_adc.csv')
valdf_adc_path = os.path.join(df_path_adc,'valdf_adc.csv')
testdf_adc_path = os.path.join(df_path_adc,'testdf_adc.csv')


df_path_t2w = os.path.join(output_dir, 't2w')

destination_path_train_t2w = os.path.join(df_path_t2w, 'train')
destination_path_valid_t2w = os.path.join(df_path_t2w, 'valid')
destination_path_test_t2w = os.path.join(df_path_t2w, 'test')

traindf_t2w_path = os.path.join(df_path_t2w,'traindf_t2w.csv')
valdf_t2w_path = os.path.join(df_path_t2w,'valdf_t2w.csv')
testdf_t2w_path = os.path.join(df_path_t2w,'testdf_t2w.csv')


df_path_hbv = os.path.join(output_dir, 'hbv')

destination_path_train_hbv = os.path.join(df_path_hbv, 'train')
destination_path_valid_hbv = os.path.join(df_path_hbv, 'valid')
destination_path_test_hbv = os.path.join(df_path_hbv, 'test')

traindf_hbv_path = os.path.join(df_path_hbv,'traindf_hbv.csv')
valdf_hbv_path = os.path.join(df_path_hbv,'valdf_hbv.csv')
testdf_hbv_path = os.path.join(df_path_hbv,'testdf_hbv.csv')

In [None]:
# checking class distribution in the dfs
traindf_adc=pd.read_csv(traindf_adc_path)
valdf_adc=pd.read_csv(valdf_adc_path)
testdf_adc=pd.read_csv(testdf_adc_path)

traindf_t2w=pd.read_csv(traindf_t2w_path)
valdf_t2w=pd.read_csv(valdf_t2w_path)
testdf_t2w=pd.read_csv(testdf_t2w_path)

traindf_hbv=pd.read_csv(traindf_hbv_path)
valdf_hbv=pd.read_csv(valdf_hbv_path)
testdf_hbv=pd.read_csv(testdf_hbv_path)

traindf_adc.name = 'traindf_adc'
valdf_adc.name = 'valdf_adc'
testdf_adc.name = 'testdf_adc'

traindf_t2w.name = 'traindf_t2w'
valdf_t2w.name = 'valdf_t2w'
testdf_t2w.name = 'testdf_t2w'

traindf_hbv.name = 'traindf_hbv'
valdf_hbv.name = 'valdf_hbv'
testdf_hbv.name = 'testdf_hbv'

print(traindf_adc.name,traindf_adc.shape)
print(valdf_adc.name,valdf_adc.shape)
print(testdf_adc.name,testdf_adc.shape)

print(traindf_t2w.name,traindf_t2w.shape)
print(valdf_t2w.name,valdf_t2w.shape)
print(testdf_t2w.name,testdf_t2w.shape)

print(traindf_hbv.name,traindf_hbv.shape)
print(valdf_hbv.name,valdf_hbv.shape)
print(testdf_hbv.name,testdf_hbv.shape)

dataframes = [traindf_adc, testdf_adc, valdf_adc,
              traindf_t2w, testdf_t2w, valdf_t2w,
              traindf_hbv, testdf_hbv, valdf_hbv]

for df in dataframes:
    label_counts = df['label'].value_counts().sort_index()
    total_samples = len(df)

    percentage_per_class = (label_counts / total_samples) * 100

    print(f"\nClass balance for {df.name}:\n")
    print(result_df)
    print("\n" + "=" * 40 + "\n")

traindf_adc (5201, 4)
valdf_adc (1121, 4)
testdf_adc (1109, 4)
traindf_t2w (5201, 4)
valdf_t2w (1121, 4)
testdf_t2w (1109, 4)

Class balance for traindf_adc:

   Count  Percentage
0   1594   71.097235
1    648   28.902765



Class balance for testdf_adc:

   Count  Percentage
0   1594   71.097235
1    648   28.902765



Class balance for valdf_adc:

   Count  Percentage
0   1594   71.097235
1    648   28.902765



Class balance for traindf_t2w:

   Count  Percentage
0   1594   71.097235
1    648   28.902765



Class balance for testdf_t2w:

   Count  Percentage
0   1594   71.097235
1    648   28.902765



Class balance for valdf_t2w:

   Count  Percentage
0   1594   71.097235
1    648   28.902765




# Binary Dataset Creation (Stacked)



## Training Set

In [7]:
dataset_creation(traindf, destination_path_train)

Processed 50 images out of 16971.
Processed 100 images out of 16971.
Processed 150 images out of 16971.
Processed 200 images out of 16971.
Processed 250 images out of 16971.
Processed 300 images out of 16971.
Processed 350 images out of 16971.
Processed 400 images out of 16971.
Processed 450 images out of 16971.
Processed 500 images out of 16971.
Processed 550 images out of 16971.
Processed 600 images out of 16971.
Processed 650 images out of 16971.
Processed 700 images out of 16971.
Processed 750 images out of 16971.
Processed 800 images out of 16971.
Processed 850 images out of 16971.
Processed 900 images out of 16971.
Processed 950 images out of 16971.
Processed 1000 images out of 16971.
Processed 1050 images out of 16971.
Processed 1100 images out of 16971.
Processed 1150 images out of 16971.
Processed 1200 images out of 16971.
Processed 1250 images out of 16971.
Processed 1300 images out of 16971.
Processed 1350 images out of 16971.
Processed 1400 images out of 16971.
Processed 14

## Validation Set

In [None]:
dataset_creation(valdf, destination_path_valid)

Processed 50 images out of 2125.
Processed 100 images out of 2125.
Processed 150 images out of 2125.
Processed 200 images out of 2125.
Processed 250 images out of 2125.
Processed 300 images out of 2125.
Processed 350 images out of 2125.
Processed 400 images out of 2125.
Processed 450 images out of 2125.
Processed 500 images out of 2125.
Processed 550 images out of 2125.
Processed 600 images out of 2125.
Processed 650 images out of 2125.
Processed 700 images out of 2125.
Processed 750 images out of 2125.
Processed 800 images out of 2125.
Processed 850 images out of 2125.
Processed 900 images out of 2125.
Processed 950 images out of 2125.
Processed 1000 images out of 2125.
Processed 1050 images out of 2125.
Processed 1100 images out of 2125.
Processed 1150 images out of 2125.
Processed 1200 images out of 2125.
Processed 1250 images out of 2125.
Processed 1300 images out of 2125.
Processed 1350 images out of 2125.
Processed 1400 images out of 2125.
Processed 1450 images out of 2125.
Proce

## Test set

In [None]:
dataset_creation(testdf, destination_path_test)

Processed 50 images out of 2126.
Processed 100 images out of 2126.
Processed 150 images out of 2126.
Processed 200 images out of 2126.
Processed 250 images out of 2126.
Processed 300 images out of 2126.
Processed 350 images out of 2126.
Processed 400 images out of 2126.
Processed 450 images out of 2126.
Processed 500 images out of 2126.
Processed 550 images out of 2126.
Processed 600 images out of 2126.
Processed 650 images out of 2126.
Processed 700 images out of 2126.
Processed 750 images out of 2126.
Processed 800 images out of 2126.
Processed 850 images out of 2126.
Processed 900 images out of 2126.
Processed 950 images out of 2126.
Processed 1000 images out of 2126.
Processed 1050 images out of 2126.
Processed 1100 images out of 2126.
Processed 1150 images out of 2126.
Processed 1200 images out of 2126.
Processed 1250 images out of 2126.
Processed 1300 images out of 2126.
Processed 1350 images out of 2126.
Processed 1400 images out of 2126.
Processed 1450 images out of 2126.
Proce

In [None]:
output_path = os.path.join(output_dir, 'val', 'test')

total_size = 0
num_patient_folders = 0
total_files = 0

for dirpath, dirnames, filenames in tqdm(os.walk(output_path)):
    for dirname in dirnames:
        if dirname.isdigit():
            num_patient_folders += 1
            folder_path = os.path.join(dirpath, dirname)
            num_files_in_folder = len([f for f in os.listdir(folder_path) if os.path.isfile(os.path.join(folder_path, f))])
            total_files += num_files_in_folder

    for filename in filenames:
        filepath = os.path.join(dirpath, filename)
        total_size += os.path.getsize(filepath)

average_files_per_folder = total_files / num_patient_folders if num_patient_folders > 0 else 0

print(f'Total number of parent folders: {num_patient_folders}')
print(f'Total number of files in all folders: {total_files}')
print(f'Average number of files in a folder: {average_files_per_folder:.2f}')
print(f'Total size of {output_path}: {total_size / (1024 ** 3):.2f} Gigabytes')

0it [00:00, ?it/s]

Total number of parent folders: 0
Total number of files in all folders: 0
Average number of files in a folder: 0.00
Total size of /content/drive/MyDrive/ABELE_prostate/claudio/black_box/data/dataset/test/: 0.00 Gigabytes





In [8]:
file_checker(destination_path_train)

3it [00:19,  6.36s/it]


Total number of patient folders: 2
Total number of patient files: 16971
Total size of n: 0.08 Gigabytes





2

# Binary Dataset Creation (Separated)


## training set (adc)

In [None]:
total_images = len(traindf_adc)
step_size = 500
processed_images = 0
progress_updates = 0

# Create the class folders if they don't exist
not_clinically_relevant_folder = os.path.join(destination_path_train_adc, '0')
clinically_relevant_folder = os.path.join(destination_path_train_adc, '1')

if not os.path.exists(not_clinically_relevant_folder):
    os.makedirs(not_clinically_relevant_folder)

if not os.path.exists(clinically_relevant_folder):
    os.makedirs(clinically_relevant_folder)

# Iterate over each row in the CSV file
for _, row in traindf_adc.iterrows():
    image_path = row['image_path']
    label = row['label']
    # Get the image filename
    image_filename = os.path.basename(image_path)

    # Append the class label to the image filename before the file extension
    filename_parts = os.path.splitext(image_filename)
    image_filename_with_class = f'{filename_parts[0]}_class{label}{filename_parts[1]}'

    # Construct the new image path in the destination folder
    destination_path = os.path.join(clinically_relevant_folder if label == 1 else not_clinically_relevant_folder, image_filename_with_class)
    image = Image.open(image_path).convert('L')
    # Save the converted image to the destination folder
    image.save(destination_path)
    processed_images += 1

    if processed_images >= progress_updates * step_size:
        print(f'Processed {processed_images} images out of {total_images}.')
        progress_updates += 1

print('train ADC Dataset creation completed.')

Processed 1 images out of 5201.
Processed 500 images out of 5201.
Processed 1000 images out of 5201.
Processed 1500 images out of 5201.
Processed 2000 images out of 5201.
Processed 2500 images out of 5201.
Processed 3000 images out of 5201.
Processed 3500 images out of 5201.
Processed 4000 images out of 5201.
Processed 4500 images out of 5201.
Processed 5000 images out of 5201.
train ADC Dataset creation completed.


## validation set (adc)

In [None]:
total_images = len(valdf_adc)
step_size = 250
processed_images = 0
progress_updates = 0

# Create the class folders if they don't exist
not_clinically_relevant_folder = os.path.join(destination_path_valid_adc, '0')
clinically_relevant_folder = os.path.join(destination_path_valid_adc, '1')

if not os.path.exists(not_clinically_relevant_folder):
    os.makedirs(not_clinically_relevant_folder)

if not os.path.exists(clinically_relevant_folder):
    os.makedirs(clinically_relevant_folder)

# Iterate over each row in the CSV file
for _, row in valdf_adc.iterrows():
    image_path = row['image_path']
    label = row['label']
    # Get the image filename
    image_filename = os.path.basename(image_path)

    # Append the class label to the image filename before the file extension
    filename_parts = os.path.splitext(image_filename)
    image_filename_with_class = f'{filename_parts[0]}_class{label}{filename_parts[1]}'

    # Construct the new image path in the destination folder
    destination_path = os.path.join(clinically_relevant_folder if label == 1 else not_clinically_relevant_folder, image_filename_with_class)
    image = Image.open(image_path).convert('L')
    # Save the converted image to the destination folder
    image.save(destination_path)
    processed_images += 1

    if processed_images >= progress_updates * step_size:
        print(f'Processed {processed_images} images out of {total_images}.')
        progress_updates += 1

print('validation ADC Dataset creation completed.')

Processed 1 images out of 1121.
Processed 250 images out of 1121.
Processed 500 images out of 1121.
Processed 750 images out of 1121.
Processed 1000 images out of 1121.
validation ADC Dataset creation completed.


## test set (adc)

In [None]:
total_images = len(testdf_adc)
step_size = 250
processed_images = 0
progress_updates = 0

# Create the class folders if they don't exist
not_clinically_relevant_folder = os.path.join(destination_path_test_adc, '0')
clinically_relevant_folder = os.path.join(destination_path_test_adc, '1')

if not os.path.exists(not_clinically_relevant_folder):
    os.makedirs(not_clinically_relevant_folder)

if not os.path.exists(clinically_relevant_folder):
    os.makedirs(clinically_relevant_folder)

# Iterate over each row in the CSV file
for _, row in testdf_adc.iterrows():
    image_path = row['image_path']
    label = row['label']
    # Get the image filename
    image_filename = os.path.basename(image_path)

    # Append the class label to the image filename before the file extension
    filename_parts = os.path.splitext(image_filename)
    image_filename_with_class = f'{filename_parts[0]}_class{label}{filename_parts[1]}'

    # Construct the new image path in the destination folder
    destination_path = os.path.join(clinically_relevant_folder if label == 1 else not_clinically_relevant_folder, image_filename_with_class)
    image = Image.open(image_path).convert('L')
    # Save the converted image to the destination folder
    image.save(destination_path)
    processed_images += 1

    if processed_images >= progress_updates * step_size:
        print(f'Processed {processed_images} images out of {total_images}.')
        progress_updates += 1

print('test ADC Dataset creation completed.')

Processed 1 images out of 1109.
Processed 250 images out of 1109.
Processed 500 images out of 1109.
Processed 750 images out of 1109.
Processed 1000 images out of 1109.
validation ADC Dataset creation completed.


In [None]:
output_csv_path = os.path.join(BASE_DIR, 'black_box', 'data ', 'CSV', 'prostate_centered' ,'80x80')
output_csv_path_adc = os.path.join(output_csv_path, 'adc')
total_size = 0
num_patient_folders = 0
total_files = 0

for root, dirs, files in tqdm(os.walk(output_csv_path_adc)):
    for dir_ in dirs:
        patient_folder = os.path.join(root, dir_)
        for class_folder in os.listdir(patient_folder):
            class_folder_path = os.path.join(patient_folder, class_folder)
            if os.path.isdir(class_folder_path):
                num_files_in_folder = len([f for f in os.listdir(class_folder_path) if os.path.isfile(os.path.join(class_folder_path, f))])
                total_files += num_files_in_folder
                num_patient_folders += 1

    for file_ in files:
        file_path = os.path.join(root, file_)
        total_size += os.path.getsize(file_path)

average_files_per_folder = total_files / num_patient_folders if num_patient_folders > 0 else 0

print(f'Total number of patient folders: {num_patient_folders}')
print(f'Total number of files in all folders: {total_files}')
print(f'Average number of files per patient folder: {average_files_per_folder:.2f}')
print(f'Total size of {output_csv_path_adc}: {total_size / (1024 ** 3):.2f} Gigabytes')

10it [00:12,  1.28s/it]

Total number of patient folders: 6
Total number of files in all folders: 7249
Average number of files per patient folder: 1208.17
Total size of /content/drive/MyDrive/ABELE_prostate/claudio/black_box/data/CSV/prostate_centered/80x80/adc/: 0.03 Gigabytes





## training set (t2w)

In [None]:
total_images = len(traindf_t2w)
step_size = 500
processed_images = 0
progress_updates = 0

# Create the class folders if they don't exist
not_clinically_relevant_folder = os.path.join(destination_path_train_t2w, '0')
clinically_relevant_folder = os.path.join(destination_path_train_t2w, '1')

if not os.path.exists(not_clinically_relevant_folder):
    os.makedirs(not_clinically_relevant_folder)

if not os.path.exists(clinically_relevant_folder):
    os.makedirs(clinically_relevant_folder)

# Iterate over each row in the CSV file
for _, row in traindf_t2w.iterrows():
    image_path = row['image_path']
    label = row['label']
    # Get the image filename
    image_filename = os.path.basename(image_path)

    # Append the class label to the image filename before the file extension
    filename_parts = os.path.splitext(image_filename)
    image_filename_with_class = f'{filename_parts[0]}_class{label}{filename_parts[1]}'

    # Construct the new image path in the destination folder
    destination_path = os.path.join(clinically_relevant_folder if label == 1 else not_clinically_relevant_folder, image_filename_with_class)
    image = Image.open(image_path).convert('L')
    # Save the converted image to the destination folder
    image.save(destination_path)
    processed_images += 1

    if processed_images >= progress_updates * step_size:
        print(f'Processed {processed_images} images out of {total_images}.')
        progress_updates += 1

print('train T2W Dataset creation completed.')

Processed 1 images out of 5201.
Processed 500 images out of 5201.
Processed 1000 images out of 5201.
Processed 1500 images out of 5201.
Processed 2000 images out of 5201.
Processed 2500 images out of 5201.
Processed 3000 images out of 5201.
Processed 3500 images out of 5201.
Processed 4000 images out of 5201.
Processed 4500 images out of 5201.
Processed 5000 images out of 5201.
train T2W Dataset creation completed.


## validation set (t2w)

In [None]:
total_images = len(valdf_t2w)
step_size = 250
processed_images = 0
progress_updates = 0

# Create the class folders if they don't exist
not_clinically_relevant_folder = os.path.join(destination_path_valid_t2w, '0')
clinically_relevant_folder = os.path.join(destination_path_valid_t2w, '1')

if not os.path.exists(not_clinically_relevant_folder):
    os.makedirs(not_clinically_relevant_folder)

if not os.path.exists(clinically_relevant_folder):
    os.makedirs(clinically_relevant_folder)

# Iterate over each row in the CSV file
for _, row in valdf_t2w.iterrows():
    image_path = row['image_path']
    label = row['label']
    # Get the image filename
    image_filename = os.path.basename(image_path)

    # Append the class label to the image filename before the file extension
    filename_parts = os.path.splitext(image_filename)
    image_filename_with_class = f'{filename_parts[0]}_class{label}{filename_parts[1]}'

    # Construct the new image path in the destination folder
    destination_path = os.path.join(clinically_relevant_folder if label == 1 else not_clinically_relevant_folder, image_filename_with_class)
    image = Image.open(image_path).convert('L')
    # Save the converted image to the destination folder
    image.save(destination_path)
    processed_images += 1

    if processed_images >= progress_updates * step_size:
        print(f'Processed {processed_images} images out of {total_images}.')
        progress_updates += 1

print('validation T2W Dataset creation completed.')

Processed 1 images out of 1121.
Processed 250 images out of 1121.
Processed 500 images out of 1121.
Processed 750 images out of 1121.
Processed 1000 images out of 1121.
validation T2W Dataset creation completed.


## test set (t2w)

In [None]:
total_images = len(testdf_t2w)
step_size = 250
processed_images = 0
progress_updates = 0

# Create the class folders if they don't exist
not_clinically_relevant_folder = os.path.join(destination_path_test_t2w, '0')
clinically_relevant_folder = os.path.join(destination_path_test_t2w, '1')

if not os.path.exists(not_clinically_relevant_folder):
    os.makedirs(not_clinically_relevant_folder)

if not os.path.exists(clinically_relevant_folder):
    os.makedirs(clinically_relevant_folder)

# Iterate over each row in the CSV file
for _, row in testdf_t2w.iterrows():
    image_path = row['image_path']
    label = row['label']
    # Get the image filename
    image_filename = os.path.basename(image_path)

    # Append the class label to the image filename before the file extension
    filename_parts = os.path.splitext(image_filename)
    image_filename_with_class = f'{filename_parts[0]}_class{label}{filename_parts[1]}'

    # Construct the new image path in the destination folder
    destination_path = os.path.join(clinically_relevant_folder if label == 1 else not_clinically_relevant_folder, image_filename_with_class)
    image = Image.open(image_path).convert('L')
    # Save the converted image to the destination folder
    image.save(destination_path)
    processed_images += 1

    if processed_images >= progress_updates * step_size:
        print(f'Processed {processed_images} images out of {total_images}.')
        progress_updates += 1

print('test T2W Dataset creation completed.')

Processed 1 images out of 1109.
Processed 250 images out of 1109.
Processed 500 images out of 1109.
Processed 750 images out of 1109.
Processed 1000 images out of 1109.
test T2W Dataset creation completed.


In [None]:
output_csv_path = os.path.join(BASE_DIR, 'black_box', 'data ', 'CSV', 'prostate_centered' ,'80x80')
output_csv_path_t2w = os.path.join(output_csv_path, 't2w')

total_size = 0
num_patient_folders = 0
total_files = 0

for root, dirs, files in tqdm(os.walk(output_csv_path_t2w)):
    for dir_ in dirs:
        patient_folder = os.path.join(root, dir_)
        for class_folder in os.listdir(patient_folder):
            class_folder_path = os.path.join(patient_folder, class_folder)
            if os.path.isdir(class_folder_path):
                num_files_in_folder = len([f for f in os.listdir(class_folder_path) if os.path.isfile(os.path.join(class_folder_path, f))])
                total_files += num_files_in_folder
                num_patient_folders += 1

    for file_ in files:
        file_path = os.path.join(root, file_)
        total_size += os.path.getsize(file_path)

average_files_per_folder = total_files / num_patient_folders if num_patient_folders > 0 else 0

print(f'Total number of patient folders: {num_patient_folders}')
print(f'Total number of files in all folders: {total_files}')
print(f'Average number of files per patient folder: {average_files_per_folder:.2f}')
print(f'Total size of {output_csv_path_t2w}: {total_size / (1024 ** 3):.2f} Gigabytes')

10it [00:12,  1.29s/it]

Total number of patient folders: 6
Total number of files in all folders: 7249
Average number of files per patient folder: 1208.17
Total size of /content/drive/MyDrive/ABELE_prostate/claudio/black_box/data/CSV/prostate_centered/80x80/t2w/: 0.03 Gigabytes



