Copy cropped subjects and its atlas labels to a new folder 

In [3]:
import os, shutil, csv

img_path = '/mnt/sda1/Repos/a-eye/Data/SHIP_dataset/non_labeled_dataset_nifti_cropped' #2022160100001_cropped.nii.gz'
seg_path = '/mnt/sda1/Repos/a-eye/Data/SHIP_dataset/non_labeled_dataset_nifti_reg_2' #/2022160100001/labels.nii.gz'
out_path = '/home/jaimebarranco/Desktop/score_atlas/'

num_images = 50
subfolders = sorted([f.name for f in os.scandir(seg_path) if f.is_dir()])
name_subject_original = list()
name_subject_simplified = list()

for i in range(num_images):

    # Create output dir if not exists
    out_dir = os.path.join(out_path, f'sub_{i+1:03d}')
    if not os.path.exists(out_dir):
        os.makedirs(out_dir)

    # Image and segmentation paths
    img = f'{img_path}/{subfolders[i]}_cropped.nii.gz'
    seg = f'{seg_path}/{subfolders[i]}/labels.nii.gz'

    # csv to map original name with the simplified one
    name_subject_original.append(subfolders[i])
    name_subject_simplified.append(f'sub_{i+1:03d}')

    # Copy image and segmentation to output folder
    shutil.copy2(img, out_dir + '/' + f'sub_{i+1:03d}.nii.gz')
    shutil.copy2(seg, out_dir + '/' + f'seg_{i+1:03d}.nii.gz')

with open(f'{out_path}mapping.csv', 'w', newline='') as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(['Subject', 'Simplified'])
    for j in range(num_images):
        writer.writerow([name_subject_original[j], name_subject_simplified[j]])

Copy entire subjects without labels to a folder
(for new manual annotation)

- 50% males, 50% females (aprox) --> select 35 for each with good quality
- good quality --> axial length between 21 and 25 mm (inclusive)

In [72]:
import os, shutil, csv, random, zipfile
import pandas as pd

# VARIABLES
num_subjects_to_copy = 35 # from each group (males and females)
folder_name = 'samples_good_quality'
good_quality = 1 # based on axial lenght
min_al = 21 # min axial length
max_al = 25 # max axial length

# FUNCTIONS
def delete_files_in_folder(folder):
    for item in os.listdir(folder):
        item_path = os.path.join(folder, item)
        try:
            if os.path.isdir(item_path):
                shutil.rmtree(item_path)
            elif os.path.isfile(item_path) or os.path.islink(item_path):
                os.unlink(item_path)
        except Exception as e:
            print(f"Failed to delete {item_path}. Reason: {e}", 'error')

def zip_folder(source, destination):
    with zipfile.ZipFile(destination, 'w', zipfile.ZIP_DEFLATED) as zipf:
        for root, dirs, files in os.walk(source):
            for file in files:
                zipf.write(os.path.join(root, file), os.path.relpath(os.path.join(root, file), os.path.join(source, '..')))            

# 1. SELECT SUBJECTS
# CSV paths
csv_metadata = '/mnt/sda1/Repos/a-eye/Output/metadata/sub_metadata.csv'
csv_al = '/mnt/sda1/Repos/a-eye/Output/atlas/axial_length/axial_length_reg_v10_grad_th100.csv' # axial length
# Pandas read csv
pd_metadata = pd.read_csv(csv_metadata)
pd_al = pd.read_csv(csv_al)
# Dataframe
df = pd.concat([pd_metadata, pd_al.iloc[:, 1:]], axis=1, verify_integrity=True)
# Group by sex (males and females)
sex_group = df.groupby(["Sex"], dropna=True)
# Male group
male_group = sex_group.get_group("M").dropna()
if good_quality:
    male_group = male_group.query(f'{min_al} <= axial_length <= {max_al}') # for good quality
# Female group
female_group = sex_group.get_group("F").dropna()
if good_quality:
    female_group = female_group.query(f'{min_al} <= axial_length <= {max_al}') # for good quality
# Select subjects randomly from each group and add them to a list
samples_male = male_group.sample(n=num_subjects_to_copy)['Subject'].tolist()
samples_female = female_group.sample(n=num_subjects_to_copy)['Subject'].tolist()
# Concatenate both lists
samples = samples_male + samples_female
samples.sort()
# print(samples)

# 2. COPY SUBJECTS
# Paths
img_path = '/mnt/sda1/Repos/a-eye/Data/SHIP_dataset/non_labeled_dataset_nifti'
out_path = f'/home/jaimebarranco/Desktop/{folder_name}'
if not os.path.exists(out_path):
    os.makedirs(out_path)
else:
    delete_files_in_folder(out_path)
# Copy loop
for i in range(len(samples)):
    # Images
    img = f'{img_path}/{samples[i]}/{samples[i]}.nii.gz'
    # Copy image to output folder
    shutil.copy2(img, out_path)

# 3. ZIP FOLDER
zip_folder(out_path, f'{out_path}.zip')

Removing chosen subjects for samples from non-labeled dataset

In [66]:
import os

path = '/mnt/sda1/Repos/a-eye/Data/SHIP_dataset/non_labeled_dataset_nifti'
files = os.listdir(path)

new_files = [subject for subject in files if subject not in str(samples)]
new_files.sort()

Subdataframe meeting condition

In [7]:
import os, shutil, csv, random, zipfile
import pandas as pd

filename = '/home/jaimebarranco/Desktop/samples_good_quality.csv'

# CSV paths
subs = '/home/jaimebarranco/Desktop/samples_good_quality' # input folder
csv_metadata = '/mnt/sda1/Repos/a-eye/Output/metadata/sub_metadata.csv' # table of metadata
csv_al = '/mnt/sda1/Repos/a-eye/Output/atlas/axial_length/axial_length_reg_v10_grad_th100.csv' # axial length

# list of names of elements (without the format) of a folder
files = [(os.path.basename(f)).split('.')[0] for f in os.listdir(subs)]
files = [int(f) for f in files] # files from string to int

# Pandas read csv
pd_metadata = pd.read_csv(csv_metadata)
pd_al = pd.read_csv(csv_al)

# Dataframe
df = pd.concat([pd_metadata, pd_al.iloc[:, 1:]], axis=1, verify_integrity=True)

# subdataframe from df where Subjects are in files
df_samples = df[df['Subject'].isin(files)].reset_index(drop=True)

# df_samples to .csv
df_samples.to_csv(filename, index=False)