# Managing subjects

## Cropped atlas subjects

Copy cropped subjects and its atlas labels to a new folder 

In [None]:
import os, shutil, csv

img_path = '/mnt/sda1/Repos/a-eye/Data/SHIP_dataset/non_labeled_dataset_nifti_cropped' #2022160100001_cropped.nii.gz'
seg_path = '/mnt/sda1/Repos/a-eye/Data/SHIP_dataset/non_labeled_dataset_nifti_reg_2' #/2022160100001/labels.nii.gz'
out_path = '/home/jaimebarranco/Desktop/score_atlas/'

num_images = 50
subfolders = sorted([f.name for f in os.scandir(seg_path) if f.is_dir()])
name_subject_original = list()
name_subject_simplified = list()

for i in range(num_images):

    # Create output dir if not exists
    out_dir = os.path.join(out_path, f'sub_{i+1:03d}')
    if not os.path.exists(out_dir):
        os.makedirs(out_dir)

    # Image and segmentation paths
    img = f'{img_path}/{subfolders[i]}_cropped.nii.gz'
    seg = f'{seg_path}/{subfolders[i]}/labels.nii.gz'

    # csv to map original name with the simplified one
    name_subject_original.append(subfolders[i])
    name_subject_simplified.append(f'sub_{i+1:03d}')

    # Copy image and segmentation to output folder
    shutil.copy2(img, out_dir + '/' + f'sub_{i+1:03d}.nii.gz')
    shutil.copy2(seg, out_dir + '/' + f'seg_{i+1:03d}.nii.gz')

with open(f'{out_path}mapping.csv', 'w', newline='') as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(['Subject', 'Simplified'])
    for j in range(num_images):
        writer.writerow([name_subject_original[j], name_subject_simplified[j]])

## Entire atlas subjects

### Copy entire subjects without labels to a folder
(for new manual annotation)

- 50% males, 50% females (aprox) --> select 35 for each with good quality
- good quality --> axial length between 21 and 25 mm (inclusive)

In [None]:
import os, shutil, csv, random, zipfile
import pandas as pd

# VARIABLES
num_subjects_to_copy = 35 # from each group (males and females)
folder_name = 'samples_good_quality'
good_quality = 1 # based on axial lenght
min_al = 21 # min axial length
max_al = 25 # max axial length

# FUNCTIONS
def delete_files_in_folder(folder):
    for item in os.listdir(folder):
        item_path = os.path.join(folder, item)
        try:
            if os.path.isdir(item_path):
                shutil.rmtree(item_path)
            elif os.path.isfile(item_path) or os.path.islink(item_path):
                os.unlink(item_path)
        except Exception as e:
            print(f"Failed to delete {item_path}. Reason: {e}", 'error')

def zip_folder(source, destination):
    with zipfile.ZipFile(destination, 'w', zipfile.ZIP_DEFLATED) as zipf:
        for root, dirs, files in os.walk(source):
            for file in files:
                zipf.write(os.path.join(root, file), os.path.relpath(os.path.join(root, file), os.path.join(source, '..')))            

# 1. SELECT SUBJECTS
# CSV paths
csv_metadata = '/mnt/sda1/Repos/a-eye/Output/metadata/sub_metadata.csv'
csv_al = '/mnt/sda1/Repos/a-eye/Output/atlas/axial_length/axial_length_reg_v10_grad_th100.csv' # axial length
# Pandas read csv
pd_metadata = pd.read_csv(csv_metadata)
pd_al = pd.read_csv(csv_al)
# Dataframe
df = pd.concat([pd_metadata, pd_al.iloc[:, 1:]], axis=1, verify_integrity=True)
# Group by sex (males and females)
sex_group = df.groupby(["Sex"], dropna=True)
# Male group
male_group = sex_group.get_group("M").dropna()
if good_quality:
    male_group = male_group.query(f'{min_al} <= axial_length <= {max_al}') # for good quality
# Female group
female_group = sex_group.get_group("F").dropna()
if good_quality:
    female_group = female_group.query(f'{min_al} <= axial_length <= {max_al}') # for good quality
# Select subjects randomly from each group and add them to a list
samples_male = male_group.sample(n=num_subjects_to_copy)['Subject'].tolist()
samples_female = female_group.sample(n=num_subjects_to_copy)['Subject'].tolist()
# Concatenate both lists
samples = samples_male + samples_female
samples.sort()
# print(samples)

# 2. COPY SUBJECTS
# Paths
img_path = '/mnt/sda1/Repos/a-eye/Data/SHIP_dataset/non_labeled_dataset_nifti'
out_path = f'/home/jaimebarranco/Desktop/{folder_name}'
if not os.path.exists(out_path):
    os.makedirs(out_path)
else:
    delete_files_in_folder(out_path)
# Copy loop
for i in range(len(samples)):
    # Images
    img = f'{img_path}/{samples[i]}/{samples[i]}.nii.gz'
    # Copy image to output folder
    shutil.copy2(img, out_path)

# 3. ZIP FOLDER
zip_folder(out_path, f'{out_path}.zip')

### Removing chosen subjects for samples from non-labeled dataset

In [None]:
import os

path = '/mnt/sda1/Repos/a-eye/Data/SHIP_dataset/non_labeled_dataset_nifti'
files = os.listdir(path)

new_files = [subject for subject in files if subject not in str(samples)]
new_files.sort()

### Subdataframe meeting condition

In [None]:
import os, shutil, csv, random, zipfile
import pandas as pd

csv_out = '/home/jaimebarranco/Desktop/samples_v2.csv' # output csv

# CSV paths
subs = '/home/jaimebarranco/Desktop/samples_v2/' # input folder
csv_metadata = '/mnt/sda1/Repos/a-eye/Output/metadata/sub_metadata.csv' # table of metadata
csv_al = '/mnt/sda1/Repos/a-eye/Output/axial_length/atlas/old/axial_length_reg_v10_grad_th100.csv' # axial length

# list of names of elements (without the format) of a folder ignoring the hidden ones
files = [(os.path.basename(f)).split('.')[0] for f in sorted(os.listdir(subs)) if not f.startswith('.')]
files = [int(f) for f in files] # files from string to int

# Pandas read csv
pd_metadata = pd.read_csv(csv_metadata)
pd_al = pd.read_csv(csv_al)

# Dataframe
df = pd.concat([pd_metadata, pd_al.iloc[:, 1:]], axis=1, verify_integrity=True)

# subdataframe from df where Subjects are in files
df_samples = df[df['Subject'].isin(files)].reset_index(drop=True)

# df_samples to .csv
df_samples.to_csv(csv_out, index=False)

### Copy corresponding dicom folders to a new folder

In [None]:
import os, shutil, csv, random, zipfile
import pandas as pd

def unzip_file(source, destination):
    # Create a ZipFile object with the path of the zip file
    zip_file = zipfile.ZipFile(source)
    # Extract all the files to a folder
    zip_file.extractall(destination)
    # Close the zip file
    zip_file.close()

def copy_folder(source, destination):
    if not os.path.exists(destination):
        os.mkdir(destination)
    for item in os.listdir(source):
        s = os.path.join(source, item)
        d = os.path.join(destination, item)
        if os.path.isdir(s):
            shutil.copytree(s, d)
        else:
            if s.split('.')[-1].lower() == 'zip':
                unzip_file(s, destination)
            else:
                shutil.copy2(s, d)

def copy_file(source, destination):
    if not os.path.exists(destination):
        os.mkdir(destination)
    shutil.copy(source, destination)

def zip_folder(source, destination):
    with zipfile.ZipFile(destination, 'w', zipfile.ZIP_DEFLATED) as zipf:
        for root, dirs, files in os.walk(source):
            for file in files:
                zipf.write(os.path.join(root, file), os.path.relpath(os.path.join(root, file), os.path.join(source, '..')))


# zip_folder(out_path, f'{out_path}.zip')

# paths
subs = '/home/jaimebarranco/Desktop/samples_v3/' # folder containing the subjects in .nii.gz (70)
dcm_folder = '/mnt/sda1/Repos/a-eye/Data/SHIP_dataset/non_labeled_dataset' # dicom folder (1200)
subs_dcm = '/home/jaimebarranco/Desktop/samples_v3_bids/sourcedata/' # output folder
if not os.path.exists(subs_dcm):
    os.makedirs(subs_dcm)

# list of names of elements (without the format) of a folder ignoring the hidden ones
files = [(os.path.basename(f)).split('.')[0] for f in sorted(os.listdir(subs)) if not f.startswith('.')]
files = [int(f) for f in files] # files from string to int

# copy loop
for i in range(len(files)):
    # copy folder
    copy_folder(f'{dcm_folder}/{files[i]}', f'{subs_dcm}/{files[i]}')

# zip folder
# zip_folder(subs_dcm, '/home/jaimebarranco/Desktop/samples_v2_dcm.zip')

### Copy nifti subjects with .json

In [None]:
import os, shutil, csv, random, zipfile
import pandas as pd

def unzip_file(source, destination):
    # Create a ZipFile object with the path of the zip file
    zip_file = zipfile.ZipFile(source)
    # Extract all the files to a folder
    zip_file.extractall(destination)
    # Close the zip file
    zip_file.close()

def copy_folder(source, destination):
    if not os.path.exists(destination):
        os.mkdir(destination)
    for item in os.listdir(source):
        s = os.path.join(source, item)
        d = os.path.join(destination, item)
        if os.path.isdir(s):
            shutil.copytree(s, d)
        else:
            if s.split('.')[-1].lower() == 'zip':
                unzip_file(s, destination)
            else:
                shutil.copy2(s, d)

def copy_file(source, destination):
    if not os.path.exists(destination):
        os.mkdir(destination)
    shutil.copy(source, destination)

# paths
subs = '/home/jaimebarranco/Downloads/samples_v3' # folder containing the subjects in .nii.gz (70)
input_path = '/mnt/sda1/Repos/a-eye/Data/SHIP_dataset/non_labeled_dataset_nifti/'
output_path = '/home/jaimebarranco/Desktop/samples_v3_bids/'

# list of names of elements (without the format) of a folder ignoring the hidden ones
files = [(os.path.basename(f)).split('.')[0] for f in sorted(os.listdir(subs)) if not f.startswith('.')]
files = [int(f) for f in files] # files from string to int


In [None]:

# copy loop
for i in range(len(files)):
    out_dir = f'{output_path}sub-{i+1:03}/anat'
    # make output folder
    if not os.path.exists(out_dir):
        os.makedirs(out_dir)
    # copy files
    copy_file(f'{input_path}/{files[i]}/{files[i]}.json', f'{out_dir}')
    copy_file(f'{input_path}/{files[i]}/{files[i]}.nii.gz', f'{out_dir}')
    # rename files
    os.rename(f'{out_dir}/{files[i]}.json', f'{out_dir}/sub-{i+1:03}_T1w.json')
    os.rename(f'{out_dir}/{files[i]}.nii.gz', f'{out_dir}/sub-{i+1:03}_T1w.nii.gz')
    # remove files
    # os.remove(f'{out_dir}/sub-{i+1:03}.json')
    # os.remove(f'{out_dir}/sub-{i+1:03}.nii.gz')

    # i+=1
    # if i==1:
    #     break

## Cropped manual images

In [None]:
import os, shutil

# paths
folder1 = "/home/jaimebarranco/Desktop/new_manual_annotations/images"
folder2 = "/mnt/sda1/Repos/a-eye/Data/SHIP_dataset/non_labeled_dataset_nifti_cropped"
output_folder = "/home/jaimebarranco/Desktop/new_manual_annotations/images/images_cropped"

# for each .nii.gz starting with '2' in folder1, copy the corresponding .nii.gz from folder2 to output_folder
for item in os.listdir(folder1):
    if item.startswith('2'):
        # print(item)
        # insert "_cropped" before ".nii.gz"
        item = item.split('.')[0] + '_cropped.nii.gz'
        print(item)
        # copy items and rename them removing the "_cropped" part
        shutil.copy2(f'{folder2}/{item}', output_folder)
        item_new = item.split('.')[0].split('_')[0] + '.nii.gz'
        os.rename(f'{output_folder}/{item}', f'{output_folder}/{item_new}')

## Images from one folder to another

In [None]:
import os, shutil

# paths
folder1 = "/home/jaimebarranco/Desktop/new_manual_annotations/segmentations/manual"
folder2 = "/home/jaimebarranco/Desktop/new_manual_annotations/images"
output_folder = "/home/jaimebarranco/Desktop/new_manual_annotations/segmentations/manual_74"

for item in sorted(os.listdir(folder1)):
    print(item)
    # copy items and rename them removing the "_cropped" part
    shutil.copy2(f'{folder1}/{item}', output_folder)

## Rest of the manual annotated images and their corresponding labels

In [6]:
import os, shutil

# paths
input_folder = "/mnt/sda1/Repos/a-eye/a-eye_preprocessing/ANTs/a123"
aux_folder = "/mnt/sda1/Repos/a-eye/Data/SHIP_dataset/labeled_dataset" # for the subject names
output_folder_images = "/home/jaimebarranco/Desktop/new_manual_annotations/images/images_74/prueba"
output_folder_segmentations = "/home/jaimebarranco/Desktop/new_manual_annotations/segmentations/manual_74/prueba"

# list of subjects already in the output folder
list_subjects_aux = ['sub-13','sub-14','sub-21','sub-34']
list_subjects_aux2 = ['0000814997','0000814999','0000815173','0000815262']

# subjects' names
subjects_names = sorted([name for name in os.listdir(aux_folder) if not name.startswith('.')])
# remove from subjects_names the subjects in list_subjects_aux2
subjects_names = [sub for sub in subjects_names if sub not in list_subjects_aux2]

i=0
for sub in sorted(os.listdir(input_folder)):
    # if sub is in list_subjects_aux, continue
    if sub in list_subjects_aux:
        continue
    
    new_name = subjects_names[i]

    # copy images and change its name for its corresponding name in order in subjects_names
    shutil.copy2(f'{input_folder}/{sub}/input/{sub}_T1.nii.gz', output_folder_images)
    os.rename(f'{output_folder_images}/{sub}_T1.nii.gz', f'{output_folder_images}/{new_name}.nii.gz')

    # copy segmentations and change its name for its corresponding name in order in subjects_names
    shutil.copy2(f'{input_folder}/{sub}/input/{sub}_labels.nii.gz', output_folder_segmentations)
    os.rename(f'{output_folder_segmentations}/{sub}_labels.nii.gz', f'{output_folder_segmentations}/{new_name}.nii.gz')

    i+=1