In [1]:
import h5py
import pandas as pd
import numpy as np
import cv2
import os
import matplotlib.pyplot as plt
from collections import Counter
from sklearn.model_selection import train_test_split

In [None]:
# read metadata
path = '/yourpath/data/ADNI/'

# use `ADNI1_Baseline_3T_7_07_2022.csv` for ADNI 3T 
demo_data = pd.read_csv(path + 'ADNI1_Screening_1.5T_7_02_2022.csv')
demo_data

In [None]:
demo_data = demo_data[demo_data['Group'] != 'MCI']

In [None]:
labels = demo_data['Group'].values.tolist()
labels = [1 if x == 'AD' else 0 for x in labels]
demo_data['label'] = labels

In [None]:
# flatten directories

import os
import itertools
import shutil


def move(destination):
    all_files = []
    for root, _dirs, files in itertools.islice(os.walk(destination), 1, None):
        for filename in files:
            #print(filename)
            all_files.append(os.path.join(root, filename))
    for filename in all_files:
        shutil.move(filename, destination)

move(path + 'images-bk')

In [None]:
# rename file: subject_id __ image_id
images = os.listdir(path + 'images-all')
for image in images:
    subject_id = image[5: 15]
    image_id = image[-10:]
    
    new_name = subject_id + '__' + image_id
    old_path = os.path.join(path, 'images-all', image)
    new_path = os.path.join(path, 'images-all', new_name)
    os.rename(old_path, new_path)



In [None]:
# rename file: subject_id __ image_id
images = os.listdir(path + 'images')
for image in images:
    image_id = image[-10:]
    if not image_id[0] == 'I':
        image_id = 'I'+ image_id
    new_name = image.split('__')[0] + '__' + image_id
    old_path = os.path.join(path, 'images', image)
    new_path = os.path.join(path, 'images', new_name)
    os.rename(old_path, new_path)

In [None]:
def addpath(row):
    return str(row['Subject']) + '__'+ str(row['Image Data ID']) + '.nii'

demo_data["Path"] = demo_data.apply(addpath, axis=1)
#all_meta['Path'] = 'images/' + str(all_meta['idx']) + '.npy'
demo_data

In [None]:
# the patient (0 for male and 1 for female), 
# the diagnosis (0 stands for healthy, 1 for glaucoma, and 2 for suspicious)


demo_data['Age_multi'] = demo_data['Age'].values.astype('int')
demo_data['Age_multi'] = np.where(demo_data['Age_multi'].between(0,54), 0, demo_data['Age_multi'])
demo_data['Age_multi'] = np.where(demo_data['Age_multi'].between(55,65), 1, demo_data['Age_multi'])
demo_data['Age_multi'] = np.where(demo_data['Age_multi'].between(65,75), 2, demo_data['Age_multi'])
demo_data['Age_multi'] = np.where(demo_data['Age_multi'].between(75,85), 3, demo_data['Age_multi'])
demo_data['Age_multi'] = np.where(demo_data['Age_multi']>=85, 4, demo_data['Age_multi'])

demo_data['Age_binary'] = demo_data['Age'].values.astype('int')
demo_data['Age_binary'] = np.where(demo_data['Age_binary'].between(0, 75), 0, demo_data['Age_binary'])
demo_data['Age_binary'] = np.where(demo_data['Age_binary']>= 75, 1, demo_data['Age_binary'])
demo_data

In [None]:
def split_712(all_meta, patient_ids):
    sub_train, sub_val_test = train_test_split(patient_ids, test_size=0.3, random_state=5)
    sub_val, sub_test = train_test_split(sub_val_test, test_size=0.66, random_state=6)
    train_meta = all_meta[all_meta.Subject.isin(sub_train.astype('str'))]
    val_meta = all_meta[all_meta.Subject.isin(sub_val.astype('str'))]
    test_meta = all_meta[all_meta.Subject.isin(sub_test.astype('str'))]
    return train_meta, val_meta, test_meta

sub_train, sub_val, sub_test = split_712(demo_data, np.unique(demo_data['Subject']))

In [None]:
sub_train.to_csv('/yourpath/ADNI/split/new_train.csv')
sub_val.to_csv('/yourpath/ADNI/split/new_val.csv')
sub_test.to_csv('/yourpath/ADNI/split/new_test.csv')