In [1]:
import numpy as np
import tensorflow as tf
import pickle
import os
import cv2
import imgaug.augmenters as iaa

In [2]:
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"

In [3]:
def load_data(data_path):
    data = pickle.load(open(data_path, 'rb'))

    return data

def save_data(output_path, mydata):
    with open(output_path, 'wb') as f:
        
        pickle.dump(mydata, f)

def count_max_len(mylist):
    
    max_len = 0
    
    for l in mylist:
        if len(l) > max_len:
            max_len = len(l)
            
    return max_len

def set_augmenter():
    
    augmenter = iaa.Sequential([
        iaa.Fliplr(0.5), # horizontal flips with 0.5 probability 
        iaa.Crop(percent=(0, 0.1)), # random crops
        # Apply affine transformations to each image.
        # Scale/zoom them, translate/move them, rotate them and shear them.
        iaa.Affine(scale={"x": (0.95, 1.05), "y": (0.95, 1.05)}, translate_percent={"x": (-0.05, 0.05), "y": (-0.05, 0.05)}, 
        rotate=(-5, 5), shear=(-2, 2)
        )])

    return augmenter

def parse_image(filename, resizing, scale, augmenter=None):
    
    image = tf.io.read_file(filename)
    image = tf.io.decode_jpeg(image)
    image = image.numpy()

    if augmenter != None:
        image = augmenter(image=image)

    image = cv2.addWeighted(image, 4, cv2.GaussianBlur(image, (0,0), 10), -4, 128)
    image = tf.image.resize(image, (resizing, resizing)) # 256 * 256
    image = tf.image.central_crop(image, scale)
    image = tf.cast(image, tf.uint8)
    
    return image

In [4]:
longitudinal_sequential_prediction_timedelta2_data_dict = load_data("/home/jl5307/current_research/AMD_prediction/img_data/data_dictionary/longitudinal_sequential_prediction_timedelta2_data_dict.pkl")

In [10]:
longitudinal_sequential_prediction_timedelta5_data_dict = load_data("/home/jl5307/current_research/AMD_prediction/img_data/data_dictionary/longitudinal_sequential_prediction_timedelta5_data_dict.pkl")

In [5]:
def build_numpy_data_set(data_root_path, output_path, longitudinal_sequential_prediction_data_dict, resizing, scale, channel, augment=True):
    
    train_eye_list = longitudinal_sequential_prediction_data_dict["train_set"]["eye_list"]
    train_label_list = longitudinal_sequential_prediction_data_dict["train_set"]["label_list"]
    validation_eye_list = longitudinal_sequential_prediction_data_dict["validation_set"]["eye_list"]
    validation_label_list = longitudinal_sequential_prediction_data_dict["validation_set"]["label_list"]
    test_eye_list = longitudinal_sequential_prediction_data_dict["test_set"]["eye_list"]
    test_label_list = longitudinal_sequential_prediction_data_dict["test_set"]["label_list"]
    per_legnth_test_dict = longitudinal_sequential_prediction_data_dict["per_length_test_set"]
    
    max_len_train_set = count_max_len(train_label_list)
    max_len_validation_set = count_max_len(validation_label_list)
    max_len_test_set = count_max_len(test_label_list)
    
    train_eye = np.zeros(shape=(len(train_eye_list), max_len_train_set, int(resizing*scale), int(resizing*scale), channel))
    train_label = np.zeros(shape=(len(train_label_list), max_len_train_set))
    validation_eye = np.zeros(shape=(len(validation_eye_list), max_len_validation_set, int(resizing*scale), int(resizing*scale), channel))
    validation_label = np.zeros(shape=(len(validation_label_list), max_len_validation_set))
    test_eye = np.zeros(shape=(len(test_eye_list), max_len_test_set, int(resizing*scale), int(resizing*scale), channel))
    test_label = np.zeros(shape=(len(test_label_list), max_len_test_set))
    
    if augment:
        augmeter = set_augmenter()
    else:
        augmeter = None
        
    print("processing training set...")
    progbar = tf.keras.utils.Progbar(len(train_eye_list))
    
    for list_idx, (eyes, labels) in enumerate(zip(train_eye_list, train_label_list)):

        for eye_idx, eye in enumerate(eyes):
            eye_filename = data_root_path + eye.split(" ")[0] + "/" + eye
            eye_img = parse_image(eye_filename, resizing=resizing, scale=scale, augmenter=augmeter)
            train_eye[list_idx, eye_idx, :, :, :] = eye_img
        
        train_label[list_idx, :len(labels)] = labels
        
        progbar.add(1)
    
    print("saving training data...")
    np.save(os.path.join(output_path, "train_eye.npy"), train_eye)
    np.save(os.path.join(output_path, "train_label.npy"), train_label)
    
    # for saving memory
    del(train_eye)
    del(train_label)
    
    print("processing validation set...")
    progbar = tf.keras.utils.Progbar(len(validation_eye_list))
    
    for list_idx, (eyes, labels) in enumerate(zip(validation_eye_list, validation_label_list)):

        for eye_idx, eye in enumerate(eyes):
            eye_filename = data_root_path + eye.split(" ")[0] + "/" + eye
            eye_img = parse_image(eye_filename, resizing=resizing, scale=scale, augmenter=None)
            validation_eye[list_idx, eye_idx, :, :, :] = eye_img
        
        validation_label[list_idx, :len(labels)] = labels
        
        progbar.add(1)
        
    print("saving validation data...")
    np.save(os.path.join(output_path, "validation_eye.npy"), validation_eye)
    np.save(os.path.join(output_path, "validation_label.npy"), validation_label)
    
    # for saving memory
    del(validation_eye)
    del(validation_label)
    
    print("processing test set...")
    progbar = tf.keras.utils.Progbar(len(test_eye_list))
    
    for list_idx, (eyes, labels) in enumerate(zip(test_eye_list, test_label_list)):

        for eye_idx, eye in enumerate(eyes):
            eye_filename = data_root_path + eye.split(" ")[0] + "/" + eye
            eye_img = parse_image(eye_filename, resizing=resizing, scale=scale, augmenter=None)
            test_eye[list_idx, eye_idx, :, :, :] = eye_img
        
        test_label[list_idx, :len(labels)] = labels
        
        progbar.add(1)
        
    print("saving test data...")
    np.save(os.path.join(output_path, "test_eye.npy"), test_eye)
    np.save(os.path.join(output_path, "test_label.npy"), test_label)

In [6]:
def build_train_tf_data_set(data_root_path, output_path, longitudinal_sequential_prediction_data_dict, resizing, scale, channel, divide, augment=True):
    
    train_eye_list = longitudinal_sequential_prediction_data_dict["train_set"]["eye_list"]
    train_label_list = longitudinal_sequential_prediction_data_dict["train_set"]["label_list"]

    max_len_train_set = count_max_len(train_label_list)
    
    division_size = int(np.ceil(len(train_eye_list) / divide))
    
    for i in range(divide):
        
        this_split_train_eye_list = train_eye_list[i*division_size:(i+1)*division_size]
        this_split_tain_label_list = train_label_list[i*division_size:(i+1)*division_size]
        this_split_train_eye = np.zeros(shape=(len(this_split_train_eye_list), max_len_train_set, int(resizing*scale), int(resizing*scale), channel))
        this_split_train_label = np.zeros(shape=(len(this_split_tain_label_list), max_len_train_set))

        if augment:
            augmeter = set_augmenter()
        else:
            augmeter = None
        
        print("processing training data of {}th split ...".format(i+1))
        progbar = tf.keras.utils.Progbar(len(this_split_train_eye_list))
    
        for list_idx, (eyes, labels) in enumerate(zip(this_split_train_eye_list, this_split_tain_label_list)):

            for eye_idx, eye in enumerate(eyes):
                eye_filename = data_root_path + eye.split(" ")[0] + "/" + eye
                eye_img = parse_image(eye_filename, resizing=resizing, scale=scale, augmenter=augmeter)
                this_split_train_eye[list_idx, eye_idx, :, :, :] = eye_img
        
            this_split_train_label[list_idx, :len(labels)] = labels
            progbar.add(1)
    
        print("saving training data of {}th split...".format(i+1))
        this_split_train_eye_path = os.path.join(output_path, "train_eye_split{}.npy".format(i))
        this_split_train_label_path = os.path.join(output_path, "train_label_split{}.npy".format(i))
        np.save(this_split_train_eye_path, this_split_train_eye)
        np.save(this_split_train_label_path, this_split_train_label)
    
        this_split_train_dataset_tf = tf.data.Dataset.from_tensor_slices((this_split_train_eye, this_split_train_label))
        this_split_train_dataset_tf_path = os.path.join(output_path, "train_dataset_tf_split{}".format(i))
        tf.data.experimental.save(this_split_train_dataset_tf, this_split_train_dataset_tf_path)
    
    train_dataset_tf_element_spec = this_split_train_dataset_tf.element_spec
    train_dataset_tf_element_spec_path = os.path.join(output_path, "train_dataset_tf_element_spec.pkl")     
    save_data(train_dataset_tf_element_spec_path, train_dataset_tf_element_spec)

In [7]:
def build_validation_tf_data_set(data_root_path, output_path, longitudinal_sequential_prediction_data_dict, resizing, scale, channel):
    
    validation_eye_list = longitudinal_sequential_prediction_data_dict["validation_set"]["eye_list"]
    validation_label_list = longitudinal_sequential_prediction_data_dict["validation_set"]["label_list"]
    
    max_len_validation_set = count_max_len(validation_label_list)

    validation_eye = np.zeros(shape=(len(validation_eye_list), max_len_validation_set, int(resizing*scale), int(resizing*scale), channel))
    validation_label = np.zeros(shape=(len(validation_label_list), max_len_validation_set))
    
    print("processing validation set...")
    progbar = tf.keras.utils.Progbar(len(validation_eye_list))
    
    for list_idx, (eyes, labels) in enumerate(zip(validation_eye_list, validation_label_list)):

        for eye_idx, eye in enumerate(eyes):
            eye_filename = data_root_path + eye.split(" ")[0] + "/" + eye
            eye_img = parse_image(eye_filename, resizing=resizing, scale=scale, augmenter=None)
            validation_eye[list_idx, eye_idx, :, :, :] = eye_img
        
        validation_label[list_idx, :len(labels)] = labels
        
        progbar.add(1)
        
    print("saving validation data...")
    np.save(os.path.join(output_path, "validation_eye.npy"), validation_eye)
    np.save(os.path.join(output_path, "validation_label.npy"), validation_label)
    
    validation_dataset_tf = tf.data.Dataset.from_tensor_slices((validation_eye, validation_label))
    validation_dataset_tf_element_spec = validation_dataset_tf.element_spec
    validation_dataset_tf_path = os.path.join(output_path, "validation_dataset_tf")
    validation_dataset_tf_element_spec_path = os.path.join(output_path, "validation_dataset_tf_element_spec.pkl")
    tf.data.experimental.save(validation_dataset_tf, validation_dataset_tf_path)
    save_data(validation_dataset_tf_element_spec_path, validation_dataset_tf_element_spec)

In [8]:
def build_test_tf_data_set(data_root_path, output_path, longitudinal_sequential_prediction_data_dict, resizing, scale, channel):

    test_eye_list = longitudinal_sequential_prediction_data_dict["test_set"]["eye_list"]
    test_label_list = longitudinal_sequential_prediction_data_dict["test_set"]["label_list"]
    max_len_test_set = count_max_len(test_label_list)

    test_eye = np.zeros(shape=(len(test_eye_list), max_len_test_set, int(resizing*scale), int(resizing*scale), channel))
    test_label = np.zeros(shape=(len(test_label_list), max_len_test_set))
    
    
    print("processing test set...")
    progbar = tf.keras.utils.Progbar(len(test_eye_list))
    
    for list_idx, (eyes, labels) in enumerate(zip(test_eye_list, test_label_list)):

        for eye_idx, eye in enumerate(eyes):
            eye_filename = data_root_path + eye.split(" ")[0] + "/" + eye
            eye_img = parse_image(eye_filename, resizing=resizing, scale=scale, augmenter=None)
            test_eye[list_idx, eye_idx, :, :, :] = eye_img
        
        test_label[list_idx, :len(labels)] = labels
        
        progbar.add(1)
        
    print("saving test data...")
    np.save(os.path.join(output_path, "test_eye.npy"), test_eye)
    np.save(os.path.join(output_path, "test_label.npy"), test_label)
    
    test_dataset_tf = tf.data.Dataset.from_tensor_slices((test_eye, test_label))
    test_dataset_tf_element_spec = test_dataset_tf.element_spec
    test_dataset_tf_path = os.path.join(output_path, "test_dataset_tf")
    test_dataset_tf_element_spec_path = os.path.join(output_path, "test_dataset_tf_element_spec.pkl")
    tf.data.experimental.save(test_dataset_tf, test_dataset_tf_path)
    save_data(test_dataset_tf_element_spec_path, test_dataset_tf_element_spec)

In [25]:
def build_tf_per_length_data_set(data_root_path, output_path, longitudinal_sequential_prediction_data_dict, resizing, scale, channel):
    
    per_legnth_test_dict = longitudinal_sequential_prediction_data_dict["per_length_test_set"]
    
    print("processing per length test set...")
    per_length_test_set_dict = dict()
    
    for length, length_dict in per_legnth_test_dict.items():
        
        print("processing length {}".format(length))
        
        this_length_test_eye_list = length_dict["eye_list"]
        this_length_test_label_list = length_dict["label_list"]
        
        this_length_test_eye = np.zeros(shape=(len(this_length_test_eye_list), length, int(resizing*scale), int(resizing*scale), channel))
        test_length_test_label = np.zeros(shape=(len(this_length_test_label_list), length))
        
        for list_idx, (eyes, labels) in enumerate(zip(this_length_test_eye_list, this_length_test_label_list)):
            
            for eye_idx, eye in enumerate(eyes):
                eye_filename = data_root_path + eye.split(" ")[0] + "/" + eye
                eye_img = parse_image(eye_filename, resizing=resizing, scale=scale, augmenter=None)
                this_length_test_eye[list_idx, eye_idx, :, :, :] = eye_img
        
            test_length_test_label[list_idx, :len(labels)] = labels
        
        print("saving length {} test data...".format(length))
        this_length_test_dataset_tf = tf.data.Dataset.from_tensor_slices((this_length_test_eye, test_length_test_label))
        file_path = os.path.join(output_path, "length_{}_test_dataset_tf".format(length))
        tf.data.experimental.save(this_length_test_dataset_tf, file_path)
        this_length_test_dataset_elem_spec = this_length_test_dataset_tf.element_spec
        this_length_test_dataset_elem_spec_path = os.path.join(output_path, "length_{}_test_dataset_element_spec.pkl".format(length))
        save_data(this_length_test_dataset_elem_spec_path, this_length_test_dataset_elem_spec)

In [23]:
build_train_tf_data_set("/home/jl5307/current_research/AMD_prediction/img_data/img_files/", 
                             "/home/jl5307/current_research/AMD_prediction/img_data/numpy_data/", 
                             longitudinal_sequential_prediction_timedelta5_data_dict, 256, 0.875, 3, divide=4, augment=False)

processing training data of 1th split ...
   8/1093 [..............................] - ETA: 5:47

KeyboardInterrupt: 

In [12]:
build_validation_tf_data_set("/home/jl5307/current_research/AMD_prediction/img_data/img_files/", 
                             "/home/jl5307/current_research/AMD_prediction/img_data/numpy_data/", 
                             longitudinal_sequential_prediction_timedelta5_data_dict, 256, 0.875, 3)

processing validation set...
saving validation data...


In [20]:
build_test_tf_data_set("/home/jl5307/current_research/AMD_prediction/img_data/img_files/", 
                             "/home/jl5307/current_research/AMD_prediction/img_data/numpy_data/", 
                             longitudinal_sequential_prediction_timedelta5_data_dict, 256, 0.875, 3)

processing test set...
saving test data...


In [26]:
build_tf_per_length_data_set("/home/jl5307/current_research/AMD_prediction/img_data/img_files/",
                              "/home/jl5307/current_research/AMD_prediction/img_data/numpy_data/", longitudinal_sequential_prediction_timedelta5_data_dict, 256, 0.875, 3)

processing per length test set...
processing length 1
saving length 1 test data...
processing length 2
saving length 2 test data...
processing length 3
saving length 3 test data...
processing length 4
saving length 4 test data...
processing length 5
saving length 5 test data...
processing length 6
saving length 6 test data...
processing length 7
saving length 7 test data...
processing length 8
saving length 8 test data...
processing length 9
saving length 9 test data...
processing length 10
saving length 10 test data...
processing length 11
saving length 11 test data...
processing length 12
saving length 12 test data...


In [6]:
train_eye = np.load("/home/jl5307/current_research/AMD_prediction/img_data/numpy_data/longitudinal_sequential_prediction_timedelta2/train_eye.npy")

In [7]:
train_label = np.load("/home/jl5307/current_research/AMD_prediction/img_data/numpy_data/longitudinal_sequential_prediction_timedelta2/train_label.npy")

In [9]:
dataset = tf.data.Dataset.from_tensor_slices((train_eye, train_label))

In [4]:
validation_eye = np.load("/home/jl5307/current_research/AMD_prediction/img_data/numpy_data/longitudinal_sequential_prediction_timedelta2/validation_eye.npy")

In [5]:
validation_label = np.load("/home/jl5307/current_research/AMD_prediction/img_data/numpy_data/longitudinal_sequential_prediction_timedelta2/validation_label.npy")

In [6]:
validation_dataset = tf.data.Dataset.from_tensor_slices((validation_eye, validation_label))

In [7]:
len(validation_dataset)

1160

In [16]:
test_eye = np.load("/home/jl5307/current_research/AMD_prediction/img_data/numpy_data/longitudinal_sequential_prediction_timedelta2/test_eye.npy")

In [17]:
test_label = np.load("/home/jl5307/current_research/AMD_prediction/img_data/numpy_data/longitudinal_sequential_prediction_timedelta2/test_label.npy")

In [20]:
test_dataset = tf.data.Dataset.from_tensor_slices((test_eye, test_label))

In [22]:
test_dataset_element_spec = test_dataset.element_spec

In [23]:
save_data("/home/jl5307/current_research/AMD_prediction/img_data/numpy_data/test_dataset_element_spec.pkl", test_dataset_element_spec)

In [24]:
tf.data.experimental.save(test_dataset, "/home/jl5307/current_research/AMD_prediction/img_data/numpy_data/test_dataset_tf")

In [4]:
td = load_data("/home/jl5307/current_research/AMD_prediction/img_data/numpy_data/per_length_test_set_dict.pkl")

In [8]:
ts = td[4]["eye"]

In [20]:
validation_eye = np.load("/home/jl5307/current_research/AMD_prediction/img_data/numpy_data/longitudinal_sequential_prediction_timedelta2/dataset_npy/validation_dataset_npy/validation_eye.npy")

In [23]:
validation_label = np.load("/home/jl5307/current_research/AMD_prediction/img_data/numpy_data/longitudinal_sequential_prediction_timedelta2/dataset_npy/validation_dataset_npy/validation_label.npy")

In [31]:
validation_label[15,:]

array([0., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [3]:
list(range(1, 12))

[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]