In [1]:
import numpy as np
import pickle
import os
import pandas as pd

In [2]:
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"

In [3]:
def load_data(data_path):
    data = pickle.load(open(data_path, 'rb'))

    return data

In [4]:
splitted_patient_dict = load_data("/home/jl5307/current_research/AMD_prediction/img_data/data_dictionary/splitted_patient_dict.pkl")

In [88]:
def build_splitted_patient_verification_data_dict(splitted_patient_dict):

    train_set = splitted_patient_dict["train_set"]
    validation_set = splitted_patient_dict["validation_set"]
    test_set = splitted_patient_dict["test_set"]
    
    train_eye_dict = dict()
    validation_eye_dict = dict()
    test_eye_dict = dict()
    
    train_pid_list = []
    train_year_list = []
    train_eye_list = []
    train_label_list = []
    train_score_list = []
    
    validation_pid_list = []
    validation_year_list = []
    validation_eye_list = []
    validation_label_list = []
    validation_score_list = []

    test_pid_list = []
    test_year_list = []
    test_eye_list = []
    test_label_list = []
    test_score_list = []
    
    # train set
    for pid, value in train_set.items():
        
        re_year = np.array(value["re"]["re_year"])
        re_img_list = value["re"]["re_img"]
        re_severe_score = value["re"]["re_severe_score"]
        re_late_amd = np.array(value["re"]["re_late_amd"])
        
        if len(re_img_list) != len(re_late_amd):
            raise ValueError("the length of img_list and label_list must be the same")
        
        train_pid_list.append(pid+"_re")
        train_year_list.append(list(re_year))
        train_eye_list.append(re_img_list)
        train_label_list.append(re_late_amd)
        train_score_list.append(re_severe_score)
        
        le_year = np.array(value["le"]["le_year"])
        le_img_list = value["le"]["le_img"]
        le_severe_score = value["le"]["le_severe_score"]
        le_late_amd = np.array(value["le"]["le_late_amd"])
        
        if len(le_img_list) != len(le_late_amd):
            raise ValueError("the length of img_list and label_list must be the same")
            
        train_pid_list.append(pid+"_le")
        train_year_list.append(list(le_year))
        train_eye_list.append(le_img_list)
        train_label_list.append(le_late_amd)
        train_score_list.append(le_severe_score)
    
    train_eye_dict["pid_list"] = train_pid_list
    train_eye_dict["year_list"] = train_year_list
    train_eye_dict["eye_list"] = train_eye_list
    train_eye_dict["label_list"] = train_label_list
    train_eye_dict["score_list"] = train_score_list
        
    # validation set
    for pid, value in validation_set.items():
        
        re_year = np.array(value["re"]["re_year"])
        re_img_list = value["re"]["re_img"]
        re_severe_score = value["re"]["re_severe_score"]
        re_late_amd = np.array(value["re"]["re_late_amd"])
        
        if len(re_img_list) != len(re_late_amd):
            raise ValueError("the length of img_list and label_list must be the same")
        
        validation_pid_list.append(pid+"_re")
        validation_year_list.append(list(re_year))
        validation_eye_list.append(re_img_list)
        validation_label_list.append(re_late_amd)
        validation_score_list.append(re_severe_score)
        
        le_year = np.array(value["le"]["le_year"])
        le_img_list = value["le"]["le_img"]
        le_severe_score = value["le"]["le_severe_score"]
        le_late_amd = np.array(value["le"]["le_late_amd"])
        
        if len(le_img_list) != len(le_late_amd):
            raise ValueError("the length of img_list and label_list must be the same")
            
        validation_pid_list.append(pid+"_le")
        validation_year_list.append(list(le_year))
        validation_eye_list.append(le_img_list)
        validation_label_list.append(le_late_amd)
        validation_score_list.append(le_severe_score)
    
    validation_eye_dict["pid_list"] = validation_pid_list
    validation_eye_dict["year_list"] = validation_year_list
    validation_eye_dict["eye_list"] = validation_eye_list
    validation_eye_dict["label_list"] = validation_label_list
    validation_eye_dict["score_list"] = validation_score_list
    
    # test set
    for pid, value in test_set.items():
        
        re_year = np.array(value["re"]["re_year"])
        re_img_list = value["re"]["re_img"]
        re_severe_score = value["re"]["re_severe_score"]
        re_late_amd = np.array(value["re"]["re_late_amd"])
        
        if len(re_img_list) != len(re_late_amd):
            raise ValueError("the length of img_list and label_list must be the same")
        
        test_pid_list.append(pid+"_re")
        test_year_list.append(list(re_year))
        test_eye_list.append(re_img_list)
        test_label_list.append(re_late_amd)
        test_score_list.append(re_severe_score)
        
        le_year = np.array(value["le"]["le_year"])
        le_img_list = value["le"]["le_img"]
        le_severe_score = value["le"]["le_severe_score"]
        le_late_amd = np.array(value["le"]["le_late_amd"])
        
        if len(le_img_list) != len(le_late_amd):
            raise ValueError("the length of img_list and label_list must be the same")
            
        test_pid_list.append(pid+"_le")
        test_year_list.append(list(le_year))
        test_eye_list.append(le_img_list)
        test_label_list.append(le_late_amd)
        test_score_list.append(le_severe_score)
    
    test_eye_dict["pid_list"] = test_pid_list
    test_eye_dict["year_list"] = test_year_list
    test_eye_dict["eye_list"] = test_eye_list
    test_eye_dict["label_list"] = test_label_list
    test_eye_dict["score_list"] = test_score_list
    
    return {"train_set" : train_eye_dict, "validation_set" : validation_eye_dict, "test_set" : test_eye_dict}

In [92]:
splitted_patient_verification_data_dict = build_splitted_patient_verification_data_dict(splitted_patient_dict)

In [25]:
def build_longitudinal_sequential_prediction_verification_data_dict(splitted_patient_dict, timedelta, remove_recurrent=True, generate_per_len_test_set=True):
    
    train_set = splitted_patient_dict["train_set"]
    validation_set = splitted_patient_dict["validation_set"]
    test_set = splitted_patient_dict["test_set"]
    
    train_eye_dict = dict()
    validation_eye_dict = dict()
    test_eye_dict = dict()
    
    train_pid_list = []
    train_year_list = []
    train_eye_list = []
    train_label_list = []
    
    validation_pid_list = []
    validation_year_list = []
    validation_eye_list = []
    validation_label_list = []

    test_pid_list = []
    test_year_list = []
    test_eye_list = []
    test_label_list = []
    
    train_eye_exclusion_count = 0
    validation_eye_exclusion_count = 0
    test_eye_exclusion_count = 0
    
    # train set
    for pid, value in train_set.items():
        
        re_year = np.array(value["re"]["re_year"])
        re_img_list = value["re"]["re_img"]
        re_severe_score = value["re"]["re_severe_score"]
        re_late_amd = np.array(value["re"]["re_late_amd"])
        
        if len(re_img_list) != len(re_late_amd):
            raise ValueError("the length of img_list and label_list must be the same")
        
        le_year = np.array(value["le"]["le_year"])
        le_img_list = value["le"]["le_img"]
        le_severe_score = value["le"]["le_severe_score"]
        le_late_amd = np.array(value["le"]["le_late_amd"])
        
        if len(le_img_list) != len(le_late_amd):
            raise ValueError("the length of img_list and label_list must be the same")
            
        if remove_recurrent:
            
            if np.sum(re_late_amd) > 0: # test whether the eye had late-amd status
                re_first_late_amd_idx = np.where(re_late_amd == 1)[0][0]
                re_year = re_year[:re_first_late_amd_idx+1]
                re_img_list = re_img_list[:re_first_late_amd_idx+1]
                re_severe_score = re_severe_score[:re_first_late_amd_idx+1]
                re_late_amd = re_late_amd[:re_first_late_amd_idx+1]
                
            if np.sum(le_late_amd) > 0: # test whether the eye had late-amd status
                le_first_late_amd_idx = np.where(le_late_amd == 1)[0][0]
                le_year = le_year[:le_first_late_amd_idx+1]
                le_img_list = le_img_list[:le_first_late_amd_idx+1]
                le_severe_score = le_severe_score[:le_first_late_amd_idx+1]
                le_late_amd = le_late_amd[:le_first_late_amd_idx+1]
            
        re_total_year_len = re_year[-1] - re_year[0]
        le_total_year_len = le_year[-1] - le_year[0]
        re_label_list = []
        le_label_list = []
        
        if re_total_year_len >= timedelta:
            for idx, (year, img) in enumerate(zip(re_year, re_img_list)):
                
                if idx == (len(re_year)-1):
                    continue
                else:
                    label_year_end = year + timedelta
                    label_year_ind1 = np.where(re_year > year, True, False)
                    label_year_ind2 = np.where(re_year <= label_year_end, True, False)
                    label_ind = label_year_ind1 * label_year_ind2
                
                    if np.sum(label_ind) > 0:
                        re_label_list.append(int(np.max(re_late_amd[label_ind])))
                    else:
                        re_label_list.append(re_late_amd[idx+1])
            
        else:
            train_eye_exclusion_count += 1
            
        if le_total_year_len >= timedelta:
            for idx, (year, img) in enumerate(zip(le_year, le_img_list)):
                
                if idx == (len(le_year)-1):
                    continue
                else:
                    label_year_end = year + timedelta
                    label_year_ind1 = np.where(le_year > year, True, False)
                    label_year_ind2 = np.where(le_year <= label_year_end, True, False)
                    label_ind = label_year_ind1 * label_year_ind2
                
                    if np.sum(label_ind) > 0:
                        le_label_list.append(int(np.max(le_late_amd[label_ind])))
                    else:
                        le_label_list.append(le_late_amd[idx+1])
            
        else:
            train_eye_exclusion_count += 1
            
        if len(re_label_list) > 0:
            assert len(re_img_list[:-1]) == len(re_label_list), "length of the label list and img list must be the same"
            train_pid_list.append(pid+"_re")
            train_year_list.append(list(re_year[:-1]))
            train_eye_list.append(re_img_list[:-1])
            train_label_list.append(re_label_list)
        
        if len(le_label_list) > 0:
            assert len(le_img_list[:-1]) == len(le_label_list), "length of the label list and img list must be the same"
            train_pid_list.append(pid+"_le")
            train_year_list.append(list(le_year[:-1]))
            train_eye_list.append(le_img_list[:-1])
            train_label_list.append(le_label_list)
    
    train_eye_dict["pid_list"] = train_pid_list
    train_eye_dict["year_list"] = train_year_list
    train_eye_dict["eye_list"] = train_eye_list
    train_eye_dict["label_list"] = train_label_list
    
    # validation set
    for pid, value in validation_set.items():
        
        re_year = np.array(value["re"]["re_year"])
        re_img_list = value["re"]["re_img"]
        re_severe_score = value["re"]["re_severe_score"]
        re_late_amd = np.array(value["re"]["re_late_amd"])
        
        if len(re_img_list) != len(re_late_amd):
            raise ValueError("the length of img_list and label_list must be the same")
        
        le_year = np.array(value["le"]["le_year"])
        le_img_list = value["le"]["le_img"]
        le_severe_score = value["le"]["le_severe_score"]
        le_late_amd = np.array(value["le"]["le_late_amd"])
        
        if len(le_img_list) != len(le_late_amd):
            raise ValueError("the length of img_list and label_list must be the same")
            
        if remove_recurrent:
            
            if np.sum(re_late_amd) > 0: # test whether the eye had late-amd status
                re_first_late_amd_idx = np.where(re_late_amd == 1)[0][0]
                re_year = re_year[:re_first_late_amd_idx+1]
                re_img_list = re_img_list[:re_first_late_amd_idx+1]
                re_severe_score = re_severe_score[:re_first_late_amd_idx+1]
                re_late_amd = re_late_amd[:re_first_late_amd_idx+1]
                
            if np.sum(le_late_amd) > 0: # test whether the eye had late-amd status
                le_first_late_amd_idx = np.where(le_late_amd == 1)[0][0]
                le_year = le_year[:le_first_late_amd_idx+1]
                le_img_list = le_img_list[:le_first_late_amd_idx+1]
                le_severe_score = le_severe_score[:le_first_late_amd_idx+1]
                le_late_amd = le_late_amd[:le_first_late_amd_idx+1]
            
        re_total_year_len = re_year[-1] - re_year[0]
        le_total_year_len = le_year[-1] - le_year[0]
        re_label_list = []
        le_label_list = []
        
        if re_total_year_len >= timedelta:
            for idx, (year, img) in enumerate(zip(re_year, re_img_list)):
                
                if idx == (len(re_year)-1):
                    continue
                else:
                    label_year_end = year + timedelta
                    label_year_ind1 = np.where(re_year > year, True, False)
                    label_year_ind2 = np.where(re_year <= label_year_end, True, False)
                    label_ind = label_year_ind1 * label_year_ind2
                
                    if np.sum(label_ind) > 0:
                        re_label_list.append(int(np.max(re_late_amd[label_ind])))
                    else:
                        re_label_list.append(re_late_amd[idx+1])
            
        else:
            validation_eye_exclusion_count += 1
            
        if le_total_year_len >= timedelta:
            for idx, (year, img) in enumerate(zip(le_year, le_img_list)):
                
                if idx == (len(le_year)-1):
                    continue
                else:
                    label_year_end = year + timedelta
                    label_year_ind1 = np.where(le_year > year, True, False)
                    label_year_ind2 = np.where(le_year <= label_year_end, True, False)
                    label_ind = label_year_ind1 * label_year_ind2
                
                    if np.sum(label_ind) > 0:
                        le_label_list.append(int(np.max(le_late_amd[label_ind])))
                    else:
                        le_label_list.append(le_late_amd[idx+1])
            
        else:
            train_eye_exclusion_count += 1
            
        if len(re_label_list) > 0:
            assert len(re_img_list[:-1]) == len(re_label_list), "length of the label list and img list must be the same"
            validation_pid_list.append(pid+"_re")
            validation_year_list.append(list(re_year[:-1]))
            validation_eye_list.append(re_img_list[:-1])
            validation_label_list.append(re_label_list)
        
        if len(le_label_list) > 0:
            assert len(le_img_list[:-1]) == len(le_label_list), "length of the label list and img list must be the same"
            validation_pid_list.append(pid+"_le")
            validation_year_list.append(list(le_year[:-1]))
            validation_eye_list.append(le_img_list[:-1])
            validation_label_list.append(le_label_list)
            
    validation_eye_dict["pid_list"] = validation_pid_list
    validation_eye_dict["year_list"] = validation_year_list
    validation_eye_dict["eye_list"] = validation_eye_list
    validation_eye_dict["label_list"] = validation_label_list
    
    # test set
    for pid, value in test_set.items():
        
        re_year = np.array(value["re"]["re_year"])
        re_img_list = value["re"]["re_img"]
        re_severe_score = value["re"]["re_severe_score"]
        re_late_amd = np.array(value["re"]["re_late_amd"])
        
        if len(re_img_list) != len(re_late_amd):
            raise ValueError("the length of img_list and label_list must be the same")
        
        le_year = np.array(value["le"]["le_year"])
        le_img_list = value["le"]["le_img"]
        le_severe_score = value["le"]["le_severe_score"]
        le_late_amd = np.array(value["le"]["le_late_amd"])
        
        if len(le_img_list) != len(le_late_amd):
            raise ValueError("the length of img_list and label_list must be the same")
            
        if remove_recurrent:
            
            if np.sum(re_late_amd) > 0: # test whether the eye had late-amd status
                re_first_late_amd_idx = np.where(re_late_amd == 1)[0][0]
                re_year = re_year[:re_first_late_amd_idx+1]
                re_img_list = re_img_list[:re_first_late_amd_idx+1]
                re_severe_score = re_severe_score[:re_first_late_amd_idx+1]
                re_late_amd = re_late_amd[:re_first_late_amd_idx+1]
                
            if np.sum(le_late_amd) > 0: # test whether the eye had late-amd status
                le_first_late_amd_idx = np.where(le_late_amd == 1)[0][0]
                le_year = le_year[:le_first_late_amd_idx+1]
                le_img_list = le_img_list[:le_first_late_amd_idx+1]
                le_severe_score = le_severe_score[:le_first_late_amd_idx+1]
                le_late_amd = le_late_amd[:le_first_late_amd_idx+1]
            
        re_total_year_len = re_year[-1] - re_year[0]
        le_total_year_len = le_year[-1] - le_year[0]
        re_label_list = []
        le_label_list = []
        
        if re_total_year_len >= timedelta:
            for idx, (year, img) in enumerate(zip(re_year, re_img_list)):
                
                if idx == (len(re_year)-1):
                    continue
                else:
                    label_year_end = year + timedelta
                    label_year_ind1 = np.where(re_year > year, True, False)
                    label_year_ind2 = np.where(re_year <= label_year_end, True, False)
                    label_ind = label_year_ind1 * label_year_ind2
                
                    if np.sum(label_ind) > 0:
                        re_label_list.append(int(np.max(re_late_amd[label_ind])))
                    else:
                        re_label_list.append(re_late_amd[idx+1])
            
        else:
            test_eye_exclusion_count += 1
            
        if le_total_year_len >= timedelta:
            for idx, (year, img) in enumerate(zip(le_year, le_img_list)):
                
                if idx == (len(le_year)-1):
                    continue
                else:
                    label_year_end = year + timedelta
                    label_year_ind1 = np.where(le_year > year, True, False)
                    label_year_ind2 = np.where(le_year <= label_year_end, True, False)
                    label_ind = label_year_ind1 * label_year_ind2
                
                    if np.sum(label_ind) > 0:
                        le_label_list.append(int(np.max(le_late_amd[label_ind])))
                    else:
                        le_label_list.append(le_late_amd[idx+1])
            
        else:
            train_eye_exclusion_count += 1
            
        if len(re_label_list) > 0:
            assert len(re_img_list[:-1]) == len(re_label_list), "length of the label list and img list must be the same"
            test_pid_list.append(pid+"_re")
            test_year_list.append(list(re_year[:-1]))
            test_eye_list.append(re_img_list[:-1])
            test_label_list.append(re_label_list)
        
        if len(le_label_list) > 0:
            assert len(le_img_list[:-1]) == len(le_label_list), "length of the label list and img list must be the same"
            test_pid_list.append(pid+"_le")
            test_year_list.append(list(le_year[:-1]))
            test_eye_list.append(le_img_list[:-1])
            test_label_list.append(le_label_list)
            
    test_eye_dict["pid_list"] = test_pid_list
    test_eye_dict["year_list"] = test_year_list
    test_eye_dict["eye_list"] = test_eye_list
    test_eye_dict["label_list"] = test_label_list
    
    if generate_per_len_test_set:
        
        per_len_test_eye_dict = dict()
        
        max_len = 0
        
        for eye_img_list in test_eye_list:
            if len(eye_img_list) > max_len:
                max_len = len(eye_img_list)
                
        for length in range(max_len):
            per_len_test_eye_dict[length+1] = {"pid_list" : [], "year_list" : [], "eye_list" : [], "label_list" : []}
        
        for pid, years, eyes, labels in zip(test_pid_list, test_year_list, test_eye_list, test_label_list):
            
            for idx, (year, eye, label) in enumerate(zip(years, eyes, labels)):
                per_len_test_eye_dict[idx+1]["pid_list"].append(pid)
                per_len_test_eye_dict[idx+1]["year_list"].append(years[:(idx+1)])
                per_len_test_eye_dict[idx+1]["eye_list"].append(eyes[:(idx+1)])
                per_len_test_eye_dict[idx+1]["label_list"].append(labels[:(idx+1)])
    
    return {"train_set" : train_eye_dict, "validation_set" : validation_eye_dict, "test_set" : test_eye_dict, "per_length_test_set" : per_len_test_eye_dict}

In [41]:
longitudinal_sequential_prediction_timedelta2_verification_data_dict = build_longitudinal_sequential_prediction_verification_data_dict(splitted_patient_dict, timedelta=2, remove_recurrent=True)

In [125]:
def sequential_prediction_verification_data_dict_to_csv(output_path, verification_data_dict):
    
    train_pid_list = verification_data_dict["train_set"]["pid_list"]
    train_year_list = verification_data_dict["train_set"]["year_list"]
    train_eye_list = verification_data_dict["train_set"]["eye_list"]
    train_label_list = verification_data_dict["train_set"]["label_list"]
    
    validation_pid_list = verification_data_dict["validation_set"]["pid_list"]
    validation_year_list = verification_data_dict["validation_set"]["year_list"]
    validation_eye_list = verification_data_dict["validation_set"]["eye_list"]
    validation_label_list = verification_data_dict["validation_set"]["label_list"]

    test_pid_list = verification_data_dict["test_set"]["pid_list"]
    test_year_list = verification_data_dict["test_set"]["year_list"]
    test_eye_list = verification_data_dict["test_set"]["eye_list"]
    test_label_list = verification_data_dict["test_set"]["label_list"]
    
    train_df_dict = dict()
    validation_df_dict = dict()
    test_df_dict = dict()
    
    for idx, (pid, years, eyes, labels) in enumerate(zip(train_pid_list, train_year_list, train_eye_list, train_label_list)):
        train_df_dict[pid] = [years, eyes, labels]
        
    for idx, (pid, years, eyes, labels) in enumerate(zip(validation_pid_list, validation_year_list, validation_eye_list, validation_label_list)):
        validation_df_dict[pid] = [years, eyes, labels]

    for idx, (pid, years, eyes, labels) in enumerate(zip(test_pid_list, test_year_list, test_eye_list, test_label_list)):
        test_df_dict[pid] = [years, eyes, labels]
        
    train_df = pd.DataFrame.from_dict(train_df_dict, orient='index', columns=['year', 'eye', 'label'])
    train_df.index.name = "pid"
    train_df.to_csv(os.path.join(output_path, "train_set_df.csv"), index=True)
    validation_df = pd.DataFrame.from_dict(validation_df_dict, orient='index', columns=['year', 'eye', 'label'])
    validation_df.to_csv(os.path.join(output_path, "validation_set_df.csv"), index=True)
    validation_df.index.name = "pid"
    test_df = pd.DataFrame.from_dict(test_df_dict, orient='index', columns=['year', 'eye', 'label'])
    test_df.index.name = "pid"
    test_df.to_csv(os.path.join(output_path, "test_set_df.csv"), index=True)
    
    per_len_test_eye_dict = verification_data_dict["per_length_test_set"]
    unique_length = list(per_len_test_eye_dict.keys())
        
    for length in unique_length:
        this_length_test_eye_dict = per_len_test_eye_dict[length]
        this_length_test_df_dict = dict()
            
        this_length_test_pid_list = this_length_test_eye_dict["pid_list"]
        this_length_test_year_list = this_length_test_eye_dict["year_list"]
        this_length_test_eye_list = this_length_test_eye_dict["eye_list"]
        this_length_test_label_list = this_length_test_eye_dict["label_list"]
            
        for idx, (pid, years, eyes, labels) in enumerate(zip(this_length_test_pid_list, this_length_test_year_list, this_length_test_eye_list, this_length_test_label_list)):
            this_length_test_df_dict[pid] = [length, years, eyes, labels]
                
        this_length_test_df = pd.DataFrame.from_dict(this_length_test_df_dict, orient='index', columns=['length', 'year', 'eye', 'label'])
        this_length_test_df.index.name = "pid"
        
        if length == 1:
            per_length_test_df = this_length_test_df
        else:
            per_length_test_df = pd.concat([per_length_test_df, this_length_test_df])
        
    per_length_test_df.to_csv(os.path.join(output_path, "per_length_test_set_df.csv"), index=True)    

In [150]:
def splitted_patient_verification_data_dict_to_csv(output_path, verification_data_dict):
    
    train_pid_list = verification_data_dict["train_set"]["pid_list"]
    train_year_list = verification_data_dict["train_set"]["year_list"]
    train_eye_list = verification_data_dict["train_set"]["eye_list"]
    train_label_list = verification_data_dict["train_set"]["label_list"]
    train_score_list = verification_data_dict["train_set"]["score_list"]
    
    validation_pid_list = verification_data_dict["validation_set"]["pid_list"]
    validation_year_list = verification_data_dict["validation_set"]["year_list"]
    validation_eye_list = verification_data_dict["validation_set"]["eye_list"]
    validation_label_list = verification_data_dict["validation_set"]["label_list"]
    validation_score_list = verification_data_dict["validation_set"]["score_list"]

    test_pid_list = verification_data_dict["test_set"]["pid_list"]
    test_year_list = verification_data_dict["test_set"]["year_list"]
    test_eye_list = verification_data_dict["test_set"]["eye_list"]
    test_label_list = verification_data_dict["test_set"]["label_list"]
    test_score_list = verification_data_dict["test_set"]["score_list"]
    
    train_df_dict = dict()
    validation_df_dict = dict()
    test_df_dict = dict()
    
    for idx, (pid, years, eyes, labels, scores) in enumerate(zip(train_pid_list, train_year_list, train_eye_list, train_label_list, train_score_list)):
        train_df_dict[pid] = [years, eyes, labels, scores]
        
    for idx, (pid, years, eyes, labels, scores) in enumerate(zip(validation_pid_list, validation_year_list, validation_eye_list, validation_label_list, validation_score_list)):
        validation_df_dict[pid] = [years, eyes, labels, scores]

    for idx, (pid, years, eyes, labels, scores) in enumerate(zip(test_pid_list, test_year_list, test_eye_list, test_label_list, test_score_list)):
        test_df_dict[pid] = [years, eyes, labels, scores]
        
    train_df = pd.DataFrame.from_dict(train_df_dict, orient='index', columns=['year', 'eye', 'label', 'score'])
    train_df.index.name = "pid"
    train_df.to_csv(os.path.join(output_path, "train_set_df.csv"), index=True)
    validation_df = pd.DataFrame.from_dict(validation_df_dict, orient='index', columns=['year', 'eye', 'label', 'score'])
    validation_df.to_csv(os.path.join(output_path, "validation_set_df.csv"), index=True)
    validation_df.index.name = "pid"
    test_df = pd.DataFrame.from_dict(test_df_dict, orient='index', columns=['year', 'eye', 'label', 'score'])
    test_df.index.name = "pid"
    test_df.to_csv(os.path.join(output_path, "test_set_df.csv"), index=True) 

In [120]:
verification_data_dict_to_csv("/home/jl5307/current_research/AMD_prediction/img_data/verification_data/sequential_prediction_timedelta2/", longitudinal_sequential_prediction_timedelta2_verification_data_dict, per_length_test_set=True)

In [151]:
splitted_patient_verification_data_dict_to_csv("/home/jl5307/current_research/AMD_prediction/img_data/verification_data/splitted_patient/", splitted_patient_verification_data_dict)

In [152]:
pd.read_csv("/home/jl5307/current_research/AMD_prediction/img_data/verification_data/splitted_patient/train_set_df.csv")

Unnamed: 0,pid,year,eye,label,score
0,G273_re,"[0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.5, 9.0, ...","['56516 QUA F2 RE LS.jpg', '56516 04 F2 RE LS....",[0 0 0 0 0 0 0 0 0 0 0 0],"[4.0, 5.0, 5.0, 6.0, 4.0, 4.0, 4.0, 8.0, 5.0, ..."
1,G273_le,"[0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.5, 9.0, ...","['56516 QUA F2 LE LS.jpg', '56516 04 F2 LE LS....",[0 0 0 0 0 0 0 0 0 0 0 0],"[4.0, 5.0, 4.0, 5.0, 2.0, 8.0, 4.0, 8.0, 8.0, ..."
2,5460_re,"[0.0, 2.0, 3.0, 4.0, 5.0, 7.0, 11.0]","['54545 QUA F2 RE LS.jpg', '54545 04 F2 RE LS....",[0 0 0 0 0 0 0],"[6.0, 6.0, 6.0, 6.0, 7.0, 6.0, 6.0]"
3,5460_le,"[0.0, 2.0, 3.0, 4.0, 5.0, 7.0, 11.0]","['54545 QUA F2 LE LS.jpg', '54545 04 F2 LE LS....",[0 0 0 0 0 0 0],"[6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0]"
4,G485_re,"[0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, ...","['58404 QUA F2 RE LS.jpg', '58404 04 F2 RE LS....",[0 0 0 0 0 0 0 0 0 0 0],"[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0, 1.0, ..."
...,...,...,...,...,...
6035,4520_le,"[0.0, 2.0, 3.0, 4.0, 5.0, 6.0]","['58753 QUA F2 LE LS.jpg', '58753 04 F2 LE LS....",[0 0 0 0 0 1],"[2.0, 5.0, 5.0, 6.0, 7.0, 11.0]"
6036,3912_re,"[0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.5, 9.0, ...","['56163 QUA F2 RE LS.jpg', '56163 04 F2 RE LS....",[0 0 0 0 0 0 0 0 0 0 0 0],"[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ..."
6037,3912_le,"[0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.5, 9.0, ...","['56163 QUA F2 LE LS.jpg', '56163 04 F2 LE LS....",[0 0 0 0 0 0 0 0 0 0 0 0],"[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ..."
6038,3537_re,"[0.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, ...","['61695 QUA F2 RE LS.jpg', '61695 04 F2 RE LS....",[0 0 0 0 0 0 0 0 0 0],"[1.0, 1.0, 1.0, 2.0, 1.0, 2.0, 1.0, 1.0, 1.0, ..."


In [117]:
d2 = pd.read_csv("/home/jl5307/current_research/AMD_prediction/img_data/verification_data/per_length_test_set_df.csv")

In [118]:
d2

Unnamed: 0,pid,length,year,eye,label
0,G183_re,1,[0.0],['54654 QUA F2 RE LS.jpg'],[0]
1,G183_le,1,[0.0],['54654 QUA F2 LE LS.jpg'],[0]
2,4693_re,1,[0.0],['58574 QUA F2 RE LS.jpg'],[0]
3,1866_re,1,[0.0],['59659 QUA F2 RE LS.jpg'],[0]
4,1866_le,1,[0.0],['59659 QUA F2 LE LS.jpg'],[1]
...,...,...,...,...,...
7424,4341_le,12,"[0.0, 2.0, 3.0, 4.0, 4.5, 5.0, 6.0, 7.0, 7.5, ...","['52539 QUA F2 LE LS.jpg', '52539 04 F2 LE LS....","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]"
7425,2255_re,12,"[0.0, 0.5, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, ...","['57115 QUA F2 RE LS.jpg', '57115 01 F2 RE LS....","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]"
7426,2255_le,12,"[0.0, 0.5, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, ...","['57115 QUA F2 LE LS.jpg', '57115 01 F2 LE LS....","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]"
7427,3250_re,12,"[0.0, 2.0, 3.0, 3.5, 4.0, 5.0, 6.0, 7.0, 7.5, ...","['57132 QUA F2 RE LS.jpg', '57132 04 F2 RE LS....","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]"
