# ODIR dataset building steps
To facilitate future method comparisons, we provide the detailed steps to construct ODIR dataset used in our TMI paper as follows.

### First of all, these 16 images are first removed from the training set.
The background of the following images is quite different from the rest ones. They are fundus images uploaded from the hospital.

- 2174_right.jpg
- 2175_left.jpg
- 2176_left.jpg
- 2177_left.jpg
- 2177_right.jpg
- 2178_right.jpg
- 2179_left.jpg
- 2179_right.jpg
- 2180_left.jpg
- 2180_right.jpg
- 2181_left.jpg
- 2181_right.jpg
- 2182_left.jpg
- 2182_right.jpg
- 2957_left.jpg
- 2957_right.jpg

### Second, we need to modify the following excel file provided by ODIR dataset:
- training annotation (English).xlsx
- off-site test annotation (English).xlsx
- on-site test annotation (English).xlsx

Specifically, we used global substitution to unify diagnostic keywords for the same disease according to the Table II in our paper. 

> For example, diagnostic keywords including "Mild nonproliferative retinopathy", "Moderate nonproliferative retinopathy", "Severe nonproliferative retinopathy", "Proliferative diabetic retinopathy", "Severe proliferative diabetic retinopathy", and "Diabetic retinopathy" are all replace with "Diabetic retinopathy".

Moreover, we treat all suspected diseases or abnormalities as diagnosed diseases or abnormalities, so all "suspected " are replace with "".

For the convenience of the follow-up, we upload the final excel file in https://github.com/Xyporz/CISSL-GANs/tree/main/ODIR.

# Third, run the following code step by step.
Remembering to change the path to your file path.

In [1]:
import pandas as pd # 用于读取csv
import os # 用于设定目录
import shutil  #用于移动文件

# 目录准备
Picture_Current_Path = "F:/Data/ODIR_Fromscratch/OIA-ODIR/Training Set/Images/"
Current_Path ="F:/Data/OIA-ODIR/ODIR/"
CSV_Path = "F:/Data/ODIR_Fromscratch/OIA-ODIR/Training Set/Annotation/training annotation (English).xlsx"

Train_Path = "F:/Data/ODIR_Fromscratch/OIA-ODIR/ODIR/Train/"

#打开目录下表格文件并读取
list = pd.read_excel(CSV_Path)

In [2]:
list

Unnamed: 0,ID,Patient Age,Patient Sex,Left-Fundus,Right-Fundus,Left-Diagnostic Keywords,Right-Diagnostic Keywords,N,D,G,C,A,H,M,O
0,0,69,Female,0_left.jpg,0_right.jpg,cataract,normal fundus,0,0,0,1,0,0,0,0
1,1,57,Male,1_left.jpg,1_right.jpg,normal fundus,normal fundus,1,0,0,0,0,0,0,0
2,2,42,Male,2_left.jpg,2_right.jpg,laser spot，diabetic retinopathy,diabetic retinopathy,0,1,0,0,0,0,0,1
3,3,66,Male,3_left.jpg,3_right.jpg,normal fundus,branch retinal artery occlusion,0,0,0,0,0,0,0,1
4,4,53,Male,4_left.jpg,4_right.jpg,macular epiretinal membrane,diabetic retinopathy,0,1,0,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3495,4686,63,Male,4686_left.jpg,4686_right.jpg,diabetic retinopathy,diabetic retinopathy,0,1,0,0,0,0,0,0
3496,4688,42,Male,4688_left.jpg,4688_right.jpg,diabetic retinopathy,diabetic retinopathy,0,1,0,0,0,0,0,0
3497,4689,54,Male,4689_left.jpg,4689_right.jpg,diabetic retinopathy,normal fundus,0,1,0,0,0,0,0,0
3498,4690,57,Male,4690_left.jpg,4690_right.jpg,diabetic retinopathy,diabetic retinopathy,0,1,0,0,0,0,0,0


In [3]:
col = [ "normal fundus",
        "diabetic retinopathy",
        "glaucoma",
        "cataract",
        "age-related macular degeneration",
        "hypertensive retinopathy",
        "myopia retinopathy" ]

In [4]:
for i in col:
    listnew=list[list["Left-Diagnostic Keywords"]==i]
    l=listnew["Left-Fundus"].tolist()
    s=listnew["Patient Sex"].tolist()
    for each in zip(l,s):
        if each[1]=='Male':
            sex = 0
        elif each[1]=='Female':
            sex = 1
        if os.path.exists(Picture_Current_Path+each[0]):
            shutil.move(Picture_Current_Path+each[0],Train_Path+i+'/'+str(sex)+'_'+each[0])
        else:
            print(each[0])

2957_left.jpg
2175_left.jpg
2176_left.jpg
2179_left.jpg
2180_left.jpg
2181_left.jpg
2182_left.jpg


In [5]:
for i in col:
    listnew=list[list["Right-Diagnostic Keywords"]==i]
    l=listnew["Right-Fundus"].tolist()
    s=listnew["Patient Sex"].tolist()
    for each in zip(l,s):
        if each[1]=='Male':
            sex = 2
        elif each[1]=='Female':
            sex = 3
        if os.path.exists(Picture_Current_Path+each[0]):
            shutil.move(Picture_Current_Path+each[0],Train_Path+i+'/'+str(sex)+'_'+each[0])
        else:
            print(each[0])

2957_right.jpg
2174_right.jpg
2177_right.jpg
2178_right.jpg
2179_right.jpg
2180_right.jpg
2181_right.jpg
2182_right.jpg


In [11]:
# 目录准备
Picture_Current_Path = "F:/Data/ODIR_Fromscratch/OIA-ODIR/Off-site Test Set/Images/"
Current_Path ="F:/Data/OIA-ODIR/ODIR/"
CSV_Path = "F:/Data/ODIR_Fromscratch/OIA-ODIR/Off-site Test Set/Annotation/off-site test annotation (English).xlsx"

Train_Path = "F:/Data/ODIR_Fromscratch/OIA-ODIR/ODIR/Val/"

#打开目录下表格文件并读取
list = pd.read_excel(CSV_Path)

In [12]:
for i in col:
    listnew=list[list["Left-Diagnostic Keywords"]==i]
    l=listnew["Left-Fundus"].tolist()
    s=listnew["Patient Sex"].tolist()
    for each in zip(l,s):
        if each[1]=='Male':
            sex = 0
        elif each[1]=='Female':
            sex = 1
        if os.path.exists(Picture_Current_Path+each[0]):
            shutil.move(Picture_Current_Path+each[0],Train_Path+i+'/'+str(sex)+'_'+each[0])
        else:
            print(each[0])

for i in col:
    listnew=list[list["Right-Diagnostic Keywords"]==i]
    l=listnew["Right-Fundus"].tolist()
    s=listnew["Patient Sex"].tolist()
    for each in zip(l,s):
        if each[1]=='Male':
            sex = 2
        elif each[1]=='Female':
            sex = 3
        if os.path.exists(Picture_Current_Path+each[0]):
            shutil.move(Picture_Current_Path+each[0],Train_Path+i+'/'+str(sex)+'_'+each[0])
        else:
            print(each[0])

In [13]:
# 目录准备
Picture_Current_Path = "F:/Data/ODIR_Fromscratch/OIA-ODIR/On-site Test Set/Images/"
Current_Path ="F:/Data/OIA-ODIR/ODIR/"
CSV_Path = "F:/Data/ODIR_Fromscratch/OIA-ODIR/On-site Test Set/Annotation/on-site test annotation (English).xlsx"

Train_Path = "F:/Data/ODIR_Fromscratch/OIA-ODIR/ODIR/Test/"

#打开目录下表格文件并读取
list = pd.read_excel(CSV_Path)

In [14]:
for i in col:
    listnew=list[list["Left-Diagnostic Keywords"]==i]
    l=listnew["Left-Fundus"].tolist()
    s=listnew["Patient Sex"].tolist()
    for each in zip(l,s):
        if each[1]=='Male':
            sex = 0
        elif each[1]=='Female':
            sex = 1
        if os.path.exists(Picture_Current_Path+each[0]):
            shutil.move(Picture_Current_Path+each[0],Train_Path+i+'/'+str(sex)+'_'+each[0])
        else:
            print(each[0])

for i in col:
    listnew=list[list["Right-Diagnostic Keywords"]==i]
    l=listnew["Right-Fundus"].tolist()
    s=listnew["Patient Sex"].tolist()
    for each in zip(l,s):
        if each[1]=='Male':
            sex = 2
        elif each[1]=='Female':
            sex = 3
        if os.path.exists(Picture_Current_Path+each[0]):
            shutil.move(Picture_Current_Path+each[0],Train_Path+i+'/'+str(sex)+'_'+each[0])
        else:
            print(each[0])