In [7]:
from pathlib import Path
import pandas as pd
from fundus_fovea_od_localization import DEFAULT_CSV_PATH

### Combine ADAM, REFUGE and IDRID datasets
- Note: Adam and Refuge offer optic disc masks instead of coordinates. Use the respective notebooks to extract the coordinates before using this notebook.

Set the `data_root` to the folder containing the extracted ADAM, REFUGE and IDRID datasets.

In [8]:
data_root = Path("../../fovea_detection/")

Now, just run the below code to combine the datasets. The combined metadata will be saved in the default path where the model will look for it.

ADAM

In [9]:
## Read in ADAM labels
path_to_adam_fovea_labels = data_root / 'ADAM' / 'DF-Annotation-Training400' / 'Training400' / 'Fovea_location.xlsx'
path_to_adam_od_labels = data_root / 'ADAM_OD_location.xlsx'

adam_fovea_labels_df = pd.read_excel(path_to_adam_fovea_labels, index_col='ID')
adam_od_labels_df = pd.read_excel(path_to_adam_od_labels)

# Exclude wrong fovea locations
adam_fovea_labels_df = adam_fovea_labels_df[(adam_fovea_labels_df[['Fovea_X', 'Fovea_Y']] != 0).all(axis=1)]

# Merge both dataframes
adam_labels =  pd.merge(adam_fovea_labels_df, adam_od_labels_df, on='imgName')

# Add path to images
def add_imgpath(row):
    image_name = row.imgName
    adam_data_type = 'AMD' if image_name.startswith('A') else 'Non-AMD'
    return data_root / 'ADAM' / 'AMD-Training400' / 'Training400' / adam_data_type / image_name

# Add path to file
adam_labels["path"] = adam_labels.apply(add_imgpath, axis=1)


REFUGE

In [10]:
## Read in REFUGE labels
path_to_refuge_fovea_labels_train = data_root / 'REFUGE' / 'Annotation-Training400' / 'Annotation-Training400' / 'Fovea_location.xlsx'
path_to_refuge_fovea_labels_val = data_root / 'REFUGE' / 'REFUGE-Validation400-GT' / 'Fovea_locations.xlsx'
path_to_refuge_od_labels = data_root / 'REFUGE_OD_location.xlsx'

refuge_fovea_labels_train_df = pd.read_excel(path_to_refuge_fovea_labels_train, index_col='ID')
refuge_fovea_labels_val_df = pd.read_excel(path_to_refuge_fovea_labels_val, index_col='ID')
refuge_fovea_labels_df = pd.concat([refuge_fovea_labels_train_df, refuge_fovea_labels_val_df])
refuge_od_labels_df = pd.read_excel(path_to_refuge_od_labels)

# Exclude wrong fovea locations
refuge_fovea_labels_df = refuge_fovea_labels_df[(refuge_fovea_labels_df[['Fovea_X', 'Fovea_Y']] != 0).all(axis=1)]

# Merge both dataframes
refuge_labels = pd.merge(refuge_fovea_labels_df, refuge_od_labels_df, on='ImgName')
refuge_labels.rename(columns={'img_path': 'path', "ImgName": "imgName"}, inplace=True)

IDRID

In [11]:
## Read in IDRID labels
path_to_idrid_labels = data_root / 'IDRID'/ '2. Groundtruths'
idrid_fovea_labels_df_1 = pd.read_csv(path_to_idrid_labels / '2. Fovea Center Location' / 'IDRiD_Fovea_Center_Training Set_Markups.csv')
idrid_fovea_labels_df_1["Image No"] += "_train"
idrid_fovea_labels_df_2 = pd.read_csv(path_to_idrid_labels / '2. Fovea Center Location' / 'IDRiD_Fovea_Center_Testing Set_Markups.csv')
idrid_fovea_labels_df_2["Image No"] += "_test"
idrid_od_labels_df_1 = pd.read_csv(path_to_idrid_labels / '1. Optic Disc Center Location' / 'a. IDRiD_OD_Center_Training Set_Markups.csv')
idrid_od_labels_df_1["Image No"] += "_train"
idrid_od_labels_df_2 = pd.read_csv(path_to_idrid_labels / '1. Optic Disc Center Location' / 'b. IDRiD_OD_Center_Testing Set_Markups.csv')
idrid_od_labels_df_2["Image No"] += "_test"

idrid_fovea_labels_df = pd.concat([idrid_fovea_labels_df_1, idrid_fovea_labels_df_2])
idrid_fovea_labels_df = idrid_fovea_labels_df.rename(columns={'Image No': 'imgName', 'X- Coordinate': 'Fovea_X', 'Y - Coordinate': 'Fovea_Y'})

idrid_od_labels_df = pd.concat([idrid_od_labels_df_1, idrid_od_labels_df_2])
idrid_od_labels_df = idrid_od_labels_df.rename(columns={'Image No': 'imgName', 'X- Coordinate': 'OD_X', 'Y - Coordinate': 'OD_Y'})

# Merge both dataframes
idrid_labels =  pd.merge(idrid_fovea_labels_df, idrid_od_labels_df, on='imgName')

# Get rid of completely empty rows
idrid_labels = idrid_labels.dropna(how='all')
# and cols
idrid_labels = idrid_labels.dropna(axis=1, how='all')

# Add path to images
def add_imgpath(row):
    image_name = row.imgName
    subfolder = 'a. Training Set' if image_name.endswith('train') else 'b. Testing Set'
    p = data_root / 'IDRID' / '1. Original Images' / subfolder / str(image_name+'.jpg')
    return p.__str__()

idrid_labels["path"] = idrid_labels.apply(add_imgpath, axis=1)

# Remove _train / _test suffixes from image paths
idrid_labels['path'] = idrid_labels['path'].str.replace('_train', '').str.replace('_test', '')

Merge all datasets

In [12]:
# Merge all datasets
labels = pd.concat([adam_labels, idrid_labels, refuge_labels])

# Harmonize column names
labels = labels.rename(columns={'imgName': 'image_name', 'Fovea_X': 'fovea_x', 'Fovea_Y': 'fovea_y', 'OD_X': 'od_x', 'OD_Y': 'od_y', 'path': 'image_path'})
del labels["Glaucoma Label"]
del labels["mask_path"]
labels

Unnamed: 0,image_name,fovea_x,fovea_y,od_x,od_y,image_path
0,A0001.jpg,1182.264278,1022.018842,278.688347,875.891829,../../fovea_detection/ADAM/AMD-Training400/Tra...
1,A0002.jpg,967.754046,1016.946655,162.407838,921.396775,../../fovea_detection/ADAM/AMD-Training400/Tra...
2,A0003.jpg,1220.206714,989.944033,537.757341,1004.166171,../../fovea_detection/ADAM/AMD-Training400/Tra...
3,A0004.jpg,1141.140888,1000.594955,334.903248,790.988073,../../fovea_detection/ADAM/AMD-Training400/Tra...
4,A0005.jpg,1127.371832,1071.109440,285.900172,889.936381,../../fovea_detection/ADAM/AMD-Training400/Tra...
...,...,...,...,...,...,...
795,V0396.jpg,1144.060000,835.200000,564.292598,802.953846,../../fovea_detection/REFUGE/REFUGE-Validation...
796,V0397.jpg,1155.050000,835.160000,476.217435,760.242040,../../fovea_detection/REFUGE/REFUGE-Validation...
797,V0398.jpg,1130.760000,865.070000,510.703242,826.372096,../../fovea_detection/REFUGE/REFUGE-Validation...
798,V0399.jpg,1032.000000,999.500000,430.232140,816.701711,../../fovea_detection/REFUGE/REFUGE-Validation...


Save to file

In [15]:
labels.to_csv(DEFAULT_CSV_PATH, index=False)