## Data organisation example - IDRiD

In [1]:
import os
import shutil
import pandas as pd
from sklearn.model_selection import train_test_split

### Split val set from train data
- Download dataset from [official website](https://ieee-dataport.org/open-access/indian-diabetic-retinopathy-image-dataset-idrid) 
- Images can be processed if necessary, with any processing tools such as [AutoMorph](https://github.com/rmaphoh/AutoMorph)

In [4]:
list_ = pd.read_csv('resources/retfound_resources/B. Disease Grading/2. Groundtruths/a. IDRiD_Disease Grading_Training Labels.csv')

In [5]:
noDR = list_.loc[list_['Retinopathy grade']==0, 'Image name']
mildDR = list_.loc[list_['Retinopathy grade']==1, 'Image name']
moderateDR = list_.loc[list_['Retinopathy grade']==2, 'Image name']
severeDR = list_.loc[list_['Retinopathy grade']==3, 'Image name']
proDR = list_.loc[list_['Retinopathy grade']==4, 'Image name']

In [6]:
noDR_train, noDR_val = train_test_split(noDR, test_size=0.2,random_state=1)
mildDR_train, mildDR_val = train_test_split(mildDR, test_size=0.2,random_state=1)
moderateDR_train, moderateDR_val = train_test_split(moderateDR, test_size=0.2,random_state=1)
severeDR_train, severeDR_val = train_test_split(severeDR, test_size=0.2,random_state=1)
proDR_train, proDR_val = train_test_split(proDR, test_size=0.2,random_state=1)

In [15]:
def create_dir(directory):
    if not os.path.exists(directory):
        os.makedirs(directory)

In [16]:
create_dir('./train/a_noDR/')
create_dir('./train/b_mildDR/')
create_dir('./train/c_moderateDR/')
create_dir('./train/d_severeDR/')
create_dir('./train/e_proDR/')

for i in noDR_train:
    shutil.copy('./resources/retfound_resources/B. Disease Grading/1. Original Images/a. Training Set/{}.jpg'.format(i), './train/a_noDR/{}.png'.format(i))
    
for i in mildDR_train:
    shutil.copy('./resources/retfound_resources/B. Disease Grading/1. Original Images/a. Training Set/{}.jpg'.format(i), './train/b_mildDR/{}.png'.format(i))
    
for i in moderateDR_train:
    shutil.copy('./resources/retfound_resources/B. Disease Grading/1. Original Images/a. Training Set/{}.jpg'.format(i), './train/c_moderateDR/{}.png'.format(i))
    
for i in severeDR_train:
    shutil.copy('./resources/retfound_resources/B. Disease Grading/1. Original Images/a. Training Set/{}.jpg'.format(i), './train/d_severeDR/{}.png'.format(i))
    
for i in proDR_train:
    shutil.copy('./resources/retfound_resources/B. Disease Grading/1. Original Images/a. Training Set/{}.jpg'.format(i), './train/e_proDR/{}.png'.format(i))

In [17]:
create_dir('./val/a_noDR/')
create_dir('./val/b_mildDR/')
create_dir('./val/c_moderateDR/')
create_dir('./val/d_severeDR/')
create_dir('./val/e_proDR/')

for i in noDR_val:
    shutil.copy('./resources/retfound_resources/B. Disease Grading/1. Original Images/a. Training Set/{}.jpg'.format(i), './val/a_noDR/{}.png'.format(i))
    
for i in mildDR_val:
    shutil.copy('./resources/retfound_resources/B. Disease Grading/1. Original Images/a. Training Set/{}.jpg'.format(i), './val/b_mildDR/{}.png'.format(i))
    
for i in moderateDR_val:
    shutil.copy('./resources/retfound_resources/B. Disease Grading/1. Original Images/a. Training Set/{}.jpg'.format(i), './val/c_moderateDR/{}.png'.format(i))
    
for i in severeDR_val:
    shutil.copy('./resources/retfound_resources/B. Disease Grading/1. Original Images/a. Training Set/{}.jpg'.format(i), './val/d_severeDR/{}.png'.format(i))
    
for i in proDR_val:
    shutil.copy('./resources/retfound_resources/B. Disease Grading/1. Original Images/a. Training Set/{}.jpg'.format(i), './val/e_proDR/{}.png'.format(i))

### Organise test set

In [18]:
list_test = pd.read_csv('resources/retfound_resources/B. Disease Grading/2. Groundtruths/b. IDRiD_Disease Grading_Testing Labels.csv')

In [19]:
noDR_test = list_test.loc[list_test['Retinopathy grade']==0, 'Image name']
mildDR_test = list_test.loc[list_test['Retinopathy grade']==1, 'Image name']
moderateDR_test = list_test.loc[list_test['Retinopathy grade']==2, 'Image name']
severeDR_test = list_test.loc[list_test['Retinopathy grade']==3, 'Image name']
proDR_test = list_test.loc[list_test['Retinopathy grade']==4, 'Image name']

In [20]:
create_dir('./test/a_noDR/')
create_dir('./test/b_mildDR/')
create_dir('./test/c_moderateDR/')
create_dir('./test/d_severeDR/')
create_dir('./test/e_proDR/')

for i in noDR_test:
    shutil.copy('resources/retfound_resources/B. Disease Grading/1. Original Images/b. Testing Set/{}.jpg'.format(i), './test/a_noDR/{}.png'.format(i))
    
for i in mildDR_test:
    shutil.copy('resources/retfound_resources/B. Disease Grading/1. Original Images/b. Testing Set/{}.jpg'.format(i), './test/b_mildDR/{}.png'.format(i))
    
for i in moderateDR_test:
    shutil.copy('resources/retfound_resources/B. Disease Grading/1. Original Images/b. Testing Set/{}.jpg'.format(i), './test/c_moderateDR/{}.png'.format(i))
    
for i in severeDR_test:
    shutil.copy('resources/retfound_resources/B. Disease Grading/1. Original Images/b. Testing Set/{}.jpg'.format(i), './test/d_severeDR/{}.png'.format(i))
    
for i in proDR_test:
    shutil.copy('resources/retfound_resources/B. Disease Grading/1. Original Images/b. Testing Set/{}.jpg'.format(i), './test/e_proDR/{}.png'.format(i))