# Eyesense

Ocular Disease Intelligent Recognition (ODIR) is a structured ophthalmic database of 5,000 patients with age, color fundus photographs from left and right eyes and doctors' diagnostic keywords from doctors.

This dataset is meant to represent ‘‘real-life’’ set of patient information collected by Shanggong Medical Technology Co., Ltd. from different hospitals/medical centers in China. In these institutions, fundus images are captured by various cameras in the market, such as Canon, Zeiss and Kowa, resulting into varied image resolutions.
Annotations were labeled by trained human readers with quality control management. They classify patient into eight labels including:

-Normal (N),
-Diabetes (D),
-Glaucoma (G),
-Cataract (C),
-Age related Macular Degeneration (A),
-Hypertension (H),
-Pathological Myopia (M),
-Other diseases/abnormalities (O)

In [3]:
import os
from shutil import copy, move
import glob
import pathlib
import pandas as pd

In [17]:
df = pd.read_csv("raw_data/full_df.csv")
df.head()

Unnamed: 0,ID,Patient Age,Patient Sex,Left-Fundus,Right-Fundus,Left-Diagnostic Keywords,Right-Diagnostic Keywords,N,D,G,C,A,H,M,O,filepath,labels,target,filename
0,0,69,Female,0_left.jpg,0_right.jpg,cataract,normal fundus,0,0,0,1,0,0,0,0,../input/ocular-disease-recognition-odir5k/ODI...,['N'],"[1, 0, 0, 0, 0, 0, 0, 0]",0_right.jpg
1,1,57,Male,1_left.jpg,1_right.jpg,normal fundus,normal fundus,1,0,0,0,0,0,0,0,../input/ocular-disease-recognition-odir5k/ODI...,['N'],"[1, 0, 0, 0, 0, 0, 0, 0]",1_right.jpg
2,2,42,Male,2_left.jpg,2_right.jpg,laser spot，moderate non proliferative retinopathy,moderate non proliferative retinopathy,0,1,0,0,0,0,0,1,../input/ocular-disease-recognition-odir5k/ODI...,['D'],"[0, 1, 0, 0, 0, 0, 0, 0]",2_right.jpg
3,4,53,Male,4_left.jpg,4_right.jpg,macular epiretinal membrane,mild nonproliferative retinopathy,0,1,0,0,0,0,0,1,../input/ocular-disease-recognition-odir5k/ODI...,['D'],"[0, 1, 0, 0, 0, 0, 0, 0]",4_right.jpg
4,5,50,Female,5_left.jpg,5_right.jpg,moderate non proliferative retinopathy,moderate non proliferative retinopathy,0,1,0,0,0,0,0,0,../input/ocular-disease-recognition-odir5k/ODI...,['D'],"[0, 1, 0, 0, 0, 0, 0, 0]",5_right.jpg


# Image categorization

In [6]:
dir_path = '../raw_data/raw_data/ODIR-5K/ODIR-5K/Training Images/'   #According to my computer.
img_list = os.listdir(dir_path)

We're not going to use the images categorized as 'Others'

In [7]:
if os.path.isdir('data_train/normal') is False:
    os.makedirs('data_train/normal')
    os.makedirs('data_train/diabets')
    os.makedirs('data_train/glaucoma')
    os.makedirs('data_train/cataract')
    os.makedirs('data_train/degeneration')
    os.makedirs('data_train/hypertension')
    os.makedirs('data_train/myopia')

In [8]:
if os.path.isdir('data_test/normal') is False:
    os.makedirs('data_test/normal')
    os.makedirs('data_test/diabets')
    os.makedirs('data_test/glaucoma')
    os.makedirs('data_test/cataract')
    os.makedirs('data_test/degeneration')
    os.makedirs('data_test/hypertension')
    os.makedirs('data_test/myopia')

In [9]:
if len(os.listdir('data_train/normal')) == 0:  # Check if the directory is empty

    for file in df.filename[df.labels == "['N']"]:
        copy(os.path.join(dir_path, file) , 'data_train/normal')
    for file in df.filename[df.labels == "['D']"]:
        copy(os.path.join(dir_path, file) , 'data_train/diabets')
    for file in df.filename[df.labels == "['G']"]:
        copy(os.path.join(dir_path, file) , 'data_train/glaucoma')
    for file in df.filename[df.labels == "['C']"]:
        copy(os.path.join(dir_path, file) , 'data_train/cataract')
    for file in df.filename[df.labels == "['A']"]:
        copy(os.path.join(dir_path, file) , 'data_train/degeneration')
    for file in df.filename[df.labels == "['H']"]:
        copy(os.path.join(dir_path, file) , 'data_train/hypertension')
    for file in df.filename[df.labels == "['M']"]:
        copy(os.path.join(dir_path, file) , 'data_train/myopia')

else:
    print("The directory 'data_train/normal' is not empty")
    print(f"\nProbably the files from {dir_path} were already copied into 'data_train/normal'.")
   

In [10]:
import random

source_paths = ['data_train/normal', 'data_train/diabets', 'data_train/glaucoma', 'data_train/cataract', 
                'data_train/degeneration', 'data_train/hypertension',
                'data_train/myopia']  

if len(os.listdir('data_test/normal')) == 0:
    for source in source_paths:
        dest = source.replace('data_train', 'data_test')
        n_files = int(0.1*len(os.listdir(source)) )   #Taking 10% of each folder
        for file in random.sample(os.listdir(source), n_files): 
            move(f"{source}/{file}", dest)
else:
    print("The directory 'data_test/normal' is not empty")
    print(f"\nProbably the files from {dir_path} were already copied into 'data_test/normal'.")

In [11]:
data_dir_train = pathlib.Path('data_train')
data_dir_test  = pathlib.Path('data_test')

In [12]:
train_length = len(list(data_dir_train.glob('*/*.jpg')))
test_length  = len(list(data_dir_test.glob('*/*.jpg')))

print(f"Train: {train_length}")
print(f"Test:  {test_length}")

Train: 5119
Test:  565


In [18]:
len(df[df.labels == "['O']"])

708

In [19]:
assert (test_length + train_length)  == (len(df) - len(df[df.labels == "['O']"]) )