Importing libraries and read annotation file.

In [1]:
import pandas as pd
import os
import glob
import random
import shutil

BATCH_SIZE = 32
CLASS_NAMES = ['N', 'D', 'G', 'C', 'A', 'H', 'M', 'O']

training_images_file = 'data/ODIR-5K/Training_Images'
labels_file = 'data/ODIR-5K/data.xlsx'

diseases = {'abnormal pigment': 'O', 'age-related macular degeneration': 'A', 'anterior segment image': 'DELETE', 'arteriosclerosis': 'O', 'asteroid hyalosis': 'O', 'atrophic change': 'O', 'atrophy': 'O', 'branch retinal artery occlusion': 'O', 'branch retinal vein occlusion': 'O', 'cataract': 'C', 'central retinal artery occlusion': 'O', 'central retinal vein occlusion': 'O', 'central serous chorioretinopathy': 'O', 'chorioretinal atrophy': 'O', 'chorioretinal atrophy with pigmentation proliferation': 'O', 'choroidal nevus': 'NaN', 'congenital choroidal coloboma': 'O', 'depigmentation of the retinal pigment epithelium': 'O', 'diabetic retinopathy': 'D', 'diffuse chorioretinal atrophy': 'O', 'diffuse retinal atrophy': 'O', 'drusen': 'O', 'dry age-related macular degeneration': 'A', 'epiretinal membrane': 'O', 'epiretinal membrane over the macula': 'O', 'fundus laser photocoagulation spots': 'O', 'glaucoma': 'G', 'glial remnants anterior to the optic disc': 'O', 'hypertensive retinopathy': 'H', 'hypertensive retinopathy,diabetic retinopathy': 'D', 'idiopathic choroidal neovascularization': 'O', 'image offset': 'DELETE', 'intraretinal hemorrhage': 'O', 'intraretinal microvascular abnormality': 'O', 'laser spot': 'O', 'lens dust': 'DELETE', 'low image quality': 'DELETE', 'low image quality,maculopathy': 'DELETE', 'macular coloboma': 'O', 'macular epiretinal membrane': 'O', 'macular hole': 'O', 'macular pigmentation disorder': 'NaN', 'maculopathy': 'O', 'mild nonproliferative retinopathy': 'D', 'moderate non proliferative retinopathy': 'D', 'morning glory syndrome': 'O', 'myelinated nerve fibers': 'O', 'myopia retinopathy': 'M', 'myopic maculopathy': 'M', 'myopic retinopathy': 'M', 'no fundus image': 'DELETE', 'normal fundus': 'N', 'old branch retinal vein occlusion': 'O', 'old central retinal vein occlusion': 'O', 'old chorioretinopathy': 'O', 'old choroiditis': 'O', 'optic disc edema': 'O', 'optic discitis': 'O', 'optic disk epiretinal membrane': 'O', 'optic disk photographically invisible': 'DELETE', 'optic nerve atrophy': 'O', 'oval yellow-white atrophy': 'O', 'pathological myopia': 'M', 'peripapillary atrophy': 'O', 'pigment epithelium proliferation': 'O', 'pigmentation disorder': 'O', 'post laser photocoagulation': 'O', 'post retinal laser surgery': 'O', 'proliferative diabetic retinopathy': 'D', 'punctate inner choroidopathy': 'O', 'refractive media opacity': 'O', 'retina fold': 'O', 'retinal artery macroaneurysm': 'O', 'retinal detachment': 'O', 'retinal pigment epithelial hypertrophy': 'O', 'retinal pigment epithelium atrophy': 'O', 'retinal pigmentation': 'O', 'retinal vascular sheathing': 'O', 'retinitis pigmentosa': 'O', 'retinochoroidal coloboma': 'O', 'rhegmatogenous retinal detachment': 'O', 'severe nonproliferative retinopathy': 'D', 'severe proliferative diabetic retinopathy': 'D', 'silicone oil eye': 'O', 'spotted membranous change': 'O', 'suspected abnormal color of  optic disc': 'O', 'suspected cataract': 'C', 'suspected diabetic retinopathy': 'D', 'suspected glaucoma': 'G', 'suspected macular epimacular membrane': 'O', 'suspected microvascular anomalies': 'O', 'suspected moderate non proliferative retinopathy': 'D', 'suspected retinal vascular sheathing': 'O', 'suspected retinitis pigmentosa': 'O', 'suspicious diabetic retinopathy': 'D', 'tessellated fundus': 'O', 'vascular loops': 'O', 'vessel tortuosity': 'O', 'vitreous degeneration': 'O', 'vitreous opacity': 'O', 'wedge white line change': 'O', 'wedge-shaped change': 'O', 'wet age-related macular degeneration': 'A', 'white vessel': 'O'}

#load labels
labels = pd.read_excel(labels_file, index_col=0) 
# print(labels.head())

Labeling Images by renaming them, removing irrelevant images.

In [16]:
file_path = 'data/ODIR-5K/Training_Images'
all_paths = []
# store all filepath into an array 
for element in glob.glob(file_path+"/*.jpg"):
  element = element.replace('\\','/')
  all_paths.append(element)
paths = []  # renamed paths

# adding label information to image names
for u_id in labels.index:
  labelL = ""
  labelR = ""
  diagnosticL = labels['Left-Diagnostic Keywords'][u_id]
  diagnosticR = labels['Right-Diagnostic Keywords'][u_id]
  diagnosticL = diagnosticL.split(",")
  diagnosticR = diagnosticR.split(",")

  if len(diagnosticL) == 1:
    labelL = diseases[diagnosticL[0].strip()]
  else:
      labelL = "DELETE"

  if len(diagnosticR) == 1:
    labelR = diseases[diagnosticR[0].strip()]
  else:
      labelR = "DELETE"

  # change filename
  if file_path+"/"+str(labels['Left-Fundus'][u_id]) in all_paths:
    filename = str(labels['Left-Fundus'][u_id]).split(".")
    os.rename(file_path+"/"+str(labels['Left-Fundus'][u_id]), file_path+"/"+filename[0]+"-"+labelL+".jpg")
    paths.append(file_path+"/"+str(labels['Left-Fundus'][u_id])+"/"+labelL)

  if file_path+"/"+str(labels['Right-Fundus'][u_id]) in all_paths:
    filename = str(labels['Right-Fundus'][u_id]).split(".")
    os.rename(file_path+"/"+str(labels['Right-Fundus'][u_id]), file_path+"/"+filename[0]+"-"+labelR+".jpg")
    paths.append(file_path+"/"+str(labels['Right-Fundus'][u_id])+"/"+labelR)

# categorize images and deleting irrelevant images

for element in glob.glob("data/ODIR-5K/Training_Images/*.jpg"):
    img_name = element.split("\\")[-1]
    img_label = img_name.split("-")
    if "DELETE" in img_label[-1]:
      os.remove(element)
    else: 
      fold_path = training_images_file + '/' + img_label[-1][0]
      if not os.path.exists(fold_path):
        os.mkdir(fold_path)
      shutil.move(element.replace('\\','/'), fold_path+'/'+img_name)
     

data/ODIR-5K/Training_Images\0_left-C.jpg
0_left-C.jpg
data/ODIR-5K/Training_Images\0_leftBF-C.jpg
0_leftBF-C.jpg
data/ODIR-5K/Training_Images\0_leftHF-C.jpg
0_leftHF-C.jpg
data/ODIR-5K/Training_Images\0_leftRB-C.jpg
0_leftRB-C.jpg
data/ODIR-5K/Training_Images\0_leftRR-C.jpg
0_leftRR-C.jpg
data/ODIR-5K/Training_Images\0_leftVF-C.jpg
0_leftVF-C.jpg
data/ODIR-5K/Training_Images\0_leftVFRR-C.jpg
0_leftVFRR-C.jpg
data/ODIR-5K/Training_Images\0_leftVFZM-C.jpg
0_leftVFZM-C.jpg
data/ODIR-5K/Training_Images\0_leftZM-C.jpg
0_leftZM-C.jpg
data/ODIR-5K/Training_Images\0_right-N.jpg
0_right-N.jpg
data/ODIR-5K/Training_Images\1006_right-O.jpg
1006_right-O.jpg
data/ODIR-5K/Training_Images\1006_rightHF-O.jpg
1006_rightHF-O.jpg
data/ODIR-5K/Training_Images\1006_rightVF-O.jpg
1006_rightVF-O.jpg
data/ODIR-5K/Training_Images\1008_left-N.jpg
1008_left-N.jpg
data/ODIR-5K/Training_Images\1008_right-O.jpg
1008_right-O.jpg
data/ODIR-5K/Training_Images\1008_rightHF-O.jpg
1008_rightHF-O.jpg
data/ODIR-5K/Trainin

Image Enhancement and Resizing. Creation of Testing Set by Random Selection


In [23]:
import cv2 as cv

def loadAndCropCenterResizeCV2(img, newSize):
    width, height, _ = img.shape
    if width == height:
        return cv.resize(img, newSize)
    length = min(width, height)
    left = (width - length) // 2
    top = (height - length) // 2
    right = (width + length) // 2
    bottom = (height + length) // 2
    return cv.resize(img[left:right, top:bottom, :], newSize)

def clahe_resize(impath):
  img = cv.imread(impath)
  #clahe = cv.createCLAHE(clipLimit=5.0, tileGridSize=(8,8))
  #L, a, b = cv.split(cv.cvtColor(img, cv.COLOR_BGR2Lab))
  #cl1 = clahe.apply(L)
  #eq_image = cv.cvtColor(cv.merge([cl1, a, b]), cv.COLOR_Lab2BGR)
  eq_image = loadAndCropCenterResizeCV2(img, (250, 250))
  
  cv.imwrite(impath,eq_image)

In [30]:
#images enhancing and resizing  
all_paths = []
for element in glob.glob("data/ODIR-5K/Training_Images/*.jpg"):
  element = element.replace('\\','/')
  all_paths.append(element)
  clahe_resize(element)

# creating Testing set
num_to_select = 1800                
list_of_random_items = random.sample(all_paths, num_to_select)
for element in list_of_random_items:
  p = element.split("/")
  os.replace(element, "data/ODIR-5K/Testing_Images/"+p[-1])

In [31]:
def show_class_distribution():
  N = 0
  D=0
  G=0
  C=0
  A=0
  H=0
  M=0
  O = 0
  for element in glob.glob("data/ODIR-5K/Training_Images/*.jpg"):
    element = element.replace('\\','/')
    img_name = element.split("/")[-1]
    img_label = img_name.split("-")
    if img_label[-1] == "N.jpg":
      N += 1
    elif img_label[-1] == "D.jpg":
      D +=1
    elif img_label[-1] == "G.jpg":
      G +=1
    elif img_label[-1] == "C.jpg":
      C +=1
    elif img_label[-1] == "A.jpg":
      A +=1
    elif img_label[-1] == "H.jpg":
      H +=1
    elif img_label[-1] == "M.jpg":
      M +=1
    elif img_label[-1] == "O.jpg":
      O +=1
  print(N, D, G, C, A, H, M, O)
show_class_distribution()

1986 962 152 180 160 71 163 542


OCULAR_DISEASE_AUGMENTATION

In [44]:
import cv2 as cv
import numpy as np
import matplotlib.pyplot as plt
import random
from PIL import Image, ImageEnhance

def cv2_clipped_zoom(img, zoom_factor):
    height, width = img.shape[:2] # It's also the final desired shape
    new_height, new_width = int(height * zoom_factor), int(width * zoom_factor)
    y1, x1 = max(0, new_height - height) // 2, max(0, new_width - width) // 2
    y2, x2 = y1 + height, x1 + width
    bbox = np.array([y1,x1,y2,x2])
    # Map back to original image coordinates
    bbox = (bbox / zoom_factor).astype(np.int)
    y1, x1, y2, x2 = bbox
    cropped_img = img[y1:y2, x1:x2]

    # Handle padding when downscaling
    resize_height, resize_width = min(new_height, height), min(new_width, width)
    pad_height1, pad_width1 = (height - resize_height) // 2, (width - resize_width) //2
    pad_height2, pad_width2 = (height - resize_height) - pad_height1, (width - resize_width) - pad_width1
    pad_spec = [(pad_height1, pad_height2), (pad_width1, pad_width2)] + [(0,0)] * (img.ndim - 2)

    result = cv.resize(cropped_img, (resize_width, resize_height))
    result = np.pad(result, pad_spec, mode='constant')
    assert result.shape[0] == height and result.shape[1] == width
    return result
def aumgent_9(impath):
  img = cv.imread(impath,1)
  im1 = cv.flip(img, 0) #Vertical Flip
  im2 = cv.flip(img, 1) #Horizontal Flip
  im3 = cv.flip(img, -1) #Flip Both
  im4 = cv2_clipped_zoom(img, random.uniform(0.9,1.2))
  im5 = cv2_clipped_zoom(im1, random.uniform(0.9,1.2))
  file_name = impath.split('/')
  new_path = file_name[-1].split('-') 
  cv.imwrite("data/ODIR-5K/Training_Images/"+new_path[0]+"VF-"+new_path[1],im1)
  cv.imwrite("data/ODIR-5K/Training_Images/"+new_path[0]+"HF-"+new_path[1],im2)
  cv.imwrite("data/ODIR-5K/Training_Images/"+new_path[0]+"BF-"+new_path[1],im3)
  cv.imwrite("data/ODIR-5K/Training_Images/"+new_path[0]+"ZM-"+new_path[1],im4)
  cv.imwrite("data/ODIR-5K/Training_Images/"+new_path[0]+"VFZM-"+new_path[1],im5)
  #image random rotating and brightening using PIL
  img_org = Image.open(impath)
  enhancer = ImageEnhance.Brightness(img_org)
  im6 = enhancer.enhance(random.uniform(1.0,1.3))
  im7 = img_org.rotate(random.uniform(0,10))
  img_ver = Image.open("data/ODIR-5K/Training_Images/"+new_path[0]+"VF-"+new_path[1])
  im8 = img_ver.rotate(random.uniform(0,10))
  im6.save("data/ODIR-5K/Training_Images/"+new_path[0]+"RB-"+new_path[1])
  im7.save("data/ODIR-5K/Training_Images/"+new_path[0]+"RR-"+new_path[1])
  im8.save("data/ODIR-5K/Training_Images/"+new_path[0]+"VFRR-"+new_path[1])

#for H class that has the smallest set
def aumgent_20(impath):
  img = cv.imread(impath,1)
  im1 = cv.flip(img, 0) #Vertical Flip
  im2 = cv.flip(img, 1) #Horizontal Flip
  im3 = cv.flip(img, -1) #Both Flip
  im4 = cv2_clipped_zoom(img, random.uniform(0.9,1.2))
  im5 = cv2_clipped_zoom(im1, random.uniform(0.9,1.2))
  im6 = cv2_clipped_zoom(im2, random.uniform(0.9,1.2))
  im7 = cv2_clipped_zoom(im3, random.uniform(0.9,1.2))
  file_name = impath.split('/')
  new_path = file_name[-1].split('-') 
  cv.imwrite("data/ODIR-5K/Training_Images/"+new_path[0]+"VF-"+new_path[1],im1)
  cv.imwrite("data/ODIR-5K/Training_Images/"+new_path[0]+"HF-"+new_path[1],im2)
  cv.imwrite("data/ODIR-5K/Training_Images/"+new_path[0]+"BF-"+new_path[1],im3)
  cv.imwrite("data/ODIR-5K/Training_Images/"+new_path[0]+"ZM-"+new_path[1],im4)
  cv.imwrite("data/ODIR-5K/Training_Images/"+new_path[0]+"VFZM-"+new_path[1],im5)
  cv.imwrite("data/ODIR-5K/Training_Images/"+new_path[0]+"HFZM-"+new_path[1],im6)
  cv.imwrite("data/ODIR-5K/Training_Images/"+new_path[0]+"BFZM-"+new_path[1],im7)
  #image random rotating and brightening using PIL
  img_org = Image.open(impath)
  enhancer = ImageEnhance.Brightness(img_org)
  im7 = enhancer.enhance(random.uniform(0.9,1.2))
  im8 = img_org.rotate(random.uniform(0,10))
  img_ver = Image.open("data/ODIR-5K/Training_Images/"+new_path[0]+"VF-"+new_path[1])
  im9 = img_ver.rotate(random.uniform(0,10))
  im10 = ImageEnhance.Brightness(im9).enhance(random.uniform(0.9,1.2))
  im11 =  ImageEnhance.Brightness(img_ver).enhance(random.uniform(0.9,1.2))
  img_hor = Image.open("data/ODIR-5K/Training_Images/"+new_path[0]+"HF-"+new_path[1])
  im12 = img_hor.rotate(random.uniform(0,10))
  im13 = ImageEnhance.Brightness(im12).enhance(random.uniform(0.9,1.2))
  im14 = ImageEnhance.Brightness(img_hor).enhance(random.uniform(0.9,1.2))
  im_bfl =  Image.open("data/ODIR-5K/Training_Images/"+new_path[0]+"BF-"+new_path[1])
  im15 = im_bfl.rotate(random.uniform(0,10))
  im16 = ImageEnhance.Brightness(im15).enhance(random.uniform(0.9,1.2))
  im17 = ImageEnhance.Brightness(im_bfl).enhance(random.uniform(0.9,1.2))
  im_zom =  Image.open("data/ODIR-5K/Training_Images/"+new_path[0]+"ZM-"+new_path[1])
  im18 = im_zom.rotate(random.uniform(0,10))
  im19 = ImageEnhance.Brightness(im18).enhance(random.uniform(0.9,1.2))
  im20 = ImageEnhance.Brightness(im_zom).enhance(random.uniform(0.9,1.2))
  
  im7.save("data/ODIR-5K/Training_Images/"+new_path[0]+"RB-"+new_path[1])
  im8.save("data/ODIR-5K/Training_Images/"+new_path[0]+"RR-"+new_path[1])
  im9.save("data/ODIR-5K/Training_Images/"+new_path[0]+"VFRR-"+new_path[1])
  im10.save("data/ODIR-5K/Training_Images/"+new_path[0]+"VFRRRB-"+new_path[1])
  im11.save("data/ODIR-5K/Training_Images/"+new_path[0]+"VFRB-"+new_path[1])
  im12.save("data/ODIR-5K/Training_Images/"+new_path[0]+"HFRR-"+new_path[1])
  im13.save("data/ODIR-5K/Training_Images/"+new_path[0]+"HFRRRB-"+new_path[1])
  im14.save("data/ODIR-5K/Training_Images/"+new_path[0]+"HFRB-"+new_path[1])
  im15.save("data/ODIR-5K/Training_Images/"+new_path[0]+"BFRR-"+new_path[1])
  im16.save("data/ODIR-5K/Training_Images/"+new_path[0]+"BFRRRB-"+new_path[1])
  im17.save("data/ODIR-5K/Training_Images/"+new_path[0]+"BFRB-"+new_path[1])
  im18.save("data/ODIR-5K/Training_Images/"+new_path[0]+"ZMRR-"+new_path[1])
  im19.save("data/ODIR-5K/Training_Images/"+new_path[0]+"ZMRRRB-"+new_path[1])
  im20.save("data/ODIR-5K/Training_Images/"+new_path[0]+"ZMRB-"+new_path[1])

def aumgent_2(impath):
  img = cv.imread(impath,0)
  im1 = cv.flip(img, 0) #Vertical Flip
  im2 = cv.flip(img, 1) #Horizontal Flip
  file_name = impath.split('/')
  new_path = file_name[-1].split('-') 
  cv.imwrite("data/ODIR-5K/Training_Images/"+new_path[0]+"VF-"+new_path[1],im1)
  cv.imwrite("data/ODIR-5K/Training_Images/"+new_path[0]+"HF-"+new_path[1],im2)

def aumgent_1(impath):
  img = cv.imread(impath,0)
  im1 = cv.flip(img, 0) #Vertical Flip
  file_name = impath.split('/')
  new_path = file_name[-1].split('-') 
  cv.imwrite("data/ODIR-5K/Training_Images/"+new_path[0]+"VF-"+new_path[1],im1)

In [46]:
labels = ["G.jpg", "C.jpg", "A.jpg","M.jpg"]
labels2 = ["D.jpg"]
labels3 = ["O.jpg"]

for element in glob.glob("data/ODIR-5K/Training_Images/*.jpg"):
  element = element.replace('\\','/')
  file_name = element.split('/')
  new_path = file_name[-1].split('-')
  if new_path[-1] =="H.jpg":
    aumgent_20(element)
  else:
    for l in labels:
      if new_path[-1] == l:
        aumgent_9(element)
    for l in labels2:
      if new_path[-1] == l:
        aumgent_1(element)
    for l in labels3:
      if new_path[-1] == l:
        aumgent_2(element)

['data/ODIR-5K/Training_Images\\0_left-C.jpg', 'data/ODIR-5K/Training_Images\\0_right-N.jpg', 'data/ODIR-5K/Training_Images\\1006_right-O.jpg', 'data/ODIR-5K/Training_Images\\1008_left-N.jpg', 'data/ODIR-5K/Training_Images\\1008_right-O.jpg', 'data/ODIR-5K/Training_Images\\100_left-O.jpg', 'data/ODIR-5K/Training_Images\\1010_left-N.jpg', 'data/ODIR-5K/Training_Images\\1011_left-N.jpg', 'data/ODIR-5K/Training_Images\\1011_right-O.jpg', 'data/ODIR-5K/Training_Images\\1012_left-O.jpg', 'data/ODIR-5K/Training_Images\\1012_right-M.jpg', 'data/ODIR-5K/Training_Images\\1013_left-N.jpg', 'data/ODIR-5K/Training_Images\\1014_left-O.jpg', 'data/ODIR-5K/Training_Images\\1014_right-N.jpg', 'data/ODIR-5K/Training_Images\\1015_left-O.jpg', 'data/ODIR-5K/Training_Images\\1015_right-O.jpg', 'data/ODIR-5K/Training_Images\\1016_left-N.jpg', 'data/ODIR-5K/Training_Images\\1016_right-O.jpg', 'data/ODIR-5K/Training_Images\\1018_right-N.jpg', 'data/ODIR-5K/Training_Images\\101_right-O.jpg', 'data/ODIR-5K/Tra

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  bbox = (bbox / zoom_factor).astype(np.int)


In [47]:
show_class_distribution()

1986 1924 1368 1620 1440 1562 1467 1626


In [3]:
# Categorize images and deleting irrelevant images
for element in glob.glob("data/ODIR-5K/Testing_Images/*.jpg"):
    img_name = element.split("\\")[-1]
    img_label = img_name.split("-")
    if "DELETE" in img_label[-1]:
      os.remove(element)
    else: 
      fold_path = 'data/ODIR-5K/Testing_Images/' + img_label[-1][0]
      if not os.path.exists(fold_path):
        os.mkdir(fold_path)
      shutil.move(element.replace('\\','/'), fold_path+'/'+img_name)
     