In [1]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [2]:
import cv2
import csv
import os
import re
import math 
import numpy as np
import pandas as pd
import imagesize
from ipywidgets import interact
from pathlib import Path

from matplotlib import pyplot as plt 
plt.style.use("ggplot")
%matplotlib inline

from tqdm import tqdm_notebook, tnrange, tqdm
from itertools import chain
from skimage.io import imread, imshow, concatenate_images
from skimage.transform import resize
from skimage.morphology import label
from sklearn.model_selection import train_test_split

import tensorflow as tf

In [None]:
path_drishti = r"/content/gdrive/Shareddrives/Tugas Akhir/Dataset/DRISHTI"

path_refugetrain = r"/content/gdrive/Shareddrives/Tugas Akhir/Dataset/REFUGE/Training"
path_refugeval = r"/content/gdrive/Shareddrives/Tugas Akhir/Dataset/REFUGE/Validation"
path_refugetest = r"/content/gdrive/Shareddrives/Tugas Akhir/Dataset/REFUGE/Test"

dataset_path = r"/content/gdrive/Shareddrives/Tugas Akhir/Dataset"

# Fungsi-fungsi

In [None]:
def saving_image(img, path):
  if os.path.isfile(path):
    os.remove(path)
    cv2.imwrite(path, img)
  else:
    cv2.imwrite(path, img)

In [None]:
def figure(images, title, coloumn = 3, size = 20):
    plt.rcParams["figure.figsize"] = (size,size) #memperbesar gambar

    if (len(images) < 3):
      col = len(images)
    else:
      col = coloumn*1

    row = math.ceil(len(images)/col)

    for a in range(len(images)):
      plt.subplot(row,col, a+1), plt.imshow(images[a], cmap = 'gray')
      plt.title(title[a])
      plt.xticks([]), plt.yticks([])
    plt.show()

In [None]:
def ekstrakROI(centroid, s, img):

  h, w = img.shape[:2]
 
  y0, y1, x0, x1 = rectfromcenter(centroid, s, h, w)

  #cropping ROI from source image
  ROI = img[y0:y1, x0:x1]
  koordinat = (y0, y1, x0, x1)

  return ROI, koordinat

In [None]:
def rectfromcenter(center, s, h, w):
  x, y = center
  x0 = round(x - 0.5*s)
  x1 = round(x + 0.5*s)
  y0 = round(y - 0.5*s)
  y1 = round(y + 0.5*s)
  dx = (x1 - x0) - s
  dy = (y1 - y0) - s
  # penanganan kasus out of image
  if (x0 < 0):
    x1 = x1 + (-x0)
    x0 = 0
    x1 -= dx
  elif (x1 > w-1):
    x0 = x0 - (x1-(w-1))
    x1 = w-1
    x0 += dx
  else:
    x0 += dx

  if (y0 < 0):
    y1 = y1 + (-y0)
    y0 = 0
    y1 -= dy
  elif (y1 > h-1):
    y0 = y0 - (y1-(h-1))
    y1 = (h-1)
    y0 += dy
  else:
    y0 += dy

  return y0, y1, x0, x1

In [None]:
def downscale(img):
  size = [256, 256]
  img = tf.image.resize(img, size, method=tf.image.ResizeMethod.BICUBIC, preserve_aspect_ratio=True)
  return img

def upscale(img):
  size = (550, 550)
  img = tf.image.resize(img, size, method=tf.image.ResizeMethod.BICUBIC, preserve_aspect_ratio=True)

  return img

In [None]:
# fungsi untuk mengkonversi groundtruth REFUGE
def refuGT(path):
  import numpy as np
  import cv2

  mask = cv2.imread(path, 0)
  h, w = mask.shape

  #inisialisasi bound
  OD_mask = np.zeros([h, w], np.uint8)
  OC_mask = np.zeros([h, w], np.uint8)
  OD_mask[mask<=192] = 255
  OC_mask[mask<=64] = 255

  return OD_mask, OC_mask

In [None]:
# fungsi untuk mengkonversi groundtruth DRISHTI-GS
def drishtiGT(path, shape):

  file = open(path, 'r')
  coordinates = file.readlines()
  w, h = shape

  # Ekstrak koordinat dari file gt drishti
  for i in range(len(coordinates)):
    y, x = coordinates[i].split(" ")
    coordinates[i] = (int(x), int(y))

  coordinates = np.array(coordinates)
  file.close()

  # inisiasi citra mask ground truth

  mask = np.zeros([h, w], np.uint8)
  mask[coordinates[:, 1],coordinates[:, 0]] = 255
  color = 255
  mask_contour, hierarchy = cv2.findContours(mask, 
  cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
  mask = np.zeros([h, w], np.uint8)
  # menggambar boundary mask dari data coordinate gt drishti

  cv2.drawContours(mask, mask_contour, -1, color, cv2.FILLED)

  return mask

In [None]:
def CDRcalc(OD_mask, OC_mask):
  c_OD, _ = cv2.findContours(OD_mask, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)
  c_OD = cv2.approxPolyDP(c_OD[0], 3, True)
  c_OC, _ = cv2.findContours(OC_mask, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)
  c_OC = cv2.approxPolyDP(c_OC[0], 3, True)
  _, _, ver_OD, hor_OD = cv2.boundingRect(c_OD)
  _, _, ver_OC, hor_OC = cv2.boundingRect(c_OC)

  VCDR = ver_OC/ ver_OD # vertical CDR
  HCDR = hor_OC/ ver_OD # Horizontal CDR
  ACDR = np.sum(OC_mask == 255)/ np.sum(OD_mask == 255) # Area CDR

  return VCDR, HCDR, ACDR

In [None]:
def extractMaskCenter(OD_Mask):
  count = (OD_Mask==255).sum()
  # calculate moments for each contour
  x_center, y_center = np.argwhere(OD_Mask==255).sum(0)/count

  return (y_center, x_center)

In [None]:
def ekstrak_groundtruth(path_OD, path_OC, shape, dataset):
  from PIL import Image
  from tqdm.notebook import tqdm
  from matplotlib import pyplot as plt
  import re
  import os
  iterates = len(path_OD)
  path_OD_png = []
  path_OC_png = []
  VCDR = []
  HCDR = []
  ACDR = []
  center = []

  for i in tqdm(range(0, iterates), desc = "Number of GT that had been processed : "):
    old_file_OD = os.path.basename(path_OD[i])
    old_file_OC = os.path.basename(path_OC[i])   
    if (dataset[i].lower() == "drishti"):  
      OD_mask_gt = drishtiGT(path_OD[i], shape[i])
      OC_mask_gt = drishtiGT(path_OC[i], shape[i])
      
    elif re.search(r'refuge', dataset[i].lower()):
      OD_mask_gt, OC_mask_gt = refuGT(path_OD[i])
      C = extractMaskCenter(OD_mask_gt)
      center.append(C)

    V, H, A = CDRcalc(OD_mask_gt, OC_mask_gt)
    VCDR.append(V)
    HCDR.append(H)
    ACDR.append(A)
    
    new_file_OD = old_file_OD[:old_file_OD.index('.')] + '_OD_img.png'
    new_file_OC = old_file_OC[:old_file_OC.index('.')] + '_OC_img.png'
    path_OD_png.append(path_OD[i].replace(old_file_OD, new_file_OD))
    path_OC_png.append(path_OC[i].replace(old_file_OC, new_file_OC))
    
    OD_mask_gt = Image.fromarray(OD_mask_gt)
    OC_mask_gt = Image.fromarray(OC_mask_gt)


    if not os.path.isfile(path_OD_png[-1]):
      OD_mask_gt.save(path_OD_png[-1])
    else:
      os.remove(path_OD_png[-1])
      OD_mask_gt.save(path_OD_png[-1])

    if not os.path.isfile(path_OC_png[-1]):
      OC_mask_gt.save(path_OC_png[-1])
    
    else:
      os.remove(path_OC_png[-1])
      OC_mask_gt.save(path_OC_png[-1])
    
  return path_OD_png, path_OC_png, VCDR, HCDR, ACDR, center

# Create Dataframe

## Functions

In [None]:
def loadRefugeTest(path):
  '''
  Load dataset into a dataframe
  input
    path    : string, path to dataset

  output
    df      : pandas Dataframe, dataframe from dataset
              path to image | file_name | disc_center | VCDR Value | HCDR Value | ACDR Value | tag | dataset | path to OD GT | path to OC GT | path to OD mask | path to OC mask 
  '''
  file_name = []
  img_path = []
  path_OD_gt = []
  shape = []
  
  # Checking every jpg, bmp, and xlxs file
  for file_path, subdirs, files in os.walk(path):
    files.sort()
    for name in files:
      # If file were jpg, get path, file name, and shape of original image
      if re.search(r'.jpg', name):
        file_name.append(name)
        img_path.append(os.path.join(file_path, name))
        shape.append(imagesize.get(os.path.join(file_path, name)))

      # If file were bmp, get path of the ground truth
      elif re.search(r'.bmp', name):
        path_OD_gt.append(os.path.join(file_path, name))
      
      # If file were xlsx, get tag
      elif re.search(r'.xlsx', name):
        df = pd.read_excel(os.path.join(file_path, name),usecols="C")
        tag = df["Glaucoma Label"].values.tolist()
  
  path_OC_gt = path_OD_gt
  dataset = ['refuge test'] * len(file_name)

  path_OD_mask, path_OC_mask, VCDR, HCDR, ACDR, center =  ekstrak_groundtruth(path_OD_gt, path_OC_gt, shape, dataset)
  
  df = pd.DataFrame({"path":img_path,'file_name':file_name, "disc_center" : center,
                   "tag":tag, "dataset":dataset,"shape":shape, "VCDR": VCDR, "HCDR": HCDR, "ACDR": ACDR,
                   "path_OD_png":path_OD_mask, "path_OC_png":path_OC_mask})
  tag_names = ['Normal', 'Glaucoma']
  tag = [int(x) for x in tag]
  df['tag'] = pd.Categorical.from_codes(tag, tag_names) 

  return df

In [None]:
def loadRefugeVal(path):
  '''
  Load dataset into a dataframe
  input
    path    : string, path to dataset

  output
    df      : pandas Dataframe, dataframe from dataset
              path to image | file_name | disc_center | VCDR Value | HCDR Value | ACDR Value | tag | dataset | path to OD GT | path to OC GT | path to OD mask | path to OC mask 
  '''
  file_name = []
  img_path = []
  path_OD_gt = []
  shape = []
  
  # Checking every jpg, bmp, and xlxs file
  for file_path, subdirs, files in os.walk(path):
    files.sort()
    for name in files:
      # If file were jpg, get path, file name, and shape of original image
      if re.search(r'.jpg', name):
        file_name.append(name)
        img_path.append(os.path.join(file_path, name))
        shape.append(imagesize.get(os.path.join(file_path, name)))

      # If file were bmp, get path of the ground truth
      elif re.search(r'.bmp', name):
        path_OD_gt.append(os.path.join(file_path, name))
      
      # If file were xlsx, get tag
      elif re.search(r'.xlsx', name):
        df = pd.read_excel(os.path.join(file_path, name),usecols="C")
        tag = df["Glaucoma Label"].values.tolist()
  
  path_OC_gt = path_OD_gt
  dataset = ['refuge val'] * len(file_name)

  path_OD_mask, path_OC_mask, VCDR, HCDR, ACDR, center =  ekstrak_groundtruth(path_OD_gt, path_OC_gt, shape, dataset)
  
  df = pd.DataFrame({"path":img_path,'file_name':file_name, "disc_center" : center,
                   "tag":tag, "dataset":dataset,"shape":shape, "VCDR": VCDR, "HCDR": HCDR, "ACDR": ACDR,
                   "path_OD_png":path_OD_mask, "path_OC_png":path_OC_mask})
  tag_names = ['Normal', 'Glaucoma']
  df['tag'] = pd.Categorical.from_codes(tag, tag_names) 

  return df

In [None]:
def loadRefugeTrain(path):
  '''
  Load dataset into a dataframe
  input
    path    : string, path to dataset

  output
    df      : pandas Dataframe, dataframe from dataset
              path to image | file_name | disc_center | VCDR Value | HCDR Value | ACDR Value | tag | dataset | path to OD GT | path to OC GT | path to OD mask | path to OC mask 
  '''
  file_name = []
  img_path = []
  path_OD_gt = []
  shape = []
  tag = []
  
  # Checking every jpg, bmp, and xlxs file
  for file_path, subdirs, files in os.walk(path):
    files.sort()
    for name in files:
      # If file were jpg, get path, file name, and shape of original image
      if re.search(r'.jpg', name):
        if name[0] == "n":
          tag.append(0)
        else:
          tag.append(1)
        file_name.append(name)
        img_path.append(os.path.join(file_path, name))
        shape.append(imagesize.get(os.path.join(file_path, name)))

      # If file were bmp, get path of the ground truth
      elif re.search(r'.bmp', name):
        path_OD_gt.append(os.path.join(file_path, name))
  
  path_OC_gt = path_OD_gt
  dataset = ['refuge train'] * len(file_name)

  path_OD_mask, path_OC_mask, VCDR, HCDR, ACDR, center =  ekstrak_groundtruth(path_OD_gt, path_OC_gt, shape, dataset)
  
  df = pd.DataFrame({"path":img_path,'file_name':file_name, "disc_center" : center,
                   "tag":tag, "dataset":dataset,"shape":shape, "VCDR": VCDR, "HCDR": HCDR, "ACDR": ACDR,
                   "path_OD_png":path_OD_mask, "path_OC_png":path_OC_mask})
  tag_names = ['Normal', 'Glaucoma']
  df['tag'] = pd.Categorical.from_codes(tag, tag_names) 

  return df

In [None]:
def loadDrishti(path):
  '''
  Load dataset into a dataframe
  input
    path    : string, path to dataset

  output
    df      : pandas Dataframe, dataframe from dataset
              path to image | file_name | disc_center | VCDR Value | HCDR Value | ACDR Value | tag | dataset | path to OD GT | path to OC GT | path to OD mask | path to OC mask 
  '''
  file_name = []
  img_path = []
  path_OD_gt = []
  path_OC_gt = []
  shape = []
  tag = []
  center = []
  dataset = []
  path_center = []

  for file_path, subdirs, files in os.walk(os.path.join(path, "Images")):
    files.sort()
    for name in files:
      if re.search(r'.png', name):
        file_name.append(name)
        img_path.append(os.path.join(file_path, name))
        shape.append(imagesize.get(os.path.join(file_path, name)))
  
  for file_path, subdirs, files in os.walk(os.path.join(path, "GT")):
    files.sort()
    for name in files:
      if re.search(r'ODAvgBoundary.txt', name):
        path_OD_gt.append(os.path.join(file_path, name))
      elif re.search(r'CupAvgBoundary.txt', name):
        path_OC_gt.append(os.path.join(file_path, name))
      elif re.search(r'diskCenter.txt', name):
        path_center.append(os.path.join(file_path, name))
  
  path_center.sort()
  path_OD_gt.sort()
  path_OC_gt.sort()

  for name in path_center:
    file = open(name, 'r')
    d = file.readlines()
    y, x = d[0].split(' ')
    center.append((int(x), int(y)))
    file.close()
  

  df_tag = pd.read_excel(os.path.join(path_drishti, "Drishti-GS1_diagnosis.xlsx"), usecols = "A, H")
  filenames = []
  for name in df_tag["Drishti-GS File"]:
    temp = name[:(len(name)-1)] + ".png"
    filenames.append(temp)
    
  for name in filenames:
    if name in file_name:
      tag.append(df_tag["Total"][filenames.index(name)])

  dataset = ['drishti'] * len(file_name)
  path_OD_mask, path_OC_mask, VCDR, HCDR, ACDR, _ =  ekstrak_groundtruth(path_OD_gt, path_OC_gt, shape, dataset)
  df = pd.DataFrame({"path":img_path,'file_name':file_name, "disc_center" : center,
                   "tag":tag, "dataset":dataset,"shape":shape, "VCDR": VCDR, "HCDR": HCDR, "ACDR": ACDR,
                   "path_OD_png":path_OD_mask, "path_OC_png":path_OC_mask})
  df.loc[df['tag'] == "Glaucomatous", 'tag'] = 'Glaucoma'

  return df

## Run

In [None]:
df_drishti = loadDrishti(path_drishti)

Number of GT that had been processed :   0%|          | 0/50 [00:00<?, ?it/s]

In [None]:
df_refugeval = loadRefugeVal(path_refugeval)

Number of GT that had been processed :   0%|          | 0/400 [00:00<?, ?it/s]

In [None]:
df_refugetest = loadRefugeTest(path_refugetest)

Number of GT that had been processed :   0%|          | 0/400 [00:00<?, ?it/s]

In [None]:
df_refuge = loadRefugeTrain(path_refugetrain)

Number of GT that had been processed :   0%|          | 0/400 [00:00<?, ?it/s]

In [None]:
# Save json
json_refugetest_path = os.path.join(dataset_path, 'df_refugetest.json')
json_refugeval_path = os.path.join(dataset_path, 'df_refugeval.json')
json_refuge_path = os.path.join(dataset_path, 'df_refugetrain.json')
json_drishti_path = os.path.join(dataset_path, 'df_drishti.json')

if os.path.isfile(json_refugetest_path):
  os.remove(json_refugetest_path)
  df_refugetest.to_json(json_refugetest_path)
else:
  df_refugetest.to_json(json_refugetest_path)

if os.path.isfile(json_refugeval_path):
  os.remove(json_refugeval_path)
  df_refugeval.to_json(json_refugeval_path)
else:
  df_refugeval.to_json(json_refugeval_path)

if os.path.isfile(json_refuge_path):
  os.remove(json_refuge_path)
  df_refuge.to_json(json_refuge_path)
else:
  df_refuge.to_json(json_refuge_path)

if os.path.isfile(json_drishti_path):
  os.remove(json_drishti_path)
  df_drishti.to_json(json_drishti_path)
else:
  df_drishti.to_json(json_drishti_path)

# Deprecated

In [None]:
def loadRefugeTestUp(path):
  '''
  Load dataset into a dataframe
  input
    path    : string, path to dataset

  output
    df      : pandas Dataframe, dataframe from dataset
              path to image | file_name | disc_center | VCDR Value | HCDR Value | ACDR Value | tag | dataset | path to OD GT | path to OC GT | path to OD mask | path to OC mask 
  '''
  file_name = []
  img_path = []
  path_OD_gt = []
  shape = []
  
  # Checking every jpg, bmp, and xlxs file
  for file_path, subdirs, files in os.walk(path):
    files.sort()
    for name in files:
      # If file were jpg, get path, file name, and shape of original image
      if re.search(r'upscaled.jpg', name):
        file_name.append(name)
        img_path.append(os.path.join(file_path, name))
        shape.append(imagesize.get(os.path.join(file_path, name)))

      # If file were bmp, get path of the ground truth
      elif re.search(r'upscaled.bmp', name):
        path_OD_gt.append(os.path.join(file_path, name))
      
      # If file were xlsx, get tag
      elif re.search(r'.xlsx', name):
        df = pd.read_excel(os.path.join(file_path, name),usecols="C")
        tag = df["Glaucoma Label"].values.tolist()
  
  path_OC_gt = path_OD_gt
  dataset = ['refuge test up'] * len(file_name)
  print(len(file_name))

  path_OD_mask, path_OC_mask, VCDR, HCDR, ACDR, center =  (path_OD_gt, path_OC_gt, shape, dataset)
  
  df = pd.DataFrame({"path":img_path,'file_name':file_name, "disc_center" : center,
                   "tag":tag, "dataset":dataset,"shape":shape, "VCDR": VCDR, "HCDR": HCDR, "ACDR": ACDR,
                   "path_OD_png":path_OD_mask, "path_OC_png":path_OC_mask})
  tag_names = ['Normal', 'Glaucoma']
  df['tag'] = pd.Categorical.from_codes(tag, tag_names) 

  return df

# Upscale Image

In [None]:
img_path = []
gt_path = []

for file_path, subdirs, files in os.walk(path_refuge_test):
    files.sort()
    for name in files:
      # If file were jpg, get path, file name, and shape of original image
      if re.search(r'.jpg', name):
        img_path.append(os.path.join(file_path, name))
      if re.search(r'.bmp', name):
        gt_path.append(os.path.join(file_path, name))

In [None]:
print(img_path[len(img_path)-1])

/content/gdrive/My Drive/Tugas Akhir/Dataset/REFUGE/Test/REFUGE-Validation400/V0400.jpg


In [None]:
from PIL import Image

for i in tqdm_notebook(range(0, len(img_path))):  
  # set the base width of the result
  basewidth = 2124
  img = Image.open(img_path[i])
  # determining the height ratio
  wpercent = (basewidth/float(img.size[0]))
  hsize = int((float(img.size[1])*float(wpercent)))
  # resize image and save
  img = img.resize((basewidth,hsize), Image.ANTIALIAS)
  path = img_path[i][:img_path[i].index('.')] + '_upscaled.jpg'
  if os.path.isfile(path):
    os.remove(path)
    img.save(path) 
  else:
    img.save(path) 

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  This is separate from the ipykernel package so we can avoid doing imports until


  0%|          | 0/400 [00:00<?, ?it/s]

In [None]:
for i in tqdm_notebook(range(0, len(gt_path))):  
  # set the base width of the result
  basewidth = 2124
  img = Image.open(gt_path[i])
  # determining the height ratio
  wpercent = (basewidth/float(img.size[0]))
  hsize = int((float(img.size[1])*float(wpercent)))
  # resize image and save
  img = img.resize((basewidth,hsize), Image.ANTIALIAS)
  path = gt_path[i][:gt_path[i].index('.')] + '_upscaled.bmp'
  if os.path.isfile(path):
    os.remove(path)
    img.save(path) 
  else:
    img.save(path) 

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  """Entry point for launching an IPython kernel.


  0%|          | 0/400 [00:00<?, ?it/s]

NameError: ignored

In [None]:
img[1045][734]

141