In [20]:
import torch
import glob
import torchvision
from torch import nn
from torch.utils.data import DataLoader
from torchvision.utils import save_image
from torchvision import datasets
from torchvision.transforms import ToTensor, Lambda, Compose
import matplotlib.pyplot as plt
import torchvision.transforms as transforms 
from PIL import Image, ImageOps

import numpy as np
import pandas as pd
import os
import cv2
#from tqdm import tqdm

In [2]:
#DATA_PATH = '/kaggle/input/ocular-disease-recognition-odir5k/ODIR-5K/ODIR-5K/data.xlsx'
#IMG_DIR = '/kaggle/input/ocular-disease-recognition-odir5k/ODIR-5K/ODIR-5K/Training Images/'

DATA_PATH = '../archive/ODIR-5K/ODIR-5K/data.xlsx'
#IMG_DIR = 'ODIR-5K/ODIR-5K/TRAINING/'
IMG_DIR = '../archive/preprocessed_images'

In [3]:
data = pd.read_excel(DATA_PATH)

diagnostic_keyphrases = {'N': ['normal fundus'], 
 'D': ['nonproliferative retinopathy',
  'non proliferative retinopathy','mild nonproliferative retinopathy',
  'proliferative retinopathy'],
 'G': ['glaucoma'],
 'C': ['cataract'],
 'A': ['age-related macular degeneration'],
 'H': ['hypertensive'],
 'M': ['myopi'],
 'O': ['macular epiretinal membrane',
  'epiretinal membrane',
  'drusen',
  'myelinated nerve fibers',
  'vitreous degeneration',
  'refractive media opacity',
  'spotted membranous change',
  'tessellated fundus',
  'maculopathy',
  'chorioretinal atrophy',
  'branch retinal vein occlusion',
  'retinal pigmentation',
  'white vessel',
  'post retinal laser surgery',
  'epiretinal membrane over the macula',
  'retinitis pigmentosa',
  'central retinal vein occlusion',
  'optic disc edema',
  'post laser photocoagulation',
  'retinochoroidal coloboma',
  'atrophic change',
  'optic nerve atrophy',
  'old branch retinal vein occlusion',
  'depigmentation of the retinal pigment epithelium',
  'chorioretinal atrophy with pigmentation proliferation',
  'central retinal artery occlusion',
  'old chorioretinopathy',
  'pigment epithelium proliferation',
  'retina fold',
  'abnormal pigment ',
  'idiopathic choroidal neovascularization',
  'branch retinal artery occlusion',
  'vessel tortuosity',
  'pigmentation disorder',
  'rhegmatogenous retinal detachment',
  'macular hole',
  'morning glory syndrome',
  'atrophy',
  'laser spot',
  'arteriosclerosis',
  'asteroid hyalosis',
  'congenital choroidal coloboma',
  'macular coloboma',
  'optic discitis',
  'oval yellow-white atrophy',
  'wedge-shaped change',
  'wedge white line change',
  'retinal artery macroaneurysm',
  'retinal vascular sheathing',
  'suspected abnormal color of  optic disc',
  'suspected retinal vascular sheathing',
  'suspected retinitis pigmentosa',
  'silicone oil eye']}

LeftText=[]
for i, row in data.iterrows():
    text=row['Left-Diagnostic Keywords']
    Listecle=[]
    for cle, valeur in diagnostic_keyphrases.items():
        valeur = diagnostic_keyphrases.get (cle)
        for keyword in valeur:
            if keyword in text:
                Listecle.append (cle)
           
    Listecle=list(set(Listecle))
    LeftText.append (Listecle)

data['Left Text']= LeftText


RightText=[]
for i, row in data.iterrows():
    text=row['Right-Diagnostic Keywords']
    Listecle=[]
    for cle, valeur in diagnostic_keyphrases.items():
        valeur = diagnostic_keyphrases.get (cle)
        for keyword in valeur:
            if keyword in text:
                Listecle.append (cle)
           
    Listecle=list(set(Listecle))
    RightText.append (Listecle)

data['Right Text']= RightText
key_columns = ['ID', 'Patient Age','Patient Sex','Left-Fundus','Right-Fundus','N','D','G','C','A','H','M','O','Left Text','Right Text'] 
data=data [key_columns]
data.head()

df=data
df.to_csv('full_df.csv', mode='a', header=key_columns, index=False, encoding='utf-8')


In [4]:
IMG_SIZE = 512

In [5]:
## implementation du pseudo-code de Ben Graham

def scaleRadius (img, scale):
    toto=int(img.shape[0]/2)
    x=img[toto,:,:].sum (1)
    r=(x>x.mean()/10).sum()/2
    s=scale * 1.0/r
    return cv2.resize (img,(0,0), fx=s , fy=s )

def preprocess_image(file_name):
    scale =310
    # lecture de l'image
    image = cv2.imread(os.path.join(IMG_DIR, file_name))
 
    #redimensionnement de l'image à un rayon donné
    image=scaleRadius (image, scale)
    
    #on soustrait la couleur moyenne pour la mapper sur 50% de gris de façon à mieux faire ressortir les constrastes
    image=cv2.addWeighted (image ,4,cv2.GaussianBlur (image,(0,0),scale/30),-4 ,128)
    
    #on enleve 10% des bordures
    b=np.zeros(image.shape)

    cv2.circle(b,(int(image.shape[1]/2), int(image.shape[0]/2)),int(scale * 0.9),(1,1,1),-1,8,0)
    image=image*b+128*(1-b)
    
    cv2.imwrite(str(scale)+"_"+file_name ,image)

   
    return image

def preprocess_patient(patient_id):
    left_eye_file = str(patient_id) + '_left.jpg'
    right_eye_file = str(patient_id) + '_right.jpg'
    # combine image vertical/horizontal
    image = cv2.hconcat([preprocess_image(left_eye_file), preprocess_image(right_eye_file)]) 

    return image

#main_df = pd.read_csv('full_df.csv')
#patient_id = main_df.iloc[13]['ID']
#image = preprocess_patient(patient_id)
image = preprocess_patient(13)  

In [6]:
# deuxieme implementation pour reduction size, normalisation, et enlever les bordures

def crop(image): 
    # Remove vertical black borders (the image must be already normalized)
    sums = image.sum(axis=0)
    sums = sums.sum(axis=1)
    filter_arr = []
    for s in sums:
        if s == 0:
            filter_arr.append(False)
        else:
            filter_arr.append(True)
    image = image[:, filter_arr]
    
    # Crop to a square shape
    h = image.shape[0]
    w = image.shape[1]    
    
    if h < w:
        x = (w - h)//2
        image = image[:, x:x+h, :]        
    elif h > w:
        x = (h - w)//2
        image = image[x:x+w, :, :]           
    else:
        pass
    
    return image


def Xpreprocess_image(file_name):

    # lecture de l'image
    image = cv2.imread(os.path.join(IMG_DIR, file_name))
 

    norm_img = np.zeros(image.shape)
    # normalisation 0 ou 1
    norm_img = cv2.normalize(image,  norm_img, 0, 255, cv2.NORM_MINMAX)
    
    # enleve les bordures
    image = crop(norm_img)

    # redimension de l'image pour avoir meme dimension entre toutes les images (à cause image resolution differentes)
    # et conversion de la couleur de l'image car par defaut cv2 lit image en couleur bleue
    # dans le resize, le ration = 1 par defaut car hauteur pixel = largeur pixel
    image = cv2.resize(cv2.cvtColor(image, cv2.COLOR_BGR2RGB), (IMG_SIZE, IMG_SIZE))

    ##norm_img = np.zeros(image.shape)
    # normalisation 0 ou 1
    ##norm_img = cv2.normalize(image,  norm_img, 0, 255, cv2.NORM_MINMAX)
    
    # enleve les bordures
    ##image = crop(norm_img)
      
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    cv2.imwrite(str("X_")+"_"+file_name ,image)
      
    return image

def Xpreprocess_patient(patient_id):
    left_eye_file = str(patient_id) + '_left.jpg'
    right_eye_file = str(patient_id) + '_right.jpg'
    # combine image vertical/horizontal
    image = cv2.hconcat([Xpreprocess_image(left_eye_file), Xpreprocess_image(right_eye_file)]) 

    return image

#patient_id = main_df.iloc[13]['ID']
#image = Xpreprocess_patient(patient_id)    

image = Xpreprocess_patient(13)  
    

In [7]:
## GENERIC --

def preprocess_image_crop(image): 
    # Remove vertical black borders (the image must be already normalized)
    sums = image.sum(axis=0)
    sums = sums.sum(axis=1)
    filter_arr = []
    for s in sums:
        if s == 0:
            filter_arr.append(False)
        else:
            filter_arr.append(True)
    image = image[:, filter_arr]
    
    # Crop to a square shape
    h = image.shape[0]
    w = image.shape[1]    
    
    if h < w:
        x = (w - h)//2
        image = image[:, x:x+h, :]        
    elif h > w:
        x = (h - w)//2
        image = image[x:x+w, :, :]           
    else:
        pass
    
    return image

def preprocess_image_Resize(image):

    norm_img = np.zeros(image.shape)
    # normalisation 0 ou 1
    norm_img = cv2.normalize(image,  norm_img, 0, 255, cv2.NORM_MINMAX)
    
    # enleve les bordures
    image = crop(norm_img)

    # redimension de l'image pour avoir meme dimension entre toutes les images (à cause image resolution differentes)
    # et conversion de la couleur de l'image car par defaut cv2 lit image en couleur bleue
    # dans le resize, le ration = 1 par defaut car hauteur pixel = largeur pixel
    image = cv2.resize(cv2.cvtColor(image, cv2.COLOR_BGR2RGB), (IMG_SIZE, IMG_SIZE))

    ##norm_img = np.zeros(image.shape)
    ##norm_img = cv2.normalize(image,  norm_img, 0, 255, cv2.NORM_MINMAX)
    
      
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    cv2.imwrite(str("X_")+"_"+file_name ,image)
      
    return image

def scaleRadius (img, scale):
    toto=int(img.shape[0]/2)
    x=img[toto,:,:].sum (1)
    r=(x>x.mean()/10).sum()/2
    s=scale * 1.0/r
    return cv2.resize (img,(0,0), fx=s , fy=s )

def preprocess_image_Ben (image):
    
    scale =320

    norm_img = np.zeros(image.shape)
    # normalisation 0 ou 1
    norm_img = cv2.normalize(image,  norm_img, 0, 255, cv2.NORM_MINMAX)
    
    # enleve les bordures
    # #image = preprocess_image_crop(norm_img)

    #redimensionnement de l'image à un rayon donné
    image=scaleRadius (image, scale)
    
    #on soustrait la couleur moyenne pour la mapper sur 50% de gris de façon à mieux faire ressortir les constrastes
    image=cv2.addWeighted (image ,4,cv2.GaussianBlur (image,(0,0),scale/30),-4 ,128)
   
   
    #on enleve 10% des bordures
    b=np.zeros(image.shape)

    cv2.circle(b,(int(image.shape[1]/2), int(image.shape[0]/2)),int(scale * 0.95),(1,1,1),-1,8,0)
    image=image*b+128*(1-b)
    
    
    return image




In [29]:
def to_grayscale(color):
    #suppression des bordures
    # passage en noir et blanc
    return ImageOps.grayscale(croped)

In [9]:
file_name = str(13) + '_left.jpg'
# lecture de l'image
image = cv2.imread(os.path.join(IMG_DIR, file_name))
#image=preprocess_image_Ben(image)
#image=preprocess_image_crop (image)
image=to_grayscale(image)
cv2.imwrite(str(800)+"_"+file_name ,image)


typezzzzz <class 'numpy.ndarray'>


True

In [32]:
class Dataset(torch.utils.data.Dataset):
    'Characterizes a dataset for PyTorch'
    def __init__(self, data_dir,transform):
        'Initialization'

        """
        to_drop = ['ID', 'Patient Age', 'Patient Sex', 'Left-Fundus', 'Right-Fundus',
           'Left-Diagnostic Keywords', 'Right-Diagnostic Keywords', 'N', 'D', 'G',
           'C', 'A', 'H', 'M', 'O', 'filepath', 'target']
        data = data.drop(columns = to_drop)
        """

        data = pd.read_csv('full_df.csv')

        my_dir = data_dir+'preprocessed_images/'
        print(my_dir)
        my_list = glob.glob(os.path.join(my_dir,'*.jpg'))
        
        right_name_list = data['Right-Fundus'].to_numpy()
        left_name_list = data['Left-Fundus'].to_numpy()
        filename_list = np.concatenate((right_name_list, left_name_list), axis=None)
        right_labels_list = data['Right Text'].to_numpy()
        left_labels_list = data['Left Text'].to_numpy()
        labels_list = np.concatenate((right_labels_list, left_labels_list), axis=None)

        self.labels = labels_list
        self.list_IDs = filename_list
        self.transform = transform
        print(self.list_IDs[1:10])

    def __len__(self):
        'Denotes the total number of samples'
        return len(self.labels)

    def __getitem__(self, index):
        """
        'Generates one sample of data'
        print("ICI")
        # Select sample
        ID = self.list_IDs[index]
        # Load data and get label
        X = torch.load('data/preprocessed_images/' + ID)
        y = self.labels[ID]
        return X, y
        """
        ID = self.list_IDs[index]
        img_path = '../archive/preprocessed_images/' + ID
        img = Image.open(img_path)
        print (img_path)
        img_transformed = self.transform(img)

        labels_unique = np.unique(self.labels)
        label = self.labels[index]
        label = np.where(labels_unique == label)[0][0]
        #print(self.label_list[idx], label)

        return img_transformed, label


#my_transforms = transforms.Compose ([
#    transforms.RandomRotation (degrees=45),
#    transforms.RandomHorizontalFlip(p=0.5),
#    transforms.ToTensor(),
#    transforms.Normalize (mean=[0.485,0.456,0.406],
#                          std=[0.229,0.224,0.225])
#])

my_transforms = transforms.Compose ([
    to_grayscale,
    transforms.ToTensor(),
    transforms.Normalize (mean=0.5,
                          std=0.229)
])

dataset=Dataset(data_dir='', transform=my_transforms)

img_num=0
for img, label in dataset:
    img_num +=1
img_num=img_num-1
save_image (img, tutu.jpg)

preprocessed_images/
['1_right.jpg' '2_right.jpg' '3_right.jpg' '4_right.jpg' '5_right.jpg'
 '6_right.jpg' '7_right.jpg' '8_right.jpg' '9_right.jpg']
../archive/preprocessed_images/0_right.jpg


AttributeError: 'JpegImageFile' object has no attribute 'sum'

In [31]:
my_transforms = transforms.Compose ([

    transforms.ToTensor(),
    transforms.Normalize (mean=[0.485,0.456,0.406],
                          std=[0.229,0.224,0.225])
])

dataset=Dataset(data_dir='', transform=my_transforms)
#trainset, testset = torch.utils.data.random_split(dataset, 0.8)

#for i, row in data.iterrows():
#    LeftImage=row['Left-Fundus']
#    RightImage=row['Right-Fundus']

preprocessed_images/
['1_right.jpg' '2_right.jpg' '3_right.jpg' '4_right.jpg' '5_right.jpg'
 '6_right.jpg' '7_right.jpg' '8_right.jpg' '9_right.jpg']
