# Organiser les fichiers en sous repertoires par classes

## Import

In [1]:
import pandas as pd
import numpy as np
import shutil
import os
from keras_preprocessing.image import ImageDataGenerator
import cv2

## Constantes

In [2]:
PATH_SRC = '/data/workspace_files/Ocular_dataset/ODIR_TrainingDataSet_Preprocess'             # repertoire source contenant les images
PATH_DST = '/data/workspace_files/Ocular_dataset/ODIR_TrainingDataSet_Organisation/'           # repertoire de destination contenant les images organisés par repertoire (classe)
PATH_DST_AUG = '/data/workspace_files/Ocular_dataset/ODIR_TrainingDataSet_Augmented/'          # repertoire contenant les images avec les classes reequibrés
PATH_DST_LIM = '/data/workspace_files/Ocular_dataset/ODIR_TrainingDataSet_Limited/'            # repertoire contenant les images mais en nombre limite (classe toujours equilibés)
FILE_NAME_FEATURES = 'X'                            # Nom du fichier de sauvegarde des données d'entrainement : features
FILE_NAME_TARGET = 'y'                             # Nom du fichier de sauvegarde des données d'entrainement : target
classes_columns = ['N', 'D', 'G', 'C', 'A', 'H', 'M', 'O']

# Lecture fichier de donnée d'annotation preprocess

In [3]:
df = pd.read_csv('/data/workspace_files/Ocular_dataset/ODIR_EYE_preprocess.csv', index_col=0)

# Organisation des images

Creation du dossier de classement

In [4]:
if ~os.path.exists(PATH_DST):
    os.makedirs(PATH_DST)

Copie des fichiers image en organisant les images par repertoire (classe)

In [7]:
for classe in classes_columns:
    dst = PATH_DST+classe+'/'
    if ~os.path.exists(dst):
        os.makedirs(dst, exist_ok=True)
    lst= df[df.Diag==classe].Fundus.tolist()
    print(f'Copy class {classe} in progress')
    for file in lst:
        src = os.path.join(PATH_SRC,file)
        shutil.copy(src, dst)

Copy class N in progress
Copy class D in progress
Copy class G in progress
Copy class C in progress
Copy class A in progress
Copy class H in progress
Copy class M in progress
Copy class O in progress


# Equilibrage des classes

Creation des images supplementaires afin de reequilibrer les classes

In [4]:
if ~os.path.exists(PATH_DST_AUG):
    os.makedirs(PATH_DST_AUG)

In [5]:

datagen = ImageDataGenerator(
    data_format='channels_last',
    rotation_range=40,
    height_shift_range=0.3,
    width_shift_range=0.2,
    zoom_range=0.2,
    brightness_range=[0.4,0.9],
    #horizontal_flip=True,
    #vertical_flip=True,
    fill_mode='constant')


Creation d'images supplementaires pour chaque classe

In [6]:

# copie de toutes les images dans un repertoire
print(f'Nombre d individu pour la classe N : 2816')
for classe in classes_columns:

    if ~os.path.exists(PATH_DST_AUG+'/'+classe):
        os.makedirs(PATH_DST_AUG+'/'+classe, exist_ok=True)
    list_file = os.listdir(PATH_DST+'/'+classe)
    n = len(list_file)
    k = round(2816/n)
    print(f'classe : {classe}  initial quantity image:  {n}   Ratio: {k}  => Final quantity image : {n*k}')

    for file in list_file:
        file_image = PATH_DST+'/'+classe+'/'+file
        #print('file_image : ' , file_image)

        image = cv2.imread(file_image, cv2.IMREAD_COLOR)
        #image = cv2.resize(image, (224,224))
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        image = np.expand_dims(image,0)
        datagen.fit(image)
        for x, val in zip(datagen.flow(image,
                                       save_to_dir=os.path.join(PATH_DST_AUG,classe),
                                       save_prefix=file[:-4],
                                       save_format='png'),
                                        range(k-1)) :
            x_tmp = x
            val_tmp = val

Nombre d individu pour la classe N : 2816
classe : N  initial quantity image:  2816   Ratio: 1  => Final quantity image : 2816
classe : D  initial quantity image:  1524   Ratio: 2  => Final quantity image : 3048
classe : G  initial quantity image:  242   Ratio: 12  => Final quantity image : 2904
classe : C  initial quantity image:  252   Ratio: 11  => Final quantity image : 2772
classe : A  initial quantity image:  241   Ratio: 12  => Final quantity image : 2892
classe : H  initial quantity image:  110   Ratio: 26  => Final quantity image : 2860
classe : M  initial quantity image:  244   Ratio: 12  => Final quantity image : 2928
classe : O  initial quantity image:  648   Ratio: 4  => Final quantity image : 2592


Copie restreinte (100 images par classe)
Objectif :  tester differentes structure de reseau sur un jeu de donnée reduit

In [11]:
if ~os.path.exists(PATH_DST_LIM):
    os.makedirs(PATH_DST_LIM)

In [12]:

# copie de 100 images dans un repertoire pour chaque classe
for classe in classes_columns:
    print(classe)

    if ~os.path.exists(PATH_DST_LIM+'/'+classe):
        os.makedirs(PATH_DST_LIM+'/'+classe)
    list_file = os.listdir(PATH_DST+'/'+classe)[0:99]
    n = len(list_file)
    k=1
    for file in list_file:
        file_image = PATH_DST+'/'+classe+'/'+file
        print('file_image : ' , file_image)

        image = cv2.imread(file_image, cv2.IMREAD_COLOR)
        #image = cv2.resize(image, (224,224))
        #image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        cv2.imwrite(os.path.join(os.path.join(PATH_DST_LIM,classe), file), image)

N
file_image :  /data/workspace_files/Ocular_dataset/ODIR_TrainingDataSet_Organisation//N/4689_right.jpg
file_image :  /data/workspace_files/Ocular_dataset/ODIR_TrainingDataSet_Organisation//N/4683_left.jpg
file_image :  /data/workspace_files/Ocular_dataset/ODIR_TrainingDataSet_Organisation//N/4672_left.jpg
file_image :  /data/workspace_files/Ocular_dataset/ODIR_TrainingDataSet_Organisation//N/4671_right.jpg
file_image :  /data/workspace_files/Ocular_dataset/ODIR_TrainingDataSet_Organisation//N/4660_right.jpg
file_image :  /data/workspace_files/Ocular_dataset/ODIR_TrainingDataSet_Organisation//N/4644_right.jpg
file_image :  /data/workspace_files/Ocular_dataset/ODIR_TrainingDataSet_Organisation//N/4642_right.jpg
file_image :  /data/workspace_files/Ocular_dataset/ODIR_TrainingDataSet_Organisation//N/4641_left.jpg
file_image :  /data/workspace_files/Ocular_dataset/ODIR_TrainingDataSet_Organisation//N/4639_left.jpg
file_image :  /data/workspace_files/Ocular_dataset/ODIR_TrainingDataSet_Org

# Export images vers fichier numpy

## Lecture des images et des targets

In [7]:

list_classe = os.listdir(PATH_DST_AUG)
df_a = pd.DataFrame(columns=['file_name', 'image', 'diag'])
list_all_images = []
train = []
labels = []
for classe in list_classe:
    path = os.path.join(PATH_DST_AUG, classe)
    list_images = [f for f in os.listdir(path) if os.path.isfile(os.path.join(path, f))]
    print(f'{classe} = {len(list_images)}')
    for image_name in list_images:
        image = cv2.imread(os.path.join(path, image_name))
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        #ajout des données dans un dataframe
        df_a.loc[len(df_a.index)] = [image_name, image, classe]

        train.append(image)
        labels.append(classe)
        list_all_images.append(image_name)

O = 2591
M = 2927
H = 2857
A = 2890
C = 2770
G = 2901
D = 3048
N = 2816


## stratification des classes

In [8]:
size = 22795
stratified = df_a.groupby('diag', group_keys=False)\
                        .apply(lambda x: x.sample(int(np.rint(size*len(x)/len(df_a)))))\
                        .sample(frac=1).reset_index(drop=True)

In [9]:
X = np.array(list(stratified.image))
y = np.array(list(stratified.diag))
files = np.array(list(stratified.file_name))

## Sauvegarde des tableaux de données

In [10]:
#from sklearn.model_selection import train_test_split
#X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 123)
# remplaver le split par du slicing afin de garder le lien avec le nom des fichiers
limit_train = int(size * 0.8)

X_train = X[:limit_train]
X_test = X[limit_train:]

files_train = files[:limit_train]
files_test = files[limit_train:]

y_train = y[:limit_train]
y_test = y[limit_train:]


X_train_1, X_train_2, y_train_1, y_train_2 = train_test_split(X_train, y_train, test_size = 0.4, random_state = 123)

In [11]:
PATH_NPY = '/data/workspace_files/Ocular_dataset/'
np.save(PATH_NPY+FILE_NAME_FEATURES+'_train', X_train)
np.save(PATH_NPY+FILE_NAME_FEATURES+'_test', X_test)
np.save(PATH_NPY+FILE_NAME_TARGET+'_train', y_train)
np.save(PATH_NPY+FILE_NAME_TARGET+'_test', y_test)
np.save(PATH_NPY+'name_file_train', files_train)
np.save(PATH_NPY+'name_file_test', files_test)