In [126]:
import cv2, numpy as np
import xml.etree.cElementTree as ET
import os
import pandas as pd
import operator
import csv
from math import log1p

from collections import Counter
from keras import applications
from keras.callbacks import ModelCheckpoint
from keras.layers import Flatten, Dense, Dropout, GlobalAveragePooling2D
from keras.layers.convolutional import Convolution2D, MaxPooling2D, ZeroPadding2D
from keras.models import Sequential, Model
from keras.optimizers import SGD
from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img
from keras.utils import to_categorical
from pathlib import Path
from PIL import Image
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from shutil import copyfile
from tensorflow.python.client import device_lib

Image.LOAD_TRUNCATED_IMAGES = True

# Initialisation - Batch Image Test Train Validation

In [2]:
data_path = './Data/'
batch_size = 10
epochs = 1
nbClasses = 5
img_rows, img_cols, img_channel = 224, 224, 3

# Chemin du dossier des images à traiter
path = "./Data/Test"
# Chemin du fichier .csv contenant les information
info = "./Data/test.csv"
# Conserve les n_first classes ayant les plus meilleurs probabilités
n_first = 4

train_datagen = ImageDataGenerator(
    rescale=1./255,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True)

validation_datagen = ImageDataGenerator(rescale=1./255)
test_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
    data_path + 'Training/',  
    target_size=(img_rows,img_cols),  
    batch_size=batch_size)  


validation_generator = validation_datagen.flow_from_directory(
        data_path + 'Validation/',
        target_size=(img_rows,img_cols),
        batch_size=batch_size)

test_generator = test_datagen.flow_from_directory(
        data_path + 'Test/',
        target_size=(img_rows,img_cols),
        batch_size=batch_size)

Found 299 images belonging to 5 classes.
Found 129 images belonging to 5 classes.
Found 132 images belonging to 5 classes.


# Modèle VGG16

In [3]:
base_model = applications.VGG16(weights='imagenet', include_top=False, input_shape=(img_rows, img_cols, img_channel))

add_model = Sequential()
add_model.add(Flatten(input_shape=base_model.output_shape[1:]))
#add_model.add(Dense(256, activation='relu'))
#add_model.add(BatchNormalization())
add_model.add(Dense(128, activation='relu'))
add_model.add(Dense(nbClasses, activation='softmax'))

model_vgg16 = Model(inputs=base_model.input, outputs=add_model(base_model.output))
for layer in base_model.layers:
    layer.trainable = False
    
model_vgg16.compile(loss='categorical_crossentropy', optimizer='rmsprop',
              metrics=['accuracy'])

In [4]:
model_vgg16.fit_generator(
    train_generator,
    steps_per_epoch= train_generator.n // batch_size,
    epochs=epochs,
    validation_data=validation_generator,
    callbacks=[ModelCheckpoint(data_path + 'VGG16-transferlearning.model', monitor='val_acc', save_best_only=True)]
)

Epoch 1/1


<keras.callbacks.History at 0x28834369a58>

# Modèle VGG19

In [5]:
base_model = applications.VGG19(weights='imagenet', include_top=False, input_shape=(img_rows, img_cols, img_channel))

add_model = Sequential()
add_model.add(Flatten(input_shape=base_model.output_shape[1:]))
add_model.add(Dense(256, activation='tanh'))
add_model.add(Dense(256, activation='relu'))
add_model.add(Dense(nbClasses, activation='softmax'))

model_vgg19 = Model(inputs=base_model.input, outputs=add_model(base_model.output))
for layer in base_model.layers:
    layer.trainable = False
    
model_vgg19.compile(loss='categorical_crossentropy', optimizer='rmsprop',
              metrics=['accuracy'])

In [6]:
model_vgg19.fit_generator(
    train_generator,
    steps_per_epoch= train_generator.n // batch_size,
    epochs=epochs,
    validation_data=validation_generator,
    callbacks=[ModelCheckpoint(data_path + 'VGG19-transferlearning.model', monitor='val_acc', save_best_only=True)]
)

Epoch 1/1


<keras.callbacks.History at 0x28830ea1a58>

# Evaluation des méthodes

In [47]:
classes_dico = train_generator.class_indices
classes_dico = {classes_dico[i]:i for i in classes_dico}

def prediction(path, model, classes, n_first=len(path)):
    im = cv2.resize(cv2.imread(path), (224, 224)).astype(np.float32)
    im[:,:,0] -= 103.939
    im[:,:,1] -= 116.779
    im[:,:,2] -= 123.68
    im = im.transpose((1,0,2))
    im = np.expand_dims(im, axis=0)
    out = model.predict(im)
    b = {classes[i]: out[0][i] for i in range(len(out[0]))}
    b = sorted(b.items(), key=operator.itemgetter(1), reverse = True)
    b = b[:n_first]
    return b

def parcours(path, model , classes):
    _list = os.listdir(path)
    pred = dict()
    for _dir in _list:
        for elt in os.listdir(path+"/"+_dir):
            image_name = elt[:elt.rfind(".")] 
            try:
                pred[image_name] = prediction(path+"/"+_dir+"/"+elt, model, classes, n_first)
            except:
                print ("erreur avec l'image "+path+"/"+_dir+"/"+elt)
    return pred

vgg16_dico = parcours(path, model_vgg16 , classes_dico)
vgg19_dico = parcours(path, model_vgg19 , classes_dico)

erreur avec l'image ./Data/Test/331705/1656989.jpg
erreur avec l'image ./Data/Test/331705/1656989.jpg


In [1]:
def agrege(dico,tab):
    for rang in range(len(tab)):
        classes = tab[rang][0]
        if tab[rang][1] == 0:
            proba = 1e-32
        elif tab[rang][1] == 1:
            proba = 0.9
        else:
            proba = tab[rang][1]
        val = -1.0/((rang+1.0)*log(proba))
        if classes in dico:
            dico[classes] = dico[classes]+val
        else:
            dico[classes] = val
    return dico

def id_to_name (path):
    """
    Crée un dictionnaire ayant pour index le nom de l'image sans l'extension et le nom de l'espèce
    """
    dico = dict()
    with open(path, 'r') as csvfile:
        spamreader = csv.reader(csvfile, delimiter=',')
        header = True
        for row in spamreader:
            if header:
                header = not header
                index = row.index("ClassId")
                Species = row.index("Species")
                Genus = row.index("Genus")
                Family = row.index("Family")
            else:
                dico[row[index]] = row[Family]+"-"+row[Genus]+"-"+row[Species]
    return dico

def scrutin(dico, *pred):
    pred = list(pred)
    vote = dict()
    if (len(pred) > 1):
        first = pred.pop(0)
        for i in first:
            tmp = agrege(dict(),first[i])
            for j in pred:
                tmp = agrege(tmp,j[i])
            vote[i] = dico[sorted(tmp.items(), key=operator.itemgetter(1), reverse=True)[0][0]]
    return vote
        

In [199]:
dico = id_to_name(info)
decision = scrutin(dico,vgg16_dico,vgg19_dico)

In [200]:
for i in decision:
    print (str(i)+" : "+str(decision[i]))

1445002 : Zygophyllaceae Larrea divaricata Cav.
1445036 : Zygophyllaceae Larrea divaricata Cav.
1445058 : Zygophyllaceae Larrea divaricata Cav.
1445066 : Zygophyllaceae Larrea divaricata Cav.
1445078 : Zygophyllaceae Larrea divaricata Cav.
1445105 : Zygophyllaceae Larrea divaricata Cav.
1445136 : Zygophyllaceae Larrea divaricata Cav.
1445154 : Zygophyllaceae Larrea divaricata Cav.
1445155 : Zygophyllaceae Larrea divaricata Cav.
1445160 : Zygophyllaceae Larrea divaricata Cav.
1445161 : Zygophyllaceae Larrea divaricata Cav.
1445179 : Zygophyllaceae Larrea divaricata Cav.
1445183 : Zygophyllaceae Larrea divaricata Cav.
1445184 : Zygophyllaceae Larrea divaricata Cav.
1445186 : Zygophyllaceae Larrea divaricata Cav.
1445199 : Zygophyllaceae Larrea divaricata Cav.
1445203 : Zygophyllaceae Larrea divaricata Cav.
1445209 : Zygophyllaceae Larrea divaricata Cav.
1445210 : Zygophyllaceae Larrea divaricata Cav.
1445211 : Zygophyllaceae Larrea divaricata Cav.
1445212 : Zygophyllaceae Larrea divarica