# Predict a dog breed

In [None]:
import os
import random
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

import cv2
import shutil
import pickle
import datetime
import scipy as sp
from scipy import linalg
from skimage import img_as_ubyte
from xml.etree import cElementTree as ElementTree
import tensorflow as tf
from tensorflow.keras.layers import concatenate
from tensorflow.keras.layers import Conv2D, SeparableConv2D
from tensorflow.keras.layers import Flatten, Dense, Activation
from tensorflow.keras.layers import BatchNormalization, Dropout
from tensorflow.keras.layers import MaxPooling2D, AveragePooling2D
from tensorflow.keras.layers import GlobalMaxPooling2D, GlobalAveragePooling2D
from tensorflow.python.client import device_lib
from tensorflow.keras import backend as K
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.applications.vgg19 import VGG19
from tensorflow.keras.applications.resnet50 import ResNet50
from tensorflow.keras.applications.xception import Xception
from tensorflow.keras.applications.inception_v3 import InceptionV3
from tensorflow.keras.applications.inception_resnet_v2 import InceptionResNetV2
from tensorflow.keras.applications import vgg19, resnet50, inception_resnet_v2, xception, inception_v3
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.callbacks import TensorBoard, ReduceLROnPlateau, EarlyStopping

from tensorflow.keras.models import Sequential, Model
from tensorflow.keras import losses
from tensorflow.keras.optimizers import Adam, Adagrad
from tensorflow.keras import regularizers
from sklearn.model_selection import GridSearchCV
import keras

from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import RandomizedSearchCV, KFold
from sklearn.metrics import make_scorer, accuracy_score


In [None]:
class XmlListConfig(list):
    def __init__(self, aList):
        for element in aList:
            if element:
                # treat like dict
                if len(element) == 1 or element[0].tag != element[1].tag:
                    self.append(XmlDictConfig(element))
                # treat like list
                elif element[0].tag == element[1].tag:
                    self.append(XmlListConfig(element))
            elif element.text:
                text = element.text.strip()
                if text:
                    self.append(text)

class XmlDictConfig(dict):

    def __init__(self, parent_element):
        if parent_element.items():
            self.update(dict(parent_element.items()))
        for element in parent_element:
            if element:
                if len(element) == 1 or element[0].tag != element[1].tag:
                    aDict = XmlDictConfig(element)
                else:
                    aDict = {element[0].tag: XmlListConfig(element)}
                if element.items():
                    aDict.update(dict(element.items()))
                self.update({element.tag: aDict})
            elif element.items():
                self.update({element.tag: dict(element.items())})
            else:
                self.update({element.tag: element.text})

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!unzip '/content/drive/MyDrive/Project6/images.zip'
!unzip '/content/drive/MyDrive/Project6/annotation.zip'

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: annotation/Annotation/n02108089-boxer/n02108089_1560  
  inflating: annotation/Annotation/n02108089-boxer/n02108089_15702  
  inflating: annotation/Annotation/n02108089-boxer/n02108089_1571  
  inflating: annotation/Annotation/n02108089-boxer/n02108089_1575  
  inflating: annotation/Annotation/n02108089-boxer/n02108089_1619  
  inflating: annotation/Annotation/n02108089-boxer/n02108089_1626  
  inflating: annotation/Annotation/n02108089-boxer/n02108089_1654  
  inflating: annotation/Annotation/n02108089-boxer/n02108089_1672  
  inflating: annotation/Annotation/n02108089-boxer/n02108089_1675  
  inflating: annotation/Annotation/n02108089-boxer/n02108089_1690  
  inflating: annotation/Annotation/n02108089-boxer/n02108089_1748  
  inflating: annotation/Annotation/n02108089-boxer/n02108089_1757  
  inflating: annotation/Annotation/n02108089-boxer/n02108089_1775  
  inflating: annotation/Annotation/n02108089-boxer

In [None]:
root_data = '/content/'  # Root data folder
root_images = '/content/images/Images'  # Images folder
root_annotation = '/content/annotation/Annotation' # Annotation xml folder
dir_train = '/content/train'
dir_test = '/content/test'
dir_val = '/content/validation'
dir_model = '/content/drive/MyDrive/Project6/Model/'
savedfiles = '/content/drive/MyDrive/Project6/SavedFiles2/'

In [None]:
directory_annotation_names = [x[0] for x in os.walk(root_annotation)][1:]
#directory_annotation_names

In [None]:
filenames = []
categories = []
director = []
widths = []
heights = []
count = 0
for dir_an_name in directory_annotation_names:
    directory = dir_an_name.split('/')[-1]
    filenames_dir = os.listdir(dir_an_name)
    xml_string = open(root_annotation + "/" + directory + "/" + filenames_dir[0], "r+").read()
    root_xml = ElementTree.XML(xml_string)
    xmldict = XmlDictConfig(root_xml)
    for filename_dir in filenames_dir:
        if os.path.isfile(root_images + "/" + directory + "/" + filename_dir + ".jpg"):
          director.append(directory) 
          filenames.append(filename_dir + ".jpg")
          categories.append(xmldict['object']['name'])
          widths.append(xmldict['size']['width'])
          heights.append(xmldict['size']['height'])
        else:
            count += 1

In [None]:
data = pd.DataFrame({
    'filename': filenames,
    'category': categories,
    'widths': widths,
    'heights': heights,
    'directory':director
})

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split

def split_stratified_into_train_val_test(df_input, stratify_colname='y',
                                         frac_train=0.6, frac_val=0.15, frac_test=0.25,
                                         random_state=None):
    '''
    Splits a Pandas dataframe into three subsets (train, val, and test)
    following fractional ratios provided by the user, where each subset is
    stratified by the values in a specific column (that is, each subset has
    the same relative frequency of the values in the column). It performs this
    splitting by running train_test_split() twice.

    Parameters
    ----------
    df_input : Pandas dataframe
        Input dataframe to be split.
    stratify_colname : str
        The name of the column that will be used for stratification. Usually
        this column would be for the label.
    frac_train : float
    frac_val   : float
    frac_test  : float
        The ratios with which the dataframe will be split into train, val, and
        test data. The values should be expressed as float fractions and should
        sum to 1.0.
    random_state : int, None, or RandomStateInstance
        Value to be passed to train_test_split().

    Returns
    -------
    df_train, df_val, df_test :
        Dataframes containing the three splits.
    '''

    if frac_train + frac_val + frac_test != 1.0:
        raise ValueError('fractions %f, %f, %f do not add up to 1.0' % \
                         (frac_train, frac_val, frac_test))

    if stratify_colname not in df_input.columns:
        raise ValueError('%s is not a column in the dataframe' % (stratify_colname))

    X = df_input # Contains all columns.
    y = df_input[[stratify_colname]] # Dataframe of just the column on which to stratify.

    # Split original dataframe into train and temp dataframes.
    df_train, df_temp, y_train, y_temp = train_test_split(X,
                                                          y,
                                                          stratify=y,
                                                          test_size=(1.0 - frac_train),
                                                          random_state=random_state)

    # Split the temp dataframe into val and test dataframes.
    relative_frac_test = frac_test / (frac_val + frac_test)
    df_val, df_test, y_val, y_test = train_test_split(df_temp,
                                                      y_temp,
                                                      stratify=y_temp,
                                                      test_size=relative_frac_test,
                                                      random_state=random_state)

    assert len(df_input) == len(df_train) + len(df_val) + len(df_test)

    return df_train, df_val, df_test

In [None]:
train_df, validate_df, test_df = split_stratified_into_train_val_test(data, 
                                                                      stratify_colname='category',
                                                                      frac_train=0.6, frac_val=0.15, frac_test=0.25,
                                                                      random_state=1)

In [None]:
# Create train, test and val directories

if os.path.isdir(dir_train):
  shutil.rmtree(dir_train)
  os.mkdir(dir_train)
else:
  os.mkdir(dir_train)
#############################
if os.path.isdir(dir_test):
  shutil.rmtree(dir_test)
  os.mkdir(dir_test)
else:
  os.mkdir(dir_test)
##############################
if os.path.isdir(dir_val):
  shutil.rmtree(dir_val)
  os.mkdir(dir_val)
else:
  os.mkdir(dir_val)

In [None]:
# Copy the files
file_error_train = []
for i, row in train_df.iterrows():
    path = root_images + "/" + row['directory'] + "/" + row['filename']
    if os.path.isfile(path):
        shutil.copy(path, dir_train)
    else:
        file_error_train.append(row['filename'])


file_error_val = []
for i, row in validate_df.iterrows():
    path = root_images + "/" + row['directory'] + "/" + row['filename']
    if os.path.isfile(path):
        shutil.copy(path, dir_val)
    else:
        file_error_val.append(row['filename'])


file_error_test = []
for i, row in test_df.iterrows():
    path = root_images + "/" + row['directory'] + "/" + row['filename']
    if os.path.isfile(path):
        shutil.copy(path, dir_test)
    else:
        file_error_test.append(row['filename'])

In [None]:
# Reduce Learning rate from 3 epochs
learning_rate_reduction = ReduceLROnPlateau(monitor='val_accuracy', patience=3, verbose=1, factor=0.5, min_lr=0.000001)

In [None]:
# Stop training when the loss metric has stopped improving from 5 epochs
earlystop = EarlyStopping(monitor='val_loss', patience=5)

In [None]:
optimiseur = tf.keras.optimizers.RMSprop(learning_rate=0.01)

In [None]:
IMAGE_SIZE = (224, 224)

In [None]:
# use of the pre-processing function
preprocessing_image = tf.keras.applications.inception_v3.preprocess_input
train_datagen = ImageDataGenerator(preprocessing_function=preprocessing_image,
                                   rotation_range=40,
                                   width_shift_range=0.2,
                                   height_shift_range=0.2,
                                   shear_range=0.2,
                                   zoom_range=0.2,
                                   channel_shift_range=10,
                                   horizontal_flip=True,
                                   brightness_range=[0.5, 1.5],
                                   fill_mode='nearest')
validation_datagen = ImageDataGenerator(preprocessing_function=preprocessing_image)
test_datagen = ImageDataGenerator(preprocessing_function=preprocessing_image)

In [None]:
train_generator = train_datagen.flow_from_dataframe(dataframe=train_df,
                                                    directory=dir_train,
                                                    x_col='filename',
                                                    y_col='category',
                                                    target_size=IMAGE_SIZE,
                                                    class_mode='categorical',
                                                    batch_size=128
                                                    )

Found 12348 validated image filenames belonging to 120 classes.


In [None]:
lab_dic = train_generator.class_indices

In [36]:
len(train_generator.class_indices)

120

In [None]:
validation_generator = validation_datagen.flow_from_dataframe(dataframe=validate_df,
                                                              directory=dir_val,
                                                              x_col="filename",
                                                              y_col="category",
                                                              target_size=IMAGE_SIZE,
                                                              class_mode="categorical",
                                                              batch_size=128
                                                              )

Found 3087 validated image filenames belonging to 120 classes.


In [None]:
test_generator = test_datagen.flow_from_dataframe(dataframe=test_df,
                                                  directory=dir_test,
                                                  x_col="filename",
                                                  y_col="category",
                                                  target_size=IMAGE_SIZE,
                                                  class_mode="categorical",
                                                  batch_size=128
                                                  )

Found 5145 validated image filenames belonging to 120 classes.


In [None]:
# Load model
InceptionV3Model =  tf.keras.applications.InceptionV3(weights='imagenet', include_top=False)

# freeze convolutional layers
for layer in InceptionV3Model.layers:
  layer.trainable = False

# add new classifier output layer
x = GlobalAveragePooling2D()(InceptionV3Model.output)
x = Dense(1024, activation='relu')(x)
x = Dense(512, activation='relu')(x)
output = Dense(120, activation = 'softmax')(x)

# Define new model
model = Model(inputs = InceptionV3Model.inputs, outputs = output)

model.compile(optimizer=optimiseur, loss='categorical_crossentropy', metrics = ['accuracy'])

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/inception_v3/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5


In [23]:
history = model.fit(train_generator,
                    epochs=20,
                    steps_per_epoch=train_df.shape[0]//128,
                    validation_data=validation_generator,
                    validation_steps=validate_df.shape[0]//128,
                    callbacks=[learning_rate_reduction, earlystop]
                    )

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20

Epoch 00008: ReduceLROnPlateau reducing learning rate to 0.004999999888241291.
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20


In [24]:
model.save_weights(dir_model + 'model_weights.h5')
model.save(dir_model + 'model.h5')

In [25]:
model = tf.keras.models.load_model(dir_model + 'model.h5')

In [26]:
directory = directory_annotation_names[0].split('/')[-1]
path_first_directory = root_images + "/" + directory
img = os.listdir(path_first_directory)[1]
img_path = root_images + "/" + directory + "/" + img
img_path

'/content/images/Images/n02097209-standard_schnauzer/n02097209_3543.jpg'

In [27]:
# load the image
img = load_img(img_path, target_size=(224, 224))
# convert to array
img = img_to_array(img)
# reshape into a single sample with 3 channels
img = img.reshape((1, img.shape[0], img.shape[1], img.shape[2]))
# prepare pixel data
img = inception_v3.preprocess_input(img)

In [None]:
# predict the class
pred = model.predict(img)

In [29]:
pred

array([[1.59075442e-09, 6.64250352e-14, 2.61893301e-23, 1.50800531e-12,
        0.00000000e+00, 2.53131240e-08, 2.80343172e-15, 2.63288091e-35,
        6.94612102e-34, 4.00270045e-30, 8.72670964e-04, 1.80895914e-14,
        6.35160177e-05, 1.56111014e-07, 0.00000000e+00, 2.70850246e-26,
        2.67433584e-25, 2.84898092e-15, 2.15701785e-08, 1.23338171e-18,
        4.47381716e-34, 2.69818318e-31, 1.29485237e-34, 0.00000000e+00,
        1.03029841e-23, 9.37443048e-08, 2.05947540e-14, 1.00591575e-26,
        1.02094773e-23, 7.07043018e-12, 9.69086359e-34, 0.00000000e+00,
        3.07865837e-25, 4.58280567e-29, 8.55057171e-12, 6.31132732e-28,
        3.77846154e-05, 4.16427994e-14, 2.77661590e-27, 1.94345394e-05,
        5.84394406e-23, 4.75945001e-14, 3.50627929e-11, 1.59372466e-05,
        6.59274118e-16, 1.70379473e-16, 3.53275659e-10, 1.06448162e-04,
        9.17533362e-06, 1.30214960e-06, 2.06710897e-19, 9.21086596e-10,
        5.77547149e-28, 3.63222148e-20, 2.27455382e-22, 1.116342

In [32]:
val_pred = np.argmax(pred, axis=1)

In [44]:
race = list(lab_dic.keys())[list(lab_dic.values()).index(val_pred)]

In [41]:
proba = pred[0][val_pred] *100

In [42]:
proba

array([47.0887], dtype=float32)

In [45]:
print('La race du chien est: %s avec une probabilité de  (%.2f%%)' % (race, proba))

La race du chien est: miniature_schnauzer avec une probabilité de  (47.09%)
