# **Import**
---

In [8]:
import numpy as np 
import os
import shutil
import math
import cv2
import tensorflow as tf
import json 
import matplotlib.pyplot as plt
import pandas as pd
import random as rd
import pickle as pk

from sklearn.decomposition import PCA
from sklearn import preprocessing
from time import time
from tensorflow import keras
from keras.preprocessing import image
from keras.models import Model, load_model
from keras import backend as K
from skimage.feature import greycomatrix, greycoprops, local_binary_pattern
from keras.applications.vgg16 import VGG16, preprocess_input #224*224
from keras.applications.xception import Xception, preprocess_input, decode_predictions #299*299
from keras.applications.mobilenet import MobileNet, preprocess_input, decode_predictions #224*224
from tqdm import tqdm
from keras_preprocessing.image import load_img, img_to_array
print("Tensorflow version: "+tf.__version__)
print("Keras version: " + tf.keras.__version__)

Tensorflow version: 2.9.2
Keras version: 2.9.0


# **Database download**
---

In [None]:
!wget https://github.com/sidimahmoudi/facenet_tf2/releases/download/AI_MIR_CLOUD/MIR_DATASETS_B.zip 
!unzip -q /content/MIR_DATASETS_B.zip
!rm MIR_DATASETS_B.zip

# **Datbase preprocess**
---

In [3]:
for animal in os.listdir('MIR_DATASETS_B'):
  for type_animal in os.listdir(os.path.join('MIR_DATASETS_B', animal)):
    for file in os.listdir(os.path.join('MIR_DATASETS_B', animal, type_animal)):
      shutil.copy2(os.path.join('MIR_DATASETS_B', animal, type_animal, file), os.path.join('MIR_DATASETS_B'))

# **Indexation Deep Learning**
---

In [5]:
def dl_predict(filename, model, size):
    img = load_img(filename, target_size = size)
    img = img_to_array(img)
    img = img.reshape((1, img.shape[0], img.shape[1], img.shape[2]))
    img = preprocess_input(img)
    feature = model.predict(img)
    feature = np.array(feature[0])
    return feature

def generate(generate_func, source, output, last_layer = False):
    start = time()
    feature = generate_func(source, last_layer)
    save(feature, output)
    del feature
    print(f'[INFO] Indexation : {generate_func.__name__} --> Done')
    return round(time() - start, 3)

def save(data, file_name):
    with open(file_name, 'w') as f:
        json.dump(data, f, cls = JsonCustomEncoder)

class JsonCustomEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, (np.ndarray, np.number)):
            return obj.tolist()
        return json.JSONEncoder.default(self, obj)

def VGG16_init(last_layer):
    # https://keras.io/api/applications/vgg/
    return VGG16(include_top = last_layer, weights ='imagenet', input_shape = (224, 224, 3), pooling = 'avg')

def Xception_init(last_layer):
    # https://keras.io/api/applications/xception/
    return Xception(include_top = last_layer, weights ='imagenet', input_shape = (299, 299, 3), pooling = 'avg')

def MobileNet_init(last_layer):
    # https://keras.io/api/applications/mobilenet/
    return MobileNet(include_top = last_layer, weights ='imagenet', input_shape = (224, 224, 3), pooling = 'avg')

def generateVGG16(folder, last_layer):
    model = VGG16_init(last_layer)
    data = list()
    for path in tqdm(os.listdir(folder)):
      if '.jpg' in path:
        feature = dl_predict(os.path.join(folder, path), model, (224, 224))
        num_image, _ = path.split(".")
        data.append({num_image : feature})
    return data

def generateXception(folder, last_layer):
    model = Xception_init(last_layer)
    data = list()
    for path in tqdm(os.listdir(folder)):
      if '.jpg' in path:
        feature = dl_predict(os.path.join(folder, path), model, (299, 299))
        num_image, _ = path.split(".")
        data.append({num_image : feature})
    return data

def generateMobileNet(folder, last_layer):
    model = MobileNet_init(last_layer)
    data = list()
    for path in tqdm(os.listdir(folder)):
      if '.jpg' in path:
        feature = dl_predict(os.path.join(folder, path), model, (224, 224))
        num_image, _ = path.split(".")
        data.append({num_image : feature})
    return data

In [None]:
time_VGG16 = generate(generateVGG16, 'MIR_DATASETS_B/', 'VGG16_false.json', last_layer = False)
time_Xcpetion = generate(generateXception, 'MIR_DATASETS_B/', 'XCEPTION_false.json', last_layer = False)
time_MobileNet = generate(generateMobileNet, 'MIR_DATASETS_B/', 'MOBILENET_false.json', last_layer = False)
time_Xcpetion_true = generate(generateXception, 'MIR_DATASETS_B/', 'XCEPTION_true.json', last_layer = True)

with open('dl_time.json', 'w') as f:
    t = {
        'vgg16_false' : int(time_VGG16),
        'xception_false' : int(time_Xcpetion),
        'xception_true' : int(time_Xcpetion_true),
          'mobilenet' : int(time_MobileNet)
    }
    json.dump(t, f, cls = JsonCustomEncoder)

# **Indexation PCA**


---



In [34]:
def extractReqFeatures(folder, algo_choice, last_layer = False): 
    features_to_return = []              
    if algo_choice == 'BGR':
        for path in tqdm(os.listdir(folder)):
            if '.jpg' in path:
                img = cv2.imread(os.path.join(folder, path))
                histB = cv2.calcHist([img],[0],None,[256],[0,256])
                histG = cv2.calcHist([img],[1],None,[256],[0,256])
                histR = cv2.calcHist([img],[2],None,[256],[0,256])
                features_to_return.append(np.concatenate((histB, np.concatenate((histG,histR),axis=None)), axis=None))
        return features_to_return
    
    elif algo_choice == 'HSV':
        for path in tqdm(os.listdir(folder)):
            if '.jpg' in path:
                img = cv2.imread(os.path.join(folder, path))
                hsv = cv2.cvtColor(img,cv2.COLOR_BGR2HSV)
                histH = cv2.calcHist([hsv],[0],None,[180],[0,180])
                histS = cv2.calcHist([hsv],[1],None,[256],[0,256])
                histV = cv2.calcHist([hsv],[2],None,[256],[0,256])
                features_to_return.append(np.concatenate((histH, np.concatenate((histS,histV),axis=None)), axis=None))
        return features_to_return

    elif algo_choice == 'SIFT':
        for path in tqdm(os.listdir(folder)):
            if '.jpg' in path:
                img = cv2.imread(os.path.join(folder, path)) 
                w, h, c = img.shape
                new_size = (int(w*0.3), int(h*0.3))
                img = cv2.resize(img, new_size)
                sift = cv2.SIFT_create()
                kps , vect_features = sift.detectAndCompute(img,None)
                features_to_return.append(vect_features)
        return features_to_return

    elif algo_choice == 'ORB':
        for path in tqdm(os.listdir(folder)):
            if '.jpg' in path:
                img = cv2.imread(os.path.join(folder, path))  
                orb = cv2.ORB_create()
                key_point1, vect_features = orb.detectAndCompute(img,None)
                features_to_return.append(vect_features)
        return features_to_return

    elif algo_choice == 'GLCM': 
        for path in tqdm(os.listdir(folder)):
            if '.jpg' in path:
                img = cv2.imread(os.path.join(folder, path)) 
                img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
                glcm = greycomatrix(img, distances=[1, -1], angles=[0, np.pi/4, np.pi/2, 3*np.pi/4], normed=True)
                glcmProperties1 = greycoprops(glcm,'contrast').ravel()
                glcmProperties2 = greycoprops(glcm,'dissimilarity').ravel()
                glcmProperties3 = greycoprops(glcm,'homogeneity').ravel()
                glcmProperties4 = greycoprops(glcm,'energy').ravel()
                glcmProperties5 = greycoprops(glcm,'correlation').ravel()
                glcmProperties6 = greycoprops(glcm,'ASM').ravel()
                feat =  np.array([glcmProperties1,
                                        glcmProperties2,
                                        glcmProperties3,
                                        glcmProperties4,
                                        glcmProperties5,
                                        glcmProperties6]).ravel()
                features_to_return.append(feat)
        return features_to_return
    elif algo_choice == 'LBP':
        for path in tqdm(os.listdir(folder)):
            if '.jpg' in path:
                img = cv2.imread(os.path.join(folder, path)) 
                points=8
                radius=1
                method='default'
                subSize=(70,70)
                img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
                img = cv2.resize(img,(350,350))
                fullLBPmatrix = local_binary_pattern(img,points,radius,method)
                hist = []
                for k in range(int(fullLBPmatrix.shape[0]/subSize[0])):
                    for j in range(int(fullLBPmatrix.shape[1]/subSize[1])):
                        subVector = fullLBPmatrix[k*subSize[0]:(k+1)*subSize[0],j*subSize[1]:(j+1)*subSize[1]].ravel()
                        subHist,edges = np.histogram(subVector,bins=int(2**points),range=(0,2**points))
                        hist = np.concatenate((hist, subHist), axis=None)
                features_to_return.append(hist)
        return features_to_return

    elif algo_choice == 'HOG':
        for path in tqdm(os.listdir(folder)):
            if '.jpg' in path:
                img = cv2.imread(os.path.join(folder, path)) 
                cellSize = (25,25)
                blockSize = (50,50)
                blockStride = (25,25)
                nBins = 9
                winSize = (350,350)
                image = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
                image = cv2.resize(image,winSize)
                hog = cv2.HOGDescriptor(winSize,blockSize,blockStride,cellSize,nBins)
                features_to_return.append(hog.compute(image))
        return features_to_return

    elif algo_choice == 'VGG16':
        model = VGG16(include_top = last_layer, weights ='imagenet', input_shape = (224, 224, 3), pooling = 'avg')
        for path in tqdm(os.listdir(folder)):
            if '.jpg' in path:
                img = load_img(os.path.join(folder, path), target_size = (224, 224))
                img = img_to_array(img)
                img = img.reshape((1, img.shape[0], img.shape[1], img.shape[2]))
                img = preprocess_input(img)
                feature = model.predict(img)
                features_to_return.append(np.array(feature[0]))
        return features_to_return

    elif algo_choice == 'XCEPTION':
        model = Xception(include_top = last_layer, weights ='imagenet', input_shape = (299, 299, 3), pooling = 'avg')
        for path in tqdm(os.listdir(folder)):
            if '.jpg' in path:
                img = load_img(os.path.join(folder, path), target_size = (299, 299))
                img = img_to_array(img)
                img = img.reshape((1, img.shape[0], img.shape[1], img.shape[2]))
                img = preprocess_input(img)
                feature = model.predict(img)
                features_to_return.append(np.array(feature[0]))
        return features_to_return

    elif algo_choice == 'MOBILENET':
        model = MobileNet(include_top = last_layer, weights ='imagenet', input_shape = (224, 224, 3), pooling = 'avg')
        for path in tqdm(os.listdir(folder)):
            if '.jpg' in path:
                img = load_img(os.path.join(folder, path), target_size = (224, 224))
                img = img_to_array(img)
                img = img.reshape((1, img.shape[0], img.shape[1], img.shape[2]))
                img = preprocess_input(img)
                feature = model.predict(img)
                features_to_return.append(np.array(feature[0]))
        return features_to_return

def pca_generate(folder, algo, last_layer, file_output):
  start_feature = time()
  data = extractReqFeatures(folder, algo, last_layer)
  end_feature = time()
  
  name = [path.split(".")[0] for path in os.listdir(folder) if '.jpg' in path]

  start_pca = time()
  pca = PCA()
  pk.dump(pca, open(file_output + '.pkl','wb'))
  pca_data = pca.transform(data)
  end_pca = time()

  start_json = time()
  data_json = list()
  for pca_feature, pca_name in zip(pca_data, name):
    data_json.append({pca_name : pca_feature})
  with open(file_output + '.json', 'w') as f:
      json.dump(data_json, f, cls = JsonCustomEncoder)
  end_json = time()

  with open(file_output + '_time.json', 'w') as f:
      t = {
          'extract_feature' : int(end_feature - start_feature),
          'pca' : int(end_pca - start_pca),
          'json dump' : int(end_json - start_json)
      }
      json.dump(t, f, cls = JsonCustomEncoder)

class JsonCustomEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, (np.ndarray, np.number)):
            return obj.tolist()
        return json.JSONEncoder.default(self, obj)

In [None]:
pca_generate('MIR_DATASETS_B/', 'XCEPTION', False, 'XCEPTION_false_pca')
pca_generate('MIR_DATASETS_B/', 'XCEPTION', True, 'XCEPTION_true_pca')
pca_generate('MIR_DATASETS_B/', 'MOBILENET', False, 'MOBILENET_false_pca')
pca_generate('MIR_DATASETS_B/', 'VGG16', False, 'VGG16_false_pca')