In [2]:
import os
import random
from datetime import datetime, timedelta
import numpy as np

import tensorflow as tf
import tensorflow_hub as hub
from skimage import transform

from PIL import Image

from imgbeddings import imgbeddings

import csv

In [3]:
files_to_remove = ['.DS_Store']
batch_size = 64

root_folder = os.path.normpath(os.getcwd() + os.sep + os.pardir)
data_folder_path = '0_data/mock_production_log'
data_folder = os.path.join(root_folder, data_folder_path)

## Get files from each folder (test, train & validation)

In [4]:
ibed = imgbeddings()

#column headers for the csv
header = ['name', 'url', 'predicted_label', 'score', 'prediction_ts', 'vector']



### Load the model to log the predictions score

In [5]:
classes = ['angular_leaf_spot', 'bean_rust', 'healthy']
model_loaded = tf.keras.models.load_model('./models/')

def process(filename):
   np_image = Image.open(filename)
   np_image = np.array(np_image).astype('float32')
   np_image = transform.resize(np_image, (224, 224, 3))
   np_image = np.expand_dims(np_image, axis=0)
   return np_image

def get_predicted_class_and_score(url):
   predicted = model_loaded.predict(process(url))
   predicted_index = np.argmax(predicted)

   return [classes[predicted_index], predicted[0][predicted_index]]

2022-10-07 14:51:13.535271: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


### Create embeddings for test dataset

In [15]:
def run_scandir_with_exclusion(dir, exclude):
    subfolders, files = [], []

    for f in os.scandir(dir):
        if f.is_dir():
            subfolders.append(f.path)
        if f.is_file():
            if f.name not in exclude:
                files.append(f.path)

    for dir in list(subfolders):
        sf, f = run_scandir_with_exclusion(dir, exclude)
        subfolders.extend(sf)
        files.extend(f)
    return subfolders, files

In [6]:
embeddings_folder = '0_data/embeddings'
embeddings_folder_path = os.path.join(root_folder, embeddings_folder)

In [8]:
embeddings_for_all_images = []
labels_for_all_images = []

def write_to_csv(files, writer, prediction_ts):
    file_count = len(files)
    chunks = (file_count - 1)
    for i in range(chunks):
        embeddings = []
        batch_files = files[i*batch_size:(i+1)*batch_size]
        if len(batch_files):
            embeddings = ibed.to_embeddings(batch_files)
            # Iterate directory
            for index in range(len(batch_files)):
                data = []
                data.append(os.path.basename(batch_files[index]))
                data.append(batch_files[index])
                [predicted_class, score] = get_predicted_class_and_score(batch_files[index])
                predicted_label = predicted_class

                data.append(predicted_label)
                data.append(score)
                data.append(prediction_ts)
                data.append(embeddings[index])

                embeddings_for_all_images.append(embeddings[index])
                labels_for_all_images.append(predicted_label)

                # write the data
                writer.writerow(data)

### Create embeddings for production (mock) dataset

In [10]:
now_ts = datetime.timestamp(datetime.now())

In [None]:
prod_file_path = os.path.join(embeddings_folder_path, 'beanleaf_production.csv')
# image_list = os.listdir(data_folder)
# image_list = [i for i in image_list if i not in files_to_remove]
prod_subfolders, prod_files = run_scandir_with_exclusion(data_folder, files_to_remove)

with open(prod_file_path, 'w', encoding='UTF8', newline='') as f:
    writer = csv.writer(f)
    # write the header
    writer.writerow(header)
    write_to_csv(prod_files, writer, now_ts)