## Imports

In [41]:
import tensorflow as tf
import tensorflow_hub as hub
import pandas as pd
import numpy as np
import os.path
from pathlib import Path
import matplotlib.pyplot as plt
import numpy as np
from tqdm import tqdm
import cv2
import os
import sys
from os.path import exists
from scipy import spatial

## Settings

In [42]:
base_dir = '/home/drevital/obstacles_classification_datasets/base_no_obstacle_dataset'
src_dir = os.path.join(base_dir, 'src')
search_dir = '/home/drevital/obstacles_classification_datasets/rgb_6_balanced/train/no_obstacle'
features_path = os.path.join(base_dir, 'features.csv')
np.set_printoptions(threshold=sys.maxsize)

## Read & Prepare Source Images

In [43]:
src_imnames = os.listdir(src_dir)
src_ims = []
for src_imname in src_imnames:
    im = tf.io.read_file(os.path.join(src_dir, src_imname))
    im = tf.io.decode_jpeg(im, channels=3)
    im_src = im
    im = tf.image.resize_with_pad(im, 224, 224)
    # Convert to shape (1, 224, 224, 3) float
    im  = tf.image.convert_image_dtype(im, tf.float32)[tf.newaxis, ...]
    imtype = src_imname.split('.')[0].split('_')[0]
    src_ims.append({'type': imtype, 'im': im})

## Load the MobileNet Module

In [44]:
module_handle = 'https://tfhub.dev/google/imagenet/mobilenet_v2_140_224/feature_vector/4'
module = hub.load(module_handle)

## Calculate the features vectors of the source images

In [45]:
for src_im in src_ims:
    f = module(src_im['im'])   
    src_im['fset'] = np.squeeze(f)

## Prepare compared images for features_calc and similarity search

In [46]:
sim_names = []
sim_scores = []
fnames = os.listdir(search_dir)
feature_vecs = {}

## Calculate and store all compared directories's images' feature-vectors

In [47]:
feature_vecs = {}
i=0

if not exists(features_path):
    for fname in tqdm(fnames):
        impath = os.path.join(search_dir, fname)
        im = tf.io.read_file(impath)
        im = tf.io.decode_jpeg(im, channels=3)
        im = tf.image.resize_with_pad(im, 224, 224)
        # Convert to shape (1, 224, 224, 3) float
        im  = tf.image.convert_image_dtype(im, tf.float32)[tf.newaxis, ...]
        f = module(im)   
        f_set = np.squeeze(f)  
        feature_vecs[i] = {'path': impath, 'features': f_set}
        i += 1
        
    features_df = pd.DataFrame.from_dict(feature_vecs).transpose() 
    features_df.to_csv(features_path, sep=',')

## Generate directories per image type (if they don't exist)

In [48]:
for src_im in src_ims:
    type_path = os.path.join(base_dir, src_im['type'])
    Path(type_path).mkdir(parents=True, exist_ok=True)

## Sort images to directories according to nearest type

In [49]:
features_df = pd.read_csv(features_path, delimiter=',')

for fname in tqdm(fnames):
    im_path = os.path.join(search_dir, fname)
    row = features_df[features_df['path'] == im_path]
    fset = [float(item) for item in row['features'].tolist()[0][1:-1].split()]
    max_similarity = 0.0
    max_type = ''
    for src_im in src_ims:
        similarity = 1 - spatial.distance.cosine(src_im['fset'], fset)
        if similarity > max_similarity:
            max_similarity = similarity
            max_type = src_im['type']
            
    # Read the original image, write it to per-type directory
    impath = os.path.join(search_dir, fname)
    im = cv2.imread(impath)
    outdir = os.path.join(base_dir, max_type)
    cv2.imwrite(os.path.join(outdir, fname), im)    

100%|██████████| 9548/9548 [01:31<00:00, 104.47it/s]


In [50]:
2823+1920

4743