In [1]:
!pip install keras
!pip install tensorflow

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [2]:
# Colab Stuff
from google.colab import drive
drive.mount('/content/drive/')
#import os
#os.listdir("./drive/MyDrive/img/")

Mounted at /content/drive/


In [6]:
import keras
from keras.applications.imagenet_utils import preprocess_input
from keras.models import Model
import pandas as pd
import numpy as np
from sklearn.decomposition import PCA
from scipy.spatial import distance

from typing import Tuple, List
from PIL import Image
from pathlib import Path
import gc
import pickle

class KerasImageClassifier:
    model = None
    feature_extractor = None
    index_file = ""
    pca_file = ""
    index = None
    pca = None

    def __init__(self, 
                index_file: str = "./data/index_tf.pickle",
                pca_file: str = "./data/pca_tf.pickle"):
        self.index_file = index_file
        self.pca_file = pca_file
        print("Loading model...")
        self.model = keras.applications.VGG16(weights="imagenet", include_top=True)
        self.feature_extractor = Model(inputs=self.model.input, 
                                       outputs=self.model.get_layer("fc2").output)
        print("Loading index...")
        self.index = self.__load_index(self.index_file)
        print("Loading PCA...")
        self.pca = self.__load_pca(self.pca_file)

    def __load_image(self, image_path: str) -> Tuple[Image.Image, np.ndarray]: 
        img = keras.utils.load_img(image_path, target_size=self.model.input_shape[1:3])
        x = keras.utils.img_to_array(img)
        x = np.expand_dims(x, axis=0)
        x = preprocess_input(x)
        return img, x

    def __extract_features(self, image_path: str) -> np.ndarray:
        img, x = self.__load_image(image_path)
        features = self.feature_extractor.predict(x)
        return features

    def __train_pca(self, features: np.ndarray) -> PCA:
        n = min(300, len(features))
        pca = PCA(n_components=n)
        pca.fit(features)
        with open(self.pca_file, "wb") as f:
            pickle.dump(pca, f)
        return pca

    def create_index(self, image_repo: str) -> None:
        features = []
        names = []
        images = list(Path(image_repo).iterdir())
        for i, image_path in enumerate(images):
            print(f"Processing {image_path.name}, Length of index {len(features)}")
            feat = self.__extract_features(image_path)[0]
            features.append(feat)
            names.append(image_path.name)          # might have different order than images

        print("Extracting PCA features (might take some minutes)...")
        self.pca = self.__train_pca(features)
        pca_features = self.pca.transform(features)
        self.index = [names, pca_features]

        with open(self.index_file, "wb") as f:
            print(f"Saving index to {self.index_file}...")
            pickle.dump(self.index, f, protocol=pickle.HIGHEST_PROTOCOL)
            gc.collect()            # garbage collection
             
    def __load_index(self, index_file: str) -> List:
        if Path(index_file).exists() == False:
            print(f"Index file {index_file} not found. Please use create_index().")  
            return None
        with open(index_file, "rb") as f:
            index = pickle.load(f)
            return index

    def __load_pca(self, pca_file: str) -> PCA:
        if Path(pca_file).exists() == False:
            print(f"PCA file {pca_file} not found. Please use create_index().")  
            return None
        with open(pca_file, "rb") as f:
            pca = pickle.load(f)
            return pca

    def find_similar_images(self, image_path: str, top_k: int = 10) -> List[Tuple[str, float]]:
        new_features = self.__extract_features(image_path)
        new_pca_features = self.pca.transform(new_features)[0]
        distances = [ distance.cosine(new_pca_features, feat) for feat in self.index[1] ]
        idx_closest = sorted(range(len(distances)), key=lambda k: distances[k])[:top_k]
        similar_images = list(zip([self.index[0][i] for i in idx_closest],
                                  [distances[i] for i in idx_closest]))
        return similar_images

In [7]:
clf = KerasImageClassifier(index_file="./drive/MyDrive/data/index_tf.pickle", pca_file="./drive/MyDrive/data/pca_tf.pickle")

Loading model...
Loading index...
Index file ./drive/MyDrive/data/index_tf.pickle not found. Please use create_index().
Loading PCA...
PCA file ./drive/MyDrive/data/pca_tf.pickle not found. Please use create_index().


In [None]:
clf.create_index("./drive/MyDrive/imagenet-mini-trained")

In [9]:
## Image Check
uploaded_img = "./drive/MyDrive/img/dog_input.jpg"
similar_images = clf.find_similar_images(uploaded_img)
print(similar_images)

[(PosixPath('drive/MyDrive/imagenet-mini-trained/ILSVRC2012_val_00033790.JPEG'), 0.2504112871569514), (PosixPath('drive/MyDrive/imagenet-mini-trained/ILSVRC2012_val_00043013.JPEG'), 0.3036423586912681), (PosixPath('drive/MyDrive/imagenet-mini-trained/ILSVRC2012_val_00031601.JPEG'), 0.32418452831057876), (PosixPath('drive/MyDrive/imagenet-mini-trained/ILSVRC2012_val_00042884.JPEG'), 0.3252865252022161), (PosixPath('drive/MyDrive/imagenet-mini-trained/ILSVRC2012_val_00043880.JPEG'), 0.32783019923445234), (PosixPath('drive/MyDrive/imagenet-mini-trained/ILSVRC2012_val_00024691.JPEG'), 0.33175445627625233), (PosixPath('drive/MyDrive/imagenet-mini-trained/ILSVRC2012_val_00041137.JPEG'), 0.3432763795454401), (PosixPath('drive/MyDrive/imagenet-mini-trained/ILSVRC2012_val_00041746.JPEG'), 0.35435708765549034), (PosixPath('drive/MyDrive/imagenet-mini-trained/ILSVRC2012_val_00036408.JPEG'), 0.3590501972279404), (PosixPath('drive/MyDrive/imagenet-mini-trained/ILSVRC2012_val_00024470.JPEG'), 0.3636