## Import packages

In [1]:
import os
import json
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from annoy import AnnoyIndex

In [2]:
tf.__version__

'2.4.1'

## Paths & Config

In [3]:
RAW_DATA = "./data/raw_data.csv"
MODEL_PATH = "models/classifier.h5"
FEATURES_PATH = "./models/features.npy"
MAPPING = "./models/id_to_path.json"
ANNOY_PATH = "./models/model.ann"

WIDTH = 150
HEIGHT = 150

BATCH_SIZE = 16

TREES = 10

## Prepare data & Extract features

In [4]:
data = pd.read_csv(RAW_DATA)
data.head()

Unnamed: 0,image,image_path,subject,mode
0,subject04.centerlight.jpg,./data/yalefaces/subject04.centerlight.jpg,subject04,centerlight
1,subject09.centerlight.jpg,./data/yalefaces/subject09.centerlight.jpg,subject09,centerlight
2,subject04.sleepy.jpg,./data/yalefaces/subject04.sleepy.jpg,subject04,sleepy
3,subject09.sad.jpg,./data/yalefaces/subject09.sad.jpg,subject09,sad
4,subject06.normal.jpg,./data/yalefaces/subject06.normal.jpg,subject06,normal


In [5]:
if os.path.exists(FEATURES_PATH):
    # Load features
    features = np.load(FEATURES_PATH)
else:
    # Restore model
    encoder = keras.models.load_model(MODEL_PATH)
    # Remove classifier top layer
    encoder.pop()
    # Create generator
    generator = keras.preprocessing.image.ImageDataGenerator(
        rescale=1./255
    )
    datagen = generator.flow_from_dataframe(
        dataframe=data,
        x_col="image_path",
        y_col="subject",
        target_size=(WIDTH, HEIGHT),
        batch_size=BATCH_SIZE,
        shuffle=False
    )
    # Extract features
    features = encoder.predict(datagen)
    # Save features
    np.save(FEATURES_PATH, features)
    # Map filenames
    with open(MAPPING, "w") as f:
        id_to_path = {i: filename for i, filename in enumerate(datagen.filenames)}
        json.dump({"feature_dim": features.shape[-1], "id_to_path": id_to_path}, f, indent=4, ensure_ascii=False)

## Modeling with ANNOY

In [6]:
t = AnnoyIndex(features.shape[-1], 'angular')
# Add features
for i in range(len(features)):
    t.add_item(i, features[i])
# Build tree
t.build(TREES)
# Save tree
t.save(ANNOY_PATH)

True