## Load/import packages

In [1]:
import json
import cv2
import scipy.sparse
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras import Sequential, layers
from tensorflow.keras.preprocessing import image_dataset_from_directory
from tensorflow.keras.preprocessing.image import ImageDataGenerator


from os import listdir, mkdir
from os.path import splitext
from os.path import join
from skimage import io, color

%matplotlib inline

# Check if Tensorflow uses GPU
print(tf.config.experimental.list_physical_devices("GPU"))

# Limit GPU memory usage
gpu_devices = tf.config.experimental.list_physical_devices("GPU")
for device in gpu_devices:
    tf.config.experimental.set_memory_growth(device, True)

print()
print(f"Tensorflow Version: {tf.__version__}")
print(f"Numpy Version: {np.__version__}")
print(f"OpenCV Version: {cv2.__version__}")
print(f"Matplotlib Version: {matplotlib.__version__}")
print(f"Keras Version: {tf.keras.__version__}")

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

Tensorflow Version: 2.4.0-rc0
Numpy Version: 1.19.2
OpenCV Version: 4.4.0
Matplotlib Version: 3.3.1
Keras Version: 2.4.0


# Initiate Base CNN
For the feature extraction the pre-trained **VGG19** network will be used with the imagenet weights. Input shape is set to 112,112,3. The top is not included because we only want to extract features.

In [2]:
from tensorflow.keras.applications.vgg19 import preprocess_input

base_VGG19 = tf.keras.applications.VGG19(
    include_top=False, weights="imagenet", input_shape=(112, 112, 3)
)

In [3]:
base_VGG19.summary()

Model: "vgg19"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 112, 112, 3)]     0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 112, 112, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 112, 112, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 56, 56, 64)        0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 56, 56, 128)       73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 56, 56, 128)       147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 28, 28, 128)       0     

# Initiate Feature Exctraction model

In [4]:
# We add a flatten layer to the base VGG19 layer to just get a simple feature vector as output
def build_FE_model():
    model = Sequential()
    model.add(base_VGG19)
    base_VGG19.trainable = False
    model.add(layers.Flatten(name="Flatten"))
    return model


FE_model = build_FE_model()
FE_model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
vgg19 (Functional)           (None, 3, 3, 512)         20024384  
_________________________________________________________________
Flatten (Flatten)            (None, 4608)              0         
Total params: 20,024,384
Trainable params: 0
Non-trainable params: 20,024,384
_________________________________________________________________


# Extract all Features

In [5]:
AW2_train_dir = r"D:\Aff-Wild2 Dataset\Aff-wild2\Sets_pscipylass\train"
AW2_val_dir = r"D:\Aff-Wild2 Dataset\Aff-wild2\Sets_per_class\val"

In [239]:
# Define ImageDataGenerator with precoessing function set to preprocess_input for vgg19 model
train_datagen = ImageDataGenerator(preprocessing_function=preprocess_input)
train_generator = train_datagen.flow_from_directory(
    directory=AW2_train_dir,
    target_size=(112, 112),
    color_mode="rgb",
    class_mode="categorical",
    batch_size=512,
    shuffle=False,
)

print(f"Amount of images in training set: {train_generator.n}")
print(f"Batch size of generator: {train_generator.batch_size}")
print(f"Current batch index: {train_generator.batch_index}")
print(train_generator.class_indices)
print(
    f"All classes in list: {train_generator.classes}, size: {len(train_generator.classes)}"
)

Found 107413 images belonging to 7 classes.
Amount of images in training set: 107413
Batch size of generator: 512
Current batch index: 0
{'0_neutral': 0, '1_anger': 1, '2_disgust': 2, '3_fear': 3, '4_happiness': 4, '5_sadness': 5, '6_surprise': 6}
All classes in list: [0 0 0 ... 6 6 6], size: 107413


In [6]:
def feature_extractor(model, data_dir, batch_size=128):

    # Define ImageDataGenerator with precoessing function set to preprocess_input for vgg19 model
    datagen = ImageDataGenerator(preprocessing_function=preprocess_input)
    generator = datagen.flow_from_directory(
        directory=data_dir,
        target_size=(112, 112),
        color_mode="rgb",
        class_mode="categorical",
        batch_size=batch_size,
        shuffle=False,
    )

    # Reset batch index to 0 for the train_generator
    generator.reset()

    # Create list for the predictions
    features = []
    labels = []

    count = 0
    for batch, label in generator:
        pred = model.predict(batch, verbose=0)
        features.append(scipy.sparse.coo_matrix(pred))
        labels.append(label)
        count += 1
        if count >= (generator.n // generator.batch_size):
            break
        print(count)
    # Stack all arrays
    # Stack features in scipy COO matrix
    features_arr = scipy.sparse.vstack(features)

    # Stack features in numpy matrix
    labels_arr = np.vstack(labels)
    print("Finished with extraction")
    return features_arr, labels_arr

In [249]:
# pred = FE_model.predict(train_generator, steps=STEP, verbose=1)

In [None]:
train_features, train_labels = feature_extractor(FE_model, AW2_train_dir)

In [None]:
val_features, val_labels = feature_extractor(FE_model, AW2_val_dir)

In [42]:
# Save features as NPZ Numpy’s compressed array format
scipy.sparse.save_npz("data/train_features.npz", train_features)
scipy.sparse.save_npz("data/val_features.npz", val_features)
np.save("data/train_labels", train_labels)
np.save("data/val_labels", val_labels)

In [None]:
from skimage.util import img_as_uint, img_as_float

for item in batch:
    item = item.astype(int)
    #     item_rgb = cv2.cvtColor(item, cv2.COLOR_BGR2RGB)
    item = img_as_uint(item)
    plt.imshow(item)
    plt.show()