# Extracting coarse-grained classifiers from large CNN models.

The following example shows how to extract coarse-grained classifiers from a fine-grained classifier pretrained on ImageNet.

Import of all the necessary libraries:

In [1]:
import tensorflow as tf
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.applications.mobilenet_v2 import preprocess_input
import numpy as np

Create Sorted Class Similarity Matrix (SCSM) for finding the most similar classes to the chosen ones based on the original model weights:

In [2]:
def create_SCSM(model):
    """ Function returns a NumPy array with Sorted Class Similarity Matrix (SCSM)
    Each row of this table stores a 'class similarity landscape' for a 
    particular class. We use cosine similarity as a similarity measure.
    The first element of each row in SCSM is always this particular 
    class (cosine similarity equals 1 for the same element). 
    Then, the most similar classes are sorted in a descending order
    (from the most to the least similar ones). We take into consideration
    the similarity of model's last layer weights.
    
    Parameters
    ----------
    model - keras pre-trained  model with fine-grained classes
    
    Returns
    -------
    Sorted Class Similarity Matrix (SCSM) - a NumPy array
    """
    #necessary imports:
    import numpy as np
    from sklearn.metrics.pairwise import cosine_similarity
    classes_i = np.moveaxis(model.layers[-1].get_weights()[0], 0, -1)
    classes_j = np.moveaxis(model.layers[-1].get_weights()[0], 0, -1)
    CSM = cosine_similarity(classes_i, classes_j)
    # sort in a descending order and return indexes of classes
    SCSM = np.argsort(-CSM, axis=1)
    return SCSM

Read an example model from Keras (MobileNetV2)

In [3]:
model = MobileNetV2(
    include_top=True,
    weights="imagenet",
    input_tensor=None,
    input_shape=None,
    pooling='avg',
    classes=1000,
    classifier_activation="softmax"
)

An example of SCSM for MobileNetV2 is presented below. We can see here, that:
* for class 0 (tench - a fish) the most similar classes are 391 (coho - a fish) and 389 (barracouta - also a fish)
* for class 999 (a toilet tissue) the most similar classes are 700 (a paper towel) and 861 (a toilet seat)

In [4]:
create_SCSM(model)

array([[  0, 391, 389, ..., 310, 840, 459],
       [  1, 392, 393, ..., 764, 138, 573],
       [  2,   3, 147, ..., 671,  18, 655],
       ...,
       [997, 947, 994, ..., 514, 442, 459],
       [998, 987, 994, ..., 880, 981, 245],
       [999, 700, 861, ..., 538,  27, 935]], dtype=int64)

Read a dataset (we use image_dataset_from_directory from tf.keras.preprocessing to create a generator). In the example below, we use our own very small dataset, but to gather the results for the purpose of our paper, we used Kaggle Dogs vs. Cats dataset (see https://www.kaggle.com/c/dogs-vs-cats) - it has the same format as our tiny example.

In [9]:
# set a correct image size for a network (MobileNetV2)
image_size = (224, 224)
batch_size = 32

test_ds = tf.keras.preprocessing.image_dataset_from_directory(
    "PetImages",
    seed=1337,
    image_size=image_size,
    batch_size=batch_size,
    label_mode='categorical',
    shuffle=False
)
normalized_ds = test_ds.map(lambda x, y: (preprocess_input(x), y))

# here, we read the labels for testing (correct only when shuffle is False)
labels = np.concatenate([y for x, y in normalized_ds], axis=0) 

Found 20 files belonging to 2 classes.


In [6]:
def create_coarse_grained_model(model, init_classes, k_most_similar_classes):
    """ Function takes a pre-trained fine-grained CNN model as an argument and returns
    a coarse-grained model built using the base pre-trained model. The new coarse-grained classes
    are placed in the same order as the corresponding initialization classes in init_classes list.
    
    Parameters
    ----------
    model - keras pre-trained model with fined-grained classes
    init_classes - list of initialization class indexes
    k_most_similar_classes - number of classes taken into consideration
    while computing the weights for a particular coarse-grained class
    
    Returns
    -------
    new_dense_coarse - resulting keras model with coarse-grained classes
    """
    import numpy as np
    
    SCSM = create_SCSM(model)

    coarse_grained_weights = []
    coarse_grained_bias =[]

    for init_class in init_classes:
        k_most_similar_per_init_class = np.squeeze(np.expand_dims(SCSM[init_class], axis=1))[:k_most_similar_classes]
        weights_for_init = np.mean(model.layers[-1].get_weights()[0][:, k_most_similar_per_init_class], axis=1)
        bias_for_init = np.mean(model.layers[-1].get_weights()[1][k_most_similar_per_init_class], axis=0)

        coarse_grained_weights.append(weights_for_init)
        coarse_grained_bias.append(bias_for_init)

    new_weights = np.moveaxis(np.array(coarse_grained_weights), 0, -1)
    new_biases = np.array(coarse_grained_bias)   

    new_number_of_classes = len(init_classes)
    new_dense_coarse = tf.keras.layers.Dense(new_number_of_classes, activation='softmax')(model.layers[-2].output)
    new_dense_coarse = tf.keras.models.Model(inputs=model.input, outputs=new_dense_coarse)
    new_dense_coarse.layers[-1].set_weights([new_weights, new_biases])
    
    return new_dense_coarse

Setting the parameters values:
* k_most_similar_classes - number of the most similar classes to the init classes taken into consideration in the process of weight and bias matrices computation - we take 3 classes in the example below
* init_classes - a list of initialization classes for a coarse-grained classifier. We take one cat breed (Persian cat - index 283) and one dog breed (Eskimo dog, husky - index 248). See https://gist.github.com/yrevar/942d3a0ac09ec9e5eb3a for a list of Imagenet labels.

In [7]:
k_most_similar_classes = 3 #param
init_classes = [283, 248] #param
coarse_grained_model = create_coarse_grained_model(model=model, init_classes=init_classes, k_most_similar_classes=k_most_similar_classes)

We can now test our model:

In [8]:
from sklearn.metrics import accuracy_score, confusion_matrix
predictions = coarse_grained_model.predict(normalized_ds)
true_y =  np.argmax(labels, axis=1)
predicted_y = np.argmax(predictions, axis=1)
print('Accuracy')
print(accuracy_score(true_y, predicted_y))
print('Confusion matrix')
print(confusion_matrix(true_y, predicted_y, normalize='true'))

Accuracy
0.85
Confusion matrix
[[0.8 0.2]
 [0.1 0.9]]
