In [1]:
from os import listdir
from pickle import dump
from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.models import Model


In [3]:
# code referenced from https://medium.com/swlh/automatic-image-captioning-using-deep-learning-5e899c127387

# extract features from each photo in the directory
def extract_features(directory_to_extract, feature_dump_location=None, verbose=False):
    # load the model
    model = VGG16()

    # make the output layer the feature representation of the image
    model = Model(inputs=model.inputs, outputs=model.layers[-2].output)

    # extract features from each photo
    features = dict()
    i = 0
    for name in listdir(directory_to_extract):
        # load an image from file
        filename = directory_to_extract + '/' + name
        
        # squash to desired size
        image = load_img(filename, target_size=(224, 224))
        
        # convert the image pixels to a numpy array
        image = img_to_array(image)
        
        # reshape data for the model
        image = image.reshape((1, image.shape[0], image.shape[1], image.shape[2]))
        
        # prepare the image for the VGG model
        image = preprocess_input(image)
        
        # get features
        feature = model.predict(image, verbose=0)
        
        # get image id
        image_id = name.split('.')[0]
        
        # store feature
        features[image_id] = feature
        
        if verbose:
            print(name)

        if (i + 1) % 500 == 0:
            print(f'{i+1} photos featurized')
            
        i += 1

    print(f'Extracted Features: {len(features)}')


    if feature_dump_location:
        file = open(feature_dump_location, 'wb')
        dump(features, file)
        file.close()  

    return features

In [35]:
# test_features = extract_features('../data/test_10_images/', 'test_10_images_features.pkl')

Extracted Features: 10


In [36]:
# extract_features('../data/flickr_8k/Images/', '../data/flickr_8k/8k_features.pkl', verbose=False)

500 photos featurized
1000 photos featurized
1500 photos featurized
2000 photos featurized
2500 photos featurized
3000 photos featurized
3500 photos featurized
4000 photos featurized
4500 photos featurized
5000 photos featurized
5500 photos featurized
6000 photos featurized
6500 photos featurized
7000 photos featurized
7500 photos featurized
8000 photos featurized
Extracted Features: 8091


{'1000268201_693b08cb0e': array([[2.507647, 0.      , 0.      , ..., 0.      , 0.      , 0.      ]],
       dtype=float32),
 '1001773457_577c3a7d70': array([[0.        , 0.        , 0.49410754, ..., 0.        , 0.        ,
         0.        ]], dtype=float32),
 '1002674143_1b742ab4b8': array([[1.4937092, 0.       , 0.5356834, ..., 2.3154132, 3.7418406,
         0.       ]], dtype=float32),
 '1003163366_44323f5815': array([[0., 0., 0., ..., 0., 0., 0.]], dtype=float32),
 '1007129816_e794419615': array([[0.        , 0.09227666, 0.        , ..., 0.        , 0.        ,
         0.06528968]], dtype=float32),
 '1007320043_627395c3d8': array([[0.       , 0.       , 0.       , ..., 0.       , 3.3386393,
         0.       ]], dtype=float32),
 '1009434119_febe49276a': array([[2.0962927, 2.1193194, 3.5624332, ..., 0.642639 , 2.7146518,
         0.       ]], dtype=float32),
 '1012212859_01547e3f17': array([[0.        , 0.        , 0.98737115, ..., 0.        , 1.4932511 ,
         0.8612848 ]], d

In [6]:
# features_30k = extract_features('../data/flickr_30k/flickr30k_images/', '../data/flickr_30k/30k_features.pkl', verbose=False)

500 photos featurized
1000 photos featurized
1500 photos featurized
2000 photos featurized
2500 photos featurized
3000 photos featurized
3500 photos featurized
4000 photos featurized
4500 photos featurized
5000 photos featurized
5500 photos featurized
6000 photos featurized
6500 photos featurized
7000 photos featurized
7500 photos featurized
8000 photos featurized
8500 photos featurized
9000 photos featurized
9500 photos featurized
10000 photos featurized
10500 photos featurized
11000 photos featurized
11500 photos featurized
12000 photos featurized
12500 photos featurized
13000 photos featurized
13500 photos featurized
14000 photos featurized
14500 photos featurized
15000 photos featurized
15500 photos featurized
16000 photos featurized
16500 photos featurized
17000 photos featurized
17500 photos featurized
18000 photos featurized
18500 photos featurized
19000 photos featurized
19500 photos featurized
20000 photos featurized
20500 photos featurized
21000 photos featurized
21500 photos