# Import Modules

In [5]:
import os
import pickle
import numpy as np
from tqdm.notebook import tqdm

from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Model
from tensorflow.keras.utils import to_categorical, plot_model
from tensorflow.keras.layers import Input, Dense, LSTM, Embedding, Dropout, add

## Extract image features

In [6]:
#Load vgg16 model
model = VGG16()
# restructure the model
model = Model(inputs=model.inputs, outputs=model.layers[-2].output)
#summarize
print(model.summary())

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 block1_conv1 (Conv2D)       (None, 224, 224, 64)      1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 224, 224, 64)      36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 112, 112, 64)      0         
                                                                 
 block2_conv1 (Conv2D)       (None, 112, 112, 128)     73856     
                                                                 
 block2_conv2 (Conv2D)       (None, 112, 112, 128)     147584    
                                                                 
 block2_pool (MaxPooling2D)  (None, 56, 56, 128)       0     

In [8]:
# Extract features form image
features = {}
directory = 'Images'

In [3]:

for img_name in tqdm(os.listdir(directory)):
    #load image from file
    img_path = directory + '/' + img_name
    image = load_img(img_path, target_size=(224, 224))
    #convert image pixels to numpy array
    image = img_to_array(image)
    #reshape data for model
    image = image.reshape((1, image.shape[0], image.shape[1], image.shape[2]))
    #preprocess image for vgg
    image = preprocess_input(image)
    #extract features
    feature = model.predict(image, verbose = 0)
    #get image ID
    image_id = img_name.split('.')[0]
    #store feature
    features[image_id] = feature

  0%|          | 0/8091 [00:00<?, ?it/s]

In [9]:
# store features in pickle
pickle.dump(features, open('features.pkl', 'wb'))

In [10]:
#load features from pickle
with open('features.pkl', 'rb') as f:
    features = pickle.load(f)

## Load the Captions Data

In [2]:
with open('captions.txt', 'r') as f:
    next(f)
    captions_doc = f.read()

In [11]:
# Create mapping of image to caption
mapping = {}
# process lines
for line in tqdm(captions_doc.split('\n')):
    # Split the line by comma
    tokens = line.split(',')
    if len(line) < 2:
        continue
    image_id, caption = tokens[0], tokens[1:]
    #remove .jpg from image id
    image_id = image_id.split('.')[0]
    # Convert caption list to string
    caption = ''.join(caption)
    # Create list if more htan one caption per image
    if image_id not in mapping:
        mapping[image_id] = []
    mapping[image_id].append(caption)

  0%|          | 0/40456 [00:00<?, ?it/s]

## Preprocess Text Data

In [12]:
def clean(mapping):
    for key, captions in mapping.items():
        for i in range(len(captions)):
            # pick one caption at a time
            caption = captions[i]
            #preprocessing steps
            #convert to lowercase
            caption = caption.lower()
            # removing digits, special characters etc
            caption = caption.replace('[^A-Za-z]','')
            # remove additional spaces
            caption = caption.replace('\s+',' ')
            # Add start and end tags to the caption
            caption = '<start> ' + " ".join([word for word in caption.split() if len(word)>1]) + caption + '<end>'
            captions[i] = caption