In [5]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [6]:
BASE_DIR = '/content/drive/MyDrive/Flickr8k/Flickr8k_Dataset'
IMAGE_DIR = BASE_DIR + '/Images'
CAPTION_FILE = BASE_DIR + '/captions.txt'


import os
print(os.path.exists(IMAGE_DIR))
print(os.path.exists(CAPTION_FILE))


True
True


In [7]:
import os
import numpy as np
import pickle
import tensorflow as tf

from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.models import Model


In [8]:
vgg = VGG16()
vgg = Model(inputs=vgg.inputs, outputs=vgg.layers[-2].output)

print("VGG16 loaded successfully")

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels.h5
[1m553467096/553467096[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 0us/step
VGG16 loaded successfully


In [9]:
def extract_features(directory):
    features = {}
    image_list = os.listdir(directory)
    total_images = len(image_list)

    for i, img_name in enumerate(image_list):
        img_path = os.path.join(directory, img_name)

        image = load_img(img_path, target_size=(224, 224))
        image = img_to_array(image)
        image = np.expand_dims(image, axis=0)
        image = preprocess_input(image)

        feature = vgg.predict(image, verbose=0)
        features[img_name] = feature


        if (i + 1) % 500 == 0 or (i + 1) == total_images:
            print(f"Processed {i + 1}/{total_images} images")

    return features



In [10]:
image_features = extract_features(IMAGE_DIR)
print("Total extracted image features:", len(image_features))


Processed 500/8091 images
Processed 1000/8091 images
Processed 1500/8091 images
Processed 2000/8091 images
Processed 2500/8091 images
Processed 3000/8091 images
Processed 3500/8091 images
Processed 4000/8091 images
Processed 4500/8091 images
Processed 5000/8091 images
Processed 5500/8091 images
Processed 6000/8091 images
Processed 6500/8091 images
Processed 7000/8091 images
Processed 7500/8091 images
Processed 8000/8091 images
Processed 8091/8091 images
Total extracted image features: 8091


In [11]:
FEATURES_PATH = '/content/drive/MyDrive/Flickr8k/image_features_vgg16.pkl'

with open(FEATURES_PATH, 'wb') as f:
    pickle.dump(image_features, f)

print("Image features saved at:", FEATURES_PATH)


Image features saved at: /content/drive/MyDrive/Flickr8k/image_features_vgg16.pkl


In [12]:
with open(FEATURES_PATH, 'rb') as f:
    image_features = pickle.load(f)

print("Loaded image features:", len(image_features))


Loaded image features: 8091


In [13]:
VGG_MODEL_PATH = '/content/drive/MyDrive/Flickr8k/vgg16_model.h5'
vgg.save(VGG_MODEL_PATH)
print(f"VGG16 model saved at: {VGG_MODEL_PATH}")



VGG16 model saved at: /content/drive/MyDrive/Flickr8k/vgg16_model.h5


In [14]:
VGG_MODEL_PATH = '/content/drive/MyDrive/Flickr8k/vgg16_model.h5'
vgg.save(VGG_MODEL_PATH)
print(f"VGG16 model saved at: {VGG_MODEL_PATH}")



VGG16 model saved at: /content/drive/MyDrive/Flickr8k/vgg16_model.h5
