In [1]:
import os
import pickle
import numpy as np
import json
from tqdm import tqdm

In [2]:
import tensorflow as tf
from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.models import Model

In [3]:
# --- 1. Setup Paths and Load Data ---
print("Step 1: Setting up paths and loading data...")

Step 1: Setting up paths and loading data...


In [4]:
# Define relative paths and handle different execution directories
annotations_file = 'dataset/annotations/captions_train2017.json'
image_dir = 'dataset/train2017'

In [5]:
if not os.path.exists(annotations_file):
    print("Dataset not found in current directory. Assuming execution from 'image_caption' subdirectory.")
    annotations_file = os.path.join('..', annotations_file)
    image_dir = os.path.join('..', image_dir)

Dataset not found in current directory. Assuming execution from 'image_caption' subdirectory.


In [6]:
# Final check for the annotations file
if not os.path.exists(annotations_file):
    raise FileNotFoundError(f"Error: The annotations file was not found at the expected path: {os.path.abspath(annotations_file)}. Please make sure you are running this from the project root or the 'image_caption' directory.")

In [7]:
# Load annotations directly
print(f"Loading annotations from {os.path.abspath(annotations_file)}...")
with open(annotations_file, 'r') as f:
    data = json.load(f)

Loading annotations from d:\Zidio_development\dataset\annotations\captions_train2017.json...


In [8]:
# Create a mapping from image ID to captions
mapping = {}
for annot in tqdm(data['annotations'], desc="Loading Captions"):
    image_id = str(annot['image_id'])
    caption = annot['caption']
    if image_id not in mapping:
        mapping[image_id] = []
    mapping[image_id].append(caption)

Loading Captions: 100%|██████████| 591753/591753 [00:01<00:00, 485561.73it/s]


In [9]:
# --- 2. Preprocess Text Data ---
print("\nStep 2: Preprocessing text data...")
def preprocess_captions(mapping):
    for key, captions in mapping.items():
        for i in range(len(captions)):
            caption = captions[i].lower()
            caption = caption.replace('[^A-Za-z]', '')
            caption = caption.replace('\s+', ' ')
            caption = 'startseq ' + " ".join([word for word in caption.split() if len(word) > 1]) + ' endseq'
            captions[i] = caption
preprocess_captions(mapping)


Step 2: Preprocessing text data...


In [10]:
# Save the processed mapping for later use
with open('processed_captions_mapping.pkl', 'wb') as f:
    pickle.dump(mapping, f)
print("Saved processed captions mapping")

Saved processed captions mapping


In [11]:
# --- 3. Extract Image Features with VGG16 ---
print("\nStep 3: Extracting image features with VGG16...")
vgg_model = VGG16()
vgg_model = Model(inputs=vgg_model.inputs, outputs=vgg_model.layers[-2].output)


Step 3: Extracting image features with VGG16...


In [12]:
features = {}
image_files = os.listdir(image_dir)
image_files = image_files[:20000]  # Process first 20000 images

In [13]:
print(f"Processing {len(image_files)} images...")

Processing 20000 images...


In [14]:
for img_name in tqdm(image_files, desc="Extracting features"):
    img_path = os.path.join(image_dir, img_name)
    try:
        image = load_img(img_path, target_size=(224, 224))
        image = img_to_array(image)
        image = image.reshape((1, image.shape[0], image.shape[1], image.shape[2]))
        image = preprocess_input(image)
        feature = vgg_model.predict(image, verbose=0)
        
        # Extract image ID from filename (assuming COCO format, e.g., '000000397133.jpg')
        image_id = img_name.split('.')[0].lstrip('0')
        features[image_id] = feature
    except Exception as e:
        print(f"Error processing {img_name}: {e}")
        continue

Extracting features: 100%|██████████| 20000/20000 [54:59<00:00,  6.06it/s] 


In [15]:
# Save the features after processing all images
print(f"\nSaving features for {len(features)} images...")
with open('vgg16_features_20000.pkl', 'wb') as f:
    pickle.dump(features, f)
print(f"Successfully saved VGG16 features to 'vgg16_features_20000.pkl'")


Saving features for 20000 images...
Successfully saved VGG16 features to 'vgg16_features_20000.pkl'


In [16]:
# Also save the original data for later use
with open('original_annotations_data.pkl', 'wb') as f:
    pickle.dump(data, f)
print("Saved original annotations data")

Saved original annotations data


In [17]:
print("\nFeature extraction completed successfully!")
print("Files created:")
print("- vgg16_features_20000.pkl: VGG16 features for images")


Feature extraction completed successfully!
Files created:
- vgg16_features_20000.pkl: VGG16 features for images
