In [None]:
import os
import json
import collections
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt

from image_captioning.constants import PROJECT_PATH

# Download data

This can take some time as it's ~13GB of data.

In [None]:
data_folder = os.path.join(PROJECT_PATH, 'data')

In [None]:
# Download caption annotation files
if not os.path.exists(data_folder):
    annotation_zip = tf.keras.utils.get_file(
        fname='captions.zip',
        origin='http://images.cocodataset.org/annotations/annotations_trainval2014.zip',
        cache_subdir=data_folder,
        extract=True
    )
    os.remove(annotation_zip)

annotation_file = os.path.join(data_folder, 'annotations/captions_train2014.json')

In [None]:
# Download image files
if not os.path.exists(data_folder):
    image_zip = tf.keras.utils.get_file(
        fname='train2014.zip',
        origin='http://images.cocodataset.org/zips/train2014.zip',
        cache_subdir=image_folder,
        extract=True
    )
    os.remove(image_zip)

image_folder = os.path.join(data_folder, 'train2014')

# Load data

In [None]:
with open(annotation_file, 'r') as f:
    annotations = json.load(f)

In [None]:
# Group all captions together having the same image ID.
image_path_to_caption = collections.defaultdict(list)
for val in annotations['annotations']:
    caption = f"<start> {val['caption']} <end>"
    image_path = os.path.join(image_folder, 'COCO_train2014_{:012d}.jpg'.format(val['image_id']))
    image_path_to_caption[image_path].append(caption)

In [None]:
image_paths = list(image_path_to_caption.keys())

In [None]:
fig, ax = plt.subplots(figsize=(8, 8))

random_image_path = np.random.choice(image_paths)
ax.set_title("\n".join(image_path_to_caption[random_image_path]), fontsize=14)
ax.imshow(plt.imread(random_image_path));