In [36]:
import numpy as np 
import pandas as pd 
import os 
import pickle
import matplotlib.pyplot as plt
from PIL import Image
import skimage
from keras.preprocessing import image
from keras.applications import VGG16
from keras.models import Model
from keras import backend

In [37]:
def load_data(filename):
    data = pd.read_json('./Dataset/'+filename+'.json')
    image_id_list = data.columns
    caption_list = []
    
    for i in image_id_list:
        caption_list.append(data[i])
    
    print("Data Size - ", len(image_id_list), "\nCaptions Per Image - ", len(caption_list[0]))
    
    return image_id_list, caption_list

In [38]:
image_list_train, caption_list_train = load_data('train_data')

Data Size -  118287 
Captions Per Image -  5


In [39]:
image_list_val, caption_list_val = load_data('val_data')

Data Size -  5000 
Captions Per Image -  5


In [40]:
vgg16_model = VGG16(include_top = True, weights = 'imagenet')
vgg16_model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         (None, 224, 224, 3)       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 112, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 56, 56, 128)       0         
__________

In [41]:
dense_output = vgg16_model.get_layer('fc2')
dense_output

<keras.layers.core.Dense at 0x7f2d5e2b5978>

In [42]:
custom_vgg16 = Model(inputs = vgg16_model.input, outputs = dense_output.output)

In [43]:
def load_image(path, size = None):
    img = Image.open(path)
    
    if not size is None:
        img = image.img_to_array(img)
        img = skimage.transform.resize(img, size)
    img = np.array(img)
    img = img / 255.0
    
    if (len(img.shape) == 2):
        img = np.repeat(img[:, :, np.newaxis], 3, axis=2)
    return img

In [44]:
def show_image(idx, train, image_id_list, caption_list):
    cwd = os.getcwd()
    
    if train: 
        dir_p = cwd+'/Dataset/val_2017/'
        filename = image_id_list[idx]
        captions = caption_list[idx]
    else:
        dir_p = cwd+'/Dataset/Test_Image/'
        filename = image_id[idx]
        captions = caption_list[idx]
    path = os.path.join(dir_p,+str(filename)+'.jpg')
    
    for caption in captions:
        print(caption)
    
    img = load_image(path)
    plt.imshow(img)
    plt.show()

In [73]:
def process_images(data_dir, image_id_list, batch_size):
    print(data_dir)
    
    num_of_images = len(image_id_list)
    image_size = backend.int_shape(vgg16_model.input)[1:3]
    dense_output_size = backend.int_shape(dense_output.output)[1]
    
    image_shape = (batch_size,) + image_size + (3,)
    print(image_shape)
    image_batch = np.zeros(shape = image_shape, dtype = np.float16)
    
    dense_val_shape = (num_of_images, dense_output_size)
    dense_values = np.zeros(shape = dense_val_shape, dtype = np.float16)
    
    start_index = 0
    
    while start_index < num_of_images:
        print(start_index, end = "\r")
        end_index = start_index + batch_size
        if end_index > num_of_images:
            end_index = num_of_images
        
        current_batch_size = end_index - start_index
        i = 0
        for image_id in image_id_list[start_index:end_index]:
            
            id_len = len(str(image_id))
            file_name = ['0'] * 13
            file_name[12-id_len:] = str(image_id)
            file_name = ''.join(file_name)
            file_name += '.jpg'
            
            path = os.path.join(data_dir, file_name)
            img = load_image(path, size = image_size)
            image_batch[i] = img
            i += 1
        dense_values_batch = custom_vgg16.predict(image_batch[0:current_batch_size])
        
        dense_values[start_index:end_index] = dense_values_batch[0:current_batch_size]
            
        start_index = end_index
    
    return dense_values

In [74]:
def cache(cache_path, func, *args, **kwargs):
    if os.path.exists(cache_path):
        with open(cache_path, 'rb') as file:
            obj = pickle.load(file)
        print('Data Loaded from Cache File: ', cache_path)
    else:
        obj = func(*args, *kwargs)
        with open(cache_path, 'wb') as file:
            pickle.dump(obj, file)
    return obj

In [75]:
def process_train():
    pwd = os.getcwd()
    train_path = pwd + '/Dataset/train_2017/'
    cache_path = pwd + '/Cache/dense_values_train.pkl'
    
    dense_values = cache(cache_path = cache_path, 
                         func = process_images, 
                         data_dir = train_path, 
                         image_id_list = image_list_train)
    return dense_values

In [76]:
def process_val():
    pwd = os.getcwd()
    val_path = pwd + '/Dataset/val_2017/'
    cache_path = pwd + '/Cache/dense_values_val.pkl'
    
    if os.path.exists(cache_path):
        with open(cache_path, 'rb') as file:
            obj = pickle.load(file)
    else:
        obj = process_images(val_path, image_list_val, 32)
        with open(cache_path, 'wb') as file:
            pickle.dump(obj, file)
    
    return obj

In [77]:
dense_values_val = process_val()
print("Shape - ", dense_values_val.shape)

/home/kshitij/Desktop/Sem2/SMAI/Project/captioning-images/Dataset/val_2017/
(32, 224, 224, 3)
Shape -  (5000, 4096)
