# Captioning evaluator

## Import libraries

In [None]:
# Basic libraries
import numpy as np
import cv2
import json
import matplotlib.pyplot as plt

# Keras basic libraries
from keras import backend
from keras import Model
from keras.models import load_model
from keras.preprocessing.text import Tokenizer
from keras.applications import inception_resnet_v2

## Define Input Parameters and Functions

In [None]:
# Defining parameters
INPUT_WIDTH = 299
INPUT_HEIGHT = 299
INPUT_SIZE = ( INPUT_WIDTH, INPUT_HEIGHT )

## Loading Model

In [None]:
MAX_SEQ_LEN = 50
INPUT_WIDTH = 299
INPUT_HEIGHT = 299
INPUT_SIZE = ( INPUT_WIDTH, INPUT_HEIGHT )

model_fe = load_model('model_inception_resnet_v2.h5')
model = load_model('model_captioning_attention.h5')
with open('/gdrive/My Drive/Final Project/tokenizer.pkl', 'rb') as f:
  tokenizer = pickle.load(f)

## Define Greedy and Beam Search Functions

In [None]:
# Greedy search
def greedySearch(photo):
    in_text = '<start>'
    for i in range(MAX_SEQ_LEN):
        sequence = [tokenizer.word_index[w] for w in in_text.split() if w in tokenizer.word_index]
        sequence = pad_sequences([sequence], maxlen=MAX_SEQ_LEN)
        yhat = model.predict([photo, sequence], verbose=0)
        yhat = np.argmax(yhat)
        word = tokenizer.index_word[yhat]
        in_text += ' ' + word
        if word == '<end>':
            break
    final = in_text.split()
    final = final[1:-1]
    final = ' '.join(final)
    return final

# Beam search
def beam_search_predictions(image, beam_index = 3):
    in_text = [tokenizer.word_index['<start>']]
    
    start_word = [[in_text, 0.0]]
    
    while len(start_word[0][0]) < MAX_SEQ_LEN:
        temp = []
        for s in start_word:
            sequence = pad_sequences([s[0]], maxlen=MAX_SEQ_LEN, padding='post')
            preds = model.predict([image, sequence], verbose=0)
            
            word_preds = np.argsort(preds[0])[-beam_index:]
            
            for w in word_preds:
                next_cap, prob = s[0][:], s[1]
                next_cap.append(w)
                prob += preds[0][w]
                temp.append([next_cap, prob])
                    
        start_word = temp

        start_word = sorted(start_word, reverse=False, key=lambda l: l[1])

        start_word = start_word[-beam_index:]
    
    start_word = start_word[-1][0]
    intermediate_caption = [tokenizer.index_word[i] for i in start_word]

    final_caption = []
    
    for i in intermediate_caption:
        if i != '<end>':
            final_caption.append(i)
        else:
            break
    
    final_caption = ' '.join(final_caption[1:])
    return final_caption

## Labeling Images

In [None]:
with open('image_captioning_info.json', 'r') as path:
    json_image_info = json.load( path )

caption_greedy = []
caption_beam_3 = []
caption_beam_5 = []
caption_beam_7 = []

for image_dict in json_image_info:
    img = inception_resnet_v2.preprocess_input( cv2.resize( cv2.imread( image_dict['file_name'] ), INPUT_SIZE ) )
    caption_greedy.append( greedySearch( model_fe.predict( img  ) ) )
    caption_beam_3.append( beam_search_predictions( model_fe.predict( img  ) ) )
    caption_beam_5.append( beam_search_predictions( model_fe.predict( img  ), 5 ) )
    caption_beam_7.append( beam_search_predictions( model_fe.predict( img  ), 7 ) )

## Saving Labels

In [None]:
import pickle

with open('captions_greedy.pkl','wb') as f:
    pickle.dump( caption_greedy, f )
with open('captions_beam_3.pkl','wb') as f:
    pickle.dump( caption_beam_3, f )
with open('captions_beam_5.pkl','wb') as f:
    pickle.dump( caption_beam_5, f )
with open('captions_beam_7.pkl','wb') as f:
    pickle.dump( caption_beam_7, f )