In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import keras
import json
import pickle
from keras.applications.vgg16 import VGG16
from keras.applications.resnet50 import ResNet50, preprocess_input, decode_predictions
from keras.preprocessing import image
from keras.models import Model, load_model
from keras.preprocessing.sequence import pad_sequences
from keras.utils import to_categorical
from keras.layers import Input, Dense, Dropout, Embedding, LSTM
from keras.layers.merge import add

In [19]:
model = load_model('model_weights/model_9.h5')

In [4]:
model_temp = ResNet50(weights="imagenet", input_shape=(224,224,3))

In [5]:
model_resnet = Model(model_temp.input, model_temp.layers[-2].output)

In [6]:
def preprocess_image(img):
    img = image.load_img(img, target_size=(224,224))
    img = image.img_to_array(img)
    img = np.expand_dims(img, axis=0)
    img = preprocess_input(img)
    return img

In [12]:
def encode_image(img):
    img = preprocess_image(img)
    feature_vector = model_resnet.predict(img)
    feature_vector = feature_vector.reshape(1,feature_vector.shape[1],)
    return feature_vector

In [13]:
enc = encode_image("dog.jpg")

In [14]:
enc

array([[9.7723722e-01, 6.0778087e-01, 4.2118469e-01, ..., 3.9965197e-01,
        5.3189451e-01, 5.5896991e-04]], dtype=float32)

In [15]:
enc.shape

(1, 2048)

In [21]:
with open("./storage/word_to_idx.pkl",'rb') as w2i:
    word_to_idx = pickle.load(w2i)
    

In [22]:
with open("./storage/idx_to_word.pkl",'rb') as i2w:
    idx_to_word = pickle.load(i2w)

In [23]:
def predict_caption(photo):
    in_text = "startseq"
    max_len = 35
    
    for i in range(max_len):
        sequence = [word_to_idx[w] for w in in_text.split() if w in word_to_idx]
        sequence = pad_sequences([sequence], maxlen=max_len, padding='post')

        ypred =  model.predict([photo,sequence])
        ypred = ypred.argmax()
        word = idx_to_word[ypred]
        in_text+= ' ' +word
        
        if word =='endseq':
            break
        
        
    final_caption =  in_text.split()
    final_caption = final_caption[1:-1]
    final_caption = ' '.join(final_caption)
    
    return final_caption

In [24]:
predict_caption(enc)

'dog is running through the grass'