In [13]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [14]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import keras
import re
import nltk
from nltk.corpus import stopwords
import string
import json
from time import time
import pickle
from keras.applications.vgg16 import VGG16, preprocess_input, decode_predictions
from keras.applications import ResNet50
from keras.preprocessing import image
from keras.models import Model, load_model
from tensorflow.keras.utils import pad_sequences
from keras.utils import to_categorical
from keras.layers import Input, Dense, Dropout, Embedding, LSTM
from keras.layers import add
from keras.optimizers import Adam

# If you need to compile the model, extract the learning rate and pass it explicitly
# with open("/content/drive/MyDrive/image_captioning_flask/model_9.h5", "rb") as f:
#     model_data = pickle.load(f)  # Assuming the model was saved using pickle
#     lr = model_data['optimizer_config']['lr']  # Extract the learning rate

# Compile the model with the extracted learning rate
# optimizer = Adam(learning_rate=lr)  # Use the extracted learning rate
# model.compile(optimizer=optimizer, ...)  # ... specify other compilation parameters

In [15]:
# Load the model, handling potential legacy format
model = load_model("/content/drive/MyDrive/image_captioning_flask/model_9 (1).h5", compile=False)

In [16]:
model_temp=ResNet50(weights="imagenet",input_shape=(224,224,3))

In [17]:
model_resnet=Model(model_temp.input,model_temp.layers[-2].output)

In [18]:
import tensorflow as tf

# ... (rest of your code)

def preprocess_img(img):
    img = tf.keras.utils.load_img(img, target_size=(224, 224)) # Use tf.keras.utils.load_img
    img = tf.keras.utils.img_to_array(img) # Use tf.keras.utils.img_to_array
    img = np.expand_dims(img, axis=0)
    img = tf.keras.applications.resnet50.preprocess_input(img) # Assuming you're using ResNet50
    return img

In [19]:
def encode_image(img):
    img=preprocess_img(img)
    feature_vector=model_resnet.predict(img)
    feature_vector=feature_vector.reshape(1,feature_vector.shape[1])
    return feature_vector

In [20]:
with open("/content/drive/MyDrive/image_captioning_flask/word_to_idx.pkl", "rb") as w2i:
    word_to_idx = pickle.load(w2i)

with open("/content/drive/MyDrive/image_captioning_flask/idx_to_word.pkl", "rb") as i2w:
    idx_to_word = pickle.load(i2w)

In [21]:
max_len=35
def predict_caption(photo):

    in_text = "startseq"
    for i in range(max_len):
        sequence = [word_to_idx[w] for w in in_text.split() if w in word_to_idx]
        sequence = pad_sequences([sequence],maxlen=max_len,padding='post')

        ypred = model.predict([photo,sequence])
        ypred = ypred.argmax() #WOrd with max prob always - Greedy Sampling
        word = idx_to_word[ypred]
        in_text += (' ' + word)

        if word == "endseq":
            break

    final_caption = in_text.split()[1:-1]
    final_caption = ' '.join(final_caption)
    return final_caption

In [22]:
enc=encode_image("/content/drive/MyDrive/image_captioning_flask/dog.jpeg")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4s/step


In [23]:
enc.shape

(1, 2048)

In [24]:
predict_caption(enc)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 96ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 114ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 159ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 129ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 98ms/step


'dog runs through the grass'

In [25]:
def caption_this_image(image):

    enc=encode_image(image)
    caption=predict_caption(enc)
    return caption
