In [1]:
import os
import pickle
import numpy as np
from tqdm.notebook import tqdm

from tensorflow.keras.applications.efficientnet_v2 import EfficientNetV2B0, preprocess_input
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Model
from tensorflow.keras.utils import to_categorical, plot_model
from tensorflow.keras.layers import Input, Dense, Embedding, GRU, Concatenate, Reshape, Dropout, add
import pandas as pd
import re
import nltk
import pickle
from nltk.stem import WordNetLemmatizer
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from tensorflow.keras import layers

In [2]:
BASE_DIR = 'Dataset'
WORKING_DIR = ''

In [3]:
with open(os.path.join(BASE_DIR, 'captions.txt'), 'r') as f:
    next(f)
    captions_doc = f.read()

In [4]:
# Create mapping of image to captions
mapping = {}

In [5]:
# Process lines
for line in tqdm(captions_doc.split('\n')):
    tokens = line.split(',')
    if len(line) < 2:
        continue
    image_id, caption = tokens[0], tokens[1:]
    image_id = image_id.split('.')[0]
    caption = " ".join(caption)

    if image_id not in mapping:
        mapping[image_id] = []

    mapping[image_id].append(caption)

  0%|          | 0/40456 [00:00<?, ?it/s]

In [6]:
# Define the clean function
def clean(mapping):
    for key, captions in mapping.items():
        for i in range(len(captions)):
            caption = captions[i].lower()
            caption = re.sub(r'[^a-zA-Z]', ' ', caption)
            caption = re.sub(r'\s+', ' ', caption)
            caption = 'startseq ' + " ".join([word for word in caption.split() if len(word) > 1]) + ' endseq'
            captions[i] = caption

In [7]:
# Preprocess the text
clean(mapping)

In [8]:
# Collect all captions
all_captions = [caption for key in mapping for caption in mapping[key]]

In [9]:
# Tokenize the text
tokenizer = Tokenizer()
tokenizer.fit_on_texts(all_captions)
vocab_size = len(tokenizer.word_index) + 1

In [10]:
# Get the maximum length of the caption
max_length = max(len(caption.split()) for caption in all_captions)

In [11]:
# Get the list of image IDs
image_ids = list(mapping.keys())

In [12]:
BASE_DIR = 'CustomImages'
WORKING_DIR = ''

In [13]:
# Load EfficientNetV2B0 model
base_model = EfficientNetV2B0(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
for layer in base_model.layers:
    layer.trainable = False

In [14]:
from tensorflow.keras.models import Sequential
from tensorflow.keras import models
# Create a sequential model
model = models.Sequential()
model.add(base_model)
model.add(layers.GlobalAveragePooling2D())

In [15]:
# Extract features from image
features = {}
directory = BASE_DIR

In [16]:
for img_name in tqdm(os.listdir(directory)):
    # Load the image from file
    img_path = os.path.join(directory, img_name)
    image = load_img(img_path, target_size=(224, 224))
    image = img_to_array(image)
    image = image.reshape((1, image.shape[0], image.shape[1], image.shape[2]))
    image = preprocess_input(image)

    # Extract features
    feature = model.predict(image, verbose=0)

    # Get image ID
    image_id = img_name.split('.')[0]

    # Store feature
    features[image_id] = feature

  0%|          | 0/10 [00:00<?, ?it/s]

In [17]:
# Store features in pickle
pickle.dump(features, open(os.path.join(WORKING_DIR, 'features.pkl'), 'wb'))

In [18]:
# Load features from pickle
with open(os.path.join(WORKING_DIR, 'features.pkl'), 'rb') as f:
    features = pickle.load(f)

In [19]:
import tensorflow as tf

# Load the model
model = tf.keras.models.load_model('best_model2.h5')

In [20]:
# Function to convert index to word
def idx_to_word(integer, tokenizer):
    for word, index in tokenizer.word_index.items():
        if index == integer:
            return word
    return None

In [21]:
# Generate caption for an image
def predict_caption(model, image, tokenizer, max_length):
    in_text = 'startseq'

    for i in range(max_length):
        sequence = tokenizer.texts_to_sequences([in_text])[0]
        sequence = pad_sequences([sequence], maxlen=max_length)
        yhat = model.predict([image, sequence], verbose=0)
        yhat = np.argmax(yhat)
        word = idx_to_word(yhat, tokenizer)
        if word is None:
            break
        in_text += " " + word
        if word == 'endseq':
            break
    in_text = in_text.replace("startseq", "").replace("endseq", "")
    return in_text

In [22]:
from PIL import Image
import matplotlib.pyplot as plt

def generate_caption(image_name):
    # Load the image
    image_id = image_name.split('.')[0]
    img_path = os.path.join(BASE_DIR, image_name)
    image = Image.open(img_path)

    # Predict the caption
    y_pred = predict_caption(model, features[image_id], tokenizer, max_length)

    # Display the image
    plt.imshow(image)
    plt.axis("off")
    plt.show()
    print('Predicted Caption: ', y_pred, "\n")

In [23]:
generate_caption("sampleimg1.jpg")
generate_caption("sampleimg3.jpeg")
generate_caption("sampleimg5.jpg")
generate_caption("sampleimg7.jpeg")
generate_caption("sampleimg8.jpeg")
generate_caption("sampleimg9.jpeg")
generate_caption("sampleimg11.jpeg")
generate_caption("sampleimg12.jpeg")
generate_caption("sampleimg13.jpeg")
generate_caption("sampleimg14.jpeg")
generate_caption("sampleimg15.jpeg")
generate_caption("sampleimg16.jpeg")

ValueError: in user code:

    File "C:\Users\mayur\AppData\Roaming\Python\Python38\site-packages\keras\src\engine\training.py", line 2341, in predict_function  *
        return step_function(self, iterator)
    File "C:\Users\mayur\AppData\Roaming\Python\Python38\site-packages\keras\src\engine\training.py", line 2327, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "C:\Users\mayur\AppData\Roaming\Python\Python38\site-packages\keras\src\engine\training.py", line 2315, in run_step  **
        outputs = model.predict_step(data)
    File "C:\Users\mayur\AppData\Roaming\Python\Python38\site-packages\keras\src\engine\training.py", line 2283, in predict_step
        return self(x, training=False)
    File "C:\Users\mayur\AppData\Roaming\Python\Python38\site-packages\keras\src\utils\traceback_utils.py", line 70, in error_handler
        raise e.with_traceback(filtered_tb) from None
    File "C:\Users\mayur\AppData\Roaming\Python\Python38\site-packages\keras\src\engine\input_spec.py", line 298, in assert_input_compatibility
        raise ValueError(

    ValueError: Input 1 of layer "model" is incompatible with the layer: expected shape=(None, 74), found shape=(None, 35)
