In [15]:
import os
import cv2
import random
import numpy as np
from keras.applications.inception_v3 import InceptionV3,preprocess_input
from keras.models import Model,Sequential,load_model
import re
import string
import tensorflow as tf
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.image import load_img,img_to_array
from keras.preprocessing.sequence import pad_sequences
import pandas as pd
import pickle
from tkinter import *
from tkinter.filedialog import askopenfile
from PIL import Image, ImageTk


In [20]:
features_encoder=load_model('features_encoder.h5')



In [21]:
#load the tokenizer of captions from the saved file 
file=open('tokenizer.pkl','rb')
tokenizer=pickle.load(file)   

#load the max length of captions from the saved file 
file=open('max_length.pkl','rb')
max_length=pickle.load(file)   


In [22]:
image_caption_model=load_model('model1.h5')  

In [23]:
def predict_feature(img_path):
    #read each image by its path
    image=cv2.imread(img_path)
    #resize each image to (224,224)
    image=cv2.resize(image,(224,224))
    #convert each image to array
    image=img_to_array(image)
    #reshape each image 
    image=image.reshape((1,224,224,3))
    #preprocess each image to inceptionV3 model
    image=preprocess_input(image)
    #predict the feature of each image
    feature=features_encoder.predict(image,verbose=0) 
    feature_lst=[feature]
    feature_arr=np.array(feature_lst)   
    feature_arr=tf.reshape(feature_arr, (-1, 2048))
    return feature_arr


In [24]:
def idx_to_word(integer,tokenizer):
    
    for word, index in tokenizer.word_index.items():
        if index==integer:
            return word
    return None


def predict_caption(model, feature, tokenizer, max_length):
    in_text = "startsen"
    for i in range(max_length):
        sequence = tokenizer.texts_to_sequences([in_text])[0]
        sequence = pad_sequences([sequence], max_length)

        y_pred = model.predict([feature,sequence])
        y_pred = np.argmax(y_pred)
        
        word = idx_to_word(y_pred, tokenizer)
        
        if word is None:
            break
            
        in_text+= " " + word
        
        if word == 'endsen':
            break
            
    return in_text 

In [25]:
root=Tk()
root.title('Image Captioning Generator')
root.config(background='gray')
root.geometry('1024x1024+250+100')
frame=Frame(root,bg='gray')
def open_image():
    empty_lbl=Label(frame,text=200*" "
              ,height='8',bg='gray',fg='white',font='25')
    empty_lbl.grid(column=1,row=3,padx=10, pady=10)
    img_path = askopenfile(mode='r', filetypes=[('Image Files', '*.jpg *.png *.jpeg')])
    if img_path:
        #image = Image.open(img_path)
        #photo = ImageTk.PhotoImage(image)
        #label.config(image=photo)
        #label.image = photo
        # Place the label at specific coordinates
        #label.place(x=50, y=50)
        feature=predict_feature(str(img_path.name))
        cap=predict_caption(image_caption_model,feature,tokenizer,max_length)
        cap_lbl=Label(frame,text=cap
              ,height='3',bg='black',fg='white',font='25')
        cap_lbl.grid(column=1,row=3,padx=10, pady=10)

        
lbl=Label(frame,text='Display the image and its caption'
              ,height='3',bg='gray',fg='black',font='25')
lbl.grid(column=1,row=1,padx=10, pady=10)
browse=Button(frame, text="Select image", command=open_image,width='25',height='3',font='30',bg='white')
browse.grid(column=1,row=2,padx=10, pady=10)

frame.pack()
root.mainloop()

