In [2]:
import numpy as np
from numpy import argmax
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.applications.vgg19 import VGG19
from tensorflow.keras.applications.vgg19 import preprocess_input
from tensorflow.keras.models import Model
from tensorflow.keras.models import load_model
from pickle import load
from tensorflow.keras.preprocessing.image import load_img
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import LSTM
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Dropout
import cv2
from tensorflow.keras.preprocessing.text import Tokenizer
from nltk.translate.bleu_score import corpus_bleu

In [18]:
image_path="D:\\Syllabus\\Syllabus_7_th_sem\\ML\\Project\\Flicker8 dataset\\Flicker8k_Dataset"
desc_path= "D:\\Syllabus\\Syllabus_7_th_sem\\ML\\Project\\Flicker8 dataset\\Flickr8k_text\\Flickr8k.token.txt"
train_img= "D:\\Syllabus\\Syllabus_7_th_sem\\ML\\Project\\Flicker8 dataset\\Flickr8k_text\\Flickr_8k.trainImages.txt"
val_img=   "D:\\Syllabus\\Syllabus_7_th_sem\\ML\\Project\\Flicker8 dataset\\Flickr8k_text\\Flickr_8k.devImages.txt"
features_file = "C:\\Users\\Abhishek Sharma\\FLicker Abhishek\\Flicker_model\\VGG19\\features_VGG19.pkl"############33
test_img_path='D:\\Syllabus\\Syllabus_7_th_sem\\ML\\Project\\Flicker8 dataset\\Flicker8k_Dataset\\1000268201_693b08cb0e.jpg'

In [28]:
# load doc into memory
def load_doc(filename):
	# open the file as read only
	file = open(filename, 'r')
	# read all text
	text = file.read()
	# close the file
	file.close()
	return text

# save descriptions to file, one per line
def save_descriptions(descriptions, filename):
	lines = list()
	for key, desc_list in descriptions.items():
		for desc in desc_list:
			lines.append(key + ' ' + desc)
	data = '\n'.join(lines)
	file = open(filename, 'w')
	file.write(data)
	file.close()

In [4]:
# load a pre-defined list of photo identifiers
def load_set(filename):
	doc = load_doc(filename)
	dataset = list()
	# process line by line
	for line in doc.split('\n'):
		# skip empty lines
		if len(line) < 1:
			continue
		# get the image identifier
		identifier = line.split('.')[0]
		dataset.append(identifier)
	return set(dataset)

In [22]:
def load_descriptions(doc):
	mapping = dict()
	# process lines
	for line in doc.split('\n'):
		# split line by white space
		tokens = line.split()
		if len(line) < 2:
			continue
		# take the first token as the image id, the rest as the description
		image_id, image_desc = tokens[0], tokens[1:]
		# remove filename from image id
		image_id = image_id.split('.')[0]
		# convert description tokens back to string
		image_desc = ' '.join(image_desc)
		# create the list if needed
		if image_id not in mapping:
			mapping[image_id] = list()
		# store description
		mapping[image_id].append(image_desc)
	return mapping


def clean_descriptions(descriptions):
	# prepare translation table for removing punctuation
	table = str.maketrans('', '', string.punctuation)
	for key, desc_list in descriptions.items():
		for i in range(len(desc_list)):
			desc = desc_list[i]
			# tokenize
			desc = desc.split()
			# convert to lower case
			desc = [word.lower() for word in desc]
			# remove punctuation from each token
			desc = [w.translate(table) for w in desc]
			# remove hanging 's' and 'a'
			desc = [word for word in desc if len(word)>1]
			# remove tokens with numbers in them
			desc = [word for word in desc if word.isalpha()]
			# store as string
			desc_list[i] =  ' '.join(desc)

In [5]:
# load clean descriptions into memory
def load_clean_descriptions(filename, dataset):
	# load document
	doc = load_doc(filename)
	descriptions = dict()
	for line in doc.split('\n'):
		# split line by white space
		tokens = line.split()
		# split id from description
		image_id, image_desc = tokens[0], tokens[1:]
		# skip images not in the set
		if image_id in dataset:
			# create list
			if image_id not in descriptions:
				descriptions[image_id] = list()
			# wrap description in tokens
			desc = 'startseq ' + ' '.join(image_desc) + ' endseq'
			# store
			descriptions[image_id].append(desc)
	return descriptions

In [6]:
# load photo features
def load_photo_features(filename, dataset):
	# load all features
	all_features = load(open(filename, 'rb'))
	# filter features
	features = {k: all_features[k] for k in dataset}
	return features

In [7]:
# covert a dictionary of clean descriptions to a list of descriptions
def to_lines(descriptions):
	all_desc = list()
	for key in descriptions.keys():
		[all_desc.append(d) for d in descriptions[key]]
	return all_desc

In [8]:
# fit a tokenizer given caption descriptions
def create_tokenizer(descriptions):
	lines = to_lines(descriptions)
	tokenizer = Tokenizer()
	tokenizer.fit_on_texts(lines)
	return tokenizer

In [9]:
# calculate the length of the description with the most words
def max_length(descriptions):
	lines = to_lines(descriptions)
	return max(len(d.split()) for d in lines)
 
# map an integer to a word
def word_for_id(integer, tokenizer):
	for word, index in tokenizer.word_index.items():
		if index == integer:
			return word
	return None
 

In [10]:
# generate a description for an image
def generate_desc(model, tokenizer, photo, max_length):
	# seed the generation process
	in_text = 'startseq'
	# iterate over the whole length of the sequence
	for i in range(max_length):
		# integer encode input sequence
		sequence = tokenizer.texts_to_sequences([in_text])[0]
		# pad input
		sequence = pad_sequences([sequence], maxlen=max_length)
		# predict next word
		yhat = model.predict([photo,sequence], verbose=0)
		# convert probability to integer
		yhat = argmax(yhat)
		# map integer to word
		word = word_for_id(yhat, tokenizer)
		# stop if we cannot map the word
		if word is None:
			break
		# append as input for generating the next word
		in_text += ' ' + word
		# stop if we predict the end of the sequence
		if word == 'endseq':
			break
	return in_text

In [15]:
# evaluate the skill of the model
def evaluate_model(model, descriptions, photos, tokenizer, max_length):
    actual, predicted = list(), list()
    # step over the whole set
    for key, desc_list in descriptions.items():
        # generate description
        yhat = generate_desc(model, tokenizer, photos[key], max_length)
        # store actual and predicted
        references = [d.split() for d in desc_list]
        actual.append(references)
        predicted.append(yhat.split())
    # calculate BLEU score
    bleu1=corpus_bleu(actual, predicted, weights=(1.0, 0, 0, 0))
    bleu2=corpus_bleu(actual, predicted, weights=(0.5, 0.5, 0, 0))
    bleu3=corpus_bleu(actual, predicted, weights=(0.3, 0.3, 0.3, 0))
    bleu4=corpus_bleu(actual, predicted, weights=(0.25, 0.25, 0.25, 0.25))
    return [bleu1,bleu2,bleu3,bleu4]

In [25]:
# convert the loaded descriptions into a vocabulary of words
def to_vocabulary(descriptions):
	# build a list of all description strings
	all_desc = set()
	for key in descriptions.keys():
		[all_desc.update(d.split()) for d in descriptions[key]]
	return all_desc

In [29]:
import string
filename = desc_path
# load descriptions
doc = load_doc(filename)
# parse descriptions
descriptions = load_descriptions(doc)
print('Loaded: %d ' % len(descriptions))
# clean descriptions
clean_descriptions(descriptions)
# summarize vocabulary
vocabulary = to_vocabulary(descriptions)
print('Vocabulary Size: %d' % len(vocabulary))
# save to file
save_descriptions(descriptions, 'descriptions.txt')

Loaded: 8092 
Vocabulary Size: 8763


In [30]:
train_img= "D:\\Syllabus\\Syllabus_7_th_sem\\ML\\Project\\Flicker8 dataset\\Flickr8k_text\\Flickr_8k.trainImages.txt"
train = load_set(train_img)
print('Train_Dataset: %d' % len(train))
# descriptions
train_descriptions = load_clean_descriptions('descriptions.txt', train)
print('Descriptions: train=%d' % len(train_descriptions))
tokenizer = create_tokenizer(train_descriptions)
train_descriptions = load_clean_descriptions('descriptions.txt', train)
print('Descriptions: train=%d' % len(train_descriptions))
vocab_size = len(tokenizer.word_index) + 1
print('Vocabulary Size: %d' % vocab_size)
# determine the maximum sequence length
max_length = max_length(train_descriptions)
print('Description Length: %d' % max_length)

Train_Dataset: 6000
Descriptions: train=6000
Descriptions: train=6000
Vocabulary Size: 7579
Description Length: 34


In [31]:
model1 = 'densenet121.h5'
model2 = 'resnet.h5'
model3 = 'vgg16.h5'
model4 = 'vgg19.h5'
model5 = 'xception.h5'
models = [model1, model2, model3,model4,model5]

feature1 = 'features_densenet121.pkl'
feature2 = 'features_resnet.pkl'
feature3 = 'features_vgg16.pkl'
feature4 = 'features_vgg19.pkl'
feature5 = 'features_xception.pkl'
features = [feature1,feature2,feature3,feature4,feature5]

In [32]:
val_img="D:\\Syllabus\\Syllabus_7_th_sem\\ML\\Project\\Flicker8 dataset\\Flickr8k_text\\Flickr_8k.devImages.txt"
test = load_set(val_img)
print('Dataset: %d' % len(test))

# descriptions
test_descriptions = load_clean_descriptions('descriptions.txt', test)
print('Descriptions: test=%d' % len(test_descriptions))


Dataset: 1000
Descriptions: test=1000


In [35]:
bleu_list = []

for model,feature in zip(models,features):
    test_features = load_photo_features(feature, test)
    print('Photos: test=%d' % len(test_features))
    model_loaded = load_model(model)
    test_features = load_photo_features(feature, test)
    bleu_list.append(evaluate_model(model_loaded, test_descriptions, test_features, tokenizer, max_length))

Photos: test=1000
Photos: test=1000
Photos: test=1000
Photos: test=1000
Photos: test=1000


ResourceExhaustedError:  OOM when allocating tensor with shape[7579,256] and type float on /job:localhost/replica:0/task:0/device:CPU:0 by allocator cpu
	 [[node model/embedding/embedding_lookup/Read/ReadVariableOp (defined at C:\ProgramData\Anaconda3\lib\site-packages\tensorflow_core\python\framework\ops.py:1751) ]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.
 [Op:__inference_distributed_function_2515252]

Function call stack:
distributed_function


In [36]:
print(bleu_list)

[[0.5207624843386433, 0.27158511833689014, 0.18515682058523408, 0.0864172430213351], [0.5206860886054129, 0.27322213542085155, 0.19053817738113785, 0.08738316731583029], [0.516220880069025, 0.25652912961899055, 0.1697913491107352, 0.0732023499501428], [0.46295989393437187, 0.2279281196067943, 0.1515955778261573, 0.06459748813595953]]


In [39]:
result=np.max(bleu_list,axis=0)

In [40]:
print('BLEU-1: %f' % result[0])
print('BLEU-2: %f' % result[1])
print('BLEU-3: %f' % result[2])
print('BLEU-4: %f' % result[3])

BLEU-1: 0.520762
BLEU-2: 0.273222
BLEU-3: 0.190538
BLEU-4: 0.087383
