In [1]:
import pickle 
import numpy as np
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.models import load_model
from nltk.translate.bleu_score import corpus_bleu

Using TensorFlow backend.


In [2]:
def load_doc(file_name):
    with open(file_name,'r') as f:
        text = f.read()
    return text


In [4]:
temp = load_doc('data_use/Flickr8k_text/Flickr8k.token_small.txt')
print(temp)
del temp

1000268201_693b08cb0e.jpg#1	A girl going into a wooden building .
1000268201_693b08cb0e.jpg#2	A little girl climbing into a wooden playhouse .
1000268201_693b08cb0e.jpg#3	A little girl climbing the stairs to her playhouse .
1000268201_693b08cb0e.jpg#4	A little girl in a pink dress going into a wooden cabin .
1001773457_577c3a7d70.jpg#4	Two dogs on pavement moving toward each other .
1002674143_1b742ab4b8.jpg#0	A little girl covered in paint sits in front of a painted rainbow with her hands in a bowl .


## function to load all the image identifiers
### we return a set ==>> to ensure there is no duplicate

In [3]:
def load_set_of_images(file_name):
    doc = load_doc(file_name)
    images_list = list()
    for line in doc.split('\n'):
        if len(line) < 1: #skip empty lines
            continue
        identifiers = line.split('.')[0]
        images_list.append(identifiers)
    return set(images_list)

## prepare a dictionary which contain image:caption
 ## descriptions = {'3242718240_3358f2d6e6': sentence1. sentence2. sentence3.}
 

In [4]:
def load_clean_descriptions(file_name, images_list):
    doc = load_doc(file_name)
    descriptions = dict()
    for line in doc.split('\n'):
        tokens = line.split()
        image_id, image_description = tokens[0],tokens[1:]
        for image_id in images_list:
            if image_id not in descriptions:
                descriptions[image_id] = list()
            # break sentences by the dot and add ssss and eeee and join them
            desc = 'ssss ' + ' '.join(image_description) + ' eeee'
            descriptions[image_id].append(desc)
    return descriptions

## Load image features in dict

In [5]:
def load_photo_features(file_name,dataset):
    all_features = pickle.load(open(file_name,'rb'))
    features_dict = dict()
    for image_id in dataset:
        features_dict[image_id] = all_features[image_id]
    return features_dict

In [6]:
def collect_all_sentences(descriptions):
    all_descriptions = list()
    for key in descriptions.keys():
        for desc in descriptions[key]:
            all_descriptions.append(desc)
    return all_descriptions

In [7]:
def create_tokenizer(descriptions):
    all_sentences = collect_all_sentences(descriptions)
    tokenizer = Tokenizer()
    tokenizer.fit_on_texts(all_sentences)
    return tokenizer

In [8]:
def max_length(descriptions):
    all_lines = collect_all_sentences(descriptions)
    return np.max(len(line.split()) for line in all_lines)

## tokenizer.word_index

### return a uniquely assigned integer to words

In [9]:
def find_word_for_integer(integer,tokenizer):
    for word,word_integer in tokenizer.word_index.items():
        if index == integer:
            return word
    return None

# Didn't like this method===>> FIND BETTER

In [10]:
# create sequences of images, input sequences and output words for an image
def create_sequences(tokenizer, max_length, desc_list, photo):
	X1, X2, y = list(), list(), list()
	# walk through each description for the image
	for desc in desc_list:
		# encode the sequence
		seq = tokenizer.texts_to_sequences([desc])[0]
		# split one sequence into multiple X,y pairs
		for i in range(1, len(seq)):
			# split into input and output pair
			in_seq, out_seq = seq[:i], seq[i]
			# pad input sequence
			in_seq = pad_sequences([in_seq], maxlen=max_length)[0]
			# encode output sequence
			out_seq = to_categorical([out_seq], num_classes=vocab_size)[0]
			# store
			X1.append(photo)
			X2.append(in_seq)
			y.append(out_seq)
	return array(X1), array(X2), array(y)

# BETTER APPROACH ==> TimeDistributed layer

In [None]:
# model = Sequential()
# model.add(Embedding(vocabulary, hidden_size, input_length=num_steps))
# model.add(LSTM(hidden_size, return_sequences=True))
# model.add(LSTM(hidden_size, return_sequences=True))
# if use_dropout:
#     model.add(Dropout(0.5))
# model.add(TimeDistributed(Dense(vocabulary)))
# model.add(Activation('softmax'))

In [12]:
# define the captioning model
def define_model(vocab_size, max_length):
    # feature extractor model
    inputs1 = Input(shape=(1000,))
#     fe1 = Dropout(0.5)(inputs1)
    fe2 = Dense(64, activation='relu')(fe1) #256
    # sequence model
    inputs2 = Input(shape=(max_length,))
    se1 = Embedding(vocab_size, 64, mask_zero=True)(inputs2)#256
#     se2 = Dropout(0.5)(se1)
    se3 = LSTM(64)(se2)#256
    # decoder model
    decoder1 = add([fe2, se3])
    decoder2 = Dense(64, activation='relu')(decoder1)#256
    outputs = Dense(vocab_size, activation='softmax')(decoder2)
    # tie it together [image, seq] [word]
    model = Model(inputs=[inputs1, inputs2], outputs=outputs)
    # compile model
    model.compile(loss='categorical_crossentropy', optimizer='adam')
    # summarize model
    model.summary()
#     plot_model(model, to_file='model.png', show_shapes=True)
    return model

In [11]:
def data_generator(photos,descriptions,tokenizer,max_length):
    while 1:
        for key,desc_list in descriptions.items():
            photo = photos[key]
            in_img, in_seq, out_seq = create_sequences(tokenizer, max_length, desc_list, photo)
            yield [[in_img,in_seq], out_seq]
            

In [13]:
import sys

# These are the usual ipython objects, including this one you are creating
ipython_vars = ['In', 'Out', 'exit', 'quit', 'get_ipython', 'ipython_vars']

# Get a sorted list of the objects and their sizes
sorted([(x, sys.getsizeof(globals().get(x))) for x in dir() if not x.startswith('_') and x not in sys.modules and x not in ipython_vars], key=lambda x: x[1], reverse=True)

[('Tokenizer', 1056),
 ('collect_all_sentences', 136),
 ('corpus_bleu', 136),
 ('create_sequences', 136),
 ('create_tokenizer', 136),
 ('data_generator', 136),
 ('define_model', 136),
 ('find_word_for_integer', 136),
 ('load_clean_descriptions', 136),
 ('load_doc', 136),
 ('load_model', 136),
 ('load_photo_features', 136),
 ('load_set_of_images', 136),
 ('max_length', 136),
 ('pad_sequences', 136),
 ('np', 80)]

In [14]:
# load training dataset (6K)
filename = 'data_use/Flickr8k_text/Flickr_8k.trainImages.txt'
train = load_doc(filename)
print('Dataset: %d' % len(train))


Dataset: 154678


In [15]:
train = train[:10000]
len(train)

10000

In [19]:
import os
os.listdir()

['y.npy',
 'model_preparation.ipynb',
 '21_Machine_Translation.ipynb',
 'new_small_descriptions.pkl',
 '.ipynb_checkpoints',
 'X1.npy',
 'resnet50_complete.h5',
 'descriptions.txt',
 'attention_idea.ipynb',
 'prepare_image_feature.ipynb',
 'prepare_text_data.ipynb',
 'prepare_dataset.py',
 'prepare_dataset.py~',
 'resnet50_weights.h5',
 '22_Image_Captioning.ipynb',
 'final_model_progressive_loading.ipynb',
 'loading_testing.ipynb',
 'data_use',
 'Y.npy',
 'progressive_loading_captioning.ipynb',
 'X2.npy',
 'features.pkl',
 'data',
 'final_image_captioning_working.ipynb',
 'resnet50_testing_layers.ipynb',
 'image_captioning.ipynb']

In [21]:
file_name = 'new_small_descriptions.pkl'
desc = pickle.load(open(file_name,'rb'))

In [22]:
desc

{'3242718240_3358f2d6e6': 'the six children are laughing',
 '1355450069_c0675b0706': 'there is rock climber holding on and climbing to the top of very large rock',
 '724702877_f2a938766b': 'small dog catches tennis ball in its mouth indoors',
 '3067500667_0fce8f28d4': 'player hits the ball at cricket game',
 '1452361926_6d8c535e32': 'the child sits in toy car and drinks from sippy cup',
 '1525153022_06c48dbe52': 'the dog runs through the water',
 '282960970_574aa1ba49': 'this person is sitting in saddle',
 '119534510_d52b3781a3': 'person sitting on concrete landing next to body of water',
 '241347460_81d5d62bf6': 'the football player in red kicks off the ball',
 '3540416981_4e74f08cbb': 'the white dog is running in the green grass',
 '2333816000_7105d0ffac': 'the man climbing the rock is wearing blue helmet',
 '2533642917_a5eace85e6': 'little boy kneedeep in water',
 '521658170_a837af87e9': 'three dogs trying to get stick from their owner',
 '3537201804_ce07aff237': 'two kids sit in th

In [35]:
desc_list = []
for key, value in desc.items():
    print(key,value)
#     temp = key,' '.join(value)
#     temp = key,value
    desc_list.append(key + ' ' + value)

3242718240_3358f2d6e6 the six children are laughing
1355450069_c0675b0706 there is rock climber holding on and climbing to the top of very large rock
724702877_f2a938766b small dog catches tennis ball in its mouth indoors
3067500667_0fce8f28d4 player hits the ball at cricket game
1452361926_6d8c535e32 the child sits in toy car and drinks from sippy cup
1525153022_06c48dbe52 the dog runs through the water
282960970_574aa1ba49 this person is sitting in saddle
119534510_d52b3781a3 person sitting on concrete landing next to body of water
241347460_81d5d62bf6 the football player in red kicks off the ball
3540416981_4e74f08cbb the white dog is running in the green grass
2333816000_7105d0ffac the man climbing the rock is wearing blue helmet
2533642917_a5eace85e6 little boy kneedeep in water
521658170_a837af87e9 three dogs trying to get stick from their owner
3537201804_ce07aff237 two kids sit in the middle of playground toy that spins around them
2170187328_65c2f11891 there is kid jumping off

In [36]:
desc_list

['3242718240_3358f2d6e6 the six children are laughing',
 '1355450069_c0675b0706 there is rock climber holding on and climbing to the top of very large rock',
 '724702877_f2a938766b small dog catches tennis ball in its mouth indoors',
 '3067500667_0fce8f28d4 player hits the ball at cricket game',
 '1452361926_6d8c535e32 the child sits in toy car and drinks from sippy cup',
 '1525153022_06c48dbe52 the dog runs through the water',
 '282960970_574aa1ba49 this person is sitting in saddle',
 '119534510_d52b3781a3 person sitting on concrete landing next to body of water',
 '241347460_81d5d62bf6 the football player in red kicks off the ball',
 '3540416981_4e74f08cbb the white dog is running in the green grass',
 '2333816000_7105d0ffac the man climbing the rock is wearing blue helmet',
 '2533642917_a5eace85e6 little boy kneedeep in water',
 '521658170_a837af87e9 three dogs trying to get stick from their owner',
 '3537201804_ce07aff237 two kids sit in the middle of playground toy that spins arou

In [14]:
# descriptions
# file_name = 'descriptions.txt'
images_list = train
doc = load_doc(file_name)


In [15]:
temp = doc[:100000-17]

In [30]:
del doc

In [33]:
# temp
'''
2495394666_2ef6c37519 dog swimming in body of water\n2495394666_2ef6c37519 the brown dog is swimming in the water\n
2561849813_ff9caa52ac baby sits on an empty beach under big blue sky\n2561849813_ff9caa52ac toddler sits on sandy beach\n
2561849813_ff9caa52ac baby sitting at beach\n2561849813_ff9caa52ac the baby sits on the sand in the front of the scene 
while the ocean is in the distance\n2561849813_ff9caa52ac the baby sits on the sandy beach\n1784309115_0ad6791146 girl 
in the bathing suit talking to guy\n1784309115_0ad6791146 woman in bikini is talking to regularly dressed man on bridge\n
'''

In [16]:
doc = temp
del temp

In [17]:
doc

'3242718240_3358f2d6e6 group of children dressed in costume pose for picture\n3242718240_3358f2d6e6 group of children stand against building and smile\n3242718240_3358f2d6e6 group of six children stands smiling and laughing in front of wall\n3242718240_3358f2d6e6 group of young children standing in row leaning against wall\n3242718240_3358f2d6e6 the six children are laughing\n1355450069_c0675b0706 man is climbing up wall\n1355450069_c0675b0706 man wearing no shirt climbs the side of large rock\n1355450069_c0675b0706 shirtless man climbing up the rock wall with rope safety lines\n1355450069_c0675b0706 man without shirt is climbing rock\n1355450069_c0675b0706 there is rock climber holding on and climbing to the top of very large rock\n724702877_f2a938766b dog has jumped in the air to catch tennis ball\n724702877_f2a938766b small brown dog jumping up to catch tennis ball\n724702877_f2a938766b small brown dog jumps and catches tennis ball\n724702877_f2a938766b small dog jumping with ball i

In [23]:
descriptions = dict()
count = 0
for line in doc.split('\n'):
#     if count == 20:
#         break
#     print(line)
    tokens = line.split()
#     print(tokens)
    
    image_id, image_description = tokens[0],tokens[1:]
#     print(image_id)
#     print(image_description)
    count += 1
    descriptions[image_id] = image_description
#     for image_id in images_list:
#         if image_id not in descriptions:
#             descriptions[image_id] = list()
#         # break sentences by the dot and add ssss and eeee and join them
#         desc = 'ssss ' + ' '.join(image_description) + ' eeee'
#         descriptions[image_id].append(desc)

IndexError: list index out of range

In [26]:
collect_keys = []
for key in descriptions.keys():
    collect_keys.append(key)

In [29]:
len(collect_keys),count

(279, 1393)

In [None]:
>>> keylist = data[0].keys()
>>> mydata = dict((k,', '.join(set(map(lambda d: d[k], data)))) for k in keylist)

In [50]:
key_list = descriptions.keys()
dict((key,', '.join((map(lambda d: d['key'], descriptions)))) for key in key_list)

TypeError: string indices must be integers

In [58]:
count = 0
new_description = dict()
for key,value in descriptions.items():
    print(key,' '.join(value))
    new_description[key] = ' '.join(value)
    count += 1

3242718240_3358f2d6e6 the six children are laughing
1355450069_c0675b0706 there is rock climber holding on and climbing to the top of very large rock
724702877_f2a938766b small dog catches tennis ball in its mouth indoors
3067500667_0fce8f28d4 player hits the ball at cricket game
1452361926_6d8c535e32 the child sits in toy car and drinks from sippy cup
1525153022_06c48dbe52 the dog runs through the water
282960970_574aa1ba49 this person is sitting in saddle
119534510_d52b3781a3 person sitting on concrete landing next to body of water
241347460_81d5d62bf6 the football player in red kicks off the ball
3540416981_4e74f08cbb the white dog is running in the green grass
2333816000_7105d0ffac the man climbing the rock is wearing blue helmet
2533642917_a5eace85e6 little boy kneedeep in water
521658170_a837af87e9 three dogs trying to get stick from their owner
3537201804_ce07aff237 two kids sit in the middle of playground toy that spins around them
2170187328_65c2f11891 there is kid jumping off

In [70]:
new_description['3242718240_3358f2d6e6']

'the six children are laughing'

In [None]:
pickle.dump(new_description,open('new_small_descriptions.pkl'))

In [59]:
count

279

In [69]:
np.unique(np.asarray(list(new_description.keys()))).shape

(279,)

In [39]:
# key_list
'''ict_keys(['3242718240_3358f2d6e6', '1355450069_c0675b0706', '724702877_f2a938766b', '3067500667_0fce8f28d4', '1452361926_6d8c535e32', '1525153022_06c48dbe52', '282960970_574aa1ba49', '119534510_d52b3781a3', '241347460_81d5d62bf6', '3540416981_4e74f08cbb', '2333816000_7105d0ffac', '2533642917_a5eace85e6', '521658170_a837af87e9', '3537201804_ce07aff237', '2170187328_65c2f11891', '2861100960_457ceda7fa', '3498240367_cbd8c6efbf', '1288909046_d2b2b62607', '3275704430_a75828048f', '3315353266_70f0bbb1c3', '418667611_b9995000f4', '241347204_007d83e252', '3204686006_88f04547b9', '3016178284_ec50a09e8c', '3364796213_b8948913b5', '857914283_270d7d1c87', '3614542901_29877fc342', '2866696346_4dcccbd3a5', '2098646162_e3b3bbf14c', '3256603992_67312b5a36', '109738763_90541ef30d', '3546474710_903c3c9fd3', '544257613_d9a1fea3f7', '2149982207_5345633bbf', '3321063116_4e5deeac83', '2533414541_362bf043bb', '3356938707_d95ba97430', '3215117062_6e07a86352', '3271385712_ffd34f2de5', '186890605_ddff5b694e', '2268729848_d418451226', '2053777548_108e54c826', '3461049169_e068ae4f25', '2918880895_e61f74f2f0','''

In [35]:
' '.join(descriptions['1355450069_c0675b0706'])

'there is rock climber holding on and climbing to the top of very large rock'

In [36]:
new_desc = dict()
for key in collect_keys:
    add
    if key not in new_desc.keys():
        new_desc[key] = list()
    else:
        new_desc[key].append(add_desc)

{'3242718240_3358f2d6e6': ['the', 'six', 'children', 'are', 'laughing'],
 '1355450069_c0675b0706': ['there',
  'is',
  'rock',
  'climber',
  'holding',
  'on',
  'and',
  'climbing',
  'to',
  'the',
  'top',
  'of',
  'very',
  'large',
  'rock'],
 '724702877_f2a938766b': ['small',
  'dog',
  'catches',
  'tennis',
  'ball',
  'in',
  'its',
  'mouth',
  'indoors'],
 '3067500667_0fce8f28d4': ['player',
  'hits',
  'the',
  'ball',
  'at',
  'cricket',
  'game'],
 '1452361926_6d8c535e32': ['the',
  'child',
  'sits',
  'in',
  'toy',
  'car',
  'and',
  'drinks',
  'from',
  'sippy',
  'cup'],
 '1525153022_06c48dbe52': ['the', 'dog', 'runs', 'through', 'the', 'water'],
 '282960970_574aa1ba49': ['this', 'person', 'is', 'sitting', 'in', 'saddle'],
 '119534510_d52b3781a3': ['person',
  'sitting',
  'on',
  'concrete',
  'landing',
  'next',
  'to',
  'body',
  'of',
  'water'],
 '241347460_81d5d62bf6': ['the',
  'football',
  'player',
  'in',
  'red',
  'kicks',
  'off',
  'the',
  'bal

In [41]:
data = dict()
data = {key1:'a', key2:'b', key1:'c'}

NameError: name 'key1' is not defined

In [50]:
del image_identifiers

In [51]:
# descriptions = dict()
image_identifiers = []
for line in doc.split('\n'):
    tokens = line.split()
    image_id,_ = tokens[0], tokens[1:]
    image_identifiers.append(image_id)

In [52]:
len(image_identifiers)

40460

In [57]:
temp = image_identifiers
image_ids_list = np.unique(np.asarray(image_identifiers))

In [63]:
del temp,image_identifiers
image_identifiers = image_ids_list
del image_ids_list

In [64]:
image_identifiers.shape

(8092,)

In [69]:
doc = doc[:1000000-17]
doc

'3242718240_3358f2d6e6 group of children dressed in costume pose for picture\n3242718240_3358f2d6e6 group of children stand against building and smile\n3242718240_3358f2d6e6 group of six children stands smiling and laughing in front of wall\n3242718240_3358f2d6e6 group of young children standing in row leaning against wall\n3242718240_3358f2d6e6 the six children are laughing\n1355450069_c0675b0706 man is climbing up wall\n1355450069_c0675b0706 man wearing no shirt climbs the side of large rock\n1355450069_c0675b0706 shirtless man climbing up the rock wall with rope safety lines\n1355450069_c0675b0706 man without shirt is climbing rock\n1355450069_c0675b0706 there is rock climber holding on and climbing to the top of very large rock\n724702877_f2a938766b dog has jumped in the air to catch tennis ball\n724702877_f2a938766b small brown dog jumping up to catch tennis ball\n724702877_f2a938766b small brown dog jumps and catches tennis ball\n724702877_f2a938766b small dog jumping with ball i

In [None]:
descriptions = dict()
for line in doc.split('\n'):
#     print(line)
    tokens = line.split()
#     print(tokens)
#     break
    image_id, image_description = tokens[0],tokens[1:]
    for image_id in image_identifiers:
        if image_id not in descriptions:
            descriptions[image_id] = list()
        # break sentences by the dot and add ssss and eeee and join them
        desc = 'ssss ' + ' '.join(image_description) + ' eeee'
        descriptions[image_id].append(desc)

In [43]:
descriptions = dict()
for line in doc.split('\n'):
#     print(line)
    tokens = line.split()
#     print(tokens)
#     break
    image_id, image_description = tokens[0],tokens[1:]
    for image_id in images_list:
        if image_id not in descriptions:
            descriptions[image_id] = list()
        # break sentences by the dot and add ssss and eeee and join them
        desc = 'ssss ' + ' '.join(image_description) + ' eeee'
        descriptions[image_id].append(desc)

IndexError: list index out of range

In [45]:
del descriptions

In [39]:
len(descriptions['2']), descriptions['2']

(1048929,
 ['ssss group of children dressed in costume pose for picture eeee',
  'ssss group of children dressed in costume pose for picture eeee',
  'ssss group of children dressed in costume pose for picture eeee',
  'ssss group of children dressed in costume pose for picture eeee',
  'ssss group of children dressed in costume pose for picture eeee',
  'ssss group of children dressed in costume pose for picture eeee',
  'ssss group of children dressed in costume pose for picture eeee',
  'ssss group of children dressed in costume pose for picture eeee',
  'ssss group of children dressed in costume pose for picture eeee',
  'ssss group of children dressed in costume pose for picture eeee',
  'ssss group of children dressed in costume pose for picture eeee',
  'ssss group of children dressed in costume pose for picture eeee',
  'ssss group of children dressed in costume pose for picture eeee',
  'ssss group of children dressed in costume pose for picture eeee',
  'ssss group of childre

In [None]:
# descriptions
train_descriptions = load_clean_descriptions('descriptions.txt', train)
print('Descriptions: train=%d' % len(train_descriptions))


In [None]:
# photo features
train_features = load_photo_features('features.pkl', train)
print('Photos: train=%d' % len(train_features))
# prepare tokenizer
tokenizer = create_tokenizer(train_descriptions)
vocab_size = len(tokenizer.word_index) + 1
print('Vocabulary Size: %d' % vocab_size)
# determine the maximum sequence length
max_length = max_length(train_descriptions)
print('Description Length: %d' % max_length)
 
# define the model
model = define_model(vocab_size, max_length)
# train the model, run epochs manually and save after each epoch
epochs = 20
steps = len(train_descriptions)
for i in range(epochs):
	# create the data generator
	generator = data_generator(train_descriptions, train_features, tokenizer, max_length)
	# fit for one epoch
	model.fit_generator(generator, epochs=1, steps_per_epoch=steps, verbose=1)
	# save model
	model.save('model_' + str(i) + '.h5')
