In [65]:
import os
import pickle
import string
import random
import itertools
import numpy as np
from collections import Counter
from keras.utils import to_categorical
from keras.preprocessing import sequence

In [16]:
def modify_text(total_text_dict, line) :
    x = line.split('\t')[0].split('.')[0]
    id = x
    text = line.split('\t')[1].split('.')[0]
    text = text.translate(str.maketrans('', '', string.punctuation)) #removing punctuation
    text = text.translate(str.maketrans('','','\n')) #removing '\n' explicitly
    text = text.lower() #converting every word to lower alphabet
    text = text.split() #converting string to list
    text = [w for w in text if w.isalpha()] #removing numeric or alphanumeric data
    if(id in total_text_dict) :
        total_text_dict[id].append(text)
    else :
        total_text_dict.update({id : [text]})

In [17]:
def create_vocabulary(corpus) :
    counts = Counter(corpus)
    vocabulary = {}
    for w in counts :
        if(counts[w] >= 10) :
            vocabulary.update({w : counts[w]})
    keys =  list(vocabulary.keys())
    random.shuffle(keys)
    dictionary = dict()
    for key in keys:
        dictionary.update({key : vocabulary[key]})
    return dictionary

In [18]:
def add_start_end(text_dict) :
    for id in text_dict :
        for i in range(5) :
            text_dict[id][i].insert(0, 'start')
            text_dict[id][i].append('end')

In [19]:
train_dir = 'C:\\Anaconda\\envs\\deeplearning\\projects\\ImageCaptioning\\text\\Flickr_8k.trainImages.txt'
dev_dir = 'C:\\Anaconda\\envs\\deeplearning\\projects\\ImageCaptioning\\text\\Flickr_8k.devImages.txt'
text_file = 'C:\\Anaconda\\envs\\deeplearning\\projects\\ImageCaptioning\\text\\Flickr8k.token.txt'
total_text_dict = {}
train_data_name = []
text_dict = {}
uncommon_names = []

with open(train_dir) as f1, open(dev_dir) as f2 :
    lines1 = f1.readlines()
    lines2 = f2.readlines()
    for line1 in lines1 :
        line1 = line1.split('.')[0]
        train_data_name.append(line1)
    for line2 in lines2 :
        line2 = line2.split('.')[0]
        train_data_name.append(line2)
        

with open(text_file) as file :
    lines = file.readlines()
    for line in lines :
        modify_text(total_text_dict, line)

for w in total_text_dict :
    if(w not in train_data_name) :
        uncommon_names.append(w)

for w in uncommon_names :
    if(w in total_text_dict) : 
        del total_text_dict[w]
        
text_dict = total_text_dict

In [20]:
add_start_end(text_dict) #adding 'START' and 'END' to every sentence
corpus = []
captions = []
for id in text_dict :
    for i in range(5) :
        captions.append(text_dict[id][i])
        for w in text_dict[id][i] :
            corpus.append(w)

In [21]:
vocabulary = create_vocabulary(corpus)
word_index = dict((c, i) for i, c in enumerate(vocabulary)) #dictionary containing char as key, index as value
index_word = dict((i, c) for i, c in enumerate(vocabulary)) #dictionary containing index as key, char as value
num_classes = len(vocabulary)
print("Number of unique words in the vocabulary :",num_classes)

Number of unique words in the vocabulary : 1799


In [22]:
maxlen = 0
maxlen = [len(w) for w in captions if len(w) > maxlen]
maxlen = max(maxlen)
print("Maximum length of caption is : ", maxlen)

Maximum length of caption is :  37


In [23]:
with open('transfer_train_data', "rb") as fp:
    transferred_features = pickle.load(fp)

for w in uncommon_names :
    if(w in transferred_features) : 
        del transferred_features[w]

In [14]:
with open('vocabulary', 'wb') as fp :
    pickle.dump(word_index, fp)

In [209]:
def data_generator(text_dict, word_index, batch_size, transferred_features, maxlen, num_classes) :
    image = []
    next_word = []
    sentence = []
    num_images = 0
    while True : 
        for img, id in zip(transferred_features, text_dict) :
                description = text_dict[id]
                image.append(transferred_features[img])
                print(np.shape(image))
                for k in range(5) :
                    temp = [word_index[word] for word in description[k] if word in word_index]
                    for i in range(1, len(temp)) :
                        input_temp, output_temp = temp[:i], temp[i]
                        input_temp = sequence.pad_sequences([input_temp], maxlen = maxlen)[0]
                        word_temp = to_categorical([output_temp], num_classes = num_classes)[0]
                        image.append(input_temp)
                        next_word.append(word_temp)
                        sentence.append(input_temp)
                num_images += 1
                if(num_images == batch_size):
                    yield ([np.array(image), np.array(sentence)], np.array(next_word))
                    image, sentence, next_word = [], [], []
                    num_images = 0 

In [210]:
generator =  data_generator(text_dict, word_index, 1, transferred_features, maxlen, num_classes)