In [1]:
from os import listdir
from numpy import array
from numpy import argmax
from pandas import DataFrame
from nltk.translate.bleu_score import corpus_bleu
from pickle import load

In [2]:
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.utils import to_categorical
from keras.preprocessing.image import load_img
from keras.preprocessing.image import img_to_array
from keras.applications.vgg16 import preprocess_input
from keras.applications.vgg16 import VGG16
from keras.utils import plot_model
from keras.models import Model
from keras.layers import Input
from keras.layers import Dense
from keras.layers import Flatten
from keras.layers import LSTM
from keras.layers import RepeatVector
from keras.layers import TimeDistributed
from keras.layers import Embedding
from keras.layers.merge import concatenate
from keras.layers.pooling import GlobalMaxPooling2D

ModuleNotFoundError: No module named 'keras'

In [1]:
#loading file in memory
def load_file(file_name):
    # open the file as read only
    file = open(file_name, 'r')
    # read all text
    text = file.read()
    # close the file
    file.close()
    return text

#loading a pre-defined image features(if required)
def load_data(filename):
    file = load_file(filename)
    dataset = list()
    for line in file.split('\n'):
        #discard small ones
        if len(line)>2:
            continue
        id = line.split('.')[0]
        dataset.append(id)
    return dataset

#remove punctuations from captions
def clean_captions(caption):
    #tabe for removing punctuation, this make all punctuations as None
    table = str('', '', string.punctuation)
    for key, caps in caption.items():
        #tokenize the data
        caps = caps.split()
        #lower case
        caps = [word.lower() for word in caps]
        #remove punctuation
        caps = [word.translate(table) for word in caps]
        #removing any stray letters as errands
        caps = [word for word in caps if len(word)>1]
        #save
        captions[key] = ' '.join(caps)
    return captions

#load photo features
def load_photo_features(filename, dataset):
    all_features = load(open(filename, 'rb'))
    #load features which are part of our dataset
    features = {k: all_features[k] for k in dataset}
    return features

#test_train split
def train_test_split(dataset):
    sort = sorted(dataset)
    return set(sort[:500]), set(sort[500:800])

#fit tokenizer
def create_tokenizer(captions):
    lines = list(captions.values())
    tokenizer = Tokenizer()
    tokenizer.fit_on_texts(lines)
    return tokenizer

#create time sequences
def create_seq(tokenizer, caps, image, max_length):
    Ximages, Xseq, y = list(), list(), list()
    vocab_size = len(tokenizer.word_index) + 1
    #encoding captions to integers
    seq_num = tokenizer.texts_to_sequences([caps])[0]
    #splitting sentences for input
    for i in range(1, len(seq_num)):
        in_seq, out_seq = seq[:i], seq[i]
        #padding to max length
        in_seq = pad_sequences([in_seq], maxlen=max_length)[0]
        #one-hot encoding
        out_seq = to_categorical([out_seq], num_classes=vocab_size)[0]
        #save
        Ximages.append(image)
        XSeq.append(in_seq)
        y.append(out_seq)
    return [Ximages, Xseq, y]

#word2vec embedding
def word2vec_embedding(tokenizer, vocab_size, max_length):
    #load
    embedding = load(open('word2vec_embedding.pkl', 'rb'))
    dim = 100
    trainable = False
    #create a weight matrix for words in training captions
    weights = zeros((vocab_size, dim))
    
    for word, i in tokenizer.word_index.items():
        if word not in embedding:
            continue
        weigths[i] = embedding[word]
    layer = Embedding(vocab_size, dimensions, weights=[weights], input_length=max_length, trainable=trainable, mask_zero=True)
    return layer
        
#defining the working model
def define_model(vocab_size, max_length):
    #feature extractor
    input1 = Input(shape(7,7,512))
    fe1 = GlobalMaxPooling2D()(inputs1)
    fe2 = Dense(128, activation='relu')(fe1)
    fe3 = RepeatVector(max_length)(fe2)
    #embedding
    input2 = Input(shape=(max_length,))
    emb2 = word2vec_embedding(tokenizer, vocab_size, max_length)(inputs2)
    emb3 = LSTM(256, return_sequences=True)(emb2)
    emb4 = TimeDistributed(Dense(128, activation='relu'))(emb3)
    #merge
    merge = concatenate([fe3, emb4])
    #decoder part
    lm2 = LSTM(256)(merge)
    lm3 = Dense(500, activation = 'relu')(lm2)
    out = Dense(vocab_size, activation = 'softmax')(lm3)
    #summary
    model = Model(inputs=[input1, input2], outputs=out)
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    print(model.summary())
    plot_model(model, show_shapes=True, to_file='plot.png')
    return model

#data generator for the model
def data_generator(descriptions, features, tokenizer, max_length, n_step):
    #loop to train
    while 1 :
        keys = list(captions.keys())
        for i in range(0, len(keys), n_step):
            Ximg, Xseq, y = list(), list(), list()
            for j in range(i, min(len(keys), i+n_step)):
                img_id = keys[j]
                #get feature of image
                image = features[image_id][0]
                #captions
                caps = captions[image_id]
                #generate in-out 
                in_img, in_seq, out_word = create_sequences(tokenizer,caps, image, max_length)
                for k in range(len(in_img)):
                    Ximages.append(in_img[k])
                    Xseq.append(in_seq[k])
                    y.append(out_word[k])
                #yeild the data to the model
                yeild [[array(Ximages), array(Xseq), array(y)]]

#map integer to word
def int_to_word(integer, tokenizer):
    for word, index in tokenizer.word_index.items():
        if index == integer :
            return word
    return None


#generate image description
def generate_caps(model, tokenizer, image, max_length):
    #first input word
    in_word = '<start>'
    #prediction
    for i in range(max_length):
        #encode input string to integer
        seq = tokenizer.texts_to_sequence([in_word])[0]
        #pad input string to match the length
        pad_seq = pad_sequences([seq], maxlen = max_length)
        #predict next word
        predict = model.predict([image, pad_seq], verbose=0)
        #get corresponding integer
        y_hat = argmax(predict)
        #map integer to word
        word = int_to_word(y_hat, tokenizer)
        #fail-safe
        if word is None:
            break
        in_word += ' ' + word
        
        if word == '<end>':
            break
    return in_word 

In [2]:
#load saved image data
filename = ' '
dataset = load_data(filename)
#test-train split
train , test = train_test_split(dataset)
#load previously saved captions
train_captions = clean_captions('captions.txt', train)
test_features= clean_captions('captions.txt', test)
#load saved image features
train_features = load_photo_features('features.pkl', train)
test_features = load_photo_features('features.pkl', test)
# prepare tokenizer
tokenizer = create_tokenizer(train_descriptions)
vocab_size = len(tokenizer.word_index) + 1
# determine the maximum sequence length
max_length = max(len(s.split()) for s in list(train_descriptions.values()))
print('Description Length: %d' % max_length)

FileNotFoundError: [Errno 2] No such file or directory: ' '

In [None]:
# define 
verbose = 2
n_epochs = 50
n_photos_per_update = 2
n_batches_per_epoch = int(len(train) / n_photos_per_update)
n_repeats = 3

#run experiment
train_results, test_results = list(), list()
for i in range(n_repeats):
    #define model
    model = define.model(vocab_size, max_length)
    #fit model
    model.fit_generator(data_generator(train_captions, train_features,max_length, n_photos_per_update), steps_per_epoch=n_batches_per_epoch, epochs=n_epochs, verbose=verbose)
    # evaluate model on training data
    train_score = evaluate_model(model, train_captions, train_features, tokenizer, max_length)
    test_score = evaluate_model(model, test_captions, test_features, tokenizer, max_length)
    # store
    train_results.append(train_score)
    test_results.append(test_score)
    print('>%d: train=%f test=%f' % ((i+1), train_score, test_score))
# save results to file
df = DataFrame()
df['train'] = train_results
df['test'] = test_results
print(df.describe())
df.to_csv(model_name+'.csv', index=False)