In [20]:
from keras.models import Sequential
from keras.layers.core import TimeDistributedDense, Activation
from keras.layers.recurrent import LSTM
from keras.optimizers import RMSprop
import numpy as np
import random


In [168]:
captions = ["a little baby plays croquet".split()]
print captions[0]
wordList = []
for word in captions[0]:
  wordList.append(word)
#list(set()) is slow for large datasets, use bloom filters for those
wordList = list(set(wordList))
#insert system start token into dictionary
wordList.insert(0, '#START#')
wordList.append("#END#")

#convert words into one-hot tokens
wordtoix = {}
ixtoword = {}
for i, word in enumerate(wordList):
  wordtoix[word] = i
  ixtoword[i] = word

['a', 'little', 'baby', 'plays', 'croquet']


In [179]:
print 'Building Training Data'
#array has shape of num_seq X num_time_steps X dimensionality of features
ins = np.zeros( (1, len(captions[0])+1, len(wordList)) )
gts = np.zeros_like(ins)

print len(captions[0])+1
#encode the poem into the training sequences
#ins begins with start
ins[0, 0, wordtoix['#START#']] = 1
print 0
for t, word in enumerate(captions[0]):
  #the ground truth at time t is the next word
  gts[0, t, wordtoix[word]] = 1
  #the input at time t+1 is the previous ground truth
  ins[0, t+1, wordtoix[word]] = 1
  print t+1
#ground truth ends with the end token (or reuse start token)
print t+1
gts[:,t+1,wordtoix['#END#']] = 1

Building Training Data
6
0
1
2
3
4
5
5


In [180]:
INP_DIM = len(wordList)
print("Building Model")
model = Sequential()
model.add(TimeDistributedDense(input_dim = INP_DIM, output_dim=64))
model.add(LSTM(input_dim=64, output_dim=64, forget_bias_init='one',return_sequences=True))
model.add(TimeDistributedDense(input_dim = INP_DIM, output_dim=INP_DIM))
model.add(Activation('softmax'))

Building Model


In [181]:
rms = RMSprop(lr=0.03)
model.compile(loss='categorical_crossentropy', optimizer=rms)


In [182]:
model.fit(ins, gts, batch_size=1, nb_epoch=150, verbose=1)

Epoch 1/150
Epoch 2/150
Epoch 3/150
Epoch 4/150
Epoch 5/150
Epoch 6/150
Epoch 7/150
Epoch 8/150
Epoch 9/150
Epoch 10/150
Epoch 11/150
Epoch 12/150
Epoch 13/150
Epoch 14/150
Epoch 15/150
Epoch 16/150
Epoch 17/150
Epoch 18/150
Epoch 19/150
Epoch 20/150
Epoch 21/150
Epoch 22/150
Epoch 23/150
Epoch 24/150
Epoch 25/150
Epoch 26/150
Epoch 27/150
Epoch 28/150
Epoch 29/150
Epoch 30/150
Epoch 31/150
Epoch 32/150
Epoch 33/150
Epoch 34/150
Epoch 35/150
Epoch 36/150
Epoch 37/150
Epoch 38/150
Epoch 39/150
Epoch 40/150
Epoch 41/150
Epoch 42/150
Epoch 43/150
Epoch 44/150
Epoch 45/150
Epoch 46/150
Epoch 47/150
Epoch 48/150
Epoch 49/150
Epoch 50/150
Epoch 51/150
Epoch 52/150
Epoch 53/150
Epoch 54/150
Epoch 55/150
Epoch 56/150
Epoch 57/150
Epoch 58/150
Epoch 59/150
Epoch 60/150
Epoch 61/150
Epoch 62/150
Epoch 63/150
Epoch 64/150
Epoch 65/150
Epoch 66/150
Epoch 67/150
Epoch 68/150
Epoch 69/150
Epoch 70/150
Epoch 71/150
Epoch 72/150
Epoch 73/150
Epoch 74/150
Epoch 75/150
Epoch 76/150
Epoch 77/150
Epoch 78

<keras.callbacks.History at 0x252ec3a50>

In [183]:
print("INPUT")
print ins[0][1].shape

for x in xrange(ins[0].shape[1]-1):
    #print ins[0, x]
    word = ixtoword[np.argmax(ins[0,x])]
    print word


INPUT
(7,)
#START#
a
little
baby
plays
croquet


In [185]:
ins = np.zeros( (1, len(captions[0])+1, len(wordList)) )
ins[:, 0, wordtoix['#START#']] = 1
for i in xrange((ins[0].shape[1])-1):
    predictions = model.predict(ins, batch_size=1, verbose=1)[0]
    index = np.argmax(predictions[i])
    print(ixtoword[index])
    

a
little
baby
plays
croquet
#END#


In [41]:
import pickle

dataset = pickle.load(open('flickr8k_Entire_dataset_with_cnn_features.pkl'))


{u'sentids': [0, 1, 2, 3, 4], 'cnn features': array([  8.41589998e-09,   3.18888098e-08,   5.94773581e-08,
         1.34680349e-07,   5.59009152e-07,   5.02435995e-08,
         2.87913409e-08,   3.16422643e-07,   4.71011759e-07,
         1.63704699e-05,   5.04545845e-08,   3.01402210e-08,
         1.73545772e-07,   2.87365314e-08,   3.70973687e-08,
         3.02698652e-07,   3.40937760e-07,   7.39699644e-08,
         8.50642834e-07,   1.02383908e-07,   8.51808082e-08,
         4.70065800e-07,   3.04309833e-06,   1.10937754e-06,
         1.28562112e-06,   6.56475976e-08,   3.00554461e-07,
         5.61098602e-07,   2.35904185e-08,   1.75602523e-07,
         9.52802814e-09,   3.71192392e-08,   8.47269490e-08,
         1.25647404e-07,   1.49475881e-07,   5.19190202e-09,
         4.43526780e-08,   6.13872331e-09,   3.86867532e-06,
         5.82997984e-07,   2.10374651e-07,   3.08866106e-07,
         3.79565961e-07,   2.09374690e-07,   4.47989748e-07,
         2.64900677e-07,   1.89438595e-

In [291]:
from collections import Counter
def word_processing(dataset):
    allwords = Counter()
    for item in dataset:
        for sentence in [item['sentences'][0]]:
            allwords.update(sentence['tokens'])
            
    vocab = [k for k, v in allwords.items()]
    vocab.insert(0, '#START#')
    vocab.append('#END#')
    vocab.append('#NULL#')
#     vocab.append('#UNK#')

    word_to_index = {w: i for i, w in enumerate(vocab)}
    index_to_word = {i: w for i, w in enumerate(vocab)}
    return vocab, word_to_index, index_to_word


N_Samples = 100
data_set = [dataset[1]]#dataset[:N_Samples]
data_set = dataset[:N_Samples]
MAX_SENTENCE_LENGTH = 35
print(MAX_SENTENCE_LENGTH)
vocab, word_to_index, index_to_word = word_processing(data_set)
#print len(vocab)
ins = np.zeros( (N_Samples, MAX_SENTENCE_LENGTH-1, len(vocab)) )
gts = np.zeros_like(ins)
for ind,dataset_val in enumerate(data_set):
    print((dataset_val['sentences'][0]['tokens']))
    assert len(dataset_val['sentences'][0]['tokens']) <= MAX_SENTENCE_LENGTH - 2
    ins[ind, 0, wordtoix['#START#']] = 1
    #print 0
    for t, word in enumerate(dataset_val['sentences'][0]['tokens']):
      #print(t, word_to_index[word])
      #the ground truth at time t is the next word
      gts[ind, t, word_to_index[word]] = 1
      #the input at time t+1 is the previous ground truth
      ins[ind, t+1, word_to_index[word]] = 1
      #print t+1
    #ground truth ends with the end token (or reuse start token)
    #print t+1, len(gts[ind])
    #print gts[ind,-1,wordtoix['#END#']]
    gts[ind,t+1,wordtoix['#END#']] = 1
    #print gts[ind,-1,wordtoix['#END#']]
    #print t+2
    flag = True
    #print len(gts[ind]),t+1
    # JUST HAVE NULL CHARACTERS AFTER END
    
    for time_step in xrange(t+2, MAX_SENTENCE_LENGTH-1):
        #print "Came in "
        word = "#NULL#"
        if(flag):
            word = "#END#"
            flag= False
        ins[ind, time_step, word_to_index[word]] = 1
        gts[ind, time_step, word_to_index["#NULL#"]] = 1
        #print time_step

35
[u'a', u'black', u'dog', u'is', u'running', u'after', u'a', u'white', u'dog', u'in', u'the', u'snow']
[u'a', u'little', u'baby', u'plays', u'croquet']
[u'a', u'brown', u'dog', u'in', u'the', u'snow', u'has', u'something', u'hot', u'pink', u'in', u'its', u'mouth']
[u'a', u'brown', u'dog', u'is', u'running', u'along', u'a', u'beach']
[u'a', u'black', u'and', u'white', u'dog', u'with', u'a', u'red', u'frisbee', u'standing', u'on', u'a', u'sandy', u'beach']
[u'a', u'cyclist', u'wearing', u'a', u'red', u'helmet', u'is', u'riding', u'on', u'the', u'pavement']
[u'a', u'man', u'dressed', u'in', u'a', u'purple', u'shirt', u'and', u'red', u'bandanna', u'smiles', u'at', u'the', u'people', u'watching', u'him']
[u'a', u'boy', u'wearing', u'a', u'red', u't', u'shirt', u'is', u'running', u'through', u'woodland']
[u'a', u'girl', u'in', u'a', u'white', u'dress']
[u'a', u'skier', u'in', u'a', u'yellow', u'jacket', u'is', u'airborne', u'above', u'the', u'mountains']
[u'a', u'photographer', u'looks', u

In [292]:
print("INPUT")
for ind, val in enumerate(ins):
    s = []
    for x in range(MAX_SENTENCE_LENGTH-1):
        #print ins[ind,x]
        #print np.argmax(ins[ind,x])
        word = index_to_word[np.argmax(ins[ind,x])]
        #if(word!="#NULL#"):
        s.append(word)
    print(" ".join(s))

INPUT
#START# a black dog is running after a white dog in the snow #END# #NULL# #NULL# #NULL# #NULL# #NULL# #NULL# #NULL# #NULL# #NULL# #NULL# #NULL# #NULL# #NULL# #NULL# #NULL# #NULL# #NULL# #NULL# #NULL# #NULL#
#START# a little baby plays croquet #END# #NULL# #NULL# #NULL# #NULL# #NULL# #NULL# #NULL# #NULL# #NULL# #NULL# #NULL# #NULL# #NULL# #NULL# #NULL# #NULL# #NULL# #NULL# #NULL# #NULL# #NULL# #NULL# #NULL# #NULL# #NULL# #NULL# #NULL#
#START# a brown dog in the snow has something hot pink in its mouth #END# #NULL# #NULL# #NULL# #NULL# #NULL# #NULL# #NULL# #NULL# #NULL# #NULL# #NULL# #NULL# #NULL# #NULL# #NULL# #NULL# #NULL# #NULL# #NULL#
#START# a brown dog is running along a beach #END# #NULL# #NULL# #NULL# #NULL# #NULL# #NULL# #NULL# #NULL# #NULL# #NULL# #NULL# #NULL# #NULL# #NULL# #NULL# #NULL# #NULL# #NULL# #NULL# #NULL# #NULL# #NULL# #NULL# #NULL#
#START# a black and white dog with a red frisbee standing on a sandy beach #END# #NULL# #NULL# #NULL# #NULL# #NULL# #NULL# #NULL# 

In [309]:
INP_DIM = len(vocab)
print("Building Model")
model = Sequential()
model.add(TimeDistributedDense(input_dim = INP_DIM, output_dim=64))
model.add(LSTM(input_dim=64, output_dim=64, forget_bias_init='one',return_sequences=True))
model.add(TimeDistributedDense(input_dim = INP_DIM, output_dim=INP_DIM))
model.add(Activation('softmax'))
rms = RMSprop(lr=0.03)
model.compile(loss='categorical_crossentropy', optimizer=rms)
model.fit(ins, gts, batch_size=1, nb_epoch=500, verbose=0)

Building Model


KeyboardInterrupt: 

In [None]:
testins = np.zeros( (1, MAX_SENTENCE_LENGTH-1, len(vocab)) )
testins[:, 0, wordtoix['#START#']] = 1
# print (ins[0].shape[0])-1
sent = []
for i in xrange((testins[0].shape[0])):
#     print i
    
    predictions = model.predict(testins, batch_size=1, verbose=0)[0]
    index = np.argmax(predictions[i])
    sent.append(index_to_word[index])
#     print(index_to_word[index])
    if(index_to_word[index] == "#END#"):
        break;
print " ".join(sent)
        
    

In [296]:
print vocab

['#START#', u'motocross', u'pointing', u'splashing', u'yellow', u'four', u'protest', u'skiing', u'go', u'chair', u'children', u'to', u'bike', u'under', u'fences', u'hats', u'brown', u'woman', u'sitting', u'very', u'baton', u'animals', u'bubbles', u'fireplace', u'school', u'large', u'race', u'guy', u'woven', u'round', u'chew', u'paperback', u'bicycle', u'fence', u'dooorway', u'boots', u'tires', u'street', u'drinks', u'blue', u'plays', u'stands', u'stool', u'liberty', u'uniform', u'waiting', u'above', u'racing', u'body', u'men', u'water', u'busy', u'baseball', u'path', u'along', u'cyclist', u'boy', u'colored', u'smoke', u'climber', u'family', u'buses', u'straw', u'cheerleader', u'standing', u'from', u'takes', u'sweater', u'two', u'next', u'few', u'camera', u'calm', u'6', u'wades', u'trail', u'carrying', u'stick', u'baby', u'hold', u'women', u'word', u'room', u'car', u'ride', u'nine', u'shelton', u'high', u'something', u'tan', u'dress', u'pink', u'sit', u'skirt', u'holds', u'hot', u'beach