In [1]:
import theano
import theano.tensor as tensor
from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams
from collections import OrderedDict

In [3]:
def get_dataset(name):
    return datasets[name][0], datasets[name][1]

# make prefix-appended name
def _p(pp, name):
    return '%s_%s' % (pp, name)

# initialize Theano shared variables according to the initial parameters
def init_tparams(params):
    tparams = OrderedDict()
    for kk, pp in params.iteritems():
        tparams[kk] = theano.shared(params[kk], name=kk)
    return tparams

# load parameters
def load_params(path, params):
    pp = numpy.load(path)
    for kk, vv in params.iteritems():
        if kk not in pp:
            raise Warning('%s is not in the archive' % kk)
        params[kk] = pp[kk]

    return params

# some utilities
def ortho_weight(ndim):
    """
    Random orthogonal weights

    Used by norm_weights(below), in which case, we
    are ensuring that the rows are orthogonal
    (i.e W = U \Sigma V, U has the same
    # of rows, V has the same # of cols)
    """
    W = numpy.random.randn(ndim, ndim)
    u, _, _ = numpy.linalg.svd(W)
    return u.astype('float32')

def norm_weight(nin,nout=None, scale=0.01, ortho=True):
    """
    Random weights drawn from a Gaussian
    """
    if nout is None:
        nout = nin
    if nout == nin and ortho:
        W = ortho_weight(nin)
    else:
        W = scale * numpy.random.randn(nin, nout)
    return W.astype('float32')

# some useful shorthands
def tanh(x):
    return tensor.tanh(x)

def rectifier(x):
    return tensor.maximum(0., x)

def linear(x):
    return x


"""
Neural network layer definitions.

The life-cycle of each of these layers is as follows
    1) The param_init of the layer is called, which creates
    the weights of the network.
    2) The fprop is called which builds that part of the Theano graph
    using the weights created in step 1). This automatically links
    these variables to the graph.

Each prefix is used like a key and should be unique
to avoid naming conflicts when building the graph.
"""
# layers: 'name': ('parameter initializer', 'fprop')
layers = {'ff': ('param_init_fflayer', 'fflayer'),
          'lstm': ('param_init_lstm', 'lstm_layer'),
          'lstm_cond': ('param_init_lstm_cond', 'lstm_cond_layer'),
          }

def get_layer(name):
    fns = layers[name]
    return (eval(fns[0]), eval(fns[1]))


# feedforward layer: affine transformation + point-wise nonlinearity
def param_init_fflayer(options, params, prefix='ff', nin=None, nout=None):
    if nin is None:
        nin = options['dim_proj']
    if nout is None:
        nout = options['dim_proj']
    params[_p(prefix, 'W')] = norm_weight(nin, nout, scale=0.01)
    params[_p(prefix, 'b')] = numpy.zeros((nout,)).astype('float32')

    return params

def fflayer(tparams, state_below, options, prefix='rconv', activ='lambda x: tensor.tanh(x)', **kwargs):
    return eval(activ)(tensor.dot(state_below, tparams[_p(prefix,'W')])+tparams[_p(prefix,'b')])


In [2]:
import flickr30k

In [3]:
train, valid, test, worddict = flickr30k.load_data()

... loading data
... loaded train
... loaded test
... loaded dev


In [62]:
annotation_vectors = []

dataset = train[1][:1000]
sent_index = 0
Total_dataset = []
DS = []
for ind, val in enumerate(dataset):
    print ind
    dict_Val = {}
    val = val.reshape((14,14,512))
#     print val.shape
    tokens = train[0][sent_index][0].split()
    #print tokens
    if(len(tokens) > 43):
#         print tokens
        continue
    print "selected"
    assert train[0][sent_index][1] == ind
    DS.append(val)
    dict_Val.update({"id": ind})
    dict_Val.update({"sentence": train[0][sent_index]})
    dict_Val.update({'annotation_vector': val})
    
    sent_index += 5
    Total_dataset.append(dict_Val)
DS = np.array(DS)

0
selected
1
selected
2
selected
3
selected
4
selected
5
selected
6
selected
7
selected
8
selected
9
selected
10
selected
11
selected
12
selected
13
selected
14
selected
15
selected
16
selected
17
selected
18
selected
19
selected
20
selected
21
selected
22
selected
23
selected
24
selected
25
selected
26
selected
27
selected
28
selected
29
selected
30
selected
31
selected
32
selected
33
selected
34
selected
35
selected
36
selected
37
selected
38
selected
39
selected
40
selected
41
selected
42
selected
43
selected
44
selected
45
selected
46
selected
47
selected
48
selected
49
selected
50
selected
51
selected
52
selected
53
selected
54
selected
55
selected
56
selected
57
selected
58
selected
59
selected
60
selected
61
selected
62
selected
63
selected
64
selected
65
selected
66
selected
67
selected
68
selected
69
selected
70
selected
71
selected
72
selected
73
selected
74
selected
75
selected
76
selected
77
selected
78
selected
79
selected
80
selected
81
selected
82
selected
83
selected
84

In [63]:
from collections import Counter
def word_processing(dataset):
    allwords = Counter()
    for item in dataset:
        tokens = item['sentence'][0].split()
#         print tokens
        allwords.update(tokens)
    
    vocab = [k for k, v in allwords.items()]
    vocab.insert(0, '#START#')
    vocab.append('#END#')
    vocab.append('#NULL#')
#     vocab.append('#UNK#')

    word_to_index = {w: i for i, w in enumerate(vocab)}
    index_to_word = {i: w for i, w in enumerate(vocab)}
    return vocab, word_to_index, index_to_word




In [64]:
vocab, word_to_index, index_to_word = word_processing(Total_dataset)

In [65]:
print len(Total_dataset)

1000


In [66]:
N_Samples = 1000
import numpy as  np
# data_set = [dataset[1]]#dataset[:N_Samples]
# data_set = dataset[:N_Samples]
MAX_SENTENCE_LENGTH = 45
print(MAX_SENTENCE_LENGTH)

ins = np.zeros( (N_Samples, MAX_SENTENCE_LENGTH-1, len(vocab)) )
gts = np.zeros_like(ins)
for ind,dataset_val in enumerate(Total_dataset):
    print((dataset_val['sentence'][0]))
    assert len(dataset_val['sentence'][0].split()) <= MAX_SENTENCE_LENGTH - 2
    ins[ind, 0, word_to_index['#START#']] = 1
    #print 0
    for t, word in enumerate(dataset_val['sentence'][0].split()):
      #print(t, word_to_index[word])
      #the ground truth at time t is the next word
      gts[ind, t, word_to_index[word]] = 1
      #the input at time t+1 is the previous ground truth
      ins[ind, t+1, word_to_index[word]] = 1
      #print t+1
    #ground truth ends with the end token (or reuse start token)
    #print t+1, len(gts[ind])
    #print gts[ind,-1,wordtoix['#END#']]
    gts[ind,t+1,word_to_index['#END#']] = 1
    #print gts[ind,-1,wordtoix['#END#']]
    #print t+2
    flag = True
    #print len(gts[ind]),t+1
    # JUST HAVE NULL CHARACTERS AFTER END

    for time_step in xrange(t+2, MAX_SENTENCE_LENGTH-1):
        #print "Came in "
        word = "#NULL#"
        if(flag):
            word = "#END#"
            flag= False
        ins[ind, time_step, word_to_index[word]] = 1
        gts[ind, time_step, word_to_index["#NULL#"]] = 1
        #print time_step


45
Two young guys with shaggy hair look at their hands while hanging out in the yard
Several men in hard hats are operating a giant pulley system
A child in a pink dress is climbing up a set of stairs in an entry way
Someone in a blue shirt and hat is standing on stair and leaning against a window
Two men one in a gray shirt one in a black shirt standing near a stove
Two people in the photo are playing the guitar and the other is poking at him
A man sits in a chair while holding a large stuffed animal of a lion
A girl is on rollerskates talking on her cellphone standing in a parking lot
An asian man wearing a black suit stands near a darkhaired woman and a brownhaired woman
Two men in Germany jumping over a rail at the same time without shirts
Five ballet dancers caught mid jump in a dancing studio with sunlight coming through a window
Three young men and a young woman wearing sneakers are leaping in midair at the top of a flight of concrete stairs
A black dog and a white dog with brow

In [None]:
dictionary = dict()
dictionary.update({'TotalDataset':Total_dataset, 'AnnotationVectors': DS, 'INS': ins, 'GTS':gts, 'vocab': vocab, 'word_to_index':word_to_index, 'index_to_word': index_to_word})

In [None]:
import pickle
pickle.dump(dictionary, open("flickr30kann_1000.pkl",'w'), protocol=pickle.HIGHEST_PROTOCOL))

In [None]:
print("INPUT")
for ind, val in enumerate(ins):
    s = []
    for x in range(MAX_SENTENCE_LENGTH-1):
        #print ins[ind,x]
        #print np.argmax(ins[ind,x])
        word = index_to_word[np.argmax(ins[ind,x])]
        #if(word!="#NULL#"):
        s.append(word)
    print(" ".join(s))