# Import

In [1]:
import os
import numpy as np
import tensorflow as tf
import random
import re
import json
from gensim.models.word2vec import Word2Vec

# Load graph

In [2]:
with tf.gfile.FastGFile(os.path.join(
    '../../../data/inception-2015-12-05/classify_image_graph_def.pb'), 'rb') as f:
    graph_def = tf.GraphDef()
    graph_def.ParseFromString(f.read())
    _ = tf.import_graph_def(graph_def, name='')
g = tf.get_default_graph()

# Insert own output layer

In [3]:
with tf.name_scope('own'):
    y = tf.placeholder("float")
    x = g.get_tensor_by_name('pool_3/_reshape:0')
    w = tf.Variable(tf.random_normal(
        [int(x.get_shape()[-1]),300], stddev=0.1), name='weights')
    b = tf.Variable(tf.random_normal([1,300]), name='bias')
    y_pred = tf.add(tf.matmul(x, w), b, name='y_pred')
    cost = tf.reduce_sum(tf.pow(y-y_pred, 2),name='cost')
    optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.1).minimize(cost)
    # Class prediction (not useful for us)
    softmax_tensor = g.get_tensor_by_name('softmax:0')

# Write to file in order to visualize graph in tensorflow
sum_writer = tf.summary.FileWriter('./sums', g)

# Read data

In [4]:
# Images
image_path = '../../../data/val2014/'
image_list = os.listdir(image_path)
# Class labels
labels_file = open('../../../data/imagenet_comp_graph_label_strings.txt', 'r')
labels = labels_file.readlines()

### Read captions (create own_dict for now, should be read from pickle)

In [5]:
def sum_caption(caption):
    caption_splitted = re.split("[^a-zåàâäæçéèêëîïôöœßùûüÿA-ZÅÀÂÄÆÇÉÈÊËÎÏÔÖŒÙÛÜŸ’\-]+",caption)
    caption_vector = np.array(300*[0])
    for c in caption_splitted:
        try:
            caption_vector = caption_vector + w2v[c]
        except KeyError:
            pass
    return tuple(caption_vector)

w2v = Word2Vec.load_word2vec_format('../../../data/GoogleNews-vectors-negative300.bin', binary=True)
w2v.init_sims(replace=True)

train_path = "../../../data/annotations/captions_val2014.json"
with open(train_path, 'r') as train_file:
    train_dict = json.load(train_file)

own_dict = {}
for im in train_dict["images"]:
    own_dict[im["id"]] = {}
    own_dict[im["id"]]["url"] = im["flickr_url"]
    own_dict[im["id"]]["file_name"] = im["file_name"]
for cap in train_dict["annotations"]:
    try:
        own_dict[cap["image_id"]]["captions"].append(cap["caption"])
        own_dict[cap["image_id"]]["vectors"].append(sum_caption(cap["caption"]))
    except KeyError:
        own_dict[cap["image_id"]]["captions"] = [cap["caption"]]
        own_dict[cap["image_id"]]["vectors"] = [sum_caption(cap["caption"])]

# Run model

In [6]:
with tf.Session() as sess:
    sess.run([w.initializer, b.initializer])
    for im in image_list[:10]:
        image = tf.gfile.FastGFile(image_path + im, 'rb').read()
        image_id = re.findall('[1-9]+',re.findall('_([0-9]+)',image_list[0])[0])[0]
        # Choose one of the five captions
        r = random.randrange(len(own_dict[int(image_id)]['vectors']))
        y_temp = np.asarray(own_dict[int(image_id)]['vectors'][r]).reshape((1,300))
        # Calculate cost
        print(sess.run(cost, {'DecodeJpeg/contents:0':image, y:y_temp}))
        # Predict class
        softmax_out = sess.run(softmax_tensor,{'DecodeJpeg/contents:0': image})
        softmax_out = np.squeeze(softmax_out)
        class_pred = np.argmax(softmax_out)
        print(labels[class_pred])

1873.42
Airedale

1142.48
moped

1658.32
jinrikisha

1961.72
desk

1420.88
leopard

1387.81
home theater

1170.09
bulbul

1866.77
china cabinet

1665.9
ballplayer

1493.6
hot pot

