In [85]:
import json
import os
import pandas as pd
import numpy as np
import cPickle as pickle
import hickle
from collections import Counter
from nltk.corpus import wordnet as wn
import urllib
import tarfile
from PIL import Image
from core.vggnet import Vgg19
import tensorflow as tf
from scipy import ndimage

# Constants

In [83]:
caption_file = 'data/annotations/captions_train2014.json'
image_dir = 'image/train2014_resized'
max_length = 15
word_count_threshold = 1
vgg_model_path = './data/imagenet-vgg-verydeep-19.mat'
batch_size = 1


# Functions

In [3]:
def _process_caption_data(caption_file, image_dir, max_length):
    with open(caption_file) as f:
        caption_data = json.load(f)

    # id_to_filename is a dictionary such as {image_id: filename]} 
    id_to_filename = {image['id']: image['file_name'] for image in caption_data['images']}

    # data is a list of dictionary which contains 'captions', 'file_name' and 'image_id' as key.
    data = []
    for annotation in caption_data['annotations']:
        image_id = annotation['image_id']
        annotation['file_name'] = os.path.join(image_dir, id_to_filename[image_id])
        data += [annotation]

    # convert to pandas dataframe (for later visualization or debugging)
    caption_data = pd.DataFrame.from_dict(data)
    del caption_data['id']
    caption_data.sort_values(by='image_id', inplace=True)
    caption_data = caption_data.reset_index(drop=True)

    del_idx = []
    for i, caption in enumerate(caption_data['caption']):
        caption = caption.replace('.', '').replace(',', '').replace("'", "").replace('"', '')
        caption = caption.replace('&', 'and').replace('(', '').replace(")", "").replace('-', ' ')
        caption = " ".join(caption.split())  # replace multiple spaces

        caption_data.set_value(i, 'caption', caption.lower())
        if len(caption.split(" ")) > max_length:
            del_idx.append(i)

    # delete captions if size is larger than max_length
    print "The number of captions before deletion: %d" % len(caption_data)
    caption_data = caption_data.drop(caption_data.index[del_idx])
    caption_data = caption_data.reset_index(drop=True)
    print "The number of captions after deletion: %d" % len(caption_data)
    return caption_data


In [4]:
def _build_vocab(annotations, threshold=1):
    counter = Counter()
    max_len = 0
    for i, caption in enumerate(annotations['caption']):
        words = caption.split(' ') # caption contrains only lower-case words
        for w in words:
            counter[w] +=1
        
        if len(caption.split(" ")) > max_len:
            max_len = len(caption.split(" "))

    vocab = [word for word in counter if counter[word] >= threshold]
    print ('Filtered %d words to %d words with word count threshold %d.' % (len(counter), len(vocab), threshold))

    word_to_idx = {u'<NULL>': 0, u'<START>': 1, u'<END>': 2}
    idx = 3
    for word in vocab:
        word_to_idx[word] = idx
        idx += 1
    print "Max length of caption: ", max_len
    return word_to_idx

In [5]:
def load_pickle(path):
    with open(path, 'rb') as f:
        file = pickle.load(f)
        print ('Loaded %s..' %path)
        return file  

def save_pickle(data, path):
    with open(path, 'wb') as f:
        pickle.dump(data, f, pickle.HIGHEST_PROTOCOL)
        print ('Saved %s..' %path)

In [6]:
train_dataset = _process_caption_data(caption_file=caption_file,
                                      image_dir=image_dir,
                                      max_length=max_length)
                                      

The number of captions before deletion: 414113
The number of captions after deletion: 399998


In [7]:
word_to_idx = _build_vocab(annotations=train_dataset, threshold=word_count_threshold)
            

Filtered 23107 words to 23107 words with word count threshold 1.
Max length of caption:  15


In [8]:
wn.morphy('caned')

u'cane'

In [16]:
ss = wn.synsets('hats')

In [17]:
ss[0].offset()

3497657

In [25]:
wnid = ss[0].pos() + str(ss[0].offset()).zfill(8)
print wnid

n03497657


In [62]:
pre_url = 'http://www.image-net.org/download/synset?wnid='
post_url = '&username=intuinno&accesskey=6be8155ee3d56b5120241b3bda13412d3cc0cd42&release=latest&src=stanford'
testfile = urllib.URLopener()
testfile.retrieve(pre_url+wnid+post_url, wnid+'.tar')

(u'n03497657.tar', <httplib.HTTPMessage instance at 0x11c40a878>)

In [63]:
cur_dir = os.getcwd()
original_dir = './data/imagenet/%s/original/'%wnid
resized_dir = './data/imagenet/%s/resized/'%wnid

if not os.path.exists(wnid):
    os.makedirs(original_dir)
    os.rename(wnid+'.tar', original_dir + 'data.tar' )
    


In [64]:
os.chdir(original_dir)
print os.getcwd()
tar = tarfile.open('data.tar')
tar.extractall()
tar.close()
os.remove('data.tar')
os.chdir(cur_dir)

/Users/intuinno/codegit/capviz/experiment/data/imagenet/n03497657/original


In [74]:
def resize_image(image):
    width, height = image.size
    if width > height:
        left = (width - height) / 2
        right = width - left
        top = 0
        bottom = height
    else:
        top = (height - width) /2
        bottom = height - top
        left = 0
        right = width 
    image = image.crop((left, top, right, bottom))
    image = image.resize([224,224], Image.ANTIALIAS)
    return image

In [75]:
if not os.path.exists(resized_dir):
    os.makedirs(resized_dir)
print 'Start resizing %s images.' %wnid
image_files = os.listdir(original_dir)
for i, image_file in enumerate(image_files):
#     from IPython.core.debugger import Tracer; Tracer()() 
    with open(os.path.join(original_dir, image_file),'r+b') as f:
        image = Image.open(f)
        image = resize_image(image)
        image.save(os.path.join(resized_dir, image_file), image.format)
        if i % 100 == 0:
            print 'Resized images: %d/%d' %(i, len(image_files))
            


In [80]:
vggnet = Vgg19(vgg_model_path)
vggnet.build()

In [None]:
with tf.Session() as sess:
    tf.initialize_all_variables().run()
    n_examples = len(image_files)
    all_feats = np.ndarray([n_examples, 196,512], dtype=np.float32)
    
    for start, end in zip(range(0, n_examples, batch_size),
                          range(batch_size, n_examples+batch_size, batch_size)):
        image_batch_file = image_files[start:end]
        image_batch = np.array(map(lambda x: ndimage.imread(os.path.join(resized_dir, x), mode='RGB'), image_batch_file)).astype(np.float32)
        feats = sess.run(vggnet.features, feed_dict={vggnet.images: image_batch})
        all_feats[start:end, :] = feats
        print ("Processed %d %s features" %(end, wnid))
        
        

Instructions for updating:
Use `tf.global_variables_initializer` instead.


In [81]:
save_path = './data/%s.hkl' %wnid
hickle.dump(all_featus, save_path)
print "Saved %s.." % save_path

[u'n03497657_10001.JPEG',
 u'n03497657_10008.JPEG',
 u'n03497657_10010.JPEG',
 u'n03497657_10034.JPEG',
 u'n03497657_10072.JPEG',
 u'n03497657_10073.JPEG',
 u'n03497657_10079.JPEG',
 u'n03497657_10084.JPEG',
 u'n03497657_10089.JPEG',
 u'n03497657_10094.JPEG',
 u'n03497657_10105.JPEG',
 u'n03497657_10142.JPEG',
 u'n03497657_10149.JPEG',
 u'n03497657_10170.JPEG',
 u'n03497657_10172.JPEG',
 u'n03497657_10191.JPEG',
 u'n03497657_10193.JPEG',
 u'n03497657_10222.JPEG',
 u'n03497657_10226.JPEG',
 u'n03497657_10233.JPEG',
 u'n03497657_10248.JPEG',
 u'n03497657_10332.JPEG',
 u'n03497657_10334.JPEG',
 u'n03497657_10356.JPEG',
 u'n03497657_10388.JPEG',
 u'n03497657_104.JPEG',
 u'n03497657_10417.JPEG',
 u'n03497657_10420.JPEG',
 u'n03497657_10434.JPEG',
 u'n03497657_10443.JPEG',
 u'n03497657_10463.JPEG',
 u'n03497657_10464.JPEG',
 u'n03497657_10471.JPEG',
 u'n03497657_10474.JPEG',
 u'n03497657_10486.JPEG',
 u'n03497657_10499.JPEG',
 u'n03497657_10500.JPEG',
 u'n03497657_10522.JPEG',
 u'n03497657_1