In [1]:
import os
import sys
import tensorflow as tf
from tensorflow import gfile
from tensorflow import logging
import pprint
import numpy as np
import pickle 

In [2]:
model_file = r".\deep_learn\image2text\classify_image_graph_def.pb"
input_description_file = r".\deep_learn\image2text\results_20130124.token"
inout_img_dir = r".\deep_learn\image2text\flickr30k-images"
output_folder = r".\deep_learn\image2text\download_inception_v3_features"

batch_size = 1000
if not gfile.Exists(output_folder):
    gfile.MakeDirs(output_folder)

def parse_token_file(token_file):
    """Parses image description file."""
    img_name_to_tokens = {}
    with gfile.GFile(token_file, 'r') as f:
        lines = f.readlines()
    
    for line in lines:
        img_id, description = line.strip('\r\n').split('\t')
        img_name, _ = img_id.split('#')
        img_name_to_tokens.setdefault(img_name, [])
        img_name_to_tokens[img_name].append(description)
    return img_name_to_tokens

img_name_to_tokens = parse_token_file(input_description_file)
all_img_names = img_name_to_tokens.keys()

logging.info("num of all images: {}".format(len(all_img_names)))
pprint.pprint(list(img_name_to_tokens.keys())[0:10])
pprint.pprint(img_name_to_tokens['2778832101.jpg'])

INFO:tensorflow:num of all images: 31783
['1000092795.jpg',
 '10002456.jpg',
 '1000268201.jpg',
 '1000344755.jpg',
 '1000366164.jpg',
 '1000523639.jpg',
 '1000919630.jpg',
 '10010052.jpg',
 '1001465944.jpg',
 '1001545525.jpg']
['A man in jeans is reclining on a green metal bench along a busy sidewalk and '
 'crowded street .',
 'A white male with a blue sweater and gray pants laying on a sidewalk bench .',
 'A man in a blue shirt and gray pants is sleeping on a sidewalk bench .',
 'A person is sleeping on a bench , next to cars .',
 'A man sleeping on a bench in a city area .']


In [3]:
def load_paretrained_inception_v3(model_file):
    with gfile.FastGFile(model_file, 'rb') as f:
        graph_def = tf.GraphDef()
        graph_def.ParseFromString(f.read())
        _ = tf.import_graph_def(graph_def, name='')
load_paretrained_inception_v3(model_file)

Instructions for updating:
Use tf.gfile.GFile.


In [4]:
# 把30K多张图片划分成 30多个小文件
num_batches = int(len(all_img_names) / batch_size)
if len(all_img_names) % batch_size != 0:
    num_batches += 1

with tf.Session() as sess:
    second_to_last_tensor = sess.graph.get_tensor_by_name("pool_3:0")
    for i in range(num_batches):
        batch_img_names = list(all_img_names)[i*batch_size: (i+1)*batch_size]
        batch_features = []
        for img_name in batch_img_names:
            img_path = os.path.join(inout_img_dir, img_name)
            if not gfile.Exists(img_path):
                continue
            img_data = gfile.FastGFile(img_path, "rb").read()
            # 通过Inception v3 变成矩阵
            feature_vector = sess.run(second_to_last_tensor,
                                      feed_dict={
                                          "DecodeJpeg/contents:0": img_data
                                      })
            batch_features.append(feature_vector)
        batch_features = np.vstack(batch_features)
        output_filename = os.path.join(
            output_folder,
            "image_features-%d.pickle" % i
        )
        logging.info("writing to file {} ".format(output_filename))
        with open(output_filename, 'w', encoding='utf-8') as f:
            pickle.dump((batch_img_names, batch_features), f)

INFO:tensorflow:batch_features [array([[[[0.07258371, 0.16919097, 0.385402  , ..., 0.35254976,
          0.19607334, 0.8022364 ]]]], dtype=float32), array([[[[0.74933356, 0.3874613 , 0.32349512, ..., 0.13553889,
          0.2788835 , 0.4688607 ]]]], dtype=float32), array([[[[1.2867635e-01, 5.1112711e-01, 8.5588771e-01, ...,
          5.3659611e-04, 6.7435759e-01, 1.1618248e+00]]]], dtype=float32), array([[[[0.04187643, 0.12668768, 0.15434048, ..., 0.27246815,
          0.19978775, 0.24478933]]]], dtype=float32), array([[[[0.35677707, 0.25361267, 0.49839154, ..., 0.852923  ,
          1.1099057 , 0.62715316]]]], dtype=float32), array([[[[0.75412536, 0.22323968, 0.799543  , ..., 0.8221463 ,
          0.37914318, 0.55921865]]]], dtype=float32), array([[[[0.18437901, 0.32009038, 0.1681987 , ..., 0.18510836,
          0.59114325, 0.04933892]]]], dtype=float32), array([[[[0.23420906, 0.12689789, 0.23353007, ..., 0.6677589 ,
          0.43836594, 0.03714041]]]], dtype=float32), array([[[[0.17

In [5]:
test_img_path = r'.\deep_learn\image2text\flickr30k_images\1000092795.jpg'

print(gfile.ListDirectory(r'.\deep_learn\image2text'))    
print('hello world')



['1d3c28a024d75158fad11de2a9069148.rar', 'classify_image_graph_def.pb', 'download_inception_v3_features', 'download_inception_v3_features_backup', 'flickr30k-images', 'flickr30k-images.tar', 'flickr30k_test.py', 'imagenet_slim_labels.txt', 'inception_v3.ckpt', 'inception_v3_2016_08_28.tar.gz', 'inception_v3_2016_08_28_frozen.pb', 'local_run', 'results_20130124.token', 'vocab.txt']
hello world
