Skip to content

Commit

Permalink
fix for TIKA-2306 contributed by kranthigv
Browse files Browse the repository at this point in the history
  • Loading branch information
KranthiGV committed Mar 26, 2017
1 parent 7ce58d6 commit 236db96
Showing 1 changed file with 128 additions and 116 deletions.
Expand Up @@ -44,6 +44,12 @@
from six.moves import urllib
import tensorflow as tf

from datasets import imagenet, dataset_utils
from nets import inception
from preprocessing import inception_preprocessing

slim = tf.contrib.slim

FLAGS = tf.app.flags.FLAGS

# classify_image_graph_def.pb:
Expand All @@ -63,84 +69,60 @@
"""Display this many predictions.""")

# pylint: disable=line-too-long
DATA_URL = 'http://download.tensorflow.org/models/image/imagenet/inception-2015-12-05.tgz'
DATA_URL = 'http://download.tensorflow.org/models/inception_v4_2016_09_09.tar.gz'
# pylint: enable=line-too-long

def create_readable_names_for_imagenet_labels():
"""Create a dict mapping label id to human readable string.
Returns:
labels_to_names: dictionary where keys are integers from to 1000
and values are human-readable names.
We retrieve a synset file, which contains a list of valid synset labels used
by ILSVRC competition. There is one synset one per line, eg.
# n01440764
# n01443537
We also retrieve a synset_to_human_file, which contains a mapping from synsets
to human-readable names for every synset in Imagenet. These are stored in a
tsv format, as follows:
# n02119247 black fox
# n02119359 silver fox
We assign each synset (in alphabetical order) an integer, starting from 1
(since 0 is reserved for the background class).
Code is based on
https://github.com/tensorflow/models/blob/master/inception/inception/data/build_imagenet_data.py#L463
"""

class NodeLookup(object):
"""Converts integer node ID's to human readable labels."""

def __init__(self,
label_lookup_path=None,
uid_lookup_path=None):
if not label_lookup_path:
label_lookup_path = os.path.join(
FLAGS.model_dir, 'imagenet_2012_challenge_label_map_proto.pbtxt')
if not uid_lookup_path:
uid_lookup_path = os.path.join(
FLAGS.model_dir, 'imagenet_synset_to_human_label_map.txt')
self.node_lookup = self.load(label_lookup_path, uid_lookup_path)

def load(self, label_lookup_path, uid_lookup_path):
"""Loads a human readable English name for each softmax node.
Args:
label_lookup_path: string UID to integer node ID.
uid_lookup_path: string UID to human-readable string.
Returns:
dict from integer node ID to human-readable string.
"""
if not tf.gfile.Exists(uid_lookup_path):
tf.logging.fatal('File does not exist %s', uid_lookup_path)
if not tf.gfile.Exists(label_lookup_path):
tf.logging.fatal('File does not exist %s', label_lookup_path)

# Loads mapping from string UID to human-readable string
proto_as_ascii_lines = tf.gfile.GFile(uid_lookup_path).readlines()
uid_to_human = {}
p = re.compile(r'[n\d]*[ \S,]*')
for line in proto_as_ascii_lines:
parsed_items = p.findall(line)
uid = parsed_items[0]
human_string = parsed_items[2]
uid_to_human[uid] = human_string

# Loads mapping from string UID to integer node ID.
node_id_to_uid = {}
proto_as_ascii = tf.gfile.GFile(label_lookup_path).readlines()
for line in proto_as_ascii:
if line.startswith(' target_class:'):
target_class = int(line.split(': ')[1])
if line.startswith(' target_class_string:'):
target_class_string = line.split(': ')[1]
node_id_to_uid[target_class] = target_class_string[1:-2]

# Loads the final mapping of integer node ID to human-readable string
node_id_to_name = {}
for key, val in node_id_to_uid.items():
if val not in uid_to_human:
tf.logging.fatal('Failed to locate: %s', val)
name = uid_to_human[val]
node_id_to_name[key] = name

return node_id_to_name

def id_to_string(self, node_id):
if node_id not in self.node_lookup:
return ''
return self.node_lookup[node_id]


def create_graph():
"""Creates a graph from saved GraphDef file and returns a saver."""
# Creates graph from saved graph_def.pb.
with tf.gfile.FastGFile(os.path.join(
FLAGS.model_dir, 'classify_image_graph_def.pb'), 'rb') as f:
graph_def = tf.GraphDef()
graph_def.ParseFromString(f.read())
_ = tf.import_graph_def(graph_def, name='')
# pylint: disable=line-too-long

dest_directory = FLAGS.model_dir

synset_list = [s.strip() for s in open(os.path.join(dest_directory, 'imagenet_lsvrc_2015_synsets.txt')).readlines()]
num_synsets_in_ilsvrc = len(synset_list)
assert num_synsets_in_ilsvrc == 1000

synset_to_human_list = open(os.path.join(dest_directory, 'imagenet_metadata.txt')).readlines()
num_synsets_in_all_imagenet = len(synset_to_human_list)
assert num_synsets_in_all_imagenet == 21842

synset_to_human = {}
for s in synset_to_human_list:
parts = s.strip().split('\t')
assert len(parts) == 2
synset = parts[0]
human = parts[1]
synset_to_human[synset] = human

label_index = 1
labels_to_names = {0: 'background'}
for synset in synset_list:
name = synset_to_human[synset]
labels_to_names[label_index] = name
label_index += 1

return labels_to_names

def run_inference_on_image(image):
"""Runs inference on an image.
Expand All @@ -151,60 +133,90 @@ def run_inference_on_image(image):
Returns:
Nothing
"""
dest_directory = FLAGS.model_dir

image_size = inception.inception_v4.default_image_size

if not tf.gfile.Exists(image):
tf.logging.fatal('File does not exist %s', image)
image_data = tf.gfile.FastGFile(image, 'rb').read()

# Creates graph from saved GraphDef.
create_graph()

with tf.Session() as sess:
# Some useful tensors:
# 'softmax:0': A tensor containing the normalized prediction across
# 1000 labels.
# 'pool_3:0': A tensor containing the next-to-last layer containing 2048
# float description of the image.
# 'DecodeJpeg/contents:0': A tensor containing a string providing JPEG
# encoding of the image.
# Runs the softmax tensor by feeding the image_data as input to the graph.
softmax_tensor = sess.graph.get_tensor_by_name('softmax:0')
predictions = sess.run(softmax_tensor,
{'DecodeJpeg/contents:0': image_data})
predictions = np.squeeze(predictions)

# Creates node ID --> English string lookup.
node_lookup = NodeLookup()

top_k = predictions.argsort()[-FLAGS.num_top_predictions:][::-1]
for node_id in top_k:
human_string = node_lookup.id_to_string(node_id)
score = predictions[node_id]
print('%s (score = %.5f)' % (human_string, score))
image_string = tf.gfile.FastGFile(image, 'rb').read()

with tf.Graph().as_default():
image = tf.image.decode_jpeg(image_string, channels=3)
processed_image = inception_preprocessing.preprocess_image(image, image_size, image_size, is_training=False)
processed_images = tf.expand_dims(processed_image, 0)

# Create the model, use the default arg scope to configure the batch norm parameters.
with slim.arg_scope(inception.inception_v4_arg_scope()):
logits, _ = inception.inception_v4(processed_images, num_classes=1001, is_training=False)
probabilities = tf.nn.softmax(logits)

init_fn = slim.assign_from_checkpoint_fn(
os.path.join(dest_directory, 'inception_v4.ckpt'),
slim.get_model_variables('InceptionV4'))

with tf.Session() as sess:
init_fn(sess)
np_image, probabilities = sess.run([image, probabilities])
probabilities = probabilities[0, 0:]
sorted_inds = [i[0] for i in sorted(enumerate(-probabilities), key=lambda x:x[1])]

names = create_readable_names_for_imagenet_labels()
top_k = FLAGS.num_top_predictions
for i in range(top_k):
index = sorted_inds[i]
print('%s (score = %.5f)' % (names[index], probabilities[index]))


def util_download(url, dest_directory):
filename = url.split('/')[-1]
filepath = os.path.join(dest_directory, filename)

def _progress(count, block_size, total_size):
sys.stdout.write('\r>> Downloading %s %.1f%%' % (
filename, float(count * block_size) / float(total_size) * 100.0))
sys.stdout.flush()
filepath, _ = urllib.request.urlretrieve(url, filepath, _progress)
print()
statinfo = os.stat(filepath)
print('Successfully downloaded', filename, statinfo.st_size, 'bytes.')

def util_download_tar(url, dest_directory):
filename = url.split('/')[-1]
filepath = os.path.join(dest_directory, filename)

def _progress(count, block_size, total_size):
sys.stdout.write('\r>> Downloading %s %.1f%%' % (
filename, float(count * block_size) / float(total_size) * 100.0))
sys.stdout.flush()
filepath, _ = urllib.request.urlretrieve(url, filepath, _progress)
print()
statinfo = os.stat(filepath)
print('Successfully downloaded', filename, statinfo.st_size, 'bytes.')
tarfile.open(filepath, 'r:gz').extractall(dest_directory)


def maybe_download_and_extract():
"""Download and extract model tar file."""
dest_directory = FLAGS.model_dir
if not os.path.exists(dest_directory):
os.makedirs(dest_directory)
filename = DATA_URL.split('/')[-1]
filepath = os.path.join(dest_directory, filename)
if not os.path.exists(filepath):
def _progress(count, block_size, total_size):
sys.stdout.write('\r>> Downloading %s %.1f%%' % (
filename, float(count * block_size) / float(total_size) * 100.0))
sys.stdout.flush()
filepath, _ = urllib.request.urlretrieve(DATA_URL, filepath, _progress)
print()
statinfo = os.stat(filepath)
print('Succesfully downloaded', filename, statinfo.st_size, 'bytes.')
tarfile.open(filepath, 'r:gz').extractall(dest_directory)
if not tf.gfile.Exists(dest_directory):
tf.gfile.MakeDirs(dest_directory)
if not tf.gfile.Exists(os.path.join(dest_directory, 'inception_v4.ckpt')):
util_download_tar(DATA_URL, dest_directory)
# pylint: disable=line-too-long
if not tf.gfile.Exists(os.path.join(dest_directory, 'imagenet_lsvrc_2015_synsets.txt')):
util_download('https://raw.githubusercontent.com/tensorflow/models/master/inception/inception/data/imagenet_lsvrc_2015_synsets.txt', dest_directory)
if not tf.gfile.Exists(os.path.join(dest_directory, 'imagenet_metadata.txt')):
util_download('https://raw.githubusercontent.com/tensorflow/models/master/inception/inception/data/imagenet_metadata.txt', dest_directory)
# pylint: enable=line-too-long



def main(_):
maybe_download_and_extract()
image = (FLAGS.image_file if FLAGS.image_file else
os.path.join(FLAGS.model_dir, 'cropped_panda.jpg'))
os.path.join(FLAGS.model_dir, 'lion.jpg'))
#print(image)
run_inference_on_image(image)


Expand Down

0 comments on commit 236db96

Please sign in to comment.