fix for TIKA-2306 contributed by kranthigv

apache · Mar 26, 2017 · 236db96 · 236db96
1 parent 7ce58d6
commit 236db96
Showing 1 changed file with 128 additions and 116 deletions.
diff --git a/tika-parsers/src/main/resources/org/apache/tika/parser/recognition/tf/classify_image.py b/tika-parsers/src/main/resources/org/apache/tika/parser/recognition/tf/classify_image.py
@@ -44,6 +44,12 @@
 from six.moves import urllib
 import tensorflow as tf
 
+from datasets import imagenet, dataset_utils
+from nets import inception
+from preprocessing import inception_preprocessing
+
+slim = tf.contrib.slim
+
 FLAGS = tf.app.flags.FLAGS
 
 # classify_image_graph_def.pb:
@@ -63,84 +69,60 @@
                             """Display this many predictions.""")
 
 # pylint: disable=line-too-long
-DATA_URL = 'http://download.tensorflow.org/models/image/imagenet/inception-2015-12-05.tgz'
+DATA_URL = 'http://download.tensorflow.org/models/inception_v4_2016_09_09.tar.gz'
 # pylint: enable=line-too-long
 
+def create_readable_names_for_imagenet_labels():
+  """Create a dict mapping label id to human readable string.
+
+  Returns:
+      labels_to_names: dictionary where keys are integers from to 1000
+      and values are human-readable names.
+
+  We retrieve a synset file, which contains a list of valid synset labels used
+  by ILSVRC competition. There is one synset one per line, eg.
+          #   n01440764
+          #   n01443537
+  We also retrieve a synset_to_human_file, which contains a mapping from synsets
+  to human-readable names for every synset in Imagenet. These are stored in a
+  tsv format, as follows:
+          #   n02119247    black fox
+          #   n02119359    silver fox
+  We assign each synset (in alphabetical order) an integer, starting from 1
+  (since 0 is reserved for the background class).
+
+  Code is based on
+  https://github.com/tensorflow/models/blob/master/inception/inception/data/build_imagenet_data.py#L463
+  """
 
-class NodeLookup(object):
-  """Converts integer node ID's to human readable labels."""
-
-  def __init__(self,
-               label_lookup_path=None,
-               uid_lookup_path=None):
-    if not label_lookup_path:
-      label_lookup_path = os.path.join(
-          FLAGS.model_dir, 'imagenet_2012_challenge_label_map_proto.pbtxt')
-    if not uid_lookup_path:
-      uid_lookup_path = os.path.join(
-          FLAGS.model_dir, 'imagenet_synset_to_human_label_map.txt')
-    self.node_lookup = self.load(label_lookup_path, uid_lookup_path)
-
-  def load(self, label_lookup_path, uid_lookup_path):
-    """Loads a human readable English name for each softmax node.
-
-    Args:
-      label_lookup_path: string UID to integer node ID.
-      uid_lookup_path: string UID to human-readable string.
-
-    Returns:
-      dict from integer node ID to human-readable string.
-    """
-    if not tf.gfile.Exists(uid_lookup_path):
-      tf.logging.fatal('File does not exist %s', uid_lookup_path)
-    if not tf.gfile.Exists(label_lookup_path):
-      tf.logging.fatal('File does not exist %s', label_lookup_path)
-
-    # Loads mapping from string UID to human-readable string
-    proto_as_ascii_lines = tf.gfile.GFile(uid_lookup_path).readlines()
-    uid_to_human = {}
-    p = re.compile(r'[n\d]*[ \S,]*')
-    for line in proto_as_ascii_lines:
-      parsed_items = p.findall(line)
-      uid = parsed_items[0]
-      human_string = parsed_items[2]
-      uid_to_human[uid] = human_string
-
-    # Loads mapping from string UID to integer node ID.
-    node_id_to_uid = {}
-    proto_as_ascii = tf.gfile.GFile(label_lookup_path).readlines()
-    for line in proto_as_ascii:
-      if line.startswith('  target_class:'):
-        target_class = int(line.split(': ')[1])
-      if line.startswith('  target_class_string:'):
-        target_class_string = line.split(': ')[1]
-        node_id_to_uid[target_class] = target_class_string[1:-2]
-
-    # Loads the final mapping of integer node ID to human-readable string
-    node_id_to_name = {}
-    for key, val in node_id_to_uid.items():
-      if val not in uid_to_human:
-        tf.logging.fatal('Failed to locate: %s', val)
-      name = uid_to_human[val]
-      node_id_to_name[key] = name
-
-    return node_id_to_name
-
-  def id_to_string(self, node_id):
-    if node_id not in self.node_lookup:
-      return ''
-    return self.node_lookup[node_id]
-
-
-def create_graph():
-  """Creates a graph from saved GraphDef file and returns a saver."""
-  # Creates graph from saved graph_def.pb.
-  with tf.gfile.FastGFile(os.path.join(
-      FLAGS.model_dir, 'classify_image_graph_def.pb'), 'rb') as f:
-    graph_def = tf.GraphDef()
-    graph_def.ParseFromString(f.read())
-    _ = tf.import_graph_def(graph_def, name='')
+  # pylint: disable=line-too-long
+
+  dest_directory = FLAGS.model_dir
 
+  synset_list = [s.strip() for s in open(os.path.join(dest_directory, 'imagenet_lsvrc_2015_synsets.txt')).readlines()]
+  num_synsets_in_ilsvrc = len(synset_list)
+  assert num_synsets_in_ilsvrc == 1000
+
+  synset_to_human_list = open(os.path.join(dest_directory, 'imagenet_metadata.txt')).readlines()
+  num_synsets_in_all_imagenet = len(synset_to_human_list)
+  assert num_synsets_in_all_imagenet == 21842
+
+  synset_to_human = {}
+  for s in synset_to_human_list:
+    parts = s.strip().split('\t')
+    assert len(parts) == 2
+    synset = parts[0]
+    human = parts[1]
+    synset_to_human[synset] = human
+
+  label_index = 1
+  labels_to_names = {0: 'background'}
+  for synset in synset_list:
+    name = synset_to_human[synset]
+    labels_to_names[label_index] = name
+    label_index += 1
+
+  return labels_to_names
 
 def run_inference_on_image(image):
   """Runs inference on an image.
@@ -151,60 +133,90 @@ def run_inference_on_image(image):
   Returns:
     Nothing
   """
+  dest_directory = FLAGS.model_dir
+
+  image_size = inception.inception_v4.default_image_size
+
   if not tf.gfile.Exists(image):
     tf.logging.fatal('File does not exist %s', image)
-  image_data = tf.gfile.FastGFile(image, 'rb').read()
-
-  # Creates graph from saved GraphDef.
-  create_graph()
-
-  with tf.Session() as sess:
-    # Some useful tensors:
-    # 'softmax:0': A tensor containing the normalized prediction across
-    #   1000 labels.
-    # 'pool_3:0': A tensor containing the next-to-last layer containing 2048
-    #   float description of the image.
-    # 'DecodeJpeg/contents:0': A tensor containing a string providing JPEG
-    #   encoding of the image.
-    # Runs the softmax tensor by feeding the image_data as input to the graph.
-    softmax_tensor = sess.graph.get_tensor_by_name('softmax:0')
-    predictions = sess.run(softmax_tensor,
-                           {'DecodeJpeg/contents:0': image_data})
-    predictions = np.squeeze(predictions)
-
-    # Creates node ID --> English string lookup.
-    node_lookup = NodeLookup()
-
-    top_k = predictions.argsort()[-FLAGS.num_top_predictions:][::-1]
-    for node_id in top_k:
-      human_string = node_lookup.id_to_string(node_id)
-      score = predictions[node_id]
-      print('%s (score = %.5f)' % (human_string, score))
+  image_string = tf.gfile.FastGFile(image, 'rb').read()
+
+  with tf.Graph().as_default():
+    image = tf.image.decode_jpeg(image_string, channels=3)
+    processed_image = inception_preprocessing.preprocess_image(image, image_size, image_size, is_training=False)
+    processed_images  = tf.expand_dims(processed_image, 0)
+
+    # Create the model, use the default arg scope to configure the batch norm parameters.
+    with slim.arg_scope(inception.inception_v4_arg_scope()):
+        logits, _ = inception.inception_v4(processed_images, num_classes=1001, is_training=False)
+    probabilities = tf.nn.softmax(logits)
+
+    init_fn = slim.assign_from_checkpoint_fn(
+        os.path.join(dest_directory, 'inception_v4.ckpt'),
+        slim.get_model_variables('InceptionV4'))
+
+    with tf.Session() as sess:
+        init_fn(sess)
+        np_image, probabilities = sess.run([image, probabilities])
+        probabilities = probabilities[0, 0:]
+        sorted_inds = [i[0] for i in sorted(enumerate(-probabilities), key=lambda x:x[1])]
+
+    names = create_readable_names_for_imagenet_labels()
+    top_k = FLAGS.num_top_predictions
+    for i in range(top_k):
+        index = sorted_inds[i]
+        print('%s (score = %.5f)' % (names[index], probabilities[index]))
+
+
+def util_download(url, dest_directory):
+  filename = url.split('/')[-1]
+  filepath = os.path.join(dest_directory, filename)
+
+  def _progress(count, block_size, total_size):
+    sys.stdout.write('\r>> Downloading %s %.1f%%' % (
+        filename, float(count * block_size) / float(total_size) * 100.0))
+    sys.stdout.flush()
+  filepath, _ = urllib.request.urlretrieve(url, filepath, _progress)
+  print()
+  statinfo = os.stat(filepath)
+  print('Successfully downloaded', filename, statinfo.st_size, 'bytes.')
+
+def util_download_tar(url, dest_directory):
+  filename = url.split('/')[-1]
+  filepath = os.path.join(dest_directory, filename)
+
+  def _progress(count, block_size, total_size):
+    sys.stdout.write('\r>> Downloading %s %.1f%%' % (
+        filename, float(count * block_size) / float(total_size) * 100.0))
+    sys.stdout.flush()
+  filepath, _ = urllib.request.urlretrieve(url, filepath, _progress)
+  print()
+  statinfo = os.stat(filepath)
+  print('Successfully downloaded', filename, statinfo.st_size, 'bytes.')
+  tarfile.open(filepath, 'r:gz').extractall(dest_directory)
 
 
 def maybe_download_and_extract():
   """Download and extract model tar file."""
   dest_directory = FLAGS.model_dir
-  if not os.path.exists(dest_directory):
-    os.makedirs(dest_directory)
-  filename = DATA_URL.split('/')[-1]
-  filepath = os.path.join(dest_directory, filename)
-  if not os.path.exists(filepath):
-    def _progress(count, block_size, total_size):
-      sys.stdout.write('\r>> Downloading %s %.1f%%' % (
-          filename, float(count * block_size) / float(total_size) * 100.0))
-      sys.stdout.flush()
-    filepath, _ = urllib.request.urlretrieve(DATA_URL, filepath, _progress)
-    print()
-    statinfo = os.stat(filepath)
-    print('Succesfully downloaded', filename, statinfo.st_size, 'bytes.')
-  tarfile.open(filepath, 'r:gz').extractall(dest_directory)
+  if not tf.gfile.Exists(dest_directory):
+    tf.gfile.MakeDirs(dest_directory)
+  if not tf.gfile.Exists(os.path.join(dest_directory, 'inception_v4.ckpt')):
+    util_download_tar(DATA_URL, dest_directory)
+  # pylint: disable=line-too-long
+  if not tf.gfile.Exists(os.path.join(dest_directory, 'imagenet_lsvrc_2015_synsets.txt')):
+    util_download('https://raw.githubusercontent.com/tensorflow/models/master/inception/inception/data/imagenet_lsvrc_2015_synsets.txt', dest_directory)
+  if not tf.gfile.Exists(os.path.join(dest_directory, 'imagenet_metadata.txt')):
+    util_download('https://raw.githubusercontent.com/tensorflow/models/master/inception/inception/data/imagenet_metadata.txt', dest_directory)
+  # pylint: enable=line-too-long
+
 
 
 def main(_):
   maybe_download_and_extract()
   image = (FLAGS.image_file if FLAGS.image_file else
-           os.path.join(FLAGS.model_dir, 'cropped_panda.jpg'))
+           os.path.join(FLAGS.model_dir, 'lion.jpg'))
+  #print(image)
   run_inference_on_image(image)