edward/util/random_variables.py

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import numpy as np
import six
import tensorflow as tf

from copy import deepcopy
from edward.models.random_variable import RandomVariable
from edward.util.graphs import random_variables
from tensorflow.core.framework import attr_value_pb2
from tensorflow.python.framework.ops import set_shapes_for_outputs
from tensorflow.python.util import compat


def check_data(data):
  """Check that the data dictionary passed during inference and
  criticism is valid.
  """
  if not isinstance(data, dict):
    raise TypeError("data must have type dict.")

  for key, value in six.iteritems(data):
    if isinstance(key, tf.Tensor) and "Placeholder" in key.op.type:
      if isinstance(value, RandomVariable):
        raise TypeError("The value of a feed cannot be a ed.RandomVariable "
                        "object. "
                        "Acceptable feed values include Python scalars, "
                        "strings, lists, numpy ndarrays, or TensorHandles.")
      elif isinstance(value, tf.Tensor):
        raise TypeError("The value of a feed cannot be a tf.Tensor object. "
                        "Acceptable feed values include Python scalars, "
                        "strings, lists, numpy ndarrays, or TensorHandles.")
    elif isinstance(key, (RandomVariable, tf.Tensor)):
      if isinstance(value, (RandomVariable, tf.Tensor)):
        if not key.shape.is_compatible_with(value.shape):
          raise TypeError("Key-value pair in data does not have same "
                          "shape: {}, {}".format(key.shape, value.shape))
        elif key.dtype != value.dtype:
          raise TypeError("Key-value pair in data does not have same "
                          "dtype: {}, {}".format(key.dtype, value.dtype))
      elif isinstance(value, (float, list, int, np.ndarray, np.number, str)):
        if not key.shape.is_compatible_with(np.shape(value)):
          raise TypeError("Key-value pair in data does not have same "
                          "shape: {}, {}".format(key.shape, np.shape(value)))
        elif isinstance(value, (np.ndarray, np.number)) and \
                not np.issubdtype(value.dtype, np.float) and \
                not np.issubdtype(value.dtype, np.int) and \
                not np.issubdtype(value.dtype, np.str):
          raise TypeError("Data value has an invalid dtype: "
                          "{}".format(value.dtype))
      else:
        raise TypeError("Data value has an invalid type: "
                        "{}".format(type(value)))
    else:
      raise TypeError("Data key has an invalid type: {}".format(type(key)))


def check_latent_vars(latent_vars):
  """Check that the latent variable dictionary passed during inference and
  criticism is valid.
  """
  if not isinstance(latent_vars, dict):
    raise TypeError("latent_vars must have type dict.")

  for key, value in six.iteritems(latent_vars):
    if not isinstance(key, (RandomVariable, tf.Tensor)):
      raise TypeError("Latent variable key has an invalid type: "
                      "{}".format(type(key)))
    elif not isinstance(value, (RandomVariable, tf.Tensor)):
      raise TypeError("Latent variable value has an invalid type: "
                      "{}".format(type(value)))
    elif not key.shape.is_compatible_with(value.shape):
      raise TypeError("Key-value pair in latent_vars does not have same "
                      "shape: {}, {}".format(key.shape, value.shape))
    elif key.dtype != value.dtype:
      raise TypeError("Key-value pair in latent_vars does not have same "
                      "dtype: {}, {}".format(key.dtype, value.dtype))


def copy_default(x, *args, **kwargs):
  if isinstance(x, (RandomVariable, tf.Operation, tf.Tensor, tf.Variable)):
    x = copy(x, *args, **kwargs)

  return x


def copy(org_instance, dict_swap=None, scope="copied",
         replace_itself=False, copy_q=False):
  """Build a new node in the TensorFlow graph from `org_instance`,
  where any of its ancestors existing in `dict_swap` are
  replaced with `dict_swap`'s corresponding value.

  The copying is done recursively, so any `Operation` whose output
  is required to evaluate `org_instance` is also copied (if it isn't
  already copied within the new scope). This is with the exception of
  `tf.Variable`s, `tf.placeholder`s, and nodes of type `Queue`, which
  are reused and not newly copied.

  Parameters
  ----------
  org_instance : RandomVariable, tf.Operation, tf.Tensor, or tf.Variable
    Node to add in graph with replaced ancestors.
  dict_swap : dict, optional
    Random variables, variables, tensors, or operations to swap with.
    Its keys are what `org_instance` may depend on, and its values are
    the corresponding object (not necessarily of the same class
    instance, but must have the same type, e.g., float32) that is used
    in exchange.
  scope : str, optional
    A scope for the new node(s). This is used to avoid name
    conflicts with the original node(s).
  replace_itself : bool, optional
    Whether to replace `org_instance` itself if it exists in
    `dict_swap`. (This is used for the recursion.)
  copy_q : bool, optional
    Whether to copy the replaced tensors too (if not already
    copied within the new scope). Otherwise will reuse them.

  Returns
  -------
  RandomVariable, tf.Variable, tf.Tensor, or tf.Operation
    The copied node.

  Raises
  ------
  TypeError
    If `org_instance` is not one of the above types.

  Examples
  --------
  >>> x = tf.constant(2.0)
  >>> y = tf.constant(3.0)
  >>> z = x * y
  >>>
  >>> qx = tf.constant(4.0)
  >>> # The TensorFlow graph is currently
  >>> # `x` -> `z` <- y`, `qx`
  >>>
  >>> # This adds a subgraph with newly copied nodes,
  >>> # `copied/qx` -> `copied/z` <- `copied/y`
  >>> z_new = ed.copy(z, {x: qx})
  >>>
  >>> sess = tf.Session()
  >>> sess.run(z)
  6.0
  >>> sess.run(z_new)
  12.0
  """
  if not isinstance(org_instance,
                    (RandomVariable, tf.Operation, tf.Tensor, tf.Variable)):
    raise TypeError("Could not copy instance: " + str(org_instance))

  if dict_swap is None:
    dict_swap = {}

  # Swap instance if in dictionary.
  if org_instance in dict_swap and replace_itself:
    org_instance = dict_swap[org_instance]
    if not copy_q:
      return org_instance
  elif isinstance(org_instance, tf.Tensor) and replace_itself:
    # Deal with case when `org_instance` is the associated tensor
    # from the RandomVariable, e.g., `z.value()`. If
    # `dict_swap={z: qz}`, we aim to swap it with `qz.value()`.
    for key, value in six.iteritems(dict_swap):
      if isinstance(key, RandomVariable):
        if org_instance == key.value():
          if isinstance(value, RandomVariable):
            org_instance = value.value()
          else:
            org_instance = value
          if not copy_q:
            return org_instance
          break

  graph = tf.get_default_graph()
  new_name = scope + '/' + org_instance.name

  # If an instance of the same name exists, return appropriately.
  # Do this for ed.RandomVariable.
  random_variables = {x.name: x for x in
                      graph.get_collection('_random_variable_collection_')}
  if new_name in random_variables:
    return random_variables[new_name]

  # Do this for tf.Tensor and tf.Operation.
  try:
    already_present = graph.as_graph_element(new_name,
                                             allow_tensor=True,
                                             allow_operation=True)
    return already_present
  except:
    pass

  # If instance is a tf.Variable, return it; do not re-copy any.
  # Note we check variables via their name and not their type. This
  # is because if we get variables through an op's inputs, it has
  # type tf.Tensor: we can only tell it is a Variable via its name.
  variables = {x.name: x for
               x in graph.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)}
  if org_instance.name in variables:
    return graph.get_tensor_by_name(variables[org_instance.name].name)

  # Do the same for tf.placeholders.
  if isinstance(org_instance, tf.Tensor) and \
          "Placeholder" in org_instance.op.type:
    return org_instance

  if isinstance(org_instance, RandomVariable):
    rv = org_instance

    # If it has copiable arguments, copy them.
    args = [copy_default(arg, dict_swap, scope, True, copy_q)
            for arg in rv._args]

    kwargs = {}
    for key, value in six.iteritems(rv._kwargs):
      if isinstance(value, list):
        kwargs[key] = [copy_default(v, dict_swap, scope, True, copy_q)
                       for v in value]
      else:
        kwargs[key] = copy_default(value, dict_swap, scope, True, copy_q)

    kwargs['name'] = new_name
    # Create new random variable with copied arguments.
    new_rv = type(rv)(*args, **kwargs)
    return new_rv
  elif isinstance(org_instance, tf.Tensor):
    tensor = org_instance

    # A tensor is one of the outputs of its underlying
    # op. Therefore copy the op itself.
    op = tensor.op
    new_op = copy(op, dict_swap, scope, True, copy_q)

    output_index = op.outputs.index(tensor)
    new_tensor = new_op.outputs[output_index]

    # Add copied tensor to collections that the original one is in.
    for name, collection in tensor.graph._collections.items():
      if tensor in collection:
        graph.add_to_collection(name, new_tensor)

    return new_tensor
  elif isinstance(org_instance, tf.Operation):
    op = org_instance

    # Do not copy queue operations.
    if 'Queue' in op.type:
      return op

    # If it has an original op, copy it.
    if op._original_op is not None:
      new_original_op = copy(op._original_op, dict_swap, scope, True, copy_q)
    else:
      new_original_op = None

    # Make a copy of the node def.
    # As an instance of tensorflow.core.framework.graph_pb2.NodeDef, it
    # stores string-based info such as name, device, and type of the op.
    # It is unique to every Operation instance.
    new_node_def = deepcopy(op.node_def)
    new_node_def.name = new_name

    # Copy the other inputs needed for initialization.
    output_types = op._output_types[:]

    # Make a copy of the op def.
    # It is unique to every Operation type.
    op_def = deepcopy(op.op_def)

    ret = tf.Operation(new_node_def,
                       graph,
                       [],
                       output_types,
                       [],
                       [],
                       new_original_op,
                       op_def)

    # advertise op early to break recursions
    graph._add_op(ret)

    # If it has control inputs, copy them.
    elems = []
    for x in op.control_inputs:
      elem = copy(x, dict_swap, scope, True, copy_q)
      if not isinstance(elem, tf.Operation):
        elem = tf.convert_to_tensor(elem)

      elems.append(elem)

    ret._add_control_inputs(elems)

    # If it has inputs, copy them.
    for x in op.inputs:
      elem = copy(x, dict_swap, scope, True, copy_q)
      if not isinstance(elem, tf.Operation):
        elem = tf.convert_to_tensor(elem)

      ret._add_input(elem)

    # Use Graph's private methods to add the op, following
    # implementation of `tf.Graph().create_op()`.
    compute_shapes = True
    compute_device = True
    op_type = new_name

    if compute_shapes:
      set_shapes_for_outputs(ret)
    graph._record_op_seen_by_control_dependencies(ret)

    if compute_device:
      graph._apply_device_functions(ret)

    if graph._colocation_stack:
      all_colocation_groups = []
      for colocation_op in graph._colocation_stack:
        all_colocation_groups.extend(colocation_op.colocation_groups())
        if colocation_op.device:
          # Make this device match the device of the colocated op, to
          # provide consistency between the device and the colocation
          # property.
          if ret.device and ret.device != colocation_op.device:
            logging.warning("Tried to colocate %s with an op %s that had "
                            "a different device: %s vs %s. "
                            "Ignoring colocation property.",
                            name, colocation_op.name, ret.device,
                            colocation_op.device)
          else:
            ret._set_device(colocation_op.device)

      all_colocation_groups = sorted(set(all_colocation_groups))
      ret.node_def.attr["_class"].CopyFrom(attr_value_pb2.AttrValue(
          list=attr_value_pb2.AttrValue.ListValue(s=all_colocation_groups)))

    # Sets "container" attribute if
    # (1) graph._container is not None
    # (2) "is_stateful" is set in OpDef
    # (3) "container" attribute is in OpDef
    # (4) "container" attribute is None
    if (graph._container and
        op_type in graph._registered_ops and
        graph._registered_ops[op_type].is_stateful and
        "container" in ret.node_def.attr and
            not ret.node_def.attr["container"].s):
      ret.node_def.attr["container"].CopyFrom(
          attr_value_pb2.AttrValue(s=compat.as_bytes(graph._container)))

    return ret
  else:
    raise TypeError("Could not copy instance: " + str(org_instance))


def get_ancestors(x, collection=None):
  """Get ancestor random variables of input.

  Parameters
  ----------
  x : RandomVariable or tf.Tensor
    Query node to find ancestors of.
  collection : list of RandomVariable, optional
    The collection of random variables to check with respect to;
    defaults to all random variables in the graph.

  Returns
  -------
  list of RandomVariable
    Ancestor random variables of x.

  Examples
  --------
  >>> a = Normal(mu=0.0, sigma=1.0)
  >>> b = Normal(mu=a, sigma=1.0)
  >>> c = Normal(mu=0.0, sigma=1.0)
  >>> d = Normal(mu=tf.mul(b, c), sigma=1.0)
  >>> assert set(ed.get_ancestors(d)) == set([a, b, c])
  """
  if collection is None:
    collection = random_variables()

  node_dict = {node.value(): node for node in collection}

  # Traverse the graph. Add each node to the set if it's in the collection.
  output = set()
  visited = set()
  nodes = {x}
  while nodes:
    node = nodes.pop()

    if node in visited:
      continue
    visited.add(node)

    if isinstance(node, RandomVariable):
      node = node.value()

    candidate_node = node_dict.get(node, None)
    if candidate_node is not None and candidate_node != x:
      output.add(candidate_node)

    nodes.update(node.op.inputs)

  return list(output)


def get_children(x, collection=None):
  """Get child random variables of input.

  Parameters
  ----------
  x : RandomVariable or tf.Tensor
    Query node to find children of.
  collection : list of RandomVariable, optional
    The collection of random variables to check with respect to;
    defaults to all random variables in the graph.

  Returns
  -------
  list of RandomVariable
    Child random variables of x.

  Examples
  --------
  >>> a = Normal(mu=0.0, sigma=1.0)
  >>> b = Normal(mu=a, sigma=1.0)
  >>> c = Normal(mu=a, sigma=1.0)
  >>> d = Normal(mu=c, sigma=1.0)
  >>> assert set(ed.get_children(a)) == set([b, c])
  """
  if collection is None:
    collection = random_variables()

  node_dict = {node.value(): node for node in collection}

  # Traverse the graph. Add each node to the set if it's in the collection.
  output = set()
  visited = set()
  nodes = {x}
  while nodes:
    node = nodes.pop()

    if node in visited:
      continue
    visited.add(node)

    if isinstance(node, RandomVariable):
      node = node.value()

    candidate_node = node_dict.get(node, None)
    if candidate_node is not None and candidate_node != x:
      output.add(candidate_node)
    else:
      for op in node.consumers():
        nodes.update(op.outputs)

  return list(output)


def get_descendants(x, collection=None):
  """Get descendant random variables of input.

  Parameters
  ----------
  x : RandomVariable or tf.Tensor
    Query node to find descendants of.
  collection : list of RandomVariable, optional
    The collection of random variables to check with respect to;
    defaults to all random variables in the graph.

  Returns
  -------
  list of RandomVariable
    Descendant random variables of x.

  Examples
  --------
  >>> a = Normal(mu=0.0, sigma=1.0)
  >>> b = Normal(mu=a, sigma=1.0)
  >>> c = Normal(mu=a, sigma=1.0)
  >>> d = Normal(mu=c, sigma=1.0)
  >>> assert set(ed.get_descendants(a)) == set([b, c, d])
  """
  if collection is None:
    collection = random_variables()

  node_dict = {node.value(): node for node in collection}

  # Traverse the graph. Add each node to the set if it's in the collection.
  output = set()
  visited = set()
  nodes = {x}
  while nodes:
    node = nodes.pop()

    if node in visited:
      continue
    visited.add(node)

    if isinstance(node, RandomVariable):
      node = node.value()

    candidate_node = node_dict.get(node, None)
    if candidate_node is not None and candidate_node != x:
      output.add(candidate_node)

    for op in node.consumers():
      nodes.update(op.outputs)

  return list(output)


def get_parents(x, collection=None):
  """Get parent random variables of input.

  Parameters
  ----------
  x : RandomVariable or tf.Tensor
    Query node to find parents of.
  collection : list of RandomVariable, optional
    The collection of random variables to check with respect to;
    defaults to all random variables in the graph.

  Returns
  -------
  list of RandomVariable
    Parent random variables of x.

  Examples
  --------
  >>> a = Normal(mu=0.0, sigma=1.0)
  >>> b = Normal(mu=a, sigma=1.0)
  >>> c = Normal(mu=0.0, sigma=1.0)
  >>> d = Normal(mu=tf.mul(b, c), sigma=1.0)
  >>> assert set(ed.get_parents(d)) == set([b, c])
  """
  if collection is None:
    collection = random_variables()

  node_dict = {node.value(): node for node in collection}

  # Traverse the graph. Add each node to the set if it's in the collection.
  output = set()
  visited = set()
  nodes = {x}
  while nodes:
    node = nodes.pop()

    if node in visited:
      continue
    visited.add(node)

    if isinstance(node, RandomVariable):
      node = node.value()

    candidate_node = node_dict.get(node, None)
    if candidate_node is not None and candidate_node != x:
      output.add(candidate_node)
    else:
      nodes.update(node.op.inputs)

  return list(output)


def get_siblings(x, collection=None):
  """Get sibling random variables of input.

  Parameters
  ----------
  x : RandomVariable or tf.Tensor
    Query node to find siblings of.
  collection : list of RandomVariable, optional
    The collection of random variables to check with respect to;
    defaults to all random variables in the graph.

  Returns
  -------
  list of RandomVariable
    Sibling random variables of x.

  Examples
  --------
  >>> a = Normal(mu=0.0, sigma=1.0)
  >>> b = Normal(mu=a, sigma=1.0)
  >>> c = Normal(mu=a, sigma=1.0)
  >>> assert ed.get_siblings(b) == [c]
  """
  parents = get_parents(x, collection)
  siblings = set()
  for parent in parents:
    siblings.update(get_children(parent, collection))

  siblings.discard(x)
  return list(siblings)


def get_variables(x, collection=None):
  """Get parent TensorFlow variables of input.

  Parameters
  ----------
  x : RandomVariable or tf.Tensor
    Query node to find parents of.
  collection : list of tf.Variable, optional
    The collection of variables to check with respect to; defaults to
    all variables in the graph.

  Returns
  -------
  list of tf.Variable
    TensorFlow variables that x depends on.

  Examples
  --------
  >>> a = tf.Variable(0.0)
  >>> b = tf.Variable(0.0)
  >>> c = Normal(mu=tf.mul(a, b), sigma=1.0)
  >>> assert set(ed.get_variables(c)) == set([a, b])
  """
  if collection is None:
    collection = tf.global_variables()

  node_dict = {node.name: node for node in collection}

  # Traverse the graph. Add each node to the set if it's in the collection.
  output = set()
  visited = set()
  nodes = {x}
  while nodes:
    node = nodes.pop()

    if node in visited:
      continue
    visited.add(node)

    if isinstance(node, RandomVariable):
      node = node.value()

    candidate_node = node_dict.get(node.name, None)
    if candidate_node is not None and candidate_node != x:
      output.add(candidate_node)

    nodes.update(node.op.inputs)

  return list(output)