Add partial_eval rule for for

Co-authored-by: Matthew Johnson <mattjj@google.com>
google · Jul 2, 2022 · a82047d · a82047d
1 parent 1fc9afd
commit a82047d
Show file tree

Hide file tree

Showing 2 changed files with 193 additions and 10 deletions.
diff --git a/jax/_src/lax/control_flow/for_loop.py b/jax/_src/lax/control_flow/for_loop.py
@@ -31,7 +31,9 @@
 from jax._src import ad_util
 from jax._src import dtypes
 from jax._src import pretty_printer as pp
-from jax._src.util import safe_map, safe_zip, split_list
+from jax._src import source_info_util
+from jax._src.util import (partition_list, merge_lists, safe_map, safe_zip,
+                           split_list)
 import jax.numpy as jnp
 
 from jax._src.lax.control_flow import loops
@@ -616,6 +618,157 @@ def _for_jvp(primals, tangents, *, jaxpr, nsteps, reverse, which_linear):
 ad.primitive_jvps[for_p] = _for_jvp
 
 
+def _partial_eval_jaxpr_custom(jaxpr, in_unknowns, policy):
+  # A simple wrapper around `pe.partial_eval_jaxpr_custom` that assumes all
+  # inputs are instantiated and doesn't ensure any outputs are unknown or
+  # instantiated.
+  return pe.partial_eval_jaxpr_custom(
+      jaxpr, in_unknowns, [True] * len(in_unknowns), False, False, policy)
+
+_save_everything = lambda *_, **__: True
+
+def _for_partial_eval(trace: pe.JaxprTrace, *tracers: pe.JaxprTracer,
+                      jaxpr: core.Jaxpr, nsteps: int, reverse: bool,
+                      which_linear: Tuple[bool]) -> List[pe.JaxprTracer]:
+  num_inputs = len(tracers)
+  in_unknowns = [not t.pval.is_known() for t in tracers]
+  # We first need to run a fixpoint to determine which of the `Ref`s are unknown
+  # after running the for loop. We want to use the jaxpr to determine which
+  # `Ref`s are unknown after executing the for loop body given which `Ref`s are
+  # unknown before. However, the jaxpr has no outputs. Instead, we discharge
+  # the body and run the fixpoint with the discharged jaxpr. We can do this
+  # because the outputs of the jaxpr are one-to-one with the inputs.
+  discharged_jaxpr, discharged_consts = discharge_state(jaxpr, ())
+  discharged_jaxpr = discharged_jaxpr.replace(
+      invars=discharged_jaxpr.constvars + discharged_jaxpr.invars,
+      constvars=[])
+  for _ in range(num_inputs):
+    jaxpr_in_unknowns = [False] * len(discharged_consts) + [False, *in_unknowns]
+    _, _, out_unknowns, _, _ = _partial_eval_jaxpr_custom(
+        discharged_jaxpr, jaxpr_in_unknowns, _save_everything)
+    out_unknowns = list(out_unknowns)
+    if out_unknowns == in_unknowns:
+      break
+    in_unknowns = map(operator.or_, in_unknowns, out_unknowns)
+  else:
+    raise Exception("Invalid fixpoint")
+  del out_unknowns  # redundant since it's the same as `in_unknowns`
+  tracers = tuple(trace.instantiate_const(t) if uk else t
+                  for t, uk in zip(tracers, in_unknowns))
+
+  # We use `partial_eval_jaxpr_custom` here because it won't remove effectful
+  # primitives like `get`/`set`.
+  jaxpr_known_resout, jaxpr_unknown_resin_, _, _, num_res = \
+        _partial_eval_jaxpr_custom(jaxpr, [False, *in_unknowns],
+                                   _save_everything)
+  # `partial_eval_jaxpr_custom` will give us jaxprs that have hybrid `Ref` and
+  # regular valued input/outputs. However, we'd like to bind these jaxprs to a
+  # `for`, which expects only `Ref` inputs and no output. We need to convert
+  # both of these jaxprs into ones that are compatible with `for`.
+  # TODO(sharadmv,mattjj): implement "passthrough" optimization.
+  # TODO(sharadmv,mattjj): rematerialize loop-dependent values instead of
+  # passing the loop index as a residual
+
+  # `jaxpr_known_resout` is a jaxpr that maps from all the input `Refs`
+  # to output residual values (none of them should be `Ref`s). We'll need to
+  # convert the output residual values into `Ref`s that are initially empty
+  # `Ref`s that are written to at the end of the jaxpr.
+  # TODO(sharadmv,mattjj): detect which residuals are loop-invariant
+  jaxpr_known, res_avals = _convert_outputs_to_writes(nsteps,
+                                                      jaxpr_known_resout)
+  # We now run the known jaxpr to obtain our residual values.
+  known_tracers, _ = partition_list(in_unknowns, tracers)
+  known_vals = [t.pval.get_known() for t in known_tracers]
+  empty_res = map(ad_util.zeros_like_aval, res_avals)
+  jaxpr_known_args = [*known_vals, *empty_res]
+  jaxpr_known_which_linear = (False,) * len(jaxpr_known_args)
+  out_flat = for_p.bind(*jaxpr_known_args, jaxpr=jaxpr_known, nsteps=nsteps,
+                        reverse=reverse, which_linear=jaxpr_known_which_linear)
+  known_outputs, residuals = split_list(out_flat, [len(known_tracers)])
+  residuals = map(trace.new_instantiated_const, residuals)
+
+  # Now we handle the `jaxpr_unknown` that expects residual values as inputs.
+  # This jaxpr is the output of `partial_eval_jaxpr_custom` that marks which
+  # inputs are actually used.
+  # `partial_eval_jaxpr_custom` doesn't remove extra inputs/outputs for you
+  # so we use `dce_jaxpr` here to do that.
+  jaxpr_unknown_resin, used_inputs = pe.dce_jaxpr(
+        jaxpr_unknown_resin_, [], [True] * num_res + [True, *in_unknowns])
+  used_res, (used_i,), used_refs = split_list(used_inputs, [num_res, 1])
+  assert all(used_res), "All residuals should be used"
+  # To make it compatible with `for`, we need to convert those residual values
+  # into `Ref`s.
+  jaxpr_unknown = _convert_inputs_to_reads(nsteps, len(res_avals),
+                                           jaxpr_unknown_resin)
+  # Since not all inputs are used in jaxpr_unknown, we filter the input tracers
+  # down using the output of `dce_jaxpr`.
+  _, used_tracers = partition_list(used_refs, tracers)
+  _, used_which_linear = partition_list(used_refs, which_linear)
+  which_linear_unknown = (False,) * num_res + tuple(used_which_linear)
+  unknown_inputs = [*residuals, *used_tracers]
+  # Outputs match inputs so we construct output tracers that look like the input
+  # tracers.
+  res_ref_unknown_outputs = [
+      pe.JaxprTracer(trace, pe.PartialVal.unknown(t.aval), None)
+      for t in unknown_inputs]
+  name_stack = source_info_util.current_name_stack()[len(trace.name_stack):]
+  source = source_info_util.current().replace(name_stack=name_stack)
+
+  eqn = pe.new_eqn_recipe(unknown_inputs, res_ref_unknown_outputs,
+                          for_p, dict(jaxpr=jaxpr_unknown, nsteps=nsteps,
+                                      reverse=reverse,
+                                      which_linear=which_linear_unknown),
+                          core.no_effects, source)
+  _, unknown_outputs = split_list(res_ref_unknown_outputs, [num_res])
+  for t in unknown_outputs: t.recipe = eqn
+  return merge_lists(in_unknowns, known_outputs, unknown_outputs)
+pe.custom_partial_eval_rules[for_p] = _for_partial_eval
+
+def _convert_outputs_to_writes(
+    nsteps: int, jaxpr: core.Jaxpr) -> Tuple[core.Jaxpr,
+                                             List[core.ShapedArray]]:
+  assert not jaxpr.constvars, "Jaxpr shouldn't have constvars."
+
+  in_avals = [v.aval for v in jaxpr.invars]  # [i, *orig_ref_avals]
+  @lu.wrap_init
+  def eval_jaxpr(i, *refs):
+    # We split the refs into the original input refs and the dummy residual
+    # refs.
+    orig_refs, residual_refs = split_list(refs, [len(in_avals) - 1])
+    residual_vals = core.eval_jaxpr(jaxpr, (), i, *orig_refs)
+    for res_ref, res_val in zip(residual_refs, residual_vals):
+      # TODO(sharadmv): loop-invariant residuals should not be an indexed write
+      res_ref[i] = res_val
+    return []
+  res_ref_avals = [ShapedArrayRef((nsteps, *v.aval.shape), v.aval.dtype)  # pytype: disable=attribute-error
+                   for v in jaxpr.outvars]
+  jaxpr, _, consts = pe.trace_to_jaxpr_dynamic(
+      eval_jaxpr, [*in_avals, *res_ref_avals])
+  assert not consts
+  return jaxpr, [core.ShapedArray(a.shape, a.dtype) for a in res_ref_avals]
+
+def _convert_inputs_to_reads(
+    nsteps: int, num_res: int, jaxpr: core.Jaxpr,
+  ) -> core.Jaxpr:
+  assert not jaxpr.constvars, "Jaxpr should not have constvars"
+
+  @lu.wrap_init
+  def eval_jaxpr(i, *refs):
+    residual_refs, orig_refs = split_list(refs, [num_res])
+    # TODO(sharadmv): don't do an indexed read for loop-invariant residuals
+    residual_vals = [r[i] for r in residual_refs]
+    () = core.eval_jaxpr(jaxpr, (), *residual_vals, i, *orig_refs)
+    return []
+
+  res_val_avals, (i_aval,), orig_ref_avals = \
+      split_list([v.aval for v in jaxpr.invars], [num_res, 1])
+  res_ref_avals = [ShapedArrayRef((nsteps, *aval.shape), aval.dtype)  # pytype: disable=attribute-error
+                   for aval in res_val_avals]
+
+  jaxpr, _, () = pe.trace_to_jaxpr_dynamic(
+      eval_jaxpr, [i_aval, *res_ref_avals, *orig_ref_avals])
+  return jaxpr
+
 ### Testing utility
 
 def discharged_for_loop(nsteps, body, init_state, *, reverse: bool = False):

diff --git a/tests/lax_control_flow_test.py b/tests/lax_control_flow_test.py
@@ -1543,7 +1543,7 @@ def f(c, a):
        "jit_scan": jit_scan, "jit_f": jit_f, "scan": scan_impl}
       for jit_scan in [False, True]
       for jit_f in [False, True]
-      for scan_impl, scan_name in SCAN_IMPLS)
+      for scan_impl, scan_name in SCAN_IMPLS_WITH_FOR)
   def testScanLinearize(self, jit_scan, jit_f, scan):
     rng = self.rng()
 
@@ -1944,6 +1944,15 @@ def body(x):
     python_should_be_executing = False
     lax.while_loop(cond, body, 0)
 
+  def test_caches_depend_on_axis_env(self):
+    # https://github.com/google/jax/issues/9187
+    scanned_f = lambda _, __: (lax.psum(1, 'i'), None)
+    f = lambda: lax.scan(scanned_f, 0, None, length=1)[0]
+    ans = jax.vmap(f, axis_name='i', axis_size=2, out_axes=None)()
+    self.assertEqual(ans, 2)
+    ans = jax.vmap(f, axis_name='i', axis_size=3, out_axes=None)()
+    self.assertEqual(ans, 3)
+
   def testWhileCondConstant(self):
     out = lax.while_loop(lambda _: False, lambda _: (), ())  # doesn't crash
     self.assertEqual(out, ())
@@ -2990,15 +2999,36 @@ def test_for_jvp(self, jit_for, f, ref, body_shapes, n):
     self.assertAllClose(ans, expected, check_dtypes=True, rtol=tol, atol=tol)
     jtu.check_grads(partial(for_, n, f), (args,), order=3, modes=["fwd"])
 
-  def test_caches_depend_on_axis_env(self):
-    # https://github.com/google/jax/issues/9187
-    scanned_f = lambda _, __: (lax.psum(1, 'i'), None)
-    f = lambda: lax.scan(scanned_f, 0, None, length=1)[0]
-    ans = jax.vmap(f, axis_name='i', axis_size=2, out_axes=None)()
-    self.assertEqual(ans, 2)
-    ans = jax.vmap(f, axis_name='i', axis_size=3, out_axes=None)()
-    self.assertEqual(ans, 3)
+  @parameterized.named_parameters(
+      {"testcase_name": "_jit_for={}_f={}_nsteps={}".format(
+        jit_for, for_body_name, nsteps),
+        "jit_for": jit_for, "f": for_body, "body_shapes": body_shapes,
+        "ref": ref, "n": nsteps}
+      for jit_for in [False, True]
+      for for_body_name, for_body, ref, body_shapes, nsteps in [
+        ("swap", for_body_swap, swap_ref, [(4,), (4,)], 4),
+        ("swap_swap", for_body_swap_swap, swap_swap_ref, [(4,), (4,)], 4),
+        ("sincos", for_body_sincos, sincos_ref, [(4,), (4,)], 4),
+        ("sincostan", for_body_sincostan, sincostan_ref, [(4,), (4,)], 4),
+        ("accum", for_body_accum, accum_ref, [(4,), (4,)], 3),
+        ("sin_sq", for_body_sin_sq, sin_sq_ref, [(4,), (4,)], 4),
+        ("reverse", for_body_reverse, reverse_ref, [(4,), (4,)], 4),
+      ])
+  def test_for_linearize(self, jit_for, f, ref, body_shapes, n):
+    for_ = for_loop.for_loop
+    rng = self.rng()
+
+    args = [rng.randn(*s) for s in body_shapes]
 
+    if jit_for:
+      for_ = jax.jit(for_, static_argnums=(0, 1))
+    tol = {np.float64: 1e-12, np.float32: 1e-4}
+    ans = jax.linearize(lambda *args: for_(         n, f, args), *args)[1](*args)
+    ans_discharged = jax.linearize(lambda *args: for_reference(n, f, args),
+                                   *args)[1](*args)
+    expected = jax.linearize(ref, *args)[1](*args)
+    self.assertAllClose(ans, ans_discharged, check_dtypes=True, rtol=tol, atol=tol)
+    self.assertAllClose(ans, expected, check_dtypes=True, rtol=tol, atol=tol)
 
 if __name__ == '__main__':
   absltest.main(testLoader=jtu.JaxTestLoader())