-
Notifications
You must be signed in to change notification settings - Fork 2.6k
/
api.py
2974 lines (2590 loc) · 128 KB
/
api.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
# Copyright 2018 The JAX Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""JAX user-facing transformations and utilities.
The transformations here mostly wrap internal transformations, providing
convenience flags to control behavior and handling Python containers of
arguments and outputs. The Python containers handled are pytrees (see
tree_util.py), which include nested tuples/lists/dicts, where the leaves are
arrays.
"""
from __future__ import annotations
import collections
from collections.abc import Generator, Hashable, Iterable, Sequence
from functools import partial
import inspect
import math
import typing
from typing import (Any, Callable, Literal, NamedTuple, TypeVar, overload,
cast, Optional)
import weakref
import numpy as np
from contextlib import contextmanager, ExitStack
from jax._src import linear_util as lu
from jax._src import stages
from jax._src.tree_util import (
tree_map, tree_flatten, tree_unflatten, tree_structure, tree_transpose,
tree_leaves, Partial, PyTreeDef, all_leaves, keystr, broadcast_prefix,
prefix_errors, generate_key_paths)
from jax._src import config
from jax._src import core
from jax._src import dispatch
from jax._src import effects
from jax._src import array
from jax._src import dtypes
from jax._src import sharding_impls
from jax._src import sharding_specs
from jax._src import source_info_util
from jax._src import traceback_util
from jax._src import pjit
from jax._src import xla_bridge as xb
from jax._src.core import eval_jaxpr, ShapedArray, ConcreteArray
from jax._src.api_util import (
flatten_fun, flatten_fun_nokwargs, flatten_fun_nokwargs2, argnums_partial,
argnums_partial_except, flatten_axes, donation_vector,
rebase_donate_argnums, _ensure_index, _ensure_index_tuple,
shaped_abstractify, _ensure_str_tuple, apply_flat_fun_nokwargs,
check_callable, debug_info, result_paths, flat_out_axes, debug_info_final)
from jax._src.lax import lax as lax_internal
from jax._src.lib import jax_jit
from jax._src.lib import xla_client as xc
from jax._src.lib import pmap_lib
from jax._src.sharding import Sharding
from jax._src.sharding_impls import (PmapSharding, TransferToMemoryKind,
XLACompatibleSharding)
from jax._src.traceback_util import api_boundary
from jax._src import tree_util
from jax._src.util import unzip2, safe_map, safe_zip, wrap_name, wraps
from jax._src import util
from jax._src.interpreters import ad
from jax._src.interpreters import batching
from jax._src.interpreters import mlir
from jax._src.interpreters import partial_eval as pe
from jax._src.interpreters import pxla
from jax._src.interpreters import xla
traceback_util.register_exclusion(__file__)
_dtype = partial(dtypes.dtype, canonicalize=True)
AxisName = Hashable
Device = xc.Device
# These TypeVars are used below to express the fact that function types
# (i.e. call signatures) are invariant under the vmap transformation.
F = TypeVar("F", bound=Callable)
T = TypeVar("T")
U = TypeVar("U")
map, unsafe_map = safe_map, map
zip, unsafe_zip = safe_zip, zip
def _nan_check_posthook(fun, args, kwargs, output):
"""Hook function called by the C++ jit/pmap to perform NaN checking."""
buffers = []
for leaf in tree_leaves(output):
if hasattr(leaf, "device_buffers"):
buffers.extend(leaf.device_buffers)
try:
dispatch.check_special(pjit.pjit_p.name, buffers)
except FloatingPointError:
# compiled_fun can only raise in this case
assert config.debug_nans.value or config.debug_infs.value
print("Invalid nan value encountered in the output of a C++-jit/pmap "
"function. Calling the de-optimized version.")
fun._cache_miss(*args, **kwargs)[0] # probably won't return
def _update_debug_special_global(_):
if config._read("jax_debug_nans") or config._read("jax_debug_infs"):
jax_jit.global_state().post_hook = _nan_check_posthook
else:
jax_jit.global_state().post_hook = None
def _update_debug_special_thread_local(_):
if (getattr(config._thread_local_state, "jax_debug_nans", False) or
getattr(config._thread_local_state, "jax_debug_infs", False)):
jax_jit.thread_local_state().post_hook = _nan_check_posthook
else:
jax_jit.thread_local_state().post_hook = None
config.debug_nans._add_hooks(_update_debug_special_global,
_update_debug_special_thread_local)
config.debug_infs._add_hooks(_update_debug_special_global,
_update_debug_special_thread_local)
float0 = dtypes.float0
def jit(
fun: Callable,
in_shardings=sharding_impls.UNSPECIFIED,
out_shardings=sharding_impls.UNSPECIFIED,
static_argnums: int | Sequence[int] | None = None,
static_argnames: str | Iterable[str] | None = None,
donate_argnums: int | Sequence[int] | None = None,
donate_argnames: str | Iterable[str] | None = None,
keep_unused: bool = False,
device: xc.Device | None = None,
backend: str | None = None,
inline: bool = False,
abstracted_axes: Any | None = None,
) -> stages.Wrapped:
"""Sets up ``fun`` for just-in-time compilation with XLA.
Args:
fun: Function to be jitted. ``fun`` should be a pure function, as
side-effects may only be executed once.
The arguments and return value of ``fun`` should be arrays,
scalars, or (nested) standard Python containers (tuple/list/dict) thereof.
Positional arguments indicated by ``static_argnums`` can be anything at
all, provided they are hashable and have an equality operation defined.
Static arguments are included as part of a compilation cache key, which is
why hash and equality operators must be defined.
JAX keeps a weak reference to ``fun`` for use as a compilation cache key,
so the object ``fun`` must be weakly-referenceable. Most :class:`Callable`
objects will already satisfy this requirement.
in_shardings: Pytree of structure matching that of arguments to ``fun``,
with all actual arguments replaced by resource assignment specifications.
It is also valid to specify a pytree prefix (e.g. one value in place of a
whole subtree), in which case the leaves get broadcast to all values in
that subtree.
The ``in_shardings`` argument is optional. JAX will infer the shardings
from the input :py:class:`jax.Array`'s and defaults to replicating the input
if the sharding cannot be inferred.
The valid resource assignment specifications are:
- :py:class:`XLACompatibleSharding`, which will decide how the value
will be partitioned. With this, using a mesh context manager is not
required.
- :py:obj:`None`, will give JAX the freedom to choose whatever sharding
it wants.
For in_shardings, JAX will mark is as replicated but this behavior
can change in the future.
For out_shardings, we will rely on the XLA GSPMD partitioner to
determine the output shardings.
The size of every dimension has to be a multiple of the total number of
resources assigned to it. This is similar to pjit's in_shardings.
out_shardings: Like ``in_shardings``, but specifies resource
assignment for function outputs. This is similar to pjit's
out_shardings.
The ``out_shardings`` argument is optional. If not specified, :py:func:`jax.jit`
will use GSPMD's sharding propagation to figure out what the sharding of the
output(s) should be.
static_argnums: An optional int or collection of ints that specify which
positional arguments to treat as static (compile-time constant).
Operations that only depend on static arguments will be constant-folded in
Python (during tracing), and so the corresponding argument values can be
any Python object.
Static arguments should be hashable, meaning both ``__hash__`` and
``__eq__`` are implemented, and immutable. Calling the jitted function
with different values for these constants will trigger recompilation.
Arguments that are not arrays or containers thereof must be marked as
static.
If neither ``static_argnums`` nor ``static_argnames`` is provided, no
arguments are treated as static. If ``static_argnums`` is not provided but
``static_argnames`` is, or vice versa, JAX uses
:code:`inspect.signature(fun)` to find any positional arguments that
correspond to ``static_argnames``
(or vice versa). If both ``static_argnums`` and ``static_argnames`` are
provided, ``inspect.signature`` is not used, and only actual
parameters listed in either ``static_argnums`` or ``static_argnames`` will
be treated as static.
static_argnames: An optional string or collection of strings specifying
which named arguments to treat as static (compile-time constant). See the
comment on ``static_argnums`` for details. If not
provided but ``static_argnums`` is set, the default is based on calling
``inspect.signature(fun)`` to find corresponding named arguments.
donate_argnums: Specify which positional argument buffers are "donated" to
the computation. It is safe to donate argument buffers if you no longer
need them once the computation has finished. In some cases XLA can make
use of donated buffers to reduce the amount of memory needed to perform a
computation, for example recycling one of your input buffers to store a
result. You should not reuse buffers that you donate to a computation, JAX
will raise an error if you try to. By default, no argument buffers are
donated.
If neither ``donate_argnums`` nor ``donate_argnames`` is provided, no
arguments are donated. If ``donate_argnums`` is not provided but
``donate_argnames`` is, or vice versa, JAX uses
:code:`inspect.signature(fun)` to find any positional arguments that
correspond to ``donate_argnames``
(or vice versa). If both ``donate_argnums`` and ``donate_argnames`` are
provided, ``inspect.signature`` is not used, and only actual
parameters listed in either ``donate_argnums`` or ``donate_argnames`` will
be donated.
For more details on buffer donation see the
`FAQ <https://jax.readthedocs.io/en/latest/faq.html#buffer-donation>`_.
donate_argnames: An optional string or collection of strings specifying
which named arguments are donated to the computation. See the
comment on ``donate_argnums`` for details. If not
provided but ``donate_argnums`` is set, the default is based on calling
``inspect.signature(fun)`` to find corresponding named arguments.
keep_unused: If `False` (the default), arguments that JAX determines to be
unused by `fun` *may* be dropped from resulting compiled XLA executables.
Such arguments will not be transferred to the device nor provided to the
underlying executable. If `True`, unused arguments will not be pruned.
device: This is an experimental feature and the API is likely to change.
Optional, the Device the jitted function will run on. (Available devices
can be retrieved via :py:func:`jax.devices`.) The default is inherited
from XLA's DeviceAssignment logic and is usually to use
``jax.devices()[0]``.
backend: This is an experimental feature and the API is likely to change.
Optional, a string representing the XLA backend: ``'cpu'``, ``'gpu'``, or
``'tpu'``.
inline: Specify whether this function should be inlined into enclosing
jaxprs (rather than being represented as an application of the xla_call
primitive with its own subjaxpr). Default False.
Returns:
A wrapped version of ``fun``, set up for just-in-time compilation.
Examples:
In the following example, ``selu`` can be compiled into a single fused kernel
by XLA:
>>> import jax
>>>
>>> @jax.jit
... def selu(x, alpha=1.67, lmbda=1.05):
... return lmbda * jax.numpy.where(x > 0, x, alpha * jax.numpy.exp(x) - alpha)
>>>
>>> key = jax.random.PRNGKey(0)
>>> x = jax.random.normal(key, (10,))
>>> print(selu(x)) # doctest: +SKIP
[-0.54485 0.27744 -0.29255 -0.91421 -0.62452 -0.24748
-0.85743 -0.78232 0.76827 0.59566 ]
To pass arguments such as ``static_argnames`` when decorating a function, a common
pattern is to use :func:`functools.partial`:
>>> from functools import partial
>>>
>>> @partial(jax.jit, static_argnames=['n'])
... def g(x, n):
... for i in range(n):
... x = x ** 2
... return x
>>>
>>> g(jnp.arange(4), 3)
Array([ 0, 1, 256, 6561], dtype=int32)
"""
(in_shardings, out_shardings, donate_argnums, donate_argnames, static_argnums,
static_argnames) = pjit.pre_infer_params(
fun, in_shardings, out_shardings, donate_argnums, donate_argnames,
static_argnums, static_argnames, device, backend, abstracted_axes)
def infer_params(*args, **kwargs):
# TODO(yashkatariya): Remove this when it's added on jit.
in_layouts = kwargs.pop('_in_layouts', None)
out_layouts = kwargs.pop('_out_layouts', None)
pjit_info_args = pjit.PjitInfo(
fun=fun, in_shardings=in_shardings,
out_shardings=out_shardings, static_argnums=static_argnums,
static_argnames=static_argnames, donate_argnums=donate_argnums,
donate_argnames=donate_argnames, device=device, backend=backend,
keep_unused=keep_unused, inline=inline, resource_env=None,
abstracted_axes=abstracted_axes, in_layouts=in_layouts,
out_layouts=out_layouts)
return pjit.common_infer_params(pjit_info_args, *args, **kwargs)
has_explicit_sharding = pjit._pjit_explicit_sharding(
in_shardings, out_shardings, device, backend)
return pjit.post_infer_params(fun, infer_params, static_argnums,
static_argnames, donate_argnums,
abstracted_axes, has_explicit_sharding)
@contextmanager
def disable_jit(disable: bool = True):
"""Context manager that disables :py:func:`jit` behavior under its dynamic context.
For debugging it is useful to have a mechanism that disables :py:func:`jit`
everywhere in a dynamic context. Note that this not only disables explicit
uses of :func:`jit` by the user, but will also remove any implicit JIT compilation
used by the JAX library: this includes implicit JIT computation of `body` and
`cond` functions passed to higher-level primitives like :func:`~jax.lax.scan` and
:func:`~jax.lax.while_loop`, JIT used in implementations of :mod:`jax.numpy` functions,
and any other case where :func:`jit` is used within an API's implementation.
Values that have a data dependence on the arguments to a jitted function are
traced and abstracted. For example, an abstract value may be a
:py:class:`ShapedArray` instance, representing the set of all possible arrays
with a given shape and dtype, but not representing one concrete array with
specific values. You might notice those if you use a benign side-effecting
operation in a jitted function, like a print:
>>> import jax
>>>
>>> @jax.jit
... def f(x):
... y = x * 2
... print("Value of y is", y)
... return y + 3
...
>>> print(f(jax.numpy.array([1, 2, 3]))) # doctest:+ELLIPSIS
Value of y is Traced<ShapedArray(int32[3])>with<DynamicJaxprTrace...>
[5 7 9]
Here ``y`` has been abstracted by :py:func:`jit` to a :py:class:`ShapedArray`,
which represents an array with a fixed shape and type but an arbitrary value.
The value of ``y`` is also traced. If we want to see a concrete value while
debugging, and avoid the tracer too, we can use the :py:func:`disable_jit`
context manager:
>>> import jax
>>>
>>> with jax.disable_jit():
... print(f(jax.numpy.array([1, 2, 3])))
...
Value of y is [2 4 6]
[5 7 9]
"""
with config.disable_jit(disable):
yield
def xla_computation(fun: Callable,
static_argnums: int | Iterable[int] = (),
axis_env: Sequence[tuple[AxisName, int]] | None = None,
in_parts=None, out_parts=None,
backend: str | None = None,
tuple_args: bool = False,
instantiate_const_outputs: bool | None = None,
return_shape: bool = False,
donate_argnums: int | Iterable[int] = ()) -> Callable:
"""Creates a function that produces its XLA computation given example args.
Args:
fun: Function from which to form XLA computations.
static_argnums: See the :py:func:`jax.jit` docstring.
axis_env: Optional, a sequence of pairs where the first element is an axis
name and the second element is a positive integer representing the size of
the mapped axis with that name. This parameter is useful when lowering
functions that involve parallel communication collectives, and it
specifies the axis name/size environment that would be set up by
applications of :py:func:`jax.pmap`. See the examples below.
in_parts: Optional, how each argument to ``fun`` should be partitioned or
replicated. This is used to specify partitioned XLA computations, see
``sharded_jit`` for more info.
out_parts: Optional, how each output of ``fun`` should be partitioned or
replicated. This is used to specify partitioned XLA computations, see
``sharded_jit`` for more info.
backend: This is an experimental feature and the API is likely to change.
Optional, a string representing the XLA backend: ``'cpu'``, ``'gpu'``, or
``'tpu'``.
tuple_args: Optional bool, defaults to ``False``. If ``True``, the resulting
XLA computation will have a single tuple argument that is unpacked into
the specified function arguments. If `None`, tupling will be enabled when
there are more than 100 arguments, since some platforms have limits on
argument arity.
instantiate_const_outputs: Deprecated argument, does nothing.
return_shape: Optional boolean, defaults to ``False``. If ``True``, the
wrapped function returns a pair where the first element is the XLA
computation and the second element is a pytree with the same structure as
the output of ``fun`` and where the leaves are objects with ``shape``,
``dtype``, and ``named_shape`` attributes representing the corresponding
types of the output leaves.
donate_argnums: Specify which arguments are "donated" to the computation.
It is safe to donate arguments if you no longer need them once the
computation has finished. In some cases XLA can make use of donated
buffers to reduce the amount of memory needed to perform a computation,
for example recycling one of your input buffers to store a result. You
should not reuse buffers that you donate to a computation, JAX will raise
an error if you try to.
Returns:
A wrapped version of ``fun`` that when applied to example arguments returns
a built XLA Computation (see xla_client.py), from which representations of
the unoptimized XLA HLO computation can be extracted using methods like
``as_hlo_text``, ``as_serialized_hlo_module_proto``, and
``as_hlo_dot_graph``. If the argument ``return_shape`` is ``True``, then the
wrapped function returns a pair where the first element is the XLA
Computation and the second element is a pytree representing the structure,
shapes, dtypes, and named shapes of the output of ``fun``.
Concrete example arguments are not always necessary. For those arguments not
indicated by ``static_argnums``, any object with ``shape`` and ``dtype``
attributes is acceptable (excepting namedtuples, which are treated as Python
containers).
For example:
>>> import jax
>>>
>>> def f(x): return jax.numpy.sin(jax.numpy.cos(x))
>>> c = jax.xla_computation(f)(3.)
>>> print(c.as_hlo_text()) # doctest: +SKIP
HloModule xla_computation_f.6
<BLANKLINE>
ENTRY xla_computation_f.6 {
constant.2 = pred[] constant(false)
parameter.1 = f32[] parameter(0)
cosine.3 = f32[] cosine(parameter.1)
sine.4 = f32[] sine(cosine.3)
ROOT tuple.5 = (f32[]) tuple(sine.4)
}
<BLANKLINE>
<BLANKLINE>
Alternatively, the assignment to ``c`` above could be written:
>>> import types
>>> scalar = types.SimpleNamespace(shape=(), dtype=np.dtype(np.float32))
>>> c = jax.xla_computation(f)(scalar)
Here's an example that involves a parallel collective and axis name:
>>> def f(x): return x - jax.lax.psum(x, 'i')
>>> c = jax.xla_computation(f, axis_env=[('i', 4)])(2)
>>> print(c.as_hlo_text()) # doctest: +SKIP
HloModule jaxpr_computation.9
primitive_computation.3 {
parameter.4 = s32[] parameter(0)
parameter.5 = s32[] parameter(1)
ROOT add.6 = s32[] add(parameter.4, parameter.5)
}
ENTRY jaxpr_computation.9 {
tuple.1 = () tuple()
parameter.2 = s32[] parameter(0)
all-reduce.7 = s32[] all-reduce(parameter.2), replica_groups={{0,1,2,3}}, to_apply=primitive_computation.3
ROOT subtract.8 = s32[] subtract(parameter.2, all-reduce.7)
}
<BLANKLINE>
<BLANKLINE>
Notice the ``replica_groups`` that were generated. Here's an example that
generates more interesting ``replica_groups``:
>>> from jax import lax
>>> def g(x):
... rowsum = lax.psum(x, 'i')
... colsum = lax.psum(x, 'j')
... allsum = lax.psum(x, ('i', 'j'))
... return rowsum, colsum, allsum
...
>>> axis_env = [('i', 4), ('j', 2)]
>>> c = xla_computation(g, axis_env=axis_env)(5.)
>>> print(c.as_hlo_text()) # doctest: +SKIP
HloModule jaxpr_computation__1.19
[removed uninteresting text here]
ENTRY jaxpr_computation__1.19 {
tuple.1 = () tuple()
parameter.2 = f32[] parameter(0)
all-reduce.7 = f32[] all-reduce(parameter.2), replica_groups={{0,2,4,6},{1,3,5,7}}, to_apply=primitive_computation__1.3
all-reduce.12 = f32[] all-reduce(parameter.2), replica_groups={{0,1},{2,3},{4,5},{6,7}}, to_apply=primitive_computation__1.8
all-reduce.17 = f32[] all-reduce(parameter.2), replica_groups={{0,1,2,3,4,5,6,7}}, to_apply=primitive_computation__1.13
ROOT tuple.18 = (f32[], f32[], f32[]) tuple(all-reduce.7, all-reduce.12, all-reduce.17)
}
"""
if instantiate_const_outputs is not None:
raise ValueError(
"instantiate_const_outputs has been deprecated. Please use the ahead of"
" time APIs. You can read more here:"
" https://jax.readthedocs.io/en/latest/aot.html")
if in_parts is not None:
raise ValueError(
"in_parts has been deprecated. Please use the ahead of time APIs. You"
" can read more here: https://jax.readthedocs.io/en/latest/aot.html")
if out_parts is not None:
raise ValueError(
"out_parts has been deprecated. Please use the ahead of time APIs. You"
" can read more here: https://jax.readthedocs.io/en/latest/aot.html")
check_callable(fun)
static_argnums = _ensure_index_tuple(static_argnums)
donate_argnums = _ensure_index_tuple(donate_argnums)
donate_argnums = rebase_donate_argnums(donate_argnums, static_argnums)
fun_name = getattr(fun, "__name__", "unknown")
platform = backend if backend is not None else xb.get_backend().platform
def make_axis_env(nreps):
if axis_env is None:
return sharding_impls.AxisEnv(nreps, (), ())
else:
nreps = nreps * math.prod(size for name, size in axis_env)
names, sizes = unzip2(axis_env)
return sharding_impls.AxisEnv(nreps, names, sizes)
@wraps(fun)
@api_boundary
def computation_maker(*args, **kwargs):
if max(static_argnums + donate_argnums, default=-1) >= len(args):
raise ValueError(f"jitted function has {static_argnums=}, {donate_argnums=} but "
f"was called with only {len(args)} positional arguments.")
f = lu.wrap_init(fun)
f, dyn_args = argnums_partial_except(f, static_argnums, args, allow_invalid=False)
args_flat, in_tree = tree_flatten((dyn_args, kwargs))
if donate_argnums:
donated_invars = donation_vector(donate_argnums, (), dyn_args, kwargs)
else:
donated_invars = (False,) * len(args_flat)
jaxtree_fun, out_tree = flatten_fun(f, in_tree)
avals = map(shaped_abstractify, args_flat)
with ExitStack() as stack:
for axis_name, size in axis_env or []:
stack.enter_context(core.extend_axis_env(axis_name, size, None))
jaxpr, out_avals, consts = pe.trace_to_jaxpr_dynamic(jaxtree_fun, avals)
jaxpr = dispatch.apply_outfeed_rewriter(jaxpr)
axis_env_ = make_axis_env(dispatch.jaxpr_replicas(jaxpr))
ordered_effects = list(
effects.ordered_effects.filter_in(jaxpr.effects))
lowering_result = mlir.lower_jaxpr_to_module(
f"xla_computation_{fun_name}",
core.ClosedJaxpr(jaxpr, consts),
ordered_effects=ordered_effects,
backend_or_name=backend,
platforms=[platform],
axis_context=sharding_impls.ReplicaAxisContext(axis_env_),
name_stack=source_info_util.new_name_stack(
wrap_name(fun_name, "xla_computation")),
donated_args=donated_invars,
arg_shardings=None,
result_shardings=None,
lowering_parameters=mlir.LoweringParameters())
built = xc._xla.mlir.mlir_module_to_xla_computation(
mlir.module_to_string(lowering_result.module),
use_tuple_args=tuple_args,
return_tuple=True)
out_shapes_flat = [
ShapeDtypeStruct(a.shape, a.dtype, a.named_shape) for a in out_avals]
out_shape = tree_unflatten(out_tree(), out_shapes_flat)
for out_aval in out_avals:
if not isinstance(out_aval, ShapedArray):
raise RuntimeError("As we want to propagate the weak_type, we need "
"to get a ShapedArray, otherwise this "
"information is lost")
if return_shape:
return built, out_shape
else:
return built
return computation_maker
def grad(fun: Callable, argnums: int | Sequence[int] = 0,
has_aux: bool = False, holomorphic: bool = False,
allow_int: bool = False,
reduce_axes: Sequence[AxisName] = ()) -> Callable:
"""Creates a function that evaluates the gradient of ``fun``.
Args:
fun: Function to be differentiated. Its arguments at positions specified by
``argnums`` should be arrays, scalars, or standard Python containers.
Argument arrays in the positions specified by ``argnums`` must be of
inexact (i.e., floating-point or complex) type. It
should return a scalar (which includes arrays with shape ``()`` but not
arrays with shape ``(1,)`` etc.)
argnums: Optional, integer or sequence of integers. Specifies which
positional argument(s) to differentiate with respect to (default 0).
has_aux: Optional, bool. Indicates whether ``fun`` returns a pair where the
first element is considered the output of the mathematical function to be
differentiated and the second element is auxiliary data. Default False.
holomorphic: Optional, bool. Indicates whether ``fun`` is promised to be
holomorphic. If True, inputs and outputs must be complex. Default False.
allow_int: Optional, bool. Whether to allow differentiating with
respect to integer valued inputs. The gradient of an integer input will
have a trivial vector-space dtype (float0). Default False.
reduce_axes: Optional, tuple of axis names. If an axis is listed here, and
``fun`` implicitly broadcasts a value over that axis, the backward pass
will perform a ``psum`` of the corresponding gradient. Otherwise, the
gradient will be per-example over named axes. For example, if ``'batch'``
is a named batch axis, ``grad(f, reduce_axes=('batch',))`` will create a
function that computes the total gradient while ``grad(f)`` will create
one that computes the per-example gradient.
Returns:
A function with the same arguments as ``fun``, that evaluates the gradient
of ``fun``. If ``argnums`` is an integer then the gradient has the same
shape and type as the positional argument indicated by that integer. If
argnums is a tuple of integers, the gradient is a tuple of values with the
same shapes and types as the corresponding arguments. If ``has_aux`` is True
then a pair of (gradient, auxiliary_data) is returned.
For example:
>>> import jax
>>>
>>> grad_tanh = jax.grad(jax.numpy.tanh)
>>> print(grad_tanh(0.2))
0.961043
"""
value_and_grad_f = value_and_grad(fun, argnums, has_aux=has_aux,
holomorphic=holomorphic,
allow_int=allow_int,
reduce_axes=reduce_axes)
docstr = ("Gradient of {fun} with respect to positional argument(s) "
"{argnums}. Takes the same arguments as {fun} but returns the "
"gradient, which has the same shape as the arguments at "
"positions {argnums}.")
@wraps(fun, docstr=docstr, argnums=argnums)
@api_boundary
def grad_f(*args, **kwargs):
_, g = value_and_grad_f(*args, **kwargs)
return g
@wraps(fun, docstr=docstr, argnums=argnums)
@api_boundary
def grad_f_aux(*args, **kwargs):
(_, aux), g = value_and_grad_f(*args, **kwargs)
return g, aux
return grad_f_aux if has_aux else grad_f
def value_and_grad(fun: Callable, argnums: int | Sequence[int] = 0,
has_aux: bool = False, holomorphic: bool = False,
allow_int: bool = False, reduce_axes: Sequence[AxisName] = ()
) -> Callable[..., tuple[Any, Any]]:
"""Create a function that evaluates both ``fun`` and the gradient of ``fun``.
Args:
fun: Function to be differentiated. Its arguments at positions specified by
``argnums`` should be arrays, scalars, or standard Python containers. It
should return a scalar (which includes arrays with shape ``()`` but not
arrays with shape ``(1,)`` etc.)
argnums: Optional, integer or sequence of integers. Specifies which
positional argument(s) to differentiate with respect to (default 0).
has_aux: Optional, bool. Indicates whether ``fun`` returns a pair where the
first element is considered the output of the mathematical function to be
differentiated and the second element is auxiliary data. Default False.
holomorphic: Optional, bool. Indicates whether ``fun`` is promised to be
holomorphic. If True, inputs and outputs must be complex. Default False.
allow_int: Optional, bool. Whether to allow differentiating with
respect to integer valued inputs. The gradient of an integer input will
have a trivial vector-space dtype (float0). Default False.
reduce_axes: Optional, tuple of axis names. If an axis is listed here, and
``fun`` implicitly broadcasts a value over that axis, the backward pass
will perform a ``psum`` of the corresponding gradient. Otherwise, the
gradient will be per-example over named axes. For example, if ``'batch'``
is a named batch axis, ``value_and_grad(f, reduce_axes=('batch',))`` will
create a function that computes the total gradient while
``value_and_grad(f)`` will create one that computes the per-example
gradient.
Returns:
A function with the same arguments as ``fun`` that evaluates both ``fun``
and the gradient of ``fun`` and returns them as a pair (a two-element
tuple). If ``argnums`` is an integer then the gradient has the same shape
and type as the positional argument indicated by that integer. If argnums is
a sequence of integers, the gradient is a tuple of values with the same
shapes and types as the corresponding arguments. If ``has_aux`` is True
then a tuple of ((value, auxiliary_data), gradient) is returned.
"""
docstr = ("Value and gradient of {fun} with respect to positional "
"argument(s) {argnums}. Takes the same arguments as {fun} but "
"returns a two-element tuple where the first element is the value "
"of {fun} and the second element is the gradient, which has the "
"same shape as the arguments at positions {argnums}.")
check_callable(fun)
argnums = core.concrete_or_error(_ensure_index, argnums)
reduce_axes = _ensure_str_tuple(reduce_axes) # type: ignore
@wraps(fun, docstr=docstr, argnums=argnums)
@api_boundary
def value_and_grad_f(*args, **kwargs):
max_argnum = argnums if isinstance(argnums, int) else max(argnums)
if max_argnum >= len(args):
raise TypeError(f"differentiating with respect to {argnums=} requires at least "
f"{max_argnum + 1} positional arguments to be passed by the caller, "
f"but got only {len(args)} positional arguments.")
f = lu.wrap_init(fun, kwargs)
f_partial, dyn_args = argnums_partial(f, argnums, args,
require_static_args_hashable=False)
for leaf in tree_leaves(dyn_args):
_check_input_dtype_grad(holomorphic, allow_int, leaf)
if not has_aux:
ans, vjp_py = _vjp(f_partial, *dyn_args, reduce_axes=reduce_axes)
else:
ans, vjp_py, aux = _vjp(
f_partial, *dyn_args, has_aux=True, reduce_axes=reduce_axes)
_check_scalar(ans)
tree_map(partial(_check_output_dtype_grad, holomorphic), ans)
g = vjp_py(lax_internal._one(ans))
g = g[0] if isinstance(argnums, int) else g
if not has_aux:
return ans, g
else:
return (ans, aux), g
return value_and_grad_f
def _check_scalar(x):
msg = "Gradient only defined for scalar-output functions. Output {}.".format
try:
aval = core.get_aval(x)
except TypeError as e:
raise TypeError(msg(f"was {x}")) from e
else:
if isinstance(aval, ShapedArray):
if aval.shape != ():
raise TypeError(msg(f"had shape: {aval.shape}"))
else:
raise TypeError(msg(f"had abstract value {aval}"))
def _check_input_dtype_revderiv(name, holomorphic, allow_int, x):
dispatch.check_arg(x)
aval = core.get_aval(x)
if holomorphic:
if not dtypes.issubdtype(aval.dtype, np.complexfloating):
raise TypeError(f"{name} with holomorphic=True requires inputs with complex dtype, "
f"but got {aval.dtype.name}.")
if (dtypes.issubdtype(aval.dtype, dtypes.extended) or
dtypes.issubdtype(aval.dtype, np.integer) or
dtypes.issubdtype(aval.dtype, np.bool_)):
if not allow_int:
raise TypeError(f"{name} requires real- or complex-valued inputs (input dtype "
f"that is a sub-dtype of np.inexact), but got {aval.dtype.name}. "
"If you want to use Boolean- or integer-valued inputs, use vjp "
"or set allow_int to True.")
elif not dtypes.issubdtype(aval.dtype, np.inexact):
raise TypeError(f"{name} requires numerical-valued inputs (input dtype that is a "
f"sub-dtype of np.bool_ or np.number), but got {aval.dtype.name}.")
_check_input_dtype_grad = partial(_check_input_dtype_revderiv, "grad")
def _check_output_dtype_revderiv(name, holomorphic, x):
aval = core.get_aval(x)
if dtypes.issubdtype(aval.dtype, dtypes.extended):
raise TypeError(
f"{name} with output element type {aval.dtype.name}")
if holomorphic:
if not dtypes.issubdtype(aval.dtype, np.complexfloating):
raise TypeError(f"{name} with holomorphic=True requires outputs with complex dtype, "
f"but got {aval.dtype.name}.")
elif dtypes.issubdtype(aval.dtype, np.complexfloating):
raise TypeError(f"{name} requires real-valued outputs (output dtype that is "
f"a sub-dtype of np.floating), but got {aval.dtype.name}. "
"For holomorphic differentiation, pass holomorphic=True. "
"For differentiation of non-holomorphic functions involving complex "
"outputs, use jax.vjp directly.")
elif not dtypes.issubdtype(aval.dtype, np.floating):
raise TypeError(f"{name} requires real-valued outputs (output dtype that is "
f"a sub-dtype of np.floating), but got {aval.dtype.name}. "
"For differentiation of functions with integer outputs, use "
"jax.vjp directly.")
_check_output_dtype_grad = partial(_check_output_dtype_revderiv, "grad")
def jacfwd(fun: Callable, argnums: int | Sequence[int] = 0,
has_aux: bool = False, holomorphic: bool = False) -> Callable:
"""Jacobian of ``fun`` evaluated column-by-column using forward-mode AD.
Args:
fun: Function whose Jacobian is to be computed.
argnums: Optional, integer or sequence of integers. Specifies which
positional argument(s) to differentiate with respect to (default ``0``).
has_aux: Optional, bool. Indicates whether ``fun`` returns a pair where the
first element is considered the output of the mathematical function to be
differentiated and the second element is auxiliary data. Default False.
holomorphic: Optional, bool. Indicates whether ``fun`` is promised to be
holomorphic. Default False.
Returns:
A function with the same arguments as ``fun``, that evaluates the Jacobian of
``fun`` using forward-mode automatic differentiation. If ``has_aux`` is True
then a pair of (jacobian, auxiliary_data) is returned.
>>> import jax
>>> import jax.numpy as jnp
>>>
>>> def f(x):
... return jnp.asarray(
... [x[0], 5*x[2], 4*x[1]**2 - 2*x[2], x[2] * jnp.sin(x[0])])
...
>>> print(jax.jacfwd(f)(jnp.array([1., 2., 3.])))
[[ 1. 0. 0. ]
[ 0. 0. 5. ]
[ 0. 16. -2. ]
[ 1.6209 0. 0.84147]]
"""
check_callable(fun)
argnums = _ensure_index(argnums)
docstr = ("Jacobian of {fun} with respect to positional argument(s) "
"{argnums}. Takes the same arguments as {fun} but returns the "
"jacobian of the output with respect to the arguments at "
"positions {argnums}.")
@wraps(fun, docstr=docstr, argnums=argnums)
def jacfun(*args, **kwargs):
f = lu.wrap_init(fun, kwargs)
f_partial, dyn_args = argnums_partial(f, argnums, args,
require_static_args_hashable=False)
tree_map(partial(_check_input_dtype_jacfwd, holomorphic), dyn_args)
if not has_aux:
pushfwd: Callable = partial(_jvp, f_partial, dyn_args)
y, jac = vmap(pushfwd, out_axes=(None, -1))(_std_basis(dyn_args))
else:
pushfwd: Callable = partial(_jvp, f_partial, dyn_args, has_aux=True)
y, jac, aux = vmap(pushfwd, out_axes=(None, -1, None))(_std_basis(dyn_args))
tree_map(partial(_check_output_dtype_jacfwd, holomorphic), y)
example_args = dyn_args[0] if isinstance(argnums, int) else dyn_args
jac_tree = tree_map(partial(_jacfwd_unravel, example_args), y, jac)
if not has_aux:
return jac_tree
else:
return jac_tree, aux
return jacfun
def _check_input_dtype_jacfwd(holomorphic: bool, x: Any) -> None:
dispatch.check_arg(x)
aval = core.get_aval(x)
if dtypes.issubdtype(aval.dtype, dtypes.extended):
raise TypeError(
f"jacfwd with input element type {aval.dtype.name}")
if holomorphic:
if not dtypes.issubdtype(aval.dtype, np.complexfloating):
raise TypeError("jacfwd with holomorphic=True requires inputs with complex "
f"dtype, but got {aval.dtype.name}.")
elif not dtypes.issubdtype(aval.dtype, np.floating):
raise TypeError("jacfwd requires real-valued inputs (input dtype that is "
f"a sub-dtype of np.floating), but got {aval.dtype.name}. "
"For holomorphic differentiation, pass holomorphic=True. "
"For differentiation of non-holomorphic functions involving "
"complex inputs or integer inputs, use jax.jvp directly.")
def _check_output_dtype_jacfwd(holomorphic, x):
aval = core.get_aval(x)
if holomorphic:
if not dtypes.issubdtype(aval.dtype, np.complexfloating):
raise TypeError("jacfwd with holomorphic=True requires outputs with complex dtype, "
f"but got {aval.dtype.name}.")
def jacrev(fun: Callable, argnums: int | Sequence[int] = 0,
has_aux: bool = False, holomorphic: bool = False, allow_int: bool = False) -> Callable:
"""Jacobian of ``fun`` evaluated row-by-row using reverse-mode AD.
Args:
fun: Function whose Jacobian is to be computed.
argnums: Optional, integer or sequence of integers. Specifies which
positional argument(s) to differentiate with respect to (default ``0``).
has_aux: Optional, bool. Indicates whether ``fun`` returns a pair where the
first element is considered the output of the mathematical function to be
differentiated and the second element is auxiliary data. Default False.
holomorphic: Optional, bool. Indicates whether ``fun`` is promised to be
holomorphic. Default False.
allow_int: Optional, bool. Whether to allow differentiating with
respect to integer valued inputs. The gradient of an integer input will
have a trivial vector-space dtype (float0). Default False.
Returns:
A function with the same arguments as ``fun``, that evaluates the Jacobian of
``fun`` using reverse-mode automatic differentiation. If ``has_aux`` is True
then a pair of (jacobian, auxiliary_data) is returned.
>>> import jax
>>> import jax.numpy as jnp
>>>
>>> def f(x):
... return jnp.asarray(
... [x[0], 5*x[2], 4*x[1]**2 - 2*x[2], x[2] * jnp.sin(x[0])])
...
>>> print(jax.jacrev(f)(jnp.array([1., 2., 3.])))
[[ 1. 0. 0. ]
[ 0. 0. 5. ]
[ 0. 16. -2. ]
[ 1.6209 0. 0.84147]]
"""
check_callable(fun)
docstr = ("Jacobian of {fun} with respect to positional argument(s) "
"{argnums}. Takes the same arguments as {fun} but returns the "
"jacobian of the output with respect to the arguments at "
"positions {argnums}.")
@wraps(fun, docstr=docstr, argnums=argnums)
def jacfun(*args, **kwargs):
f = lu.wrap_init(fun, kwargs)
f_partial, dyn_args = argnums_partial(f, argnums, args,
require_static_args_hashable=False)
tree_map(partial(_check_input_dtype_jacrev, holomorphic, allow_int), dyn_args)
if not has_aux:
y, pullback = _vjp(f_partial, *dyn_args)
else:
y, pullback, aux = _vjp(f_partial, *dyn_args, has_aux=True)
tree_map(partial(_check_output_dtype_jacrev, holomorphic), y)
jac = vmap(pullback)(_std_basis(y))
jac = jac[0] if isinstance(argnums, int) else jac
example_args = dyn_args[0] if isinstance(argnums, int) else dyn_args
jac_tree = tree_map(partial(_jacrev_unravel, y), example_args, jac)
jac_tree = tree_transpose(tree_structure(example_args), tree_structure(y), jac_tree)
if not has_aux:
return jac_tree
else:
return jac_tree, aux
return jacfun
jacobian = jacrev
_check_input_dtype_jacrev = partial(_check_input_dtype_revderiv, "jacrev")
_check_output_dtype_jacrev = partial(_check_output_dtype_revderiv, "jacrev")
def hessian(fun: Callable, argnums: int | Sequence[int] = 0,
has_aux: bool = False, holomorphic: bool = False) -> Callable:
"""Hessian of ``fun`` as a dense array.
Args:
fun: Function whose Hessian is to be computed. Its arguments at positions
specified by ``argnums`` should be arrays, scalars, or standard Python
containers thereof. It should return arrays, scalars, or standard Python
containers thereof.
argnums: Optional, integer or sequence of integers. Specifies which
positional argument(s) to differentiate with respect to (default ``0``).
has_aux: Optional, bool. Indicates whether ``fun`` returns a pair where the
first element is considered the output of the mathematical function to be
differentiated and the second element is auxiliary data. Default False.
holomorphic: Optional, bool. Indicates whether ``fun`` is promised to be
holomorphic. Default False.
Returns:
A function with the same arguments as ``fun``, that evaluates the Hessian of
``fun``.
>>> import jax
>>>
>>> g = lambda x: x[0]**3 - 2*x[0]*x[1] - x[1]**6
>>> print(jax.hessian(g)(jax.numpy.array([1., 2.])))
[[ 6. -2.]
[ -2. -480.]]
:py:func:`hessian` is a generalization of the usual definition of the Hessian
that supports nested Python containers (i.e. pytrees) as inputs and outputs.
The tree structure of ``jax.hessian(fun)(x)`` is given by forming a tree
product of the structure of ``fun(x)`` with a tree product of two copies of
the structure of ``x``. A tree product of two tree structures is formed by
replacing each leaf of the first tree with a copy of the second. For example:
>>> import jax.numpy as jnp
>>> f = lambda dct: {"c": jnp.power(dct["a"], dct["b"])}
>>> print(jax.hessian(f)({"a": jnp.arange(2.) + 1., "b": jnp.arange(2.) + 2.}))
{'c': {'a': {'a': Array([[[ 2., 0.], [ 0., 0.]],