Skip to content
Permalink
Branch: master
Find file Copy path
Find file Copy path
36 contributors

Users who have contributed to this file

@toslunar @niboshi @hvy @beam2d @okuta @sonots @unnonouno @okapies @mitmul @kmaehashi @LuoYuanke @delta2323 @anaruse @fukatani @Hakuyume @Crissman @crcrpar @gwtnb @takagi @yuyu2172 @Rishav1 @Artifice @mergecat @sinhrks @mergify @tgpfeiffer
1952 lines (1562 sloc) 68.6 KB
from __future__ import absolute_import
import collections
import copy
import heapq
import traceback
import typing as tp # NOQA
import warnings
import weakref
import numpy
import six
import chainer
from chainer import _backprop_utils
from chainer import backend
from chainer.backends import _cpu
from chainer.backends import cuda
from chainer.backends import intel64
from chainer import initializers
from chainer.initializers import constant
from chainer import types # NOQA
import chainer.utils._collections
from chainer.utils import argument
import chainerx
def _check_grad_type(func, x, is_node_x, gx):
# TODO(niboshi): Write comment about is_node_x
assert gx is not None
# TODO(kataoka): avoid `isinstance`
x_data = None if isinstance(x, _ChainerxVariableNodeProps) else x.data
# TODO(kataoka): Make _update_data_info store the array module.
# ``is_node_x and x_data is None`` implies that the data array is not
# retained.
# ``not is_node_x and x_data is None`` implies that grad of uninitialized
# variable is checked here.
if x_data is None and not is_node_x:
# TODO(kataoka): This should be an error.
return
if x.dtype is None or x.shape is None:
# unretained Variable(None)
# TODO(kataoka): This should be an error.
return
if not isinstance(gx, chainer.get_array_types()):
msg = ('Type of grad is invalid:\n'
+ 'Expected: Any of {}\n'.format(chainer.get_array_types())
+ 'Actual: {}'.format(type(gx)))
typ = TypeError
elif x_data is not None and not chainer.is_arrays_compatible((gx, x_data)):
msg = ('Type of data and grad mismatch\ngrad: %s != data: %s' %
(type(gx), type(x_data)))
typ = TypeError
elif gx.dtype != x.dtype:
msg = ('Dtype of data and grad mismatch\ngrad: %s != data: %s' %
(gx.dtype, x.dtype))
typ = TypeError
elif gx.shape != x.shape:
msg = ('Shape of data and grad mismatch\ngrad: %s != data: %s' %
(gx.shape, x.shape))
typ = ValueError
else:
return
detail = ''
if func:
detail = 'Function `{0}` ({1}) has a bug.\n'.format(
type(func)._impl_name, func.label)
stack = func.stack
if stack:
detail += 'Stacktrace of the function is below:\n'
for line in traceback.format_list(func.stack):
detail += line
detail += '''
Please report this error to the issue tracker with the stack trace,
the information of your environment, and your script:
https://github.com/chainer/chainer/issues/new.
'''
raise typ(detail + msg)
def variable_repr(var):
"""Return the string representation of a variable.
Args:
var (~chainer.Variable): Input Variable.
.. seealso:: numpy.array_repr
"""
arr = _cpu._to_cpu(var.array)
if var.name:
prefix = 'variable ' + var.name
else:
prefix = 'variable'
if arr is None:
lst = 'None'
elif arr.size > 0 or arr.shape == (0,):
lst = numpy.array2string(arr, None, None, None, ', ', prefix + '(')
else: # show zero-length shape unless it is (0,)
lst = '[], shape=%s' % (repr(arr.shape),)
return '%s(%s)' % (prefix, lst)
def variable_str(var):
"""Return the string representation of a variable.
Args:
var (~chainer.Variable): Input Variable.
.. seealso:: numpy.array_str
"""
arr = _cpu._to_cpu(var.array)
if var.name:
prefix = 'variable ' + var.name
else:
prefix = 'variable'
if arr is None:
lst = 'None'
else:
lst = numpy.array2string(arr, None, None, None, ' ', prefix + '(')
return '%s(%s)' % (prefix, lst)
class VariableNode(object):
"""Node in the backward computational graph representing a variable.
This object represents a variable node in a computational graph. The node
is used in error backpropagation (a.k.a. backprop) to determine which
gradient to be passed to each function.
A variable node is held by the corresponding :class:`~chainer.Variable`
object, which is managed by users. :class:`~chainer.FunctionNode` objects
that take the variable as an input also hold references to the variable
node.
Note that the node does not hold a reference to the corresponding data
array in general. The data array is actually accessible by the node in the
following cases.
1. If there exists a :class:`~chainer.Variable` object that holds a
reference to the variable node, the variable node holds a weak reference
to the variable object, and thus the data array is accessible via the
weak reference.
2. If :meth:`retain_data` is called, the node holds a reference to the data
array. It is mainly called by a function that needs the input or output
data array in its backprop procedure.
See :meth:`FunctionNode.retain_inputs()
<chainer.FunctionNode.retain_inputs>`
and :meth:`FunctionNode.retain_outputs()
<chainer.FunctionNode.retain_outputs>` for more details.
Users usually do not need to touch this variable node object. The
computational graph is automatically managed by Chainer, and any interface
that is beneficial for users is also provided by
:class:`~chainer.Variable`.
Args:
variable (~chainer.Variable): The corresponding variable object.
name (str): Name of the variable node.
Attributes:
dtype: Data type of the data array.
shape: Shape of the data array.
name (str): Name of the variable node.
"""
_creator_node = None
_data = None # type: types.NdArray
_rank = 0 # type: int
# Name of the Function is assigned if this variable is a gradient generated
# by an old-style Function
_old_style_grad_generator = None # type: str
def __init__(self, variable, name, **kwargs):
# type: (Variable, tp.Optional[str], **tp.Any) -> None
if kwargs:
argument.check_unexpected_kwargs(
kwargs,
grad='unexpected keyword argument "grad": '
'pass the gradient to Variable instead'
)
self._variable = weakref.ref(variable)
self.name = name
self._requires_grad = variable.requires_grad
vdata = variable.data
self._update_data_info(vdata)
@property
def creator(self):
"""Function object that created this variable node.
When the function is implemented with the old-style API (i.e., it uses
:class:`~chainer.Function` class),
this property returns the :class:`~chainer.Function` object.
The object is extracted from the :class:`~chainer.FunctionAdapter`
object, so the returned object is not the function node, but instead
the actual implementation of forward and backward procedures.
When the function is implemented with the new-style API (i.e., it uses
:class:`~chainer.FunctionNode` class),
this property returns the function node
object. In this case, the returned object is same as
:attr:`creator_node`.
.. warning::
As of v3.0.0, when the creator is an old-style function, the
following code is invalid:
.. code-block:: python
creator = v.creator
v.creator = None
...
v.creator = creator
The point is that :class:`~chainer.FunctionNode` objects are used
as nodes in the computational graph instead of
:class:`~chainer.Function`, and each :class:`~chainer.Function`
object only holds a *weak reference* to the corresponding
:class:`~chainer.FunctionNode`.
Since ``creator`` returns the :class:`~chainer.Function` object,
the :class:`~chainer.FunctionNode` object is not kept by preserving
``creator``.
The above code should be fixed as follows.
.. code-block:: python
creator_node = v.creator_node
v.creator_node = None
...
v.creator_node = creator_node
"""
node = self._creator_node
if node is None:
return None
if isinstance(node, chainer.function.FunctionAdapter):
return node.function
return node
@creator.setter
def creator(self, func):
self.creator_node = func
@property
def creator_node(self):
"""Function node that has this variable as an output.
See :class:`~chainer.FunctionNode` for the definition of a function
node.
"""
return self._creator_node
@creator_node.setter
def creator_node(self, func):
if isinstance(func, chainer.Function):
func = func.node
self._creator_node = func
if func is not None:
self._rank = func.rank + 1
@property
def data(self):
"""Data array of the corresponding variable.
If the data is not available, it returns ``None``.
"""
return self._data
@data.setter
def data(self, d):
self._data = d
self._update_data_info(d)
@property
def grad(self):
"""Gradient array of the corresponding variable.
If the variable is not available, it returns ``None``.
"""
var = self._variable()
return None if var is None else var.grad
@property
def grad_var(self):
"""Gradient variable of the corresponding variable.
If the corresponding variable is not available, it return ``None``.
"""
var = self._variable()
return None if var is None else var.grad_var
def _set_grad_var_if_available(self, g):
var = self._variable()
if var is not None:
var._set_grad_var_without_check(g)
@property
def label(self):
"""Short text that represents the variable node."""
if self.shape == ():
return str(self.dtype)
return '(%s), %s' % (', '.join(map(str, self.shape)),
str(self.dtype))
@property
def rank(self):
return self._rank
@property
def requires_grad(self):
"""It indicates that ``grad`` will be set in backward calculation."""
return self._requires_grad
def get_variable(self):
"""Returns the corresponding :class:`~chainer.Variable` object.
VariableNode object holds a weak reference of the variable object. If
the reference is alive, it is returned by this property. Otherwise,
this property creates a new :class:`~chainer.Variable` object from
this node object and returns it.
Returns:
~chainer.Variable: The variable object that refers this node.
"""
var = self._variable()
if var is not None:
return var
var = Variable._init_unchecked(
self.data, self.name, None, self.requires_grad, None, self)
return var
def get_variable_or_none(self):
"""Returns the holding :class:`~chainer.Variable` object or ``None``.
VariableNode object holds a weak reference of the variable object.If
the reference is alive, it is returned by this property. Otherwise,
returns ``None``.
Returns:
~chainer.Variable: The variable object that refers this node.
"""
return self._variable()
def set_creator(self, creator):
"""Sets a :class:`~chainer.Function` object that created this node.
This method is equivalent to ``self.creator = creator``. A
:class:`~chainer.FunctionNode` object can also be passed.
Args:
creator (Function or FunctionNode): Function that has created this
variable.
"""
self.creator = creator
def set_creator_node(self, creator_node):
"""Sets a :class:`~chainer.FunctionNode` object that created this node.
This method is equivalent to ``self.creator_node = creator_node``. A
:class:`~chainer.Function` object can also be passed, in which case the
:attr:`Function.node <chainer.Function.node>` attribute is used.
Args:
creator_node (FunctionNode or Function): Function node that has
this variable as an output.
"""
self.creator_node = creator_node
def unchain(self):
"""Deletes the reference to the creator of this variable node.
This method is equivalent to ``self.creator_node = None``.
"""
self.creator_node = None
def retain_data(self):
"""Lets the node hold a reference to the underlying data array.
This method gets the data array of the corresponding variable and keeps
it. If the weak reference to the corresponding variable is dead, it
raises an error.
"""
variable = self._variable()
if variable is not None:
self.data = variable.data
else:
raise RuntimeError('cannot retain variable data: the variable has '
'been already released')
def _update_data_info(self, d):
if d is None:
self.dtype = None
self.shape = None
else:
self.dtype = d.dtype
self.shape = d.shape
# If the node has a reference to data, update it as well.
if self._data is not None:
self._data = d
def _check_old_style_gradient(self):
if self._old_style_grad_generator is not None:
raise RuntimeError(
'cannot twice-differentiate an old style Function "%s"' %
self._old_style_grad_generator)
def _create_variable(data, name, grad, requires_grad, device):
var = Variable(
data, name=name, grad=grad, requires_grad=requires_grad)
var.to_device(device)
return var
class Variable(object):
"""__init__(data=None, *, name=None, grad=None, requires_grad=True)
Array with a structure to keep track of computation.
Every variable holds a data array of type either :class:`numpy.ndarray` or
:class:`cupy.ndarray`.
A variable object holds a data array and a
:class:`~chainer.variable.VariableNode` object of
a computational graph. If the variable is constructed by the user, the node
is *root* and does not hold any parent. If the variable is constructed by a
:class:`~chainer.FunctionNode` object (i.e., by calling functions under
``chainer.functions`` or user-defined functions), or by using operators
(see the list below), the node holds a reference to its parent called
:attr:`creator_node`.
This reference is used in backpropagation to backtrack the graph.
Users can disable (resp. enable) this chaining behavior by calling
:func:`~chainer.no_backprop_mode` (resp.
:func:`~chainer.force_backprop_mode`).
In the former context, a variable never creates a computational graph,
whereas in the latter context, it is forced to create.
.. note::
The following operators are defined for variable(s).
* Indexing: ``a[slices]`` (:meth:`__getitem__`)
* Addition: ``a + b`` (:meth:`__add__`, :meth:`__radd__`)
* Subtraction: ``a - b`` (:meth:`__sub__`, :meth:`__rsub__`)
* Multiplication: ``a * b`` (:meth:`__mul__`, :meth:`__rmul__`)
* Division: ``a / b`` (:meth:`__div__`, :meth:`__rdiv__`, \
:meth:`__truediv__`, :meth:`__rtruediv__`)
* Floor Division: ``a // b`` (:meth:`__floordiv__`, \
:meth:`__rfloordiv__`)
* Exponentiation: ``a ** b`` (:meth:`__pow__`, :meth:`__rpow__`)
* Matrix Multiplication: ``a @ b`` (:meth:`__matmul__`, \
:meth:`__rmatmul__`)
* Negation (Arithmetic): ``- a`` (:meth:`__neg__`)
* Absolute value: ``abs(a)`` (:meth:`__abs__`)
Args:
data (:ref:`ndarray`): Initial data array.
name (str): Name of the variable.
grad (:ref:`ndarray`): Initial gradient array.
requires_grad (bool): Boolean indicating whether ``grad`` will be set
in backward calculation.
"""
# Cached value of `self.xp is chainerx`. It prevents from initializing
# self._device as much as possible because it is really costly.
_has_chainerx_array = False
# Cached grad-stopped view of chainerx array. This is the return value
# of `array` and `data` properties.
_chainerx_nobp_array_cache = None
# Cached grad-stopped view of the array returned by `grad` property.
# It's a 2-element tuple, where the first is the original grad array and
# the second is a grad-stopped view of the first. `grad` property returns
# the second element.
_chainerx_grad_cache = None
_chainerx_name = None # type: tp.Optional[str]
# A NumPy, CuPy array cache to avoid redundant conversions between
# NumPy/CuPy and ChainerX.
# TODO(hvy): Avoid modifying this variable from outside this class.
_chainerx_fallback_array = None
# Used in non-ChainerX variables. The gradient array is stored in
# this attribute on Variable.grad setter to delay creation of grad_var
# instance.
_grad = None
def __init__(self, data=None, **kwargs):
# type: (tp.Optional[types.NdArray], **tp.Any) -> None
name, grad, requires_grad = argument.parse_kwargs(
kwargs, ('name', None), ('grad', None), ('requires_grad', True),
volatile='volatile argument is not supported anymore. '
'Use chainer.using_config')
assert isinstance(requires_grad, bool)
if data is not None:
array_types = chainer.get_array_types()
if not isinstance(data, array_types):
msg = '{} or {} are expected. Actual: {}'.format(
', '.join([str(at) for at in array_types[:-1]]),
array_types[-1], type(data))
raise TypeError(msg)
self._init_impl(data, name, grad, requires_grad, None, None)
@staticmethod
def _init_unchecked(data=None, name=None, grad=None, requires_grad=True,
is_chainerx_array=None, node=None):
# type: (tp.Optional[types.NdArray], tp.Optional[str], tp.Optional[types.NdArray], bool, tp.Optional[bool], tp.Optional[VariableNode]) -> Variable # NOQA
"""Creates a new :class:`Variable` without the validations for
optimizing performance.
"""
# Create a Variable without invoking __init__
var = Variable.__new__(Variable)
var._init_impl(data, name, grad, requires_grad, is_chainerx_array,
node)
return var
def _init_impl(self, data, name, grad, requires_grad, is_chainerx_array,
node):
# type: (tp.Optional[types.NdArray], tp.Optional[str], tp.Optional[types.NdArray], bool, tp.Optional[bool], tp.Optional[VariableNode]) -> None # NOQA
# Use a list as a data structure to hold the data array indirectly to
# abstract its initialized/uninitialized state.
self._requires_grad = requires_grad # type: bool
self._loss_scale = None
self._grad_var = None
self._device = None
if is_chainerx_array is None:
is_chainerx_array = isinstance(data, chainerx.ndarray)
if is_chainerx_array:
if not requires_grad and grad is not None:
raise ValueError(
'Cannot initialize a variable with gradients if the '
'require_grad argument is False.')
self._set_chainerx_array(data, grad) # type: ignore
# ChainerX itself has own node objects, but not exposed to python.
self._node = None # type: tp.Optional[VariableNode]
self._chainerx_name = name
else:
self._data = [data] # type: tp.List[tp.Optional[types.NdArray]]
if node is None:
self._node = VariableNode(self, name)
else:
self._node = node
self._grad = grad
def __copy__(self):
return self._copy_to(Variable())
def _copy_to(self, target):
target.__dict__ = copy.copy(self.__dict__)
target._node = VariableNode(target, self.name)
return target
def __reduce__(self):
args = (
self.array, self.name, self.grad, self._requires_grad, self.device)
return _create_variable, args
def __repr__(self):
return variable_repr(self)
def __str__(self):
return variable_str(self)
def _clear_chainerx(self):
self._chainerx_nobp_array_cache = None
self._chainerx_grad_cache = None
self._chainerx_fallback_array = None
def _ensure_grad_var_up_to_date(self):
# For non-ChainerX, this method creates _grad_var if it's not yet
# created and _grad is set.
# For ChainerX, this method checks consistency between
# _grad_var._data[0] and self._data[0].grad and recreates _grad_var
# as necessary. (chainerx.ndarray.grad can be altered independently
# from chainer)
if self._has_chainerx_array:
self._grad = None
# Update gradient variable if it has not yet been initialized or
# it happens to be dirty w.r.t. the actual gradient of the
# underlying chainerx.ndarray.
arr = self._data[0]
actual_grad = (
arr.grad
if arr is not None and arr.is_grad_required()
else None)
if actual_grad is None:
self._grad_var = None
else:
grad_var = self._grad_var
old_grad = None if grad_var is None else grad_var._data[0]
if actual_grad is not old_grad:
self._grad_var = Variable(
actual_grad,
requires_grad=actual_grad.is_backprop_required())
return
if self._grad_var is None:
if self._grad is not None:
self._grad_var = Variable(self._grad)
def _set_chainerx_array(self, array, grad):
# type: (tp.Optional[chainerx.ndarray], tp.Optional[chainerx.ndarray]) -> None # NOQA
# Sets chainerx array and grad.
assert array is None or isinstance(array, chainerx.ndarray)
requires_grad = self._requires_grad
self._grad = None
if (not requires_grad
and array is not None
and array.is_backprop_required()):
raise ValueError(
'Cannot initialize a variable to not require '
'gradients if the ChainerX array already requires '
'backprop.')
# Create a view of the given data to hold internally and modify.
if array is None:
self._data = [None]
else:
# If the array `array` is not connected to a graph, a view of it is
# created and kept, in order not to change the no-graph status of
# it. If the array is connected, the graph status is kept track of.
if not array.is_backprop_required():
array = array.view()
if requires_grad:
array.require_grad()
if grad is not None:
array.set_grad(grad)
self._data = [array]
self._has_chainerx_array = True # even if data is None
self._chainerx_nobp_array_cache = None
self._chainerx_grad_cache = None
self._chainerx_fallback_array = None
@property
def device(self):
"""Device on which the data array of this variable reside."""
# lazy initialization for performance
if self._device is None:
if self._data[0] is None:
self._device = backend.CpuDevice()
else:
self._device = backend.get_device_from_array(self._data[0])
return self._device
@property
def xp(self):
# type: () -> tp.Optional[types.Xp]
"""Array module for the data array of this variable."""
if self._has_chainerx_array:
return chainerx
else:
device = self.device
return None if device is None else device.xp
@property
def name(self):
if self._has_chainerx_array:
return self._chainerx_name
return self._node.name
@name.setter
def name(self, n):
if self._has_chainerx_array:
self._chainerx_name = n
return
self._node.name = n
def summary(self):
if self.name:
return '<variable %s>' % self.name
else:
return '<variable at 0x%x>' % id(self)
def debug_print(self):
"""Display a summary of the stored data and location of the Variable"""
msg = """{summary}
- device: {device}
- backend: {backend}
- shape: {shape}
- dtype: {dtype}
- statistics: {stats}
- grad: {grad}"""
stats_msg = 'mean={0:.8f}, std={1:.8f}'
array = self.array
with cuda.get_device_from_array(array) as dev:
xp = numpy if int(dev) == -1 else cuda.cupy
if array is None:
# `array` can be `None` if constructed without any arguments
device = None
backend = None
stats = None
else:
device = getattr(array, 'device', 'CPU')
backend = type(array)
stats = stats_msg.format(float(xp.mean(array)),
float(xp.std(array)))
shape = getattr(array, 'shape', None)
dtype = getattr(array, 'dtype', None)
if self.grad is None:
grad = None
elif xp.all(self.grad == 0):
grad = 0
else:
grad = stats_msg.format(float(xp.mean(self.grad)),
float(xp.std(self.grad)))
return msg.format(summary=self.summary(), device=device,
backend=backend, shape=shape, dtype=dtype,
stats=stats, grad=grad)
def __pos__(self):
return self
def __len__(self):
"""Returns the first dimension of the data array.
Returns:
int: Number of the first dimension of the data array.
"""
return len(self.array)
@property
def label(self):
"""Short text that represents the variable."""
if self._has_chainerx_array:
raise RuntimeError(
'A variable of ChainerX does not provide a node label.')
return self._node.label
@property
def creator(self):
"""Function implementation that created this variable.
When this variable has been created by an old-style function (i.e., it
is implemented as a subclass of :class:`Function`), this property
returns that :class:`Function` object.
When this variable has been created by a new-style function (i.e., it
is implemented as a subclass of :class:`FunctionNode` class), this
property returns that node object.
"""
if self._has_chainerx_array:
raise RuntimeError(
'A variable of ChainerX does not provide a creator.')
return self._node.creator
@creator.setter
def creator(self, func):
if self._has_chainerx_array:
raise RuntimeError(
'A variable of ChainerX does not provide a creator.')
self._node.creator = func
@property
def creator_node(self):
""":class:`FunctionNode` object that created this variable.
This property has a setter to which ``None`` can be set. Setting
``None`` to this property is equivalent to call :meth:`unchain`;
it purges the variable from the function that created this variable.
The setter also accepts the original :class:`FunctionNode` object that
created this variable. For example, you can once set ``None`` to this
property and then set the original value again.
.. note::
Setting an irrelevant :meth:`FunctionNode` object does not emit any
error immediately, whereas the behavior is undefined. Do not set
a :meth:`FunctionNode` object that did not create this variable
object.
"""
if self._has_chainerx_array:
raise RuntimeError(
'A variable of ChainerX does not provide a creator_node.')
return self._node._creator_node
@creator_node.setter
def creator_node(self, func):
if self._has_chainerx_array:
raise RuntimeError(
'A variable of ChainerX does not provide a creator_node.')
self._node.creator_node = func
@property
def array(self):
# type: () -> tp.Optional[types.NdArray]
"""The underlying data array.
It is either :class:`numpy.ndarray` or :class:`cupy.ndarray` object,
or ``None`` if the variable in in an uninitialized state.
"""
# For ChainerX, this property always returns a grad-stopped view.
# The view is cached to reduce potential overhead.
if self._has_chainerx_array:
if (self._chainerx_nobp_array_cache is None
and self._data[0] is not None):
self._chainerx_nobp_array_cache = (
self._data[0].as_grad_stopped()) # type: ignore
return self._chainerx_nobp_array_cache
return self._data[0]
@array.setter
def array(self, d):
# type: (types.NdArray) -> None
if self._has_chainerx_array:
d_old = self._data[0]
if (d_old is not None
and (d_old.is_backprop_required() # type: ignore
or d.is_backprop_required())): # type: ignore
raise ValueError(
'Cannot update the array of a Variable if either the '
'existing or the new array requires backprop.')
self._set_chainerx_array(d, None) # type: ignore
else:
self._node._update_data_info(d) # type: ignore # _node doesn't have value when xp is chainerx # NOQA
self._data[0] = d
self._has_chainerx_array = False
@property
def chx_array(self):
"""A view of the raw ChainerX array.
In contrary to :data:`Variable.array` which is always disconnected,
the array represented by this attribute may be connected to the
computational graph.
It is a view, so it has a distinct gradient from the original array.
If this attribute is queried on a :class:`Variable` with a non-ChainerX
array, :class:`ValueError` will be raised.
"""
if not self._has_chainerx_array:
raise ValueError(
'chx_array is not available for Variable with '
'non-ChainerX array.')
return self._data[0].view()
@property
def data(self):
# type: () -> tp.Optional[types.NdArray]
"""The underlying data array (equivalent to :attr:`array`).
Note that using this attribute directly is discouraged; use
:attr:`array` instead. Using :attr:`array`, you can find an error
earlier when your code mixes up Variable and ndarray because
ndarray does not have an attribute ``.array`` while it has
``.data``.
"""
return self.array
@data.setter
def data(self, d):
# type: (types.NdArray) -> None
self.array = d
def _set_chainerx_grad(self, g):
# Assigns chainerx.ndarray.grad
assert self.xp is chainerx
if not self._requires_grad and g is not None:
raise RuntimeError(
'Cannot set the gradient of a variable that is flagged to not '
'require one.')
arr = self._data[0]
if arr is None:
if g is not None:
raise RuntimeError(
'Cannot set a gradient to an empty variable')
elif arr.is_backprop_required():
arr.set_grad(g)
def _set_grad_without_check(self, g):
if self._has_chainerx_array:
self._set_chainerx_grad(g)
self._grad_var = None
return
self._grad = g
self._grad_var = None
@property
def grad(self):
"""Gradient array of this variable.
Note that this property returns the underlying array of the gradient
variable instead of the gradient variable itself; to get/set
gradient variable, use :attr:`grad_var` instead.
If the underlying array is a :class:`chainerx.ndarray` and
requires_grad is false, trying to access the gradient will results in
and error.
"""
if self._has_chainerx_array:
arr = self._data[0]
if arr is None or not arr.is_backprop_required():
self._chainerx_grad_cache = None
return None
actual_grad = arr.grad
if actual_grad is None:
self._chainerx_grad_cache = None
return None
# If grad is cached and the actual grad has not changed, return
# the cache.
if self._chainerx_grad_cache is not None:
orig_grad, grad_stopped_grad = self._chainerx_grad_cache
if orig_grad is actual_grad:
return grad_stopped_grad
# Update the cache
grad_stopped_grad = actual_grad.as_grad_stopped()
self._chainerx_grad_cache = (actual_grad, grad_stopped_grad)
return grad_stopped_grad
if self._grad_var is not None:
return self._grad_var.array
return self._grad
@grad.setter
def grad(self, g):
if g is not None:
_check_grad_type(None, self, False, g)
self._set_grad_without_check(g)
def _set_grad_var_without_check(self, gv):
if self._has_chainerx_array:
self._set_chainerx_grad(None if gv is None else gv._data[0])
self._grad_var = gv
return
self._grad_var = gv
self._grad = None if gv is None else gv.array
@property
def grad_var(self):
"""Gradient variable."""
self._ensure_grad_var_up_to_date()
return self._grad_var
@grad_var.setter
def grad_var(self, g):
if g is not None:
_check_grad_type(None, self, False, g.array)
self._set_grad_var_without_check(g)
@property
def shape(self):
return self.array.shape
@property
def ndim(self):
return self.array.ndim
@property
def size(self):
return self.array.size
@property
def dtype(self):
return self.array.dtype
@property
def rank(self):
if self._has_chainerx_array:
raise RuntimeError(
'A variable of ChainerX does not provide a node rank.')
return self._node.rank
@property
def node(self):
if self._has_chainerx_array:
raise RuntimeError(
'A variable of ChainerX does not provide a node.')
return self._node
@property
def requires_grad(self):
"""It indicates that ``grad`` will be set in backward calculation."""
return self._requires_grad
@property
def T(self):
"""Transposition of this variable."""
return chainer.functions.transpose(self)
def to_cpu(self):
"""Copies the data and gradient arrays to CPU."""
self.to_device(backend.CpuDevice())
def to_gpu(self, device=None):
"""Copies the data and gradient arrays to specified GPU.
Args:
device: Target device specifier. If omitted, the current device is
used.
"""
cuda.check_cuda_available()
self.to_device(cuda._get_device_or_current(device))
def to_intel64(self):
"""Copies the data and gradient arrays to intel64 specific mdarray.
If the array is not suited for intel64, it will be converted to
:class:`numpy.ndarray`.
"""
intel64.check_ideep_available()
self.to_device(intel64.Intel64Device())
def to_chx(self):
"""Converts the array and gradient to ChainerX arrays without copy.
This method converts the underlying array and gradient to
:class:`chainerx.ndarray` on the same physical device. It does nothing
if the array held by the Variable object is already a ChainerX array.
The new array is a view of the original one.
"""
self._to_chx(allow_unchaining=False)
def _to_chx(self, allow_unchaining):
if not chainerx.is_available():
raise RuntimeError('ChainerX is not available.')
if self._has_chainerx_array:
return
if not allow_unchaining and self.creator is not None:
raise RuntimeError(
'A variable with a creator cannot be converted into ChainerX '
'array')
array = self.array
grad = self.grad
if array is None and grad is not None:
# TODO(hvy): Reconsider this possibly invalid state.
raise RuntimeError(
'A variable without data but with a gradient cannot be '
'transferred to a ChainerX device.')
self._to_device(
backend.ChainerxDevice.from_fallback_device(self.device),
allow_unchaining)
def from_chx(self):
"""Converts the array and gradient to non-ChainerX arrays without copy.
This method converts the underlying ChainerX array and gradient
residing in either a ``native`` or ``cuda`` device to NumPy or CuPy
arrays respectively, on their same physical device. It does nothing
if the array held by the Variable object is not a ChainerX array. The
new array is a view of the original one.
Raises an error if such a conversion is not supported for the device.
"""
self._from_chx(allow_unchaining=False)
def _from_chx(self, allow_unchaining):
if not self._has_chainerx_array:
return
if not allow_unchaining and self._data[0].is_backprop_required():
raise RuntimeError(
'Cannot convert from a Variable with a ChainerX array that is '
'connected to a graph.')
self.to_device(self.device.fallback_device)
def to_device(self, device):
"""Copies the data and gradient arrays to specified device.
Args:
device: Target device specifier. See
:func:`~chainer.get_device` for available values.
"""
self._to_device(device, allow_unchaining=False)
def _to_device(self, device, allow_unchaining):
device = chainer.get_device(device)
was_chainerx = self._has_chainerx_array
is_chainerx = device.xp is chainerx
if not allow_unchaining:
if was_chainerx and not is_chainerx:
chx_arr = self._data[0]
if chx_arr is not None and chx_arr.is_backprop_required():
raise RuntimeError(
'A variable of a ChainerX array which requires '
'gradients cannot be copied into non-chainerx device '
'({}).'.format(device))
elif not was_chainerx and is_chainerx:
arr = self._data[0]
if arr is not None and self.creator is not None:
raise RuntimeError(
'A variable of a non-ChainerX array which is '
'connected to a graph cannot be copied to a ChainerX '
'device ({}).'.format(device))
arr = self._data[0]
grad_var = self.grad_var
if was_chainerx and not is_chainerx:
self._clear_chainerx()
self._node = VariableNode(self, self._chainerx_name)
elif not was_chainerx and is_chainerx:
self._chainerx_name = self._node.name
self._device = device
self._has_chainerx_array = is_chainerx
if arr is None:
return
if backend.get_device_from_array(arr) == device:
return
new_arr = device.send(arr)
if is_chainerx:
if grad_var is None:
new_grad = None
else:
new_grad = device.send(grad_var._data[0])
self._set_chainerx_array(new_arr, new_grad)
else:
self._data = [new_arr]
if grad_var is not None:
grad_var.to_device(device)
# _grad has been invalidated by grad_var.to_device().
self._grad = grad_var.array
# ensure that the node tracks the device migration
node = self._node
if is_chainerx:
# ChainerX itself has own node objects,
# ensure that the node is disconnected with this variable.
if node is not None:
# Disconnect by replacing with an alternative of dead weakref
node._variable = lambda: None
self._node = None
else:
if node._data is not None:
node.retain_data()
def cleargrad(self):
"""Clears the gradient array."""
self.grad_var = None
def zerograd(self):
"""Initializes the gradient array by zeros.
Note that the gradient variable is unchained from the computational
graph by this method, because this operation breaks the backprop
validity.
.. deprecated:: v1.15
Use more efficient :meth:`cleargrads` instead.
"""
warnings.warn(
'Variable.zerograd is deprecated. Use Variable.cleargrad instead.',
DeprecationWarning)
arr = self.array
if arr is None:
return
if self._has_chainerx_array:
gv = self.grad_var
if gv is None:
self.grad = chainerx.zeros_like(
arr, device=self.device.device)
else:
gv._data[0].fill(0)
else:
with cuda.get_device_from_array(arr) as dev:
if self._grad is None:
xp = numpy if dev.id == -1 else cuda.cupy
self._grad = xp.zeros_like(arr)
self._grad_var = None
else:
gv = self._grad_var
if gv is not None:
gv.unchain()
self._grad.fill(0)
def copydata(self, var):
"""Copies the data array from given source variable.
This method copies the data array from given variable to this variable.
The copy is done even if the arrays reside on different devices,
including across the host and a GPU device. If this variable has an
uninitialized data array, this method initializes it by the data array
of the given variable. Similarly, if the given variable has an
uninitialized data array, this method initializes it by the data array
of this variable (``self``). If both are uninitialized, this method
does nothing.
Args:
var (~chainer.Variable): Source variable.
"""
src = var.array
dst = self.array
if src is None:
if dst is None:
return
var.initialize(self.shape)
src = var.array
elif dst is None:
self.initialize(src.shape)
dst = self.array
backend.copyto(dst, src)
def addgrad(self, var):
"""Accumulates the gradient array from given source variable.
This method adds the gradient of a given variable to the gradient of
this variable. The accumulation is even done across the host and
different devices. If this variable has uninitialized data/grad arrays,
this method initializes it with the shape of the given variable and
then accumulates the gradient.
Args:
var (~chainer.Variable): Source variable.
"""
dst_device = self.device
is_chainerx = dst_device.xp is chainerx
if is_chainerx != (var.device.xp is chainerx):
raise RuntimeError(
'Variable.addgrad does not support addition between '
'gradients on non-ChainerX and ChainerX devices.\n'
'Adding gradient to: {}\n'
'Adding gradient from: {}'.format(
dst_device, var.device))
if var.grad is None:
return
src = var.grad_var
if self.array is None:
self.initialize(var.shape)
dst = self.grad_var
src_device = src.device
if src_device != dst_device:
src = chainer.functions.copy(src, dst_device)
self.grad_var = src if dst is None else src + dst
def set_creator(self, gen_func):
"""Notifies the variable that the given function is its creator.
Args:
gen_func (Function): Function object that creates this variable as
one of its outputs.
"""
if self._has_chainerx_array:
raise RuntimeError(
'A variable of ChainerX does not provide a creator.')
self._node.set_creator(gen_func)
def set_creator_node(self, fnode):
"""Notifies the variable that the given node is its creator.
Args:
fnode (FunctionNode): Function node that has this variable as an
output.
"""
if self._has_chainerx_array:
raise RuntimeError(
'A variable of ChainerX does not provide a creator node.')
self._node.set_creator_node(fnode)
def backward(self, retain_grad=False, enable_double_backprop=False,
loss_scale=None):
"""Runs error backpropagation (a.k.a.\\ backprop) from this variable.
On backprop,
:meth:`FunctionNode.backward() <chainer.FunctionNode.backward>`
is called on each :class:`~chainer.FunctionNode` object appearing in
the backward graph starting from this variable.
The backward graph is represented by backward
references from variable nodes to their creators, and from function
nodes to their input variable nodes. The backprop stops at all root
nodes. Some function nodes set ``None`` as gradients of some inputs,
where further backprop does not take place at such inputs.
This method uses :data:`grad` as the initial error array. User can
manually set a gradient array before calling this method.
If the shape of :data:`data` is ``()`` (i.e., it is scalar) and
:data:`grad` is ``None``, then this method automatically complements
1.0 as the initial error. This is useful on starting backprop from
some scalar loss value.
From v3, this method supports *differentiable backprop* (a.k.a. double
backprop, grad of grads). To enable it, pass
``enable_double_backprop=True``.
Args:
retain_grad (bool): If ``True``, the gradient arrays of all
intermediate variables are kept.
Otherwise, :data:`~chainer.Variable.grad` of the
intermediate variables are set to ``None`` on appropriate
timing, which may reduce the maximum memory consumption.
In most cases of training some models, the purpose of backprop
is to compute gradients of parameters, not of all variables,
and therefore it is recommended that this flag be set to
``False``.
enable_double_backprop (bool): *(Added in v3.0)* If ``True``,
computational trace of the whole backpropagation procedure is
recorded to the computational graph so that one can further do
backpropagation from the resulting gradients. Note that
enabling it results in larger memory consumption needed to
store the gradients w.r.t intermediate variables that are
required for the second gradient computation.
loss_scale (float): Loss scaling factor. Loss scaling is a usefull
technique to mitigate vanishing gradient issue that tends to
happen when low precision data type like float16 is used during
training. If you set loss scaling factor, gradients of loss
values are to be multiplied by the factor before backprop
starts. The factor is propagated to whole gradients in a
computational graph along the backprop. The gradients of
parameters are divided by the factor just before the parameters
are to be updated.
"""
if self._has_chainerx_array:
if retain_grad:
raise RuntimeError(
'retain_grad is not supported for ChainerX array.')
if loss_scale is not None:
raise RuntimeError(
'loss_scale is not supported for ChainerX array.')
arr = self._data[0]
assert isinstance(arr, chainerx.ndarray)
chainerx.backward(
arr, enable_double_backprop=enable_double_backprop)
return
# Initialize error by 1, if this is a loss variable
if self.array.size == 1 and self.grad_var is None:
if self.array.ndim != 0:
warnings.warn(
'Treating a variable with only one element as a scalar'
' in Variable.backward is deprecated. A scalar variable'
' must be a 0-dimensional array. Apply'
' chainer.functions.squeeze to obtain a scalar variable.'
' If the size of this variable accidentally becomes one,'
' set zero to grad.',
DeprecationWarning)
with cuda.get_device_from_array(self.array) as device:
if device is cuda.DummyDevice:
self.grad = numpy.ones_like(self.array)
else:
self.grad = cuda.cupy.ones_like(self.array)
if loss_scale is not None:
self.grad *= loss_scale
node = self.node
grad_var = self.grad_var
self.grad_var = None
with chainer.using_config('enable_backprop', enable_double_backprop):
# TODO(kataoka): The following line should not pass grad_var = None
# to _backprop_to_all, but it is working because grad_var is
# immediately popped away as None = _backprop_utils._reduce([None])
_backprop_to_all([(node, grad_var)], retain_grad, loss_scale)
def item(self):
"""Converts the variable with one element to a Python scalar.
This will incur host-device synchronization.
Returns:
int or float: The element of the array.
"""
return self.array.item()
def reshape(self, *shape):
"""Returns a variable of a different shape and the same content.
.. seealso::
:func:`chainer.functions.reshape` for full documentation,
"""
if len(shape) == 1 and isinstance(shape[0], (tuple, list)):
shape = shape[0]
return chainer.functions.reshape(self, shape)
def transpose(self, *axes):
"""Permute the dimensions of an input variable without copy.
.. seealso::
:func:`chainer.functions.transpose` for full documentation.
"""
if len(axes) == 0:
axes = None
elif len(axes) == 1 and (isinstance(axes[0], (tuple, list)) or
axes[0] is None):
axes = axes[0]
return chainer.functions.transpose(self, axes)
def unchain(self):
"""Deletes the reference to the creator of this variable.
This method deletes the reference to the creator from the corresponding
variable node. Unlike :meth:`unchain_backward`, it does not backtrack
the graph.
This method is equivalent to ``self.creator_node = None``.
"""
if self._has_chainerx_array:
raise RuntimeError(
'A variable of ChainerX does not provide an unchain method.')
self.creator_node = None
def unchain_backward(self):
"""Deletes references between variable nodes and functions backward.
After this method completes, intermediate variable nodes and functions
that are not referenced from anywhere are deallocated by reference
count GC. Also this variable itself deletes the reference to its
creator function from the node, i.e. the node becomes root in the
computation graph. It indicates that backprop after unchaining stops at
this variable. This behavior is useful to implement truncated BPTT.
"""
if self._has_chainerx_array:
raise RuntimeError(
'A variable of ChainerX does not provide an unchain_backward '
'method.')
cand_funcs = []
seen_set = set()
def add_cand(cand):
if cand is not None and cand not in seen_set:
cand_funcs.append(cand)
seen_set.add(cand)
add_cand(self.creator_node)
while cand_funcs:
func = cand_funcs.pop()
for var in func.inputs:
add_cand(var.creator_node)
func.unchain()
def retain_data(self):
"""Lets the corresponding variable node keep the underlying array."""
if self._has_chainerx_array:
raise RuntimeError(
'A variable of ChainerX does not provide a retain_data '
'method.')
self._node.data = self._data[0]
def _error_nobp_op(self, op):
raise TypeError(
'Variables do not support {} operator. '
'You could use `array` attribute instead.'.format(op))
def __lt__(self, other):
"""This operator is not supported in Variables."""
self._error_nobp_op('<')
def __le__(self, other):
"""This operator is not supported in Variables."""
self._error_nobp_op('<=')
def __eq__(self, other):
"""This operator is not supported in Variables."""
self._error_nobp_op('==')
def __ne__(self, other):
"""This operator is not supported in Variables."""
self._error_nobp_op('!=')
def __gt__(self, other):
"""This operator is not supported in Variables."""
self._error_nobp_op('>')
def __ge__(self, other):
"""This operator is not supported in Variables."""
self._error_nobp_op('>=')
def __nonzero__(self):
"""This operator is not supported in Variables."""
# Python 2.x
raise TypeError(
'Variables cannot be evaluated as Python bool.')
def __bool__(self):
"""This operator is not supported in Variables."""
# Python 3.x
raise TypeError(
'Variables cannot be evaluated as Python bool.')
__array_priority__ = 200 # type: int
__hash__ = None # type: tp.Callable[[object], int]
def _backprop_to_all(outputs, retain_grad, loss_scale):
"""Backprop to all input variables
Args:
outputs (list of tuple): each tuple is (y_node, y_grad_var).
y_grad_var should not be None.
retain_grad (bool): see docstring of Variable.backward
loss_scale (float): see docstring of Variable.backward
"""
OrderedDict = chainer.utils._collections.OrderedDict # fix py2 memory leak
cand_funcs = []
seen_set = set()
def add_cand(cand):
if cand not in seen_set:
# Negate since heapq is min-heap
heapq.heappush(cand_funcs, (-cand.rank, len(seen_set), cand))
seen_set.add(cand)
grads = _backprop_utils.GradTable(accumulate_grad_inputs=True)
leaf_nodes = set()
for y, gy in outputs:
grads.accumulate(y, gy)
func = y.creator_node
if func is None: # leaf
leaf_nodes.add(y)
else:
add_cand(func)
# Fix F812 (Python 2)
y = None
del y
is_debug = chainer.is_debug()
base_hooks = chainer.get_function_hooks().values()
while cand_funcs:
_, _, func = heapq.heappop(cand_funcs)
inputs = func.inputs
target_input_indexes = tuple([
i for i, x in enumerate(inputs) if x.requires_grad
])
outputs = [y() for y in func.outputs] # access via weak ref
out_grad = tuple([grads.pop(y)
if y is not None and y.creator_node is not None
else None
for y in outputs])
if not target_input_indexes:
continue
in_data = [x.data for x in inputs]
out_grad_array = [None if g is None else g.array for g in out_grad]
if func._n_local_function_hooks != 0:
local_hooks = collections.OrderedDict(chainer.get_function_hooks())
local_hooks.update(func.local_function_hooks)
hooks = local_hooks.values() # avoid six for performance
else:
hooks = base_hooks
with cuda.get_device_from_array(*(in_data + out_grad_array)):
for hook in hooks:
hook.backward_preprocess(
func, tuple(in_data), tuple(out_grad_array))
# Collect the current input gradients.
target_inputs = [inputs[i] for i in target_input_indexes]
# Keep the order for the portability, rather than
# in_grad = {x: grads.get_as_list(x)
# for x in set(target_inputs)}
in_grad = OrderedDict()
for x in target_inputs:
if x not in in_grad:
in_grad[x] = grads.get_as_list(x)
_backprop_utils.backprop_step(
func, target_input_indexes, out_grad, in_grad, is_debug)
for hook in hooks:
hook.backward_postprocess(
func, tuple(in_data), tuple(out_grad_array))
if retain_grad:
# The gradients of the outputs of `func` are final. Store them if
# retain_grad=True.
for y, gy in six.moves.zip(outputs, out_grad):
if y is not None:
y._set_grad_var_if_available(gy)
del gy # to reduce memory usage
del out_grad # to reduce memory usage
for x, gx in in_grad.items():
if not gx: # gradient == None
continue
for gx_elem in gx:
if gx_elem is not None:
_check_grad_type(func, x, True, gx_elem.array)
del gx_elem # to reduce memory usage
if x.creator_node is None: # leaf
leaf_nodes.add(x)
else:
add_cand(x.creator_node)
del gx, in_grad # to reduce memory usage
for x in leaf_nodes:
x_var = x.get_variable_or_none()
gx = grads.pop(x)
if x_var is not None:
x_var._set_grad_var_without_check(gx)
x_var._loss_scale = loss_scale
grads.assert_no_grads()
class Parameter(Variable):
"""Parameter variable that can be registered to a link.
Parameter is a subclass of :class:`Variable`. It almost behaves as same
as a usual variable except that a parameter can be registered to a
:class:`~chainer.Link` object just by assigning it to an attribute of
the link within an :meth:`~chainer.Link.init_scope` context.
Parameter also supports an initialization by an initializer. It can have
two initializers: one for the data array, and the other for the gradient
array. The initializer only specifies the way of filling the elements of
these arrays, and the shape information is specified at the initialization
point.
When a link that the parameter has been registered to is passed to an
:class:`~chainer.GradientMethod`, an update rule is set to the parameter.
This update rule specifies how to update the data array of the parameter
using its gradient array.
Args:
initializer (~chainer.Initializer or :ref:`ndarray`):
Initializer of the data array. If ``shape`` is given, this
initializer is immediately used to initialize the data array.
Otherwise, if it is an array, it is immediately used as the data
array, and otherwise the data array is left uninitialized and will
be initialized by this initializer in :meth:`initialize`. It can
also be a scalar, in which case the data array will be filled by
this scalar. Note that float32 is used in this case.
shape (int or tuple of int or None): Shape of the parameter. If it is
``None``, the initialization is deferred to the call of
:meth:`initialize`.
name (str): Name of the parameter.
Attributes:
initializer: Initializer of the data array. It is used for
initializing the data array of an uninitialized variable.
update_rule: :class:`~chainer.optimizer.UpdateRule` instance that
updates this variable as a parameter. This argument is set to
:attr:`update_rule`.
"""
initializer = None # type: tp.Optional[tp.Union[tp.Optional[types.AbstractInitializer], types.NdArray]] # NOQA
# TODO(okapies): fix the behavior when shape is None and remove NdArray
_grad_initializer = None # type: tp.Optional[types.AbstractInitializer]
def __init__(self, initializer=None, shape=None, name=None):
# type: (tp.Optional[types.InitializerSpec], tp.Optional[types.ShapeSpec], tp.Optional[str]) -> None # NOQA
if initializer is None:
initializer = constant.NaN()
elif numpy.isscalar(initializer):
initializer = constant.Constant(initializer)
if shape is None:
if isinstance(initializer, chainer.get_array_types()):
# parameter initialized by the initial array
super(Parameter, self).__init__(initializer, name=name)
else:
# uninitialized parameter
super(Parameter, self).__init__(name=name)
dtype = getattr(initializer, 'dtype', None)
self._grad_initializer = constant.NaN(dtype)
else:
# parameter initialized with a given shape
if isinstance(initializer, chainer.get_array_types()):
xp = backend.get_array_module(initializer)
initializer = constant.Constant(initializer)
else:
xp = numpy
data = initializers.generate_array(initializer, shape, xp) # type: ignore # NOQA
grad = xp.full_like(data, numpy.nan)
super(Parameter, self).__init__(data, name=name, grad=grad)
self._initial_device = backend.CpuDevice()
self.update_rule = None
self.initializer = initializer
def __copy__(self):
return self._copy_to(Parameter())
def __reduce__(self):
args = (
self.array, self.name, self.grad, self.initializer,
self.update_rule, self.device)
return _recover_parameter, args
def to_cpu(self):
return self.to_device(backend.CpuDevice())
def to_gpu(self, device=None):
device = chainer.get_device(cuda._get_device_or_current(device))
assert device.xp is cuda.cupy
self.to_device(device)
def to_intel64(self):
self.to_device(intel64.Intel64Device())
def to_chx(self):
if not chainerx.is_available():
raise RuntimeError('ChainerX is not available.')
# Derive the target ChainerX device from the array if it is
# initialized. Otherwise, from the current initial device.
if self.array is not None:
device = backend.get_device_from_array(self.array)
else:
device = self._initial_device
if device.xp is numpy:
self._initial_device = backend.ChainerxDevice(
chainerx.get_device('native:0'))
elif device.xp is cuda.cupy:
self._initial_device = backend.ChainerxDevice(
chainerx.get_device('cuda:{}'.format(device.device.id)))
super(Parameter, self)._to_chx(allow_unchaining=True)
def from_chx(self):
if self.array is not None:
device = backend.get_device_from_array(self.array)
else:
device = self._initial_device
if isinstance(device, backend.ChainerxDevice):
backend_name = device.device.backend.name
if backend_name == 'native':
self._initial_device = backend.CpuDevice()
elif backend_name == 'cuda':
self._initial_device = backend.GpuDevice.from_device_id(
device.device.index)
super(Parameter, self)._from_chx(allow_unchaining=True)
def to_device(self, device):
device = chainer.get_device(device)
if self.data is None and self._initial_device != device:
self._data = [None] # Renew placeholder to break sharing
self._has_chainerx_array = False
self._initial_device = device
super(Parameter, self)._to_device(device, allow_unchaining=True)
def cleargrad(self):
super(Parameter, self).cleargrad()
if self.array is None:
self._grad_initializer = None
def zerograd(self):
super(Parameter, self).zerograd()
if self.array is None:
dtype = getattr(self.initializer, 'dtype', None)
self._grad_initializer = initializers.Zero(dtype)
def initialize(self, shape):
"""Initializes the uninitialized variable.
Uninitialized variable is a variable created with the data array set to
None. This method creates and initializes the data array. The shape of
the variable can be left unknown until this method is called.
Args:
shape (tuple of int): Shape of the data array.
"""
device = self._initial_device
assert device is not None
xp = device.xp
data = initializers.generate_array(
self.initializer, shape, xp, device=device)
ginit = self._grad_initializer
grad = None if ginit is None else initializers.generate_array(
ginit, shape, xp, device=device)
self.array = data
self.grad = grad
# Convert the array for iDeep.
# TODO(niboshi): This could be done in generate_array().
if isinstance(self._initial_device, intel64.Intel64Device):
self.to_intel64()
def update(self):
"""Updates the data array using the gradient and the update rule.
This method updates the parameter using the attached update rule.
"""
if self.update_rule is not None:
self.update_rule.update(self)
def as_variable(obj):
"""Converts an array or a variable into :class:`~chainer.Variable`.
This is a convenient function to get a :class:`~chainer.Variable` object
transparently from a raw array or a variable.
Note that this function should only be used for type consistency (i.e., to
enforce the return value of an API having type :class:`~chainer.Variable`).
The :class:`~chainer.Variable.requires_grad` flag is kept as is; if ``obj``
is a raw array, the newly created variable has ``requires_grad = False``.
In order to make a variable w.r.t. which you want to compute the gradient,
you should use :class:`~chainer.Variable` directly.
Args:
obj (:ref:`ndarray` or ~chainer.Variable): An array or
a variable that you want to convert to :class:`~chainer.Variable`.
Returns:
~chainer.Variable:
A variable converted from ``obj``. If ``obj`` is a raw array, this is a
new :class:`~chainer.Variable` object that wraps the array. If ``obj``
is already a :class:`~chainer.Variable` object, this function returns
``obj`` as is.
"""
if isinstance(obj, Variable):
return obj
if isinstance(obj, chainerx.ndarray):
requires_grad = obj.is_backprop_required()
else:
requires_grad = False
return Variable(obj, requires_grad=requires_grad)
# TODO(hvy): Make private, i.e. _as_array?
def as_array(obj):
"""Returns the underlying array from a variable or an array.
This is a convenient function to get the underlying array object
transparently from an object that could be either a variable or an array.
Args:
obj (:ref:`ndarray` or ~chainer.Variable): An array or a variable.
Returns:
:ref:`ndarray` or ~chainer.Variable:
The underlying array object of the argument.
"""
if isinstance(obj, Variable):
return obj.array
return obj
def _recover_parameter(data, name, grad, initializer, update_rule, device):
p = Parameter(initializer=initializer, name=name)
p.array = data
p.grad = grad
p.update_rule = update_rule
p.to_device(device)
return p
class _ChainerxVariableNodeProps(object):
def __init__(self, x):
self.shape = x.shape
self.dtype = x.dtype
You can’t perform that action at this time.