From d18592ff6f8b1b9049176c8f326c15e74f2f7e77 Mon Sep 17 00:00:00 2001 From: Haowen Xu Date: Sun, 16 Feb 2020 19:24:56 +0800 Subject: [PATCH 1/7] checked the compatibility with PyTorch 1.4.0: still need some bug fix from upstream --- .coveragerc | 1 + .travis.yml | 6 +- README.md | 6 + tensorkit/__init__.py | 3 +- tensorkit/backend/{losses.py => optim.py} | 6 +- tensorkit/backend/pytorch_/core.py | 42 +- tensorkit/backend/pytorch_/flows.py | 132 ++-- tensorkit/backend/pytorch_/init.py | 4 +- tensorkit/backend/pytorch_/layers.py | 266 ++++--- tensorkit/backend/pytorch_/losses.py | 19 - tensorkit/backend/pytorch_/nn.py | 2 + tensorkit/backend/pytorch_/optim.py | 131 ++++ tensorkit/backend/pytorch_/train.py | 91 +++ tensorkit/backend/pytorch_/utils.py | 182 +++++ tensorkit/backend/train.py | 9 + tensorkit/backend/utils.py | 9 + tensorkit/distributions/flow.py | 20 +- tensorkit/examples/.gitignore | 1 + .../losses => tensorkit/examples}/__init__.py | 0 tensorkit/examples/classification/__init__.py | 0 tensorkit/examples/classification/mnist.py | 99 +++ .../examples/classification/mnist_resnet.py | 104 +++ tensorkit/examples/utils/__init__.py | 3 + tensorkit/examples/utils/fit_model_.py | 39 ++ tensorkit/examples/utils/ops.py | 9 + tensorkit/examples/utils/prepare_data.py | 69 ++ tensorkit/flows/act_norm.py | 59 +- tensorkit/flows/coupling.py | 28 +- tensorkit/flows/rearrangement.py | 12 +- tensorkit/flows/reshape_.py | 72 +- tensorkit/flows/split_.py | 52 +- tensorkit/init/std_data_init.py | 5 +- tensorkit/layers/__init__.py | 1 + tensorkit/layers/activation.py | 26 +- tensorkit/layers/builder.py | 651 ++++++++++++++++++ tensorkit/layers/contextual.py | 33 +- tensorkit/layers/flow_layer.py | 10 +- tensorkit/layers/gated.py | 5 +- tensorkit/layers/pixelcnn.py | 63 +- tensorkit/layers/pool.py | 20 +- tensorkit/layers/resnet.py | 20 +- tensorkit/layers/shape_.py | 33 +- tensorkit/layers/split_.py | 4 +- tensorkit/losses/core.py | 4 - tensorkit/optim/__init__.py | 2 + tensorkit/optim/core.py | 4 + tensorkit/optim/lr_scheduler.py | 71 ++ tensorkit/tensor/__init__.py | 2 +- tensorkit/tensor/utils.py | 4 + tensorkit/{losses => train}/__init__.py | 0 tensorkit/train/core.py | 4 + tensorkit/utils/__init__.py | 2 + tensorkit/utils/data_utils.py | 97 +++ tensorkit/utils/tensor_stream.py | 48 ++ tests/distributions/test_flow.py | 28 +- tests/flows/test_core.py | 120 ++-- tests/flows/test_coupling.py | 2 +- tests/flows/test_shape_.py | 4 +- tests/flows/test_split_.py | 28 +- tests/init/test_core.py | 2 +- tests/layers/test_contextual.py | 58 -- tests/layers/test_core.py | 39 +- tests/layers/test_flow_layer.py | 14 +- tests/layers/test_pixelcnn.py | 6 +- tests/layers/test_resnet.py | 10 +- tests/losses/test_core.py | 31 - tests/ops.py | 2 +- tests/tensor/test_core.py | 32 +- tests/tensor/test_nn.py | 6 + tests/tensor/test_utils.py | 119 ++++ tests/train/__init__.py | 0 tests/train/test_core.py | 67 ++ 72 files changed, 2472 insertions(+), 681 deletions(-) create mode 100644 README.md rename tensorkit/backend/{losses.py => optim.py} (59%) delete mode 100644 tensorkit/backend/pytorch_/losses.py create mode 100644 tensorkit/backend/pytorch_/optim.py create mode 100644 tensorkit/backend/pytorch_/train.py create mode 100644 tensorkit/backend/pytorch_/utils.py create mode 100644 tensorkit/backend/train.py create mode 100644 tensorkit/backend/utils.py create mode 100644 tensorkit/examples/.gitignore rename {tests/losses => tensorkit/examples}/__init__.py (100%) create mode 100644 tensorkit/examples/classification/__init__.py create mode 100644 tensorkit/examples/classification/mnist.py create mode 100644 tensorkit/examples/classification/mnist_resnet.py create mode 100644 tensorkit/examples/utils/__init__.py create mode 100644 tensorkit/examples/utils/fit_model_.py create mode 100644 tensorkit/examples/utils/ops.py create mode 100644 tensorkit/examples/utils/prepare_data.py create mode 100644 tensorkit/layers/builder.py delete mode 100644 tensorkit/losses/core.py create mode 100644 tensorkit/optim/__init__.py create mode 100644 tensorkit/optim/core.py create mode 100644 tensorkit/optim/lr_scheduler.py create mode 100644 tensorkit/tensor/utils.py rename tensorkit/{losses => train}/__init__.py (100%) create mode 100644 tensorkit/train/core.py create mode 100644 tensorkit/utils/__init__.py create mode 100644 tensorkit/utils/data_utils.py create mode 100644 tensorkit/utils/tensor_stream.py delete mode 100644 tests/losses/test_core.py create mode 100644 tests/tensor/test_utils.py create mode 100644 tests/train/__init__.py create mode 100644 tests/train/test_core.py diff --git a/.coveragerc b/.coveragerc index e21aa69..33df70b 100644 --- a/.coveragerc +++ b/.coveragerc @@ -15,6 +15,7 @@ ignore_errors = True omit = tests/* scripts/* + tensorkit/examples/* tensorkit/backend/pytorch_/_make_dtypes_mapper.py setup.py *.pyt diff --git a/.travis.yml b/.travis.yml index e7c24ef..c17b9d8 100644 --- a/.travis.yml +++ b/.travis.yml @@ -8,9 +8,13 @@ install: - pip install --upgrade coverage coveralls - pip install -r requirements-dev.txt script: - # run tests with PyTorch + # run tests with PyTorch 1.3.1 - pip install torch==1.3.1 - TENSORKIT_BACKEND=PyTorch TENSORKIT_DISABLE_JIT=true coverage run -a -m pytest - TENSORKIT_BACKEND=PyTorch TENSORKIT_DISABLE_JIT=false coverage run -a -m pytest +# # run tests with PyTorch 1.4.0 +# - pip install torch==1.4.0 +# - TENSORKIT_BACKEND=PyTorch TENSORKIT_DISABLE_JIT=true coverage run -a -m pytest +# - TENSORKIT_BACKEND=PyTorch TENSORKIT_DISABLE_JIT=false coverage run -a -m pytest after_success: - coveralls diff --git a/README.md b/README.md new file mode 100644 index 0000000..70cf60a --- /dev/null +++ b/README.md @@ -0,0 +1,6 @@ +## TensorKit + +### Requirements + +* PyTorch >= 1.4.0 + diff --git a/tensorkit/__init__.py b/tensorkit/__init__.py index 6fdcd0e..90191fb 100644 --- a/tensorkit/__init__.py +++ b/tensorkit/__init__.py @@ -1,7 +1,8 @@ __version__ = '0.0.1' -from . import backend, distributions, flows, init, layers, losses, variational +from . import (backend, distributions, flows, init, layers, optim, train, + utils, variational) from .bayes import * # from .distributions import * # from .layers import * diff --git a/tensorkit/backend/losses.py b/tensorkit/backend/optim.py similarity index 59% rename from tensorkit/backend/losses.py rename to tensorkit/backend/optim.py index 0ae2540..9f5958d 100644 --- a/tensorkit/backend/losses.py +++ b/tensorkit/backend/optim.py @@ -1,9 +1,9 @@ from ..settings_ import settings if settings.backend == 'PyTorch': - from .pytorch_ import losses - from .pytorch_.losses import * + from .pytorch_ import optim + from .pytorch_.optim import * else: RuntimeError(f'Backend {settings.backend} not supported.') -__all__ = losses.__all__ +__all__ = optim.__all__ diff --git a/tensorkit/backend/pytorch_/core.py b/tensorkit/backend/pytorch_/core.py index 68318ad..8c01417 100644 --- a/tensorkit/backend/pytorch_/core.py +++ b/tensorkit/backend/pytorch_/core.py @@ -59,7 +59,7 @@ # reduce operators 'reduce_sum', 'reduce_mean', 'reduce_max', 'reduce_min', - 'log_sum_exp', 'log_mean_exp', + 'argmax', 'argmin', 'log_sum_exp', 'log_mean_exp', # 'all', 'any', 'calculate_mean_and_var', 'norm_except_axis', @@ -1108,6 +1108,16 @@ def reduce_min(input: Tensor, return input +@jit +def argmax(input: Tensor, axis: int, keepdims: bool = False) -> Tensor: + return torch.argmax(input, dim=axis, keepdim=keepdims) + + +@jit +def argmin(input: Tensor, axis: int, keepdims: bool = False) -> Tensor: + return torch.argmin(input, dim=axis, keepdim=keepdims) + + @jit def log_sum_exp(input: Tensor, axis: Optional[List[int]] = None, @@ -1363,33 +1373,47 @@ def matrix_inverse(matrix: Tensor) -> Tensor: # ---- gradient utilities ---- -if settings.disable_jit: +if settings.disable_jit or not torch.__version__.startswith('1.3.'): + @jit def grad(outputs: List[Tensor], inputs: List[Tensor], grad_outputs: Optional[List[Optional[Tensor]]] = None, - keep_graph: Optional[bool] = None, + retain_graph: Optional[bool] = None, create_graph: bool = False, allow_unused: bool = False) -> List[Optional[Tensor]]: - return list( + grad_outs = list( torch.autograd.grad( outputs=outputs, inputs=inputs, grad_outputs=grad_outputs, - retain_graph=keep_graph, + retain_graph=retain_graph, create_graph=create_graph, allow_unused=allow_unused, ) ) + if not allow_unused: + for i in range(len(grad_outs)): + if grad_outs[i] is None: + raise RuntimeError( + 'One of the differentiated Tensors ' + 'appears to not have been used in the graph. ' + 'Set allow_unused=True if this is the desired ' + 'behavior.' + ) + + return grad_outs + + + def is_null_grad(origin: Tensor, grad: Optional[Tensor]) -> bool: + return grad is None - def is_null_grad(origin: Tensor, gradient: Optional[Tensor]) -> bool: - return gradient is None else: @jit def grad(outputs: List[Tensor], inputs: List[Tensor], grad_outputs: Optional[List[Optional[Tensor]]] = None, - keep_graph: Optional[bool] = None, + retain_graph: Optional[bool] = None, create_graph: bool = False, allow_unused: bool = False) -> List[Tensor]: grad_outs = list( @@ -1397,7 +1421,7 @@ def grad(outputs: List[Tensor], outputs=outputs, inputs=inputs, grad_outputs=grad_outputs, - keep_graph=keep_graph, + keep_graph=retain_graph, create_graph=create_graph, allow_unused=allow_unused, ) diff --git a/tensorkit/backend/pytorch_/flows.py b/tensorkit/backend/pytorch_/flows.py index 3f5892b..216bc72 100644 --- a/tensorkit/backend/pytorch_/flows.py +++ b/tensorkit/backend/pytorch_/flows.py @@ -13,17 +13,17 @@ from .nn import * __all__ = [ - 'BaseFlow', 'FeatureMappingFlow', + 'Flow', 'FeatureMappingFlow', 'InverseFlow', 'SequentialFlow', 'LooseInvertibleMatrix', 'StrictInvertibleMatrix', 'InvertibleDense', 'InvertibleConv1d', 'InvertibleConv2d', 'InvertibleConv3d', - 'BaseScale', 'SigmoidScale', 'ExpScale', 'LinearScale', + 'Scale', 'SigmoidScale', 'ExpScale', 'LinearScale', ] # ---- base flow classes ---- -class BaseFlow(BaseLayer): +class Flow(BaseLayer): """ Base class for normalizing flows. @@ -61,7 +61,19 @@ def __init__(self, self.y_event_ndims = int(y_event_ndims) self.explicitly_invertible = bool(explicitly_invertible) - def invert(self) -> 'BaseFlow': + @jit_method + def get_x_event_ndims(self) -> int: + return self.x_event_ndims + + @jit_method + def get_y_event_ndims(self) -> int: + return self.y_event_ndims + + @jit_method + def is_explicitly_invertible(self) -> bool: + return self.explicitly_invertible + + def invert(self) -> 'Flow': """ Get the inverse flow from this flow. @@ -78,12 +90,12 @@ def invert(self) -> 'BaseFlow': """ return InverseFlow(self) - def _forward(self, - input: Tensor, - input_log_det: Optional[Tensor], - inverse: bool, - compute_log_det: bool - ) -> Tuple[Tensor, Optional[Tensor]]: + def _transform(self, + input: Tensor, + input_log_det: Optional[Tensor], + inverse: bool, + compute_log_det: bool + ) -> Tuple[Tensor, Optional[Tensor]]: raise NotImplementedError() def forward(self, @@ -133,7 +145,7 @@ def forward(self, ) # compute the transformed output and log-det - output, output_log_det = self._forward( + output, output_log_det = self._transform( input, input_log_det, inverse, compute_log_det) if output_log_det is not None: @@ -150,10 +162,10 @@ def forward(self, return output, output_log_det -class FeatureMappingFlow(BaseFlow): +class FeatureMappingFlow(Flow): """Base class for flows mapping input features to output features.""" - __constants__ = BaseFlow.__constants__ + ('axis',) + __constants__ = Flow.__constants__ + ('axis',) axis: int """The feature axis (negative index).""" @@ -194,44 +206,48 @@ def __init__(self, explicitly_invertible=explicitly_invertible) self.axis = axis - @property - def event_ndims(self) -> int: + @jit_method + def get_axis(self) -> int: + return self.axis + + @jit_method + def get_event_ndims(self) -> int: """Get the number of event dimensions in both `x` and `y`.""" return self.x_event_ndims # ---- composite flows ---- -class InverseFlow(BaseFlow): +class InverseFlow(Flow): """A flow that inverts another given flow.""" - __constants__ = BaseFlow.__constants__ + ('original_flow',) + __constants__ = Flow.__constants__ + ('original_flow',) original_flow: Module """The original flow, which is inverted by this :class:`InverseFlow`.""" def __init__(self, flow: Module): - if (not isinstance(flow, BaseFlow) and not is_jit_layer(flow)) or \ - not flow.explicitly_invertible: + if (not isinstance(flow, Flow) and not is_jit_layer(flow)) or \ + not flow.is_explicitly_invertible(): raise TypeError( f'`flow` must be an explicitly invertible flow: ' f'got {flow!r}' ) super().__init__( - x_event_ndims=flow.y_event_ndims, - y_event_ndims=flow.x_event_ndims, - explicitly_invertible=flow.explicitly_invertible, + x_event_ndims=flow.get_y_event_ndims(), + y_event_ndims=flow.get_x_event_ndims(), + explicitly_invertible=flow.is_explicitly_invertible(), ) self.original_flow = flow - def invert(self) -> BaseFlow: + def invert(self) -> Flow: return self.original_flow - def _forward(self, - input: Tensor, - input_log_det: Optional[Tensor], - inverse: bool, - compute_log_det: bool) -> Tuple[Tensor, Optional[Tensor]]: + def _transform(self, + input: Tensor, + input_log_det: Optional[Tensor], + inverse: bool, + compute_log_det: bool) -> Tuple[Tensor, Optional[Tensor]]: return self.original_flow( input, input_log_det, not inverse, compute_log_det) @@ -247,9 +263,9 @@ def forward(self, raise RuntimeError('Not an explicitly invertible flow.') -class SequentialFlow(BaseFlow): +class SequentialFlow(Flow): - __constants__ = BaseFlow.__constants__ + ('_chain', '_inverse_chain') + __constants__ = Flow.__constants__ + ('_chain', '_inverse_chain') _chain: ModuleList @@ -267,22 +283,22 @@ def __init__(self, *flows: Union[Module, Sequence[Module]]): raise ValueError('`flows` must not be empty.') for i, flow in enumerate(flows): - if not isinstance(flow, BaseFlow) and not is_jit_layer(flow): + if not isinstance(flow, Flow) and not is_jit_layer(flow): raise TypeError(f'`flows[{i}]` is not a flow: got {flow!r}') for i, (flow1, flow2) in enumerate(zip(flows[:-1], flows[1:])): - if flow2.x_event_ndims != flow1.y_event_ndims: + if flow2.get_x_event_ndims() != flow1.get_y_event_ndims(): raise ValueError( f'`x_event_ndims` of `flows[{i + 1}]` != ' f'`y_event_ndims` of `flows[{i}]`: ' - f'{flow2.x_event_ndims} vs {flow1.y_event_ndims}.' + f'{flow2.get_x_event_ndims()} vs {flow1.get_y_event_ndims()}.' ) super().__init__( - x_event_ndims=flows[0].x_event_ndims, - y_event_ndims=flows[-1].y_event_ndims, + x_event_ndims=flows[0].get_x_event_ndims(), + y_event_ndims=flows[-1].get_y_event_ndims(), explicitly_invertible=all( - flow.explicitly_invertible for flow in flows) + flow.is_explicitly_invertible() for flow in flows) ) self._chain = ModuleList(flows) @@ -291,12 +307,12 @@ def __init__(self, *flows: Union[Module, Sequence[Module]]): else: self._inverse_chain = ModuleList([_NotInvertibleFlow()]) - def _forward(self, - input: Tensor, - input_log_det: Optional[Tensor], - inverse: bool, - compute_log_det: bool - ) -> Tuple[Tensor, Optional[Tensor]]: + def _transform(self, + input: Tensor, + input_log_det: Optional[Tensor], + inverse: bool, + compute_log_det: bool + ) -> Tuple[Tensor, Optional[Tensor]]: output, output_log_det = input, input_log_det if inverse: @@ -504,16 +520,16 @@ def __init__(self, def _get_spatial_ndims(self) -> int: raise NotImplementedError() - def _linear_transform(self, input: Tensor, weight: Tensor) -> Tensor: + def _affine_transform(self, input: Tensor, weight: Tensor) -> Tensor: raise NotImplementedError() @jit_method - def _forward(self, - input: Tensor, - input_log_det: Optional[Tensor], - inverse: bool, - compute_log_det: bool - ) -> Tuple[Tensor, Optional[Tensor]]: + def _transform(self, + input: Tensor, + input_log_det: Optional[Tensor], + inverse: bool, + compute_log_det: bool + ) -> Tuple[Tensor, Optional[Tensor]]: # obtain the weight weight, log_det = self.invertible_matrix( inverse=inverse, compute_log_det=compute_log_det) @@ -522,7 +538,7 @@ def _forward(self, # compute the output output, front_shape = flatten_to_ndims(input, spatial_ndims + 2) - output = self._linear_transform(output, weight) + output = self._affine_transform(output, weight) output = unflatten_from_ndims(output, front_shape) # compute the log_det @@ -545,7 +561,7 @@ def _get_spatial_ndims(self) -> int: return 0 @jit_method - def _linear_transform(self, input: Tensor, weight: Tensor) -> Tensor: + def _affine_transform(self, input: Tensor, weight: Tensor) -> Tensor: return torch.nn.functional.linear(input, weight) @@ -556,7 +572,7 @@ def _get_spatial_ndims(self) -> int: return 1 @jit_method - def _linear_transform(self, input: Tensor, weight: Tensor) -> Tensor: + def _affine_transform(self, input: Tensor, weight: Tensor) -> Tensor: return torch.nn.functional.conv1d(input, weight) @@ -567,7 +583,7 @@ def _get_spatial_ndims(self) -> int: return 2 @jit_method - def _linear_transform(self, input: Tensor, weight: Tensor) -> Tensor: + def _affine_transform(self, input: Tensor, weight: Tensor) -> Tensor: return torch.nn.functional.conv2d(input, weight) @@ -578,12 +594,12 @@ def _get_spatial_ndims(self) -> int: return 3 @jit_method - def _linear_transform(self, input: Tensor, weight: Tensor) -> Tensor: + def _affine_transform(self, input: Tensor, weight: Tensor) -> Tensor: return torch.nn.functional.conv3d(input, weight) # ---- scale modules, for transforming input to output by a scale ---- -class BaseScale(BaseLayer): +class Scale(BaseLayer): """Base class for scaling `input`.""" def _scale_and_log_scale(self, @@ -669,7 +685,7 @@ def forward(self, return output, output_log_det -class ExpScale(BaseScale): +class ExpScale(Scale): """ Scaling `input` with `exp` activation. @@ -701,7 +717,7 @@ def _scale_and_log_scale(self, return scale, log_scale -class SigmoidScale(BaseScale): +class SigmoidScale(Scale): """ Scaling `input` with `sigmoid` activation. @@ -745,7 +761,7 @@ def _scale_and_log_scale(self, return scale, log_scale -class LinearScale(BaseScale): +class LinearScale(Scale): """ Scaling `input` with `linear` activation. diff --git a/tensorkit/backend/pytorch_/init.py b/tensorkit/backend/pytorch_/init.py index d6bb26b..f7a144d 100644 --- a/tensorkit/backend/pytorch_/init.py +++ b/tensorkit/backend/pytorch_/init.py @@ -271,11 +271,11 @@ def register(self, layer: Module, initialized: bool = False) -> None: _ = DataDependentInitializerForwardPreHook( self, layer, initialized=initialized) - def _forward(self, layer: Module, inputs: List[Tensor]) -> None: + def _init(self, layer: Module, inputs: List[Tensor]) -> None: raise NotImplementedError() def __call__(self, layer: Module, inputs: List[Tensor]) -> None: - self._forward(layer, list(inputs)) + self._init(layer, list(inputs)) def __repr__(self) -> str: buf = [] diff --git a/tensorkit/backend/pytorch_/layers.py b/tensorkit/backend/pytorch_/layers.py index 96dc9b8..53fdc72 100644 --- a/tensorkit/backend/pytorch_/layers.py +++ b/tensorkit/backend/pytorch_/layers.py @@ -14,12 +14,12 @@ 'DEFAULT_GATE_BIAS', 'DEFAULT_WEIGHT_INIT', 'DEFAULT_BIAS_INIT', # utils - 'add_parameter', 'get_parameter', 'get_parameters', - 'add_buffer', 'get_buffer', 'get_buffers', + 'add_parameter', 'get_parameter', 'get_parameters', 'get_named_parameters', + 'add_buffer', 'get_buffer', 'get_buffers', 'get_named_buffers', 'set_train_mode', # parameter store modules - 'BaseParamStore', 'SimpleParamStore', + 'ParamStore', 'SimpleParamStore', 'NormedWeightStore', 'NormedAndScaledWeightStore', 'get_weight_store', 'get_bias_store', @@ -27,10 +27,7 @@ 'Identity', # base layers and composition layers - 'BaseLayer', 'BaseSingleVariateLayer', 'BaseMultiVariateLayer', - 'BaseSplitLayer', 'BaseMergeLayer', - 'ModuleList', 'Sequential', - 'BaseContextualLayer', 'BaseMultiVariateContextualLayer', + 'BaseLayer', 'ModuleList', 'Sequential', # linear layers 'CoreLinear', 'Linear', @@ -70,7 +67,12 @@ def get_parameter(layer: Module, name: str) -> Optional[Variable]: def get_parameters(layer: Module, recursive: bool = True - ) -> Iterator[Tuple[str, Variable]]: + ) -> Iterator[Variable]: + return layer.parameters(recurse=recursive) + + +def get_named_parameters(layer: Module, recursive: bool = True + ) -> Iterator[Tuple[str, Variable]]: return layer.named_parameters(recurse=recursive) @@ -87,7 +89,12 @@ def get_buffer(layer: Module, name: str) -> Optional[Tensor]: def get_buffers(layer: Module, recursive: bool = True - ) -> Iterator[Tuple[str, Tensor]]: + ) -> Iterator[Tensor]: + return layer.buffers(recurse=recursive) + + +def get_named_buffers(layer: Module, recursive: bool = True + ) -> Iterator[Tuple[str, Tensor]]: return layer.named_buffers(recurse=recursive) @@ -97,7 +104,16 @@ def set_train_mode(layer: Module, training: bool = True): # ---- weight wrapper: a simple weight, or a normed weight ---- -class BaseParamStore(Module): +class _NullParamStore(Module): + # This module is actually not used in any context. + # It is just a place-holder module, to gain JIT support. + + def forward(self) -> Tensor: # pragma: no cover + zero_shape: List[int] = [] + return torch.zeros(zero_shape, dtype=torch.float32) + + +class ParamStore(Module): """ Base class for a component that stores a trainable parameter, or a set of trainable parameters that can be used to derive @@ -125,7 +141,7 @@ def set(self, value: TensorOrData) -> None: raise NotImplementedError() -class SimpleParamStore(BaseParamStore): +class SimpleParamStore(ParamStore): """A module that carries a direct variable as the parameter.""" def __init__(self, @@ -165,10 +181,10 @@ def weight_norm_decompose(weight: Tensor, return v, v_norm -class NormedWeightStore(BaseParamStore): +class NormedWeightStore(ParamStore): """A module that carries the weight-normed `weight`, without `g`.""" - __constants__ = BaseParamStore.__constants__ + ('feature_axis', 'epsilon') + __constants__ = ParamStore.__constants__ + ('feature_axis', 'epsilon') norm_axis: int epsilon: float @@ -202,10 +218,10 @@ def set(self, value: TensorOrData) -> None: assign_data(self.v, v) -class NormedAndScaledWeightStore(BaseParamStore): +class NormedAndScaledWeightStore(ParamStore): """A module that carries the weight-normed `weight`, with `v` and `g`.""" - __constants__ = BaseParamStore.__constants__ + ('feature_axis', 'epsilon') + __constants__ = ParamStore.__constants__ + ('feature_axis', 'epsilon') norm_axis: int epsilon: float @@ -245,7 +261,7 @@ def get_weight_store(shape: List[int], initializer: TensorInitArgType = DEFAULT_WEIGHT_INIT, norm_axis: int = 1, weight_norm: WeightNormArgType = False - ) -> BaseParamStore: + ) -> ParamStore: """ Create a module which carries the `weight` parameter. @@ -275,7 +291,7 @@ def get_weight_store(shape: List[int], def get_bias_store(shape: List[int], initializer: TensorInitArgType = DEFAULT_BIAS_INIT, use_bias: bool = True - ) -> Optional[BaseParamStore]: + ) -> Optional[ParamStore]: """ Create a module that carries the `bias` parameter. @@ -300,7 +316,26 @@ def forward(self, input: Tensor) -> Tensor: # ---- base layers and composition layers ---- -class BaseLayer(Module): +class BaseLayerMeta(type): + + def __new__(cls, name, parents, dct): + if torch.__version__ == '1.4.0': + # strange bug, that PyTorch 1.4.0 does not support annotations + # with type `Module` or `ModuleList` + if '__annotations__' in dct: + annotations = dct['__annotations__'] + annotation_keys = list(annotations) + for attr in annotation_keys: + if annotations[attr] in (Module, ModuleList): + annotations.pop(attr) + + return super().__new__(cls, name, parents, dct) + + +class BaseLayer(Module, metaclass=BaseLayerMeta): + + def _is_attr_included_in_repr(self, attr: str, value: Any) -> bool: + return True def extra_repr(self) -> str: buf = [] @@ -314,102 +349,17 @@ def extra_repr(self) -> str: if attr.startswith('_'): continue attr_val = getattr(self, attr, None) - if attr_val is None or isinstance(attr_val, Module) or \ - isinstance(attr_val, Tensor): + if attr_val is None or \ + isinstance(attr_val, Module) or \ + isinstance(attr_val, Tensor) or \ + is_jit_layer(attr_val): continue - buf.append(f'{attr}={attr_val!r}') + if self._is_attr_included_in_repr(attr, attr_val): + buf.append(f'{attr}={attr_val!r}') return ', '.join(buf) -class BaseSingleVariateLayer(BaseLayer): - """ - Base class for single-input, single-output layers. - - Sub-classes should override `_call(input: Tensor) -> Tensor` to - actually implement the module. - """ - - def _forward(self, input: Tensor) -> Tensor: - raise NotImplementedError() - - def forward(self, input: Tensor) -> Tensor: - return self._forward(input) - - -class BaseMultiVariateLayer(BaseLayer): - """ - Base class for multiple-input, multiple-output layers. - The inputs and outputs should be given as a list of Tensors. - """ - - def _forward(self, inputs: List[Tensor]) -> List[Tensor]: - raise NotImplementedError() - - def forward(self, inputs: List[Tensor]) -> List[Tensor]: - return self._forward(inputs) - - -class BaseSplitLayer(BaseLayer): - """ - Base class for single-input, multiple-output layers. - The outputs should be given as a list of Tensors. - """ - - def _forward(self, input: Tensor) -> List[Tensor]: - raise NotImplementedError() - - def forward(self, input: Tensor) -> List[Tensor]: - return self._forward(input) - - -class BaseMergeLayer(BaseLayer): - """ - Base class for multiple-input, single-output layers. - The inputs should be given as a list of Tensors. - """ - - def _forward(self, inputs: List[Tensor]) -> Tensor: - raise NotImplementedError() - - def forward(self, inputs: List[Tensor]) -> Tensor: - return self._forward(inputs) - - -class BaseContextualLayer(BaseLayer): - """ - Base class layers that produces the output according to the input tensor - and contextual tensors. - """ - - def _forward(self, input: Tensor, context: List[Tensor]) -> Tensor: - raise NotImplementedError() - - def forward(self, - input: Tensor, - context: Optional[List[Tensor]] = None) -> Tensor: - if context is None: - context = [] - return self._forward(input, context) - - -class BaseMultiVariateContextualLayer(BaseLayer): - """ - Base class layers that produces the output tensors according to the - input tensors and contextual tensors. - """ - - def _forward(self, inputs: List[Tensor], context: List[Tensor]) -> List[Tensor]: - raise NotImplementedError() - - def forward(self, - inputs: List[Tensor], - context: Optional[List[Tensor]] = None) -> List[Tensor]: - if context is None: - context = [] - return self._forward(inputs, context) - - class Sequential(torch_nn.Sequential): def __init__(self, *layers: Union[Module, Sequence[Module]]): @@ -432,7 +382,8 @@ class CoreLinear(BaseLayer): ) weight_store: Module - bias_store: Optional[Module] + bias_store: Module + use_bias: bool def __init__(self, weight_shape: List[int], @@ -447,6 +398,8 @@ def __init__(self, weight_shape, initializer=weight_init, weight_norm=weight_norm) bias_store = get_bias_store( bias_shape, initializer=bias_init, use_bias=use_bias) + if bias_store is None: + bias_store = _NullParamStore() if data_init is not None: if not isinstance(data_init, init.DataDependentInitializer) and \ @@ -460,40 +413,37 @@ def __init__(self, super().__init__() self.weight_store = weight_store self.bias_store = bias_store + self.use_bias = use_bias if data_init is not None: data_init.register(self) - def __repr__(self) -> str: - attributes = [] - for attr in self.__annotations__: - val = getattr(self, attr, None) - if val is not None: - if attr == 'use_bias': - if not val: - attributes.append(f'{attr}={val}') - elif not isinstance(val, (Module, Tensor)): - attributes.append(f'{attr}={val!r}') - return f'{self.__class__.__qualname__}({", ".join(attributes)})' - - def _forward(self, input: Tensor, weight: Tensor, bias: Optional[Tensor] - ) -> Tensor: + def _is_attr_included_in_repr(self, attr: str, value: Any) -> bool: + return attr != 'use_bias' or not value + + def __repr__(self): + return f'{self.__class__.__qualname__}({self.extra_repr()})' + + def _linear_transform(self, + input: Tensor, + weight: Tensor, + bias: Optional[Tensor] + ) -> Tensor: raise NotImplementedError() def forward(self, input: Tensor) -> Tensor: weight = self.weight_store() - if self.bias_store is None: - bias = None + if self.use_bias: + bias: Optional[Tensor] = self.bias_store() else: - bias = self.bias_store() - return self._forward(input, weight, bias) + bias: Optional[Tensor] = None + return self._linear_transform(input, weight, bias) class Linear(CoreLinear): in_features: int out_features: int - use_bias: bool def __init__(self, in_features: int, @@ -509,7 +459,6 @@ def __init__(self, self.in_features = in_features self.out_features = out_features - self.use_bias = use_bias super().__init__( weight_shape=[out_features, in_features], @@ -522,8 +471,11 @@ def __init__(self, ) @jit_method - def _forward(self, input: Tensor, weight: Tensor, bias: Optional[Tensor] - ) -> Tensor: + def _linear_transform(self, + input: Tensor, + weight: Tensor, + bias: Optional[Tensor] + ) -> Tensor: output, front_shape = flatten_to_ndims(input, 2) output = torch.nn.functional.linear(output, weight, bias) output = unflatten_from_ndims(output, front_shape) @@ -539,7 +491,6 @@ class LinearConvNd(CoreLinear): padding: List[Tuple[int, int]] _symmetric_padding: Optional[List[int]] dilation: List[int] - use_bias: bool def __init__(self, in_channels: int, @@ -570,7 +521,6 @@ def __init__(self, self.dilation = dilation self.padding = padding self._symmetric_padding = _symmetric_padding - self.use_bias = use_bias super().__init__( weight_shape=[out_channels, in_channels] + kernel_size, @@ -592,8 +542,11 @@ def _get_spatial_ndims(self) -> int: return 1 @jit_method - def _forward(self, input: Tensor, weight: Tensor, bias: Optional[Tensor] - ) -> Tensor: + def _linear_transform(self, + input: Tensor, + weight: Tensor, + bias: Optional[Tensor] + ) -> Tensor: if self._symmetric_padding is not None: return torch.nn.functional.conv1d( input=input, weight=weight, bias=bias, stride=self.stride, @@ -613,8 +566,11 @@ def _get_spatial_ndims(self) -> int: return 2 @jit_method - def _forward(self, input: Tensor, weight: Tensor, bias: Optional[Tensor] - ) -> Tensor: + def _linear_transform(self, + input: Tensor, + weight: Tensor, + bias: Optional[Tensor] + ) -> Tensor: if self._symmetric_padding is not None: return torch.nn.functional.conv2d( input=input, weight=weight, bias=bias, stride=self.stride, @@ -634,8 +590,11 @@ def _get_spatial_ndims(self) -> int: return 3 @jit_method - def _forward(self, input: Tensor, weight: Tensor, bias: Optional[Tensor] - ) -> Tensor: + def _linear_transform(self, + input: Tensor, + weight: Tensor, + bias: Optional[Tensor] + ) -> Tensor: if self._symmetric_padding is not None: return torch.nn.functional.conv3d( input=input, weight=weight, bias=bias, stride=self.stride, @@ -659,7 +618,6 @@ class LinearConvTransposeNd(CoreLinear): is_symmetric_padding: bool dilation: List[int] output_padding: List[int] - use_bias: bool def __init__(self, in_channels: int, @@ -694,7 +652,6 @@ def __init__(self, self._symmetric_padding = _symmetric_padding self.output_padding = output_padding self.dilation = dilation - self.use_bias = use_bias super().__init__( weight_shape=[in_channels, out_channels] + kernel_size, @@ -731,8 +688,11 @@ def _get_spatial_ndims(self) -> int: return 1 @jit_method - def _forward(self, input: Tensor, weight: Tensor, bias: Optional[Tensor] - ) -> Tensor: + def _linear_transform(self, + input: Tensor, + weight: Tensor, + bias: Optional[Tensor] + ) -> Tensor: if self._symmetric_padding is not None: return torch.nn.functional.conv_transpose1d( input=input, weight=weight, bias=bias, stride=self.stride, @@ -754,8 +714,11 @@ def _get_spatial_ndims(self) -> int: return 2 @jit_method - def _forward(self, input: Tensor, weight: Tensor, bias: Optional[Tensor] - ) -> Tensor: + def _linear_transform(self, + input: Tensor, + weight: Tensor, + bias: Optional[Tensor] + ) -> Tensor: if self._symmetric_padding is not None: return torch.nn.functional.conv_transpose2d( input=input, weight=weight, bias=bias, stride=self.stride, @@ -777,8 +740,11 @@ def _get_spatial_ndims(self) -> int: return 3 @jit_method - def _forward(self, input: Tensor, weight: Tensor, bias: Optional[Tensor] - ) -> Tensor: + def _linear_transform(self, + input: Tensor, + weight: Tensor, + bias: Optional[Tensor] + ) -> Tensor: if self._symmetric_padding is not None: return torch.nn.functional.conv_transpose3d( input=input, weight=weight, bias=bias, stride=self.stride, @@ -859,7 +825,7 @@ def _check_input_dim(self, input: Tensor): Dropout = torch_nn.Dropout -class Dropout1d(BaseSingleVariateLayer): +class Dropout1d(BaseLayer): """Randomly zero out entire channels of the 1d convolution input.""" __constants__ = ('p', '_keep_prob') @@ -872,7 +838,7 @@ def __init__(self, p: float = 0.5): self.p = p self._keep_prob = 1. - p - def _forward(self, input: Tensor) -> Tensor: + def forward(self, input: Tensor) -> Tensor: if input.dim() < 2: # pragma: no cover raise ValueError('`input` must be at least 2d, but the ' 'input shape is {}.'.format(shape(input))) diff --git a/tensorkit/backend/pytorch_/losses.py b/tensorkit/backend/pytorch_/losses.py deleted file mode 100644 index 4013e77..0000000 --- a/tensorkit/backend/pytorch_/losses.py +++ /dev/null @@ -1,19 +0,0 @@ -from .core import * -from .layers import * - - -__all__ = [ - 'BaseSupervisedLossLayer', -] - - -class BaseSupervisedLossLayer(BaseLayer): - - def _forward(self, output: Tensor, target: Tensor) -> Tensor: - raise NotImplementedError() - - def forward(self, output: Tensor, target: Tensor) -> Tensor: - ret = self._forward(output, target) - if ret.numel() > 1: - ret = ret.mean() - return ret diff --git a/tensorkit/backend/pytorch_/nn.py b/tensorkit/backend/pytorch_/nn.py index 7177cf6..a083b39 100644 --- a/tensorkit/backend/pytorch_/nn.py +++ b/tensorkit/backend/pytorch_/nn.py @@ -117,6 +117,8 @@ def cross_entropy_with_logits(logits: Tensor, logits, front_shape = flatten_to_ndims(logits, 2) labels, _ = flatten_to_ndims(labels, 1) + if labels.dtype != torch.int64: + labels = labels.to(torch.int64) ret = torch.nn.functional.cross_entropy( logits, labels, reduction=reduction) diff --git a/tensorkit/backend/pytorch_/optim.py b/tensorkit/backend/pytorch_/optim.py new file mode 100644 index 0000000..70e3ca9 --- /dev/null +++ b/tensorkit/backend/pytorch_/optim.py @@ -0,0 +1,131 @@ +from contextlib import contextmanager +from typing import * + +import torch +from torch.optim import (adam, adadelta, adagrad, adamax, + rmsprop, sgd) +from torch.optim.optimizer import Optimizer as TorchOptimizer + +from .core import * + +__all__ = [ + 'Optimizer', 'SGD', 'Adam', +] + + +class Optimizer(object): + + @property + def lr(self) -> float: + raise NotImplementedError() + + def set_lr(self, lr: float): + raise NotImplementedError() + + def add_params(self, params: List[Variable]): + raise NotImplementedError() + + def clear_grad(self): + raise NotImplementedError() + + @contextmanager + def capture_grad(self) -> Generator[None, None, None]: + raise NotImplementedError() + + def minimize(self, loss: Tensor): + raise NotImplementedError() + + def maximize(self, loss: Tensor): + raise NotImplementedError() + + def state_dict(self) -> Dict[str, Any]: + raise NotImplementedError() + + def load_state_dict(self, state_dict: Dict[str, Any]): + raise NotImplementedError() + + +class BackendOptimizer(Optimizer): + + _lr: float = None + torch_optimizer: TorchOptimizer + + def __init__(self, + lr: float, + torch_optimizer: TorchOptimizer): + self.torch_optimizer = torch_optimizer + self.set_lr(lr) + + def lr(self) -> float: + return self._lr + + def set_lr(self, lr: float): + if self._lr != lr: + for group in self.torch_optimizer.param_groups: + group['lr'] = lr + self._lr = lr + + def add_params(self, params: Sequence[Variable]): + self.torch_optimizer.add_param_group({ + 'params': list(params), + 'lr': self._lr, + }) + + def clear_grad(self): + self.torch_optimizer.zero_grad() + + @contextmanager + def capture_grad(self) -> Generator[None, None, None]: + yield + + def minimize(self, loss: Tensor): + loss.backward() + self.torch_optimizer.step() + + def maximize(self, loss: Tensor): + self.minimize(-loss) + + def state_dict(self) -> Dict[str, Any]: + return self.torch_optimizer.state_dict() + + def load_state_dict(self, state_dict: Dict[str, Any]): + self.torch_optimizer.load_state_dict(state_dict) + + +class SGD(BackendOptimizer): + + def __init__(self, + params: Iterable[Variable], + lr: float, + momentum: float = 0., + nesterov: bool = False): + super().__init__( + lr=lr, + torch_optimizer=torch.optim.sgd.SGD( + params=params, + lr=lr, + momentum=momentum, + nesterov=nesterov, + ) + ) + + +class Adam(BackendOptimizer): + + def __init__(self, + params: Iterable[Variable], + lr: float = 1e-3, + beta_1: float = 0.9, + beta_2: float = 0.999, + epsilon: float = 1e-8, + amsgrad: bool = False): + super().__init__( + lr=lr, + torch_optimizer=adam.Adam( + params=params, + lr=lr, + betas=(beta_1, beta_2), + eps=epsilon, + amsgrad=amsgrad, + ) + ) diff --git a/tensorkit/backend/pytorch_/train.py b/tensorkit/backend/pytorch_/train.py new file mode 100644 index 0000000..1632113 --- /dev/null +++ b/tensorkit/backend/pytorch_/train.py @@ -0,0 +1,91 @@ +import os +from typing import * + +import torch +from mltk import StatefulObject, BaseCheckpoint + +__all__ = ['Checkpoint'] + + +class _TorchCheckpointableObject(StatefulObject): + """Wraps a PyTorch checkpointable object into :class:`StatefulObject`.""" + + def __init__(self, torch_object): + self.torch_object = torch_object + + def get_state_dict(self) -> Dict[str, Any]: + return self.torch_object.state_dict() + + def set_state_dict(self, state: Dict[str, Any]): + self.torch_object.load_state_dict(state) + + +class Checkpoint(BaseCheckpoint): + """ + A checkpoint object that supports to save and recover PyTorch checkpointable + objects (i.e., objects with method :meth:`state_dict()` and + :meth:`load_state_dict()`). + + Usage:: + + # create the PyTorch objects + class Net(torch.nn.Module): + ... + + net = Net(...) + optimizer = T.optim.Adam(...) + + # construct the checkpoint object + checkpoint = T.train.Checkpoint(net=net, optimizer=optimizer) + + # save a checkpoint + checkpoint.save(checkpoint_path) + + # restore the checkpoint + checkpoint.restore(checkpoint_path) + """ + + def __init__(self, **torch_objects: Any): + def to_stateful_object(obj) -> StatefulObject: + if isinstance(obj, StatefulObject): + return obj + elif hasattr(obj, 'state_dict') and hasattr(obj, 'load_state_dict'): + return _TorchCheckpointableObject(obj) + else: + raise TypeError( + f'Object must be a :class:`StatefulObject`, or has ' + f'`state_dict()` and `load_state_dict()` methods: ' + f'got {obj!r}' + ) + + self._objects: Dict[str, StatefulObject] = { + k: to_stateful_object(o) + for k, o in torch_objects.items() + } + + def _restore(self, checkpoint_path: str) -> None: + data_path = os.path.join(checkpoint_path, 'data.pth') + state_dict = torch.load(data_path) + + # check whether or not all keys exist + for k in self._objects: + if k not in state_dict: + raise ValueError(f'Key {k!r} does not exist in the ' + f'state dict recovered from: {data_path}') + + # load the state dict + for k, o in self._objects.items(): + o.set_state_dict(state_dict[k]) + + def _save(self, checkpoint_path: str) -> None: + # generate the state dict + state_dict = { + k: o.get_state_dict() + for k, o in self._objects.items() + } + + # save the objects + if not os.path.exists(checkpoint_path): + os.makedirs(checkpoint_path, exist_ok=True) + data_path = os.path.join(checkpoint_path, 'data.pth') + torch.save(state_dict, data_path) diff --git a/tensorkit/backend/pytorch_/utils.py b/tensorkit/backend/pytorch_/utils.py new file mode 100644 index 0000000..c2efeab --- /dev/null +++ b/tensorkit/backend/pytorch_/utils.py @@ -0,0 +1,182 @@ +from typing import * + +from .core import jit + +__all__ = [ + 'split_channel_spatial_shape', 'unsplit_channel_spatial_shape', + 'calculate_deconv_output_padding', + 'calculate_conv_output_size', 'calculate_deconv_output_size', +] + + +@jit +def _check_conv_args(input_size: List[int], + padding: List[Tuple[int, int]], + arg_values: List[List[int]], + arg_names: List[str]) -> int: + spatial_ndims = len(input_size) + if spatial_ndims not in (1, 2, 3): + raise ValueError( + '`input_size` is not a 1d, 2d or 3d convolutional input size: ' + 'got input size {}.'.format(input_size) + ) + + if len(padding) != spatial_ndims: + raise ValueError( + '`padding` is not for {}d convolution: got `padding` {}.'. + format(spatial_ndims, padding) + ) + + for i in range(len(arg_values)): + arg_val = arg_values[i] + if len(arg_val) != spatial_ndims: + arg_name = arg_names[i] + raise ValueError( + '`{}` is not for {}d convolution: got `{}` {}.'. + format(arg_name, spatial_ndims, arg_name, arg_val) + ) + + return spatial_ndims + + +@jit +def split_channel_spatial_shape(shape: List[int]) -> Tuple[int, List[int]]: + if len(shape) not in (2, 3, 4): + raise ValueError('Invalid `shape`: {}'.format(shape)) + return shape[0], shape[1:] + + +@jit +def unsplit_channel_spatial_shape(channels: int, size: List[int]) -> List[int]: + if len(size) not in (1, 2, 3): + raise ValueError('Invalid `size`: {}'.format(size)) + return [channels] + size + + +@jit +def calculate_deconv_output_padding(input_size: List[int], + output_size: List[int], + kernel_size: List[int], + stride: List[int], + padding: List[Tuple[int, int]], + dilation: List[int]): + """ + Calculate the `output_padding` for deconvolution (conv_transpose). + + Args: + input_size: The input size (shape) of the spatial dimensions. + output_size: The output size (shape) of the spatial dimensions. + kernel_size: The kernel size. + stride: The stride. + padding: The padding. + dilation: The dilation. + + Returns: + The output padding, can be used to construct a deconvolution + (conv transpose) layer. + + Raises: + ValueError: If any argument is invalid, or no output padding + can satisfy the specified arguments. + """ + spatial_ndims = _check_conv_args( + input_size, padding, + [output_size, kernel_size, stride, dilation], + ['output_size', 'kernel_size', 'stride', 'dilation'], + ) + + ret: List[int] = [] + for i in range(spatial_ndims): + op = output_size[i] - ( + (input_size[i] - 1) * stride[i] - + (padding[i][0] + padding[i][1]) + + (kernel_size[i] - 1) * dilation[i] + 1 + ) + if op < 0 or (op >= stride[i] and op >= dilation[i]): + raise ValueError( + 'No `output_padding` can satisfy the deconvolution task: ' + 'input_size == {}, output_size == {}, ' + 'kernel_size == {}, stride == {}, ' + 'padding == {}, dilation == {}.'.format( + input_size, output_size, kernel_size, stride, padding, + dilation + ) + ) + ret.append(op) + + return ret + + +@jit +def calculate_conv_output_size(input_size: List[int], + kernel_size: List[int], + stride: List[int], + padding: List[Tuple[int, int]], + dilation: List[int]) -> List[int]: + """ + Calculate the convolution output size for specified arguments. + + Args: + input_size: The input size (shape) of the spatial dimensions. + kernel_size: The kernel size. + stride: The stride. + padding: The padding. + dilation: The dilation. + + Returns: + The output size. + """ + spatial_ndims = _check_conv_args( + input_size, padding, + [input_size, kernel_size, stride, dilation], + ['input_size', 'kernel_size', 'stride', 'dilation'], + ) + + ret: List[int] = [] + for i in range(spatial_ndims): + ret.append( + 1 + (input_size[i] + padding[i][0] + padding[i][1] - + (kernel_size[i] - 1) * dilation[i] - 1) // stride[i] + ) + + return ret + + +@jit +def calculate_deconv_output_size(input_size: List[int], + kernel_size: List[int], + stride: List[int], + padding: List[Tuple[int, int]], + output_padding: List[int], + dilation: List[int]) -> List[int]: + """ + Calculate the deconvolution output size for specified arguments. + + Args: + input_size: The input size (shape) of the spatial dimensions. + kernel_size: The kernel size. + stride: The stride. + padding: The padding. + output_padding: The output padding. + dilation: The dilation. + + Returns: + The output size. + """ + spatial_ndims = _check_conv_args( + input_size, padding, + [input_size, kernel_size, stride, output_padding, dilation], + ['input_size', 'kernel_size', 'stride', 'output_padding', 'dilation'], + ) + + ret: List[int] = [] + for i in range(spatial_ndims): + ret.append( + output_padding[i] + + (input_size[i] - 1) * stride[i] - + (padding[i][0] + padding[i][1]) + + (kernel_size[i] - 1) * dilation[i] + + 1 + ) + + return ret diff --git a/tensorkit/backend/train.py b/tensorkit/backend/train.py new file mode 100644 index 0000000..dbdd318 --- /dev/null +++ b/tensorkit/backend/train.py @@ -0,0 +1,9 @@ +from ..settings_ import settings + +if settings.backend == 'PyTorch': + from .pytorch_ import train + from .pytorch_.train import * +else: + RuntimeError(f'Backend {settings.backend} not supported.') + +__all__ = train.__all__ diff --git a/tensorkit/backend/utils.py b/tensorkit/backend/utils.py new file mode 100644 index 0000000..af30799 --- /dev/null +++ b/tensorkit/backend/utils.py @@ -0,0 +1,9 @@ +from ..settings_ import settings + +if settings.backend == 'PyTorch': + from .pytorch_ import utils + from .pytorch_.utils import * +else: + RuntimeError(f'Backend {settings.backend} not supported.') + +__all__ = utils.__all__ diff --git a/tensorkit/distributions/flow.py b/tensorkit/distributions/flow.py index 7926f96..6064903 100644 --- a/tensorkit/distributions/flow.py +++ b/tensorkit/distributions/flow.py @@ -1,7 +1,7 @@ from typing import * from .. import tensor as T -from ..flows import BaseFlow +from ..flows import Flow from ..stochastic import StochasticTensor from .base import Distribution from .utils import copy_distribution, get_overrided_parameterized @@ -18,7 +18,7 @@ class FlowDistribution(Distribution): _base_distribution: Distribution """The base distribution, which is transform by the `flow`.""" - flow: BaseFlow + flow: Flow """The flow instance, which transforms the `distribution`.""" _base_group_ndims: int @@ -26,7 +26,7 @@ class FlowDistribution(Distribution): def __init__(self, distribution: Distribution, - flow: BaseFlow, + flow: Flow, reparameterized: Optional[bool] = None, event_ndims: Optional[int] = None, validate_tensors: Optional[bool] = None): @@ -34,7 +34,7 @@ def __init__(self, if not isinstance(distribution, Distribution): raise TypeError(f'`distribution` is not an instance of ' f'`Distribution`: got {distribution!r}') - if not isinstance(flow, BaseFlow) and not T.is_jit_layer(flow): + if not isinstance(flow, Flow) and not T.is_jit_layer(flow): raise TypeError(f'`flow` is not a flow: {flow!r}') # `distribution` is required to be continuous and have float dtype. @@ -53,19 +53,19 @@ def __init__(self, # requirement: distribution.event_ndims <= flow.x_event_ndims <= distribution.value_ndims # otherwise the distribution cannot be transformed by the flow - if not (distribution.event_ndims <= flow.x_event_ndims <= + if not (distribution.event_ndims <= flow.get_x_event_ndims() <= distribution.value_ndims): raise ValueError( f'`distribution.event_ndims <= flow.x_event_ndims <= ' f'distribution.value_ndims` is not satisfied: ' f'`distribution.event_ndims` is {distribution.event_ndims}, ' - f'while `flow.x_event_ndims` is {flow.x_event_ndims}.' + f'while `flow.x_event_ndims` is {flow.get_x_event_ndims()}.' ) # requirement: min_event_ndims <= event_ndims <= max_event_ndims - min_event_ndims = flow.y_event_ndims + min_event_ndims = flow.get_y_event_ndims() max_event_ndims = (distribution.value_ndims + - (flow.y_event_ndims - flow.x_event_ndims)) + (flow.get_y_event_ndims() - flow.get_x_event_ndims())) if event_ndims is not None and \ not (min_event_ndims <= event_ndims <= max_event_ndims): raise ValueError( @@ -76,7 +76,7 @@ def __init__(self, # obtain the arguments if event_ndims is None: - event_ndims = flow.y_event_ndims + event_ndims = flow.get_y_event_ndims() batch_ndims = max_event_ndims - event_ndims batch_shape = distribution.batch_shape[:batch_ndims] reparameterized = get_overrided_parameterized( @@ -87,7 +87,7 @@ def __init__(self, if validate_tensors is None: validate_tensors = distribution.validate_tensors - base_group_ndims = flow.x_event_ndims - distribution.event_ndims + base_group_ndims = flow.get_x_event_ndims() - distribution.event_ndims # now construct the instance super(FlowDistribution, self).__init__( diff --git a/tensorkit/examples/.gitignore b/tensorkit/examples/.gitignore new file mode 100644 index 0000000..68bcbc9 --- /dev/null +++ b/tensorkit/examples/.gitignore @@ -0,0 +1 @@ +results/ \ No newline at end of file diff --git a/tests/losses/__init__.py b/tensorkit/examples/__init__.py similarity index 100% rename from tests/losses/__init__.py rename to tensorkit/examples/__init__.py diff --git a/tensorkit/examples/classification/__init__.py b/tensorkit/examples/classification/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tensorkit/examples/classification/mnist.py b/tensorkit/examples/classification/mnist.py new file mode 100644 index 0000000..7b43db0 --- /dev/null +++ b/tensorkit/examples/classification/mnist.py @@ -0,0 +1,99 @@ +import mltk +import tensorkit as tk +from tensorkit import tensor as T +from tensorkit.examples import utils + + +class Config(mltk.Config): + max_epoch: int = 10 + batch_size: int = 32 + test_batch_size: int = 64 + lr: float = 0.001 + lr_anneal_ratio: float = 0.5 + lr_anneal_epochs: int = 5 + + +def main(exp: mltk.Experiment[Config]): + # prepare the data + train_stream, val_stream, test_stream = utils.get_mnist_streams( + batch_size=exp.config.batch_size, + test_batch_size=exp.config.test_batch_size, + val_batch_size=exp.config.test_batch_size, + val_portion=0.2, + flatten=True, + x_range=(-1., 1.), + ) + + # build the network + net: T.Module = tk.layers.SequentialBuilder(784). \ + set_args('dense', + activation=tk.layers.LeakyReLU, + data_init=tk.init.StdDataInit()). \ + dense(500). \ + dense(500). \ + linear(10). \ + log_softmax(). \ + build() + + # define the train and evaluate functions + def train_step(x, y): + logits = net(x) + loss = T.nn.cross_entropy_with_logits(logits, y, reduction='mean') + acc = utils.calculate_acc(logits, y) + return {'loss': loss, 'acc': acc} + + def evaluate(x, y): + with T.no_grad(): + logits = net(x) + acc = utils.calculate_acc(logits, y) + return {'acc': acc} + + # build the optimizer and the train loop + loop = mltk.TrainLoop(max_epoch=exp.config.max_epoch) + optimizer = tk.optim.Adam(tk.layers.get_parameters(net)) + lr_scheduler = tk.optim.lr_scheduler.AnnealingLR( + loop=loop, + optimizer=optimizer, + initial_lr=exp.config.lr, + ratio=exp.config.lr_anneal_ratio, + epochs=exp.config.lr_anneal_epochs + ) + + # add a callback to do early-stopping on the network parameters + # according to the validation metric. + loop.add_callback( + mltk.callbacks.EarlyStopping( + checkpoint=tk.train.Checkpoint(net=net), + root_dir=exp.abspath('./checkpoint/early-stopping'), + # note for `loop.validation()`, the prefix "val_" will be + # automatically prepended to any metrics generated by the + # `evaluate` function. + metric_name='val_acc', + smaller_is_better=False, + ) + ) + + # run validation after every epoch + if val_stream is not None: + loop.run_after_every( + lambda: loop.validation().run(evaluate, val_stream), + epochs=1, + ) + + # run test after every epoch + loop.run_after_every( + lambda: loop.test().run(evaluate, test_stream), + epochs=1, + ) + + # train the model + utils.fit_model(loop=loop, optimizer=optimizer, fn=train_step, + stream=train_stream) + + # do the final test with the best network parameters (according to validation) + results = mltk.TestLoop().run(evaluate, test_stream) + + +if __name__ == '__main__': + with mltk.Experiment(Config) as exp: + main(exp) diff --git a/tensorkit/examples/classification/mnist_resnet.py b/tensorkit/examples/classification/mnist_resnet.py new file mode 100644 index 0000000..4b5aacd --- /dev/null +++ b/tensorkit/examples/classification/mnist_resnet.py @@ -0,0 +1,104 @@ +import mltk +import tensorkit as tk +from tensorkit import tensor as T +from tensorkit.examples import utils + + +class Config(mltk.Config): + max_epoch: int = 10 + batch_size: int = 32 + test_batch_size: int = 64 + lr: float = 0.01 + lr_anneal_ratio: float = 0.5 + lr_anneal_epochs: int = 2 + + +def main(exp: mltk.Experiment[Config]): + # prepare the data + train_stream, val_stream, test_stream = utils.get_mnist_streams( + batch_size=exp.config.batch_size, + test_batch_size=exp.config.test_batch_size, + val_batch_size=exp.config.test_batch_size, + val_portion=0.2, + x_range=(-1., 1.), + ) + + # build the network + net: T.Module = tk.layers.SequentialBuilder(train_stream.data_shapes[0]). \ + set_args('res_block2d', + kernel_size=3, + activation=tk.layers.LeakyReLU, + normalizer=tk.layers.BatchNorm2d, + dropout=0.5, + data_init=tk.init.StdDataInit()). \ + res_block2d(16). \ + res_block2d(32, stride=2). \ + res_block2d(32). \ + res_block2d(64, stride=2). \ + res_block2d(64). \ + global_avg_pool2d(). \ + linear(10). \ + log_softmax(). \ + build() + + # the train, test and validate functions + def train_step(x, y): + logits = net(x) + loss = T.nn.cross_entropy_with_logits(logits, y, reduction='mean') + acc = utils.calculate_acc(logits, y) + return {'loss': loss, 'acc': acc} + + def evaluate(x, y): + with T.no_grad(): + logits = net(x) + acc = utils.calculate_acc(logits, y) + return {'acc': acc} + + # build the optimizer and the train loop + loop = mltk.TrainLoop(max_epoch=exp.config.max_epoch) + optimizer = tk.optim.Adam(tk.layers.get_parameters(net)) + lr_scheduler = tk.optim.lr_scheduler.AnnealingLR( + loop=loop, + optimizer=optimizer, + initial_lr=exp.config.lr, + ratio=exp.config.lr_anneal_ratio, + epochs=exp.config.lr_anneal_epochs + ) + + # add a callback to do early-stopping on the network parameters + # according to the validation metric. + loop.add_callback( + mltk.callbacks.EarlyStopping( + checkpoint=tk.train.Checkpoint(net=net), + root_dir=exp.abspath('./checkpoint/early-stopping'), + # note for `loop.validation()`, the prefix "val_" will be + # automatically prepended to any metrics generated by the + # `evaluate` function. + metric_name='val_acc', + smaller_is_better=False, + ) + ) + + # run validation after every epoch + loop.run_after_every( + lambda: loop.validation().run(evaluate, val_stream), + epochs=1, + ) + + # run test after every epoch + loop.run_after_every( + lambda: loop.test().run(evaluate, test_stream), + epochs=1, + ) + + # train the model + utils.fit_model(loop=loop, optimizer=optimizer, fn=train_step, + stream=train_stream) + + # do the final test with the best network parameters (according to validation) + results = mltk.TestLoop().run(evaluate, test_stream) + + +if __name__ == '__main__': + with mltk.Experiment(Config) as exp: + main(exp) diff --git a/tensorkit/examples/utils/__init__.py b/tensorkit/examples/utils/__init__.py new file mode 100644 index 0000000..b6d3a03 --- /dev/null +++ b/tensorkit/examples/utils/__init__.py @@ -0,0 +1,3 @@ +from .fit_model_ import * +from .ops import * +from .prepare_data import * diff --git a/tensorkit/examples/utils/fit_model_.py b/tensorkit/examples/utils/fit_model_.py new file mode 100644 index 0000000..eb6d3a7 --- /dev/null +++ b/tensorkit/examples/utils/fit_model_.py @@ -0,0 +1,39 @@ +from typing import * + +import mltk + +import tensorkit as tk +from tensorkit import tensor as T +from tensorkit.typing_ import TensorOrData + +__all__ = ['fit_model'] + + +def fit_model(loop: mltk.TrainLoop, + optimizer: tk.optim.Optimizer, + fn: Callable[..., Dict[str, TensorOrData]], + stream: mltk.DataStream, + loss_metric: str = 'loss', + minimize_loss: bool = True): + def step(*train_data): + optimizer.clear_grad() + with optimizer.capture_grad(): + metrics = fn(*train_data) + try: + loss = metrics[loss_metric] + if not isinstance(loss, T.Tensor): + raise TypeError() + except Exception: + raise ValueError( + f'`train_fn` is expected to return a dict, carrying ' + f'the train loss in the "{loss_metric}" entry: got ' + f'{metrics!r}.' + ) + else: + if minimize_loss: + optimizer.minimize(loss) + else: + optimizer.maximize(loss) + return metrics + + loop.run(step, stream) diff --git a/tensorkit/examples/utils/ops.py b/tensorkit/examples/utils/ops.py new file mode 100644 index 0000000..1fcf83b --- /dev/null +++ b/tensorkit/examples/utils/ops.py @@ -0,0 +1,9 @@ +from tensorkit import tensor as T + +__all__ = ['calculate_acc'] + + +def calculate_acc(logits: T.Tensor, y: T.Tensor) -> T.Tensor: + with T.no_grad(): + out_y = T.argmax(logits, axis=-1) + return T.reduce_mean(T.cast(T.equal(out_y, y), dtype=T.float32)) diff --git a/tensorkit/examples/utils/prepare_data.py b/tensorkit/examples/utils/prepare_data.py new file mode 100644 index 0000000..2f3950f --- /dev/null +++ b/tensorkit/examples/utils/prepare_data.py @@ -0,0 +1,69 @@ +from typing import * + +import mltk +import numpy as np + +import tensorkit as tk +from tensorkit import tensor as T + +__all__ = [ + 'get_mnist_streams' +] + + +def _scale_pixels_to_range(x, x_min, x_max): + scale = (x_max - x_min) / 255. + return np.minimum(np.maximum(x * scale + x_min, x_min), x_max) + + +def get_mnist_streams(batch_size: int, + test_batch_size: Optional[int] = None, + val_batch_size: Optional[int] = None, + val_portion: Optional[float] = None, + flatten: bool = False, + x_range: Optional[Tuple[float, float]] = None, + y_dtype: Union[str, np.dtype] = np.int32, + as_tensor_stream: bool = True, + prefetch: Optional[int] = 5, + ) -> Tuple[mltk.DataStream, Optional[mltk.DataStream], mltk.DataStream]: + # check the arguments + if test_batch_size is None: + test_batch_size = batch_size + if val_batch_size is None: + val_batch_size = batch_size + + # load data + x_shape = [784] if flatten else [28, 28, 1] + (train_x, train_y), (test_x, test_y) = mltk.data.load_mnist( + x_shape=x_shape, x_dtype=np.float32, y_dtype=y_dtype) + + if not flatten: + train_x = tk.utils.numpy_channel_from_last_to_default2d(train_x) + test_x = tk.utils.numpy_channel_from_last_to_default2d(test_x) + + # scale pixels to the desired range + if x_range is not None: + train_x = _scale_pixels_to_range(train_x, *x_range) + test_x = _scale_pixels_to_range(test_x, *x_range) + + # split train & valid set, and construct the streams + def make_stream(arrays, **kwargs): + stream = mltk.DataStream.arrays(arrays, **kwargs) + if as_tensor_stream: + stream = tk.utils.as_tensor_stream(stream, prefetch=prefetch) + return stream + + if val_portion is not None: + (train_x, train_y), (val_x, val_y) = \ + mltk.utils.split_numpy_arrays([train_x, train_y], portion=val_portion) + val_stream = make_stream([val_x, val_y], batch_size=val_batch_size) + else: + val_stream = None + + train_stream = make_stream( + [train_x, train_y], batch_size=batch_size, shuffle=True, + skip_incomplete=True) + test_stream = make_stream([test_x, test_y], batch_size=test_batch_size) + + # return the streams + return train_stream, val_stream, test_stream diff --git a/tensorkit/flows/act_norm.py b/tensorkit/flows/act_norm.py index f2ec1fc..239da4b 100644 --- a/tensorkit/flows/act_norm.py +++ b/tensorkit/flows/act_norm.py @@ -2,7 +2,9 @@ from typing import * from .. import init, tensor as T -from ..tensor import Tensor, Module, reshape +from ..tensor import (Tensor, Module, reshape, shape, int_range, + calculate_mean_and_var, assert_finite, + as_tensor_backend, maximum, log, sqrt) from ..layers import * from ..typing_ import * from .core import * @@ -103,61 +105,64 @@ def __init__(self, def set_initialized(self, initialized: bool = True) -> None: self.initialized = initialized - @T.jit_ignore - def initialize_with_input(self, input: Tensor) -> bool: + @T.jit_method + def calculate_bias_and_pre_scale_for_init(self, input: Tensor) -> Tuple[Tensor, Tensor]: # PyTorch 1.3.1 bug: cannot mark this method as returning `None`. input_rank = T.rank(input) - if not isinstance(input, Tensor) or input_rank < self.event_ndims + 1: + if not isinstance(input, Tensor) or input_rank < self.x_event_ndims + 1: raise ValueError( - f'`input` is required to be a tensor with ' - f'at least {self.event_ndims + 1} dimensions: got input shape ' - f'{T.shape(input)!r}, while `event_ndims` of ' - f'the ActNorm layer {self!r} is {self.event_ndims}.') + '`input` is required to be a tensor with at least {} ' + 'dimensions: got input shape {}.'. + format(self.x_event_ndims, shape(input)) + ) # calculate the axis to reduce feature_axis = input_rank + self.axis reduce_axis = ( - T.int_range(0, feature_axis) + - T.int_range(feature_axis + 1, input_rank) + int_range(0, feature_axis) + + int_range(feature_axis + 1, input_rank) ) # calculate sample mean and variance - input_mean, input_var = T.calculate_mean_and_var( + input_mean, input_var = calculate_mean_and_var( input, axis=reduce_axis, unbiased=True) - input_var = T.assert_finite(input_var, 'input_var') + input_var = assert_finite(input_var, 'input_var') # calculate the initial_value for `bias` bias = -input_mean # calculate the initial value for `pre_scale` - epsilon = T.as_tensor_backend(self.epsilon, dtype=input_var.dtype) + epsilon = as_tensor_backend(self.epsilon, dtype=input_var.dtype) if self.scale_type == 'exp': - pre_scale = -0.5 * T.log(T.maximum(input_var, epsilon)) + pre_scale = -0.5 * log(maximum(input_var, epsilon)) else: - pre_scale = 1. / T.sqrt(T.maximum(input_var, epsilon)) + pre_scale = 1. / sqrt(maximum(input_var, epsilon)) - # assign the initial values to the layer parameters + return bias, pre_scale + + @T.jit_ignore + def _initialize_act_norm(self, input: Tensor) -> None: + bias, pre_scale = self.calculate_bias_and_pre_scale_for_init(input) with T.no_grad(): T.assign(get_parameter(self, 'bias'), bias) T.assign(get_parameter(self, 'pre_scale'), pre_scale) - self.set_initialized(True) - return True @T.jit_method - def _forward(self, - input: Tensor, - input_log_det: Optional[Tensor], - inverse: bool, - compute_log_det: bool - ) -> Tuple[Tensor, Optional[Tensor]]: + def _transform(self, + input: Tensor, + input_log_det: Optional[Tensor], + inverse: bool, + compute_log_det: bool + ) -> Tuple[Tensor, Optional[Tensor]]: # initialize the parameters if not self.initialized: if inverse: raise RuntimeError( '`ActNorm` must be initialized with `inverse = False`.') - self.initialize_with_input(input) + # self.initialize_with_input(input) + self._initialize_act_norm(input) self.set_initialized(True) # do transformation @@ -169,7 +174,7 @@ def _forward(self, output, output_log_det = self.scale( input=input, pre_scale=pre_scale, - event_ndims=self.event_ndims, + event_ndims=self.x_event_ndims, input_log_det=input_log_det, compute_log_det=compute_log_det, inverse=True, @@ -179,7 +184,7 @@ def _forward(self, output, output_log_det = self.scale( input=input + shift, pre_scale=pre_scale, - event_ndims=self.event_ndims, + event_ndims=self.x_event_ndims, input_log_det=input_log_det, compute_log_det=compute_log_det, inverse=False, diff --git a/tensorkit/flows/coupling.py b/tensorkit/flows/coupling.py index 6caad02..4d1729a 100644 --- a/tensorkit/flows/coupling.py +++ b/tensorkit/flows/coupling.py @@ -2,7 +2,7 @@ from .. import tensor as T from ..tensor import Tensor, Module, concat, split -from .core import (FeatureMappingFlow, BaseScale, ExpScale, SigmoidScale, +from .core import (FeatureMappingFlow, Scale, ExpScale, SigmoidScale, LinearScale) __all__ = [ @@ -53,8 +53,8 @@ def __init__(self, shift_and_pre_scale: Module, axis: int = -1, event_ndims: int = 1, - scale: Union[str, BaseScale, Type[BaseScale], - Callable[[], BaseScale]] = 'exp', + scale: Union[str, Scale, Type[Scale], + Callable[[], Scale]] = 'exp', secondary: bool = False, sigmoid_scale_bias: float = 2., epsilon: float = T.EPSILON): @@ -106,7 +106,7 @@ def __init__(self, scale = INVALID if isinstance(scale, Module): - if not isinstance(scale, BaseScale) and not T.is_jit_layer(scale): + if not isinstance(scale, Scale) and not T.is_jit_layer(scale): scale = INVALID elif isinstance(scale, type) or callable(scale): if scale is SigmoidScale: @@ -131,12 +131,12 @@ def __init__(self, self.sigmoid_scale_bias = sigmoid_scale_bias self.epsilon = epsilon - def _forward(self, - input: Tensor, - input_log_det: Optional[Tensor], - inverse: bool, - compute_log_det: bool - ) -> Tuple[Tensor, Optional[Tensor]]: + def _transform(self, + input: Tensor, + input_log_det: Optional[Tensor], + inverse: bool, + compute_log_det: bool + ) -> Tuple[Tensor, Optional[Tensor]]: # split the tensor n_features = input.shape[self.axis] n1 = n_features // 2 @@ -152,7 +152,7 @@ def _forward(self, y2, output_log_det = self.scale( input=x2, pre_scale=pre_scale, - event_ndims=self.event_ndims, + event_ndims=self.x_event_ndims, input_log_det=input_log_det, compute_log_det=compute_log_det, inverse=True, @@ -162,7 +162,7 @@ def _forward(self, y2, output_log_det = self.scale( input=x2 + shift, pre_scale=pre_scale, - event_ndims=self.event_ndims, + event_ndims=self.x_event_ndims, input_log_det=input_log_det, compute_log_det=compute_log_det, inverse=False, @@ -180,8 +180,8 @@ class CouplingLayerNd(CouplingLayer): def __init__(self, shift_and_pre_scale: Module, - scale: Union[str, BaseScale, Type[BaseScale], - Callable[[], BaseScale]] = 'exp', + scale: Union[str, Scale, Type[Scale], + Callable[[], Scale]] = 'exp', secondary: bool = False, sigmoid_scale_bias: float = 2., epsilon: float = T.EPSILON): diff --git a/tensorkit/flows/rearrangement.py b/tensorkit/flows/rearrangement.py index 91d5ffb..539771c 100644 --- a/tensorkit/flows/rearrangement.py +++ b/tensorkit/flows/rearrangement.py @@ -54,12 +54,12 @@ def __init__(self, add_parameter(self, 'inv_permutation', inv_permutation, requires_grad=False) - def _forward(self, - input: Tensor, - input_log_det: Optional[Tensor], - inverse: bool, - compute_log_det: bool - ) -> Tuple[Tensor, Optional[Tensor]]: + def _transform(self, + input: Tensor, + input_log_det: Optional[Tensor], + inverse: bool, + compute_log_det: bool + ) -> Tuple[Tensor, Optional[Tensor]]: if inverse: output = index_select(input, self.inv_permutation, axis=self.axis) else: diff --git a/tensorkit/flows/reshape_.py b/tensorkit/flows/reshape_.py index 5f90b46..9f1eec8 100644 --- a/tensorkit/flows/reshape_.py +++ b/tensorkit/flows/reshape_.py @@ -11,7 +11,7 @@ ] -class ReshapeFlow(BaseFlow): +class ReshapeFlow(Flow): """ A flow which reshapes the last `x_event_ndims` of `x` into `y_event_shape`. @@ -26,7 +26,7 @@ class ReshapeFlow(BaseFlow): # log_det == tf.zeros([2]) """ - __constants__ = BaseFlow.__constants__ + ('x_event_shape', 'y_event_shape') + __constants__ = Flow.__constants__ + ('x_event_shape', 'y_event_shape') x_event_shape: List[int] y_event_shape: List[int] @@ -71,12 +71,12 @@ def check_shape(name, event_shape): self.x_event_shape = x_event_shape self.y_event_shape = y_event_shape - def _forward(self, - input: Tensor, - input_log_det: Optional[Tensor], - inverse: bool, - compute_log_det: bool - ) -> Tuple[Tensor, Optional[Tensor]]: + def _transform(self, + input: Tensor, + input_log_det: Optional[Tensor], + inverse: bool, + compute_log_det: bool + ) -> Tuple[Tensor, Optional[Tensor]]: if inverse: output = reshape_tail(input, self.y_event_ndims, self.x_event_shape) else: @@ -88,7 +88,7 @@ def _forward(self, return output, output_log_det -class SpaceDepthTransformFlow(BaseFlow): +class SpaceDepthTransformFlow(Flow): __constants__ = ('block_size',) @@ -115,21 +115,21 @@ def __init__(self, block_size: int): def _get_spatial_ndim(self) -> int: raise NotImplementedError() - def _transform(self, input: Tensor) -> Tensor: + def _transform_forward(self, input: Tensor) -> Tensor: raise NotImplementedError() - def _inv_transform(self, input: Tensor) -> Tensor: + def _transform_inverse(self, input: Tensor) -> Tensor: raise NotImplementedError() - def _forward(self, - input: Tensor, - input_log_det: Optional[Tensor], - inverse: bool, - compute_log_det: bool) -> Tuple[Tensor, Optional[Tensor]]: + def _transform(self, + input: Tensor, + input_log_det: Optional[Tensor], + inverse: bool, + compute_log_det: bool) -> Tuple[Tensor, Optional[Tensor]]: if inverse: - output = self._inv_transform(input) + output = self._transform_inverse(input) else: - output = self._transform(input) + output = self._transform_forward(input) output_log_det = input_log_det if compute_log_det and output_log_det is None: @@ -145,14 +145,14 @@ def _get_spatial_ndim(self) -> int: return 1 @jit_method - def _transform(self, input: Tensor) -> Tensor: + def _transform_forward(self, input: Tensor) -> Tensor: return space_to_depth1d(input, self.block_size) @jit_method - def _inv_transform(self, input: Tensor) -> Tensor: + def _transform_inverse(self, input: Tensor) -> Tensor: return depth_to_space1d(input, self.block_size) - def invert(self) -> BaseFlow: + def invert(self) -> Flow: return DepthToSpace1d(self.block_size) @@ -163,14 +163,14 @@ def _get_spatial_ndim(self) -> int: return 2 @jit_method - def _transform(self, input: Tensor) -> Tensor: + def _transform_forward(self, input: Tensor) -> Tensor: return space_to_depth2d(input, self.block_size) @jit_method - def _inv_transform(self, input: Tensor) -> Tensor: + def _transform_inverse(self, input: Tensor) -> Tensor: return depth_to_space2d(input, self.block_size) - def invert(self) -> BaseFlow: + def invert(self) -> Flow: return DepthToSpace2d(self.block_size) @@ -181,14 +181,14 @@ def _get_spatial_ndim(self) -> int: return 3 @jit_method - def _transform(self, input: Tensor) -> Tensor: + def _transform_forward(self, input: Tensor) -> Tensor: return space_to_depth3d(input, self.block_size) @jit_method - def _inv_transform(self, input: Tensor) -> Tensor: + def _transform_inverse(self, input: Tensor) -> Tensor: return depth_to_space3d(input, self.block_size) - def invert(self) -> BaseFlow: + def invert(self) -> Flow: return DepthToSpace3d(self.block_size) @@ -199,14 +199,14 @@ def _get_spatial_ndim(self) -> int: return 1 @jit_method - def _transform(self, input: Tensor) -> Tensor: + def _transform_forward(self, input: Tensor) -> Tensor: return depth_to_space1d(input, self.block_size) @jit_method - def _inv_transform(self, input: Tensor) -> Tensor: + def _transform_inverse(self, input: Tensor) -> Tensor: return space_to_depth1d(input, self.block_size) - def invert(self) -> BaseFlow: + def invert(self) -> Flow: return SpaceToDepth1d(self.block_size) @@ -217,14 +217,14 @@ def _get_spatial_ndim(self) -> int: return 2 @jit_method - def _transform(self, input: Tensor) -> Tensor: + def _transform_forward(self, input: Tensor) -> Tensor: return depth_to_space2d(input, self.block_size) @jit_method - def _inv_transform(self, input: Tensor) -> Tensor: + def _transform_inverse(self, input: Tensor) -> Tensor: return space_to_depth2d(input, self.block_size) - def invert(self) -> BaseFlow: + def invert(self) -> Flow: return SpaceToDepth2d(self.block_size) @@ -235,12 +235,12 @@ def _get_spatial_ndim(self) -> int: return 3 @jit_method - def _transform(self, input: Tensor) -> Tensor: + def _transform_forward(self, input: Tensor) -> Tensor: return depth_to_space3d(input, self.block_size) @jit_method - def _inv_transform(self, input: Tensor) -> Tensor: + def _transform_inverse(self, input: Tensor) -> Tensor: return space_to_depth3d(input, self.block_size) - def invert(self) -> BaseFlow: + def invert(self) -> Flow: return SpaceToDepth3d(self.block_size) diff --git a/tensorkit/flows/split_.py b/tensorkit/flows/split_.py index 9788f08..b055bfa 100644 --- a/tensorkit/flows/split_.py +++ b/tensorkit/flows/split_.py @@ -9,7 +9,7 @@ ] -class SplitFlow(BaseFlow): +class SplitFlow(Flow): """ A flow which splits input `x` into halves, apply different flows on each half, then concat the output together. @@ -26,7 +26,7 @@ class SplitFlow(BaseFlow): log_det = log_det1 + log_det2 """ - __constants__ = BaseFlow.__constants__ + ( + __constants__ = Flow.__constants__ + ( 'left', 'right', 'x_sections', 'x_axis', 'y_sections', 'y_axis', ) @@ -39,8 +39,8 @@ class SplitFlow(BaseFlow): def __init__(self, x_sections: Sequence[int], - left: BaseFlow, - right: Optional[BaseFlow] = None, + left: Flow, + right: Optional[Flow] = None, y_sections: Optional[Sequence[int]] = None, x_axis: int = -1, y_axis: Optional[int] = None): @@ -79,23 +79,23 @@ def __init__(self, f'two positive integers: got {y_sections!r}.') y_sections = list(map(int, y_sections)) - if not isinstance(left, BaseFlow) and not T.is_jit_layer(left): + if not isinstance(left, Flow) and not T.is_jit_layer(left): raise TypeError(f'`left` is not a flow: got {left!r}.') - x_event_ndims = left.x_event_ndims - y_event_ndims = left.y_event_ndims + x_event_ndims = left.get_x_event_ndims() + y_event_ndims = left.get_y_event_ndims() if right is not None: - if not isinstance(right, BaseFlow) and not T.is_jit_layer(right): + if not isinstance(right, Flow) and not T.is_jit_layer(right): raise TypeError(f'`right` is not a flow: got {right!r}.') - if right.x_event_ndims != x_event_ndims or \ - right.y_event_ndims != y_event_ndims: + if right.get_x_event_ndims() != x_event_ndims or \ + right.get_y_event_ndims() != y_event_ndims: raise ValueError( f'`left` and `right` flows must have same `x_event_ndims` ' f'and `y_event_ndims`: ' - f'got `left.x_event_ndims` == {left.x_event_ndims!r}, ' - f'`left.y_event_ndims` == {left.y_event_ndims}, ' - f'`right.x_event_ndims` == {right.x_event_ndims}, ' - f'and `right.y_event_ndims` == {right.y_event_ndims}.' + f'got `left.x_event_ndims` == {left.get_x_event_ndims()!r}, ' + f'`left.y_event_ndims` == {left.get_y_event_ndims()}, ' + f'`right.x_event_ndims` == {right.get_x_event_ndims()}, ' + f'and `right.y_event_ndims` == {right.get_y_event_ndims()}.' ) if x_event_ndims != y_event_ndims: @@ -124,12 +124,12 @@ def __init__(self, self.y_sections = y_sections self.y_axis = y_axis - def _forward(self, - input: Tensor, - input_log_det: Optional[Tensor], - inverse: bool, - compute_log_det: bool - ) -> Tuple[Tensor, Optional[Tensor]]: + def _transform(self, + input: Tensor, + input_log_det: Optional[Tensor], + inverse: bool, + compute_log_det: bool + ) -> Tuple[Tensor, Optional[Tensor]]: if inverse: out_left, out_right = split( input, sections=self.y_sections, axis=self.y_axis) @@ -159,8 +159,8 @@ class SplitFlowNd(SplitFlow): def __init__(self, x_sections: Sequence[int], - left: BaseFlow, - right: Optional[BaseFlow] = None, + left: Flow, + right: Optional[Flow] = None, y_sections: Optional[Sequence[int]] = None): """ Construct a new convolutional split flow. @@ -181,13 +181,13 @@ def __init__(self, # type error deferred to the base class, thus we only check # the event ndims if `arg` looks like a flow. if arg is not None and hasattr(arg, 'x_event_ndims'): - if arg.x_event_ndims != event_ndims or \ - arg.y_event_ndims != event_ndims: + if arg.get_x_event_ndims() != event_ndims or \ + arg.get_y_event_ndims() != event_ndims: raise ValueError( f'The `x_event_ndims` and `y_event_ndims` of ' f'`{arg_name}` are required to be {event_ndims}: ' - f'got `x_event_ndims` == {arg.x_event_ndims}, ' - f'and `y_event_ndims` == {arg.y_event_ndims}.' + f'got `x_event_ndims` == {arg.get_x_event_ndims()}, ' + f'and `y_event_ndims` == {arg.get_y_event_ndims()}.' ) super().__init__( diff --git a/tensorkit/init/std_data_init.py b/tensorkit/init/std_data_init.py index f970148..0527771 100644 --- a/tensorkit/init/std_data_init.py +++ b/tensorkit/init/std_data_init.py @@ -21,7 +21,7 @@ def __init__(self, epsilon: float = T.EPSILON): super().__init__() self.epsilon = epsilon - def _forward(self, layer: Module, inputs: List[Tensor]) -> None: + def _init(self, layer: Module, inputs: List[Tensor]) -> None: if T.is_jit_layer(layer): raise TypeError(f'JIT compiled layer is not supported: got {layer!r}') if not isinstance(layer, CoreLinear): @@ -31,8 +31,9 @@ def _forward(self, layer: Module, inputs: List[Tensor]) -> None: f'{inputs!r}') # get the weight and bias + use_bias = layer.use_bias weight = layer.weight_store() - bias = layer.bias_store() if layer.bias_store is not None else None + bias = layer.bias_store() if use_bias else None is_conv_transpose = isinstance(layer, (LinearConvTranspose1d, LinearConvTranspose2d, LinearConvTranspose3d)) diff --git a/tensorkit/layers/__init__.py b/tensorkit/layers/__init__.py index 0d66afb..436ccd8 100644 --- a/tensorkit/layers/__init__.py +++ b/tensorkit/layers/__init__.py @@ -1,4 +1,5 @@ from .activation import * +from .builder import * from .composed import * from .contextual import * from .core import * diff --git a/tensorkit/layers/activation.py b/tensorkit/layers/activation.py index e86a875..fb5cd03 100644 --- a/tensorkit/layers/activation.py +++ b/tensorkit/layers/activation.py @@ -1,19 +1,19 @@ from ..tensor import Tensor, tanh -from ..tensor.nn import LEAKY_RELU_DEFAULT_SLOPE, relu, leaky_relu, sigmoid +from ..tensor.nn import * from .core import * __all__ = [ - 'ReLU', 'LeakyReLU', 'Tanh', 'Sigmoid', + 'ReLU', 'LeakyReLU', 'Tanh', 'Sigmoid', 'LogSoftmax', ] -class ReLU(BaseSingleVariateLayer): +class ReLU(BaseLayer): - def _forward(self, input: Tensor) -> Tensor: + def forward(self, input: Tensor) -> Tensor: return relu(input) -class LeakyReLU(BaseSingleVariateLayer): +class LeakyReLU(BaseLayer): __constants__ = ('negative_slope',) @@ -23,17 +23,23 @@ def __init__(self, negative_slope=LEAKY_RELU_DEFAULT_SLOPE): super().__init__() self.negative_slope = negative_slope - def _forward(self, input: Tensor) -> Tensor: + def forward(self, input: Tensor) -> Tensor: return leaky_relu(input, negative_slope=self.negative_slope) -class Tanh(BaseSingleVariateLayer): +class Tanh(BaseLayer): - def _forward(self, input: Tensor) -> Tensor: + def forward(self, input: Tensor) -> Tensor: return tanh(input) -class Sigmoid(BaseSingleVariateLayer): +class Sigmoid(BaseLayer): - def _forward(self, input: Tensor) -> Tensor: + def forward(self, input: Tensor) -> Tensor: return sigmoid(input) + + +class LogSoftmax(BaseLayer): + + def forward(self, input: Tensor) -> Tensor: + return log_softmax(input) diff --git a/tensorkit/layers/builder.py b/tensorkit/layers/builder.py new file mode 100644 index 0000000..9cae176 --- /dev/null +++ b/tensorkit/layers/builder.py @@ -0,0 +1,651 @@ +import re +from contextlib import contextmanager +from typing import * + +from mltk.utils import NOT_SET + +from .activation import * +from .composed import * +from .core import * +from .pool import * +from .resnet import * +from .shape_ import * +from .. import tensor as T +from ..arg_check import * +from ..typing_ import * + +__all__ = ['SequentialBuilder'] + + +def _get_layer_class(name: str) -> type: + if not _cached_layer_class_names_map: + # map the standard names of the layers to the layer classes + import tensorkit as tk + for attr in dir(tk.layers): + val = getattr(tk.layers, attr) + if isinstance(val, type) and issubclass(val, T.Module): + _cached_layer_class_names_map[attr.lower()] = val + + # aliases to XXXTransposeNd + for spatial_ndims in (1, 2, 3): + for prefix in ('LinearConv', 'Conv'): + # the original name and the layer class + orig_name = f'{prefix}Transpose{spatial_ndims}d' + layer_cls = getattr(tk.layers, orig_name) + + # the new name + alias_name = orig_name + alias_name = alias_name.replace('ConvTranspose', 'DeConv') + _cached_layer_class_names_map[alias_name.lower()] = layer_cls + + canonical_name = name.lower().replace('_', '') + if canonical_name not in _cached_layer_class_names_map: + raise ValueError(f'Unsupported layer class: {name!r}.') + return _cached_layer_class_names_map[canonical_name] + + +_cached_layer_class_names_map = {} + + +def _calculate_conv_output_size(in_size, kernel_size, stride, padding, dilation): + out_size = [] + for i, k, s, p, d in zip(in_size, kernel_size, stride, padding, dilation): + if i is None: + out_size.append(None) + else: + l = T.utils.calculate_conv_output_size([i], [k], [s], [p], [d])[0] + out_size.append(l) + return out_size + + +def _calculate_deconv_output_size(in_size, kernel_size, stride, padding, output_padding, dilation): + out_size = [] + for i, k, s, p, op, d in zip(in_size, kernel_size, stride, padding, output_padding, dilation): + if i is None: + out_size.append(None) + else: + l = T.utils.calculate_deconv_output_size(d[i], [k], [s], [p], [op], [d])[0] + out_size.append(l) + return out_size + + +if T.IS_CHANNEL_LAST: + def _split_channel_spatial(shape): + return shape[-1], shape[:-1] + + + def _unsplit_channel_spatial(channel, spatial): + return list(spatial) + [channel] + +else: + def _split_channel_spatial(shape): + return shape[0], shape[1:] + + + def _unsplit_channel_spatial(channel, spatial): + return [channel] + list(spatial) + + +class LayerArgs(object): + """A class that manages the default arguments for constructing layers.""" + + args: Dict[type, Dict[str, Any]] + + def __init__(self, layer_args: Optional['LayerArgs'] = None): + """ + Construct a new :class:`LayerArgs` instance. + + Args: + layer_args: Clone from this :class:`LayerArgs` instance. + """ + if layer_args is not None: + self.args = {type_: {key: val for key, val in type_args.items()} + for type_, type_args in layer_args.args.items()} + else: + self.args = {} + + def set_args(self, + type_or_types_: Union[ + str, Type[T.Module], Sequence[Union[str, Type[T.Module]]]], + **kwargs): + """ + Set default arguments for the specified layer types. + + Args: + type_or_types_: The layer type or types. + **kwargs: The default arguments to be set. + """ + if isinstance(type_or_types_, (str, type)): + type_or_types_ = [type_or_types_] + + for type_ in type_or_types_: + if isinstance(type_, str): + type_ = _get_layer_class(type_) + if type_ not in self.args: + self.args[type_] = {} + self.args[type_].update(kwargs) + + def get_kwargs(self, type_: Union[str, type], **kwargs) -> Dict[str, Any]: + """ + Get the merged keyword arguments for the specified layer type. + + Args: + type_: The layer type. + **kwargs: The overrided keyword arguments. + + Returns: + The merged keyword arguments. + """ + if isinstance(type_, str): + type_ = _get_layer_class(type_) + layer_args = self.args.get(type_) + if layer_args: + for key, val in layer_args.items(): + kwargs.setdefault(key, val) + return kwargs + + def build(self, type_: Union[str, type], *args, **kwargs): + """ + Build the layer with default arguments. + + Args: + type_: The layer type. + *args: The positional arguments. + **kwargs: The named arguments, which may override the default + arguments. + + Returns: + The built layer object. + """ + return type_(*args, **self.get_kwargs(type_, **kwargs)) + + +class SequentialBuilder(object): + """A class that helps to build a sequence layers.""" + + in_shape: List[Optional[int]] + out_shape: List[Optional[int]] + layer_args: LayerArgs + layers: List[T.Module] + + def __init__(self, + in_spec: Union[ + Optional[int], + Sequence[Optional[int]], + 'SequentialBuilder'] = NOT_SET, + *, + in_shape: Sequence[Optional[int]] = NOT_SET, + in_channels: Optional[int] = NOT_SET, + in_spatial_shape: List[int] = NOT_SET, + in_builder: 'SequentialBuilder' = NOT_SET): + """ + Construct a new :class:`SequentialBuilder`. + + Args: + in_spec: Positional argument, maybe the input shape, the number + of input channels, or another instance of `SequentialBuilder`, + whose layer arguments will be cloned and `out_shape` will be + used as the `in_shape` of this :class:`SequentialBuilder`. + in_shape: The input shape. + in_channels: The number of input channels. + in_spatial_shape: The input spatial shape. Can be specified + only if `in_channels` is specified, or `in_spec` is a int. + in_builder: Explicitly specify the previous sequential builder. + """ + + # parse the argument + if int(in_spec is not NOT_SET) + int(in_shape is not NOT_SET) + \ + int(in_channels is not NOT_SET) + int(in_builder is not NOT_SET) != 1: + raise ValueError( + 'One and only one of `in_spec`, `in_shape`, `in_channels` and ' + '`in_builder` should be specified.' + ) + + if isinstance(in_spec, SequentialBuilder): + in_builder = in_spec + layer_args = LayerArgs(in_builder.layer_args) + elif hasattr(in_spec, '__iter__'): + in_shape = in_spec + layer_args = LayerArgs() + else: + in_channels = in_spec + layer_args = LayerArgs() + + if in_spatial_shape is not NOT_SET and in_channels is NOT_SET: + raise ValueError( + '`in_spatial_shape` can be specified only when `in_channels` ' + 'is specified, or `in_spec` is None or an integer.' + ) + + if in_shape is not NOT_SET: + in_shape = list(in_shape) + elif in_channels is not NOT_SET: + if in_spatial_shape is NOT_SET: + in_spatial_shape = [] + in_shape = _unsplit_channel_spatial(in_channels, in_spatial_shape) + else: + in_shape = list(in_builder.out_shape) + + # create the object + self.in_shape = in_shape + self.out_shape = in_shape + self.layer_args = layer_args + self.layers = [] + + def _assert_out_shape(self, + shape: Optional[Sequence[bool]] = None, + channel: Optional[bool] = None, + spatial: Optional[Sequence[bool]] = None, + at_least: bool = False) -> List[Optional[int]]: + if shape is None: + if channel is None: + raise ValueError('`channel` must be specified when `shape` is not.') + shape = _unsplit_channel_spatial(channel, spatial or []) + + ndims = len(shape) + if at_least: + if len(self.out_shape) < ndims: + raise ValueError( + f'The previous output shape is expected to be ' + f'at least {ndims}d: got output shape {self.out_shape}.' + ) + else: + if len(self.out_shape) != ndims: + raise ValueError( + f'The previous output shape is expected to be ' + f'exactly {ndims}d: got output shape {self.out_shape}.' + ) + + for i, (d, s) in enumerate( + zip(shape[::-1], self.out_shape[::-1]), 1): + if d and s is None: + raise ValueError( + f'Axis {-i} of the previous output shape is expected ' + f'to be deterministic: got output shape {self.out_shape}.' + ) + + return self.out_shape + + def _split_out_shape(self, + channel: Optional[bool] = None, + spatial: Optional[Sequence[bool]] = None + ) -> Tuple[Optional[int], List[Optional[int]]]: + out_shape = self._assert_out_shape(channel=channel, spatial=spatial) + return _split_channel_spatial(out_shape) + + def set_args(self, + type_or_types_: Union[str, type, Sequence[Union[str, type]]], + **kwargs) -> 'SequentialBuilder': + """ + Set layer default arguments. + + Args: + type_or_types_: The layer type or types. + **kwargs: The default arguments. + + Returns: + This sequential builder object. + """ + self.layer_args.set_args(type_or_types_, **kwargs) + return self + + @contextmanager + def arg_scope(self, + type_or_types_: Union[str, type, Sequence[Union[str, type]]], + **kwargs) -> Generator[None, None, None]: + """ + Set layer default arguments within a scope, which will be restore to + the previous values after exiting the scope. + + Args: + type_or_types_: The layer type or types. + **kwargs: The default arguments. + """ + old_layer_args = self.layer_args + layer_args = LayerArgs(old_layer_args) + layer_args.set_args(type_or_types_, **kwargs) + self.layer_args = layer_args + try: + yield + finally: + self.layer_args = old_layer_args + + def add(self, + layer: T.Module, + out_shape: List[Optional[int]] = NOT_SET, + *, + out_channels: Optional[int] = NOT_SET, + out_spatial_shape: List[Optional[int]] = NOT_SET + ) -> 'SequentialBuilder': + """ + Manually add a layer to this builder. + + Args: + layer: The layer to be added. + out_shape: The new output shape. + out_channels: The new output channels. Should be specified and + only be specified when `out_shape` is not. + out_spatial_shape: The new spatial shape. Should only be specified + when `out_channels` is specified. + + Returns: + This sequential builder object. + """ + if (out_shape is NOT_SET) == (out_channels is NOT_SET): + raise ValueError('Either `out_shape` or `out_channels` should be ' + 'specified, but not both.') + if out_spatial_shape is not NOT_SET and out_channels is NOT_SET: + raise ValueError('`out_spatial_shape` can only be specified when ' + '`out_channels` is specified.') + + if out_channels is not NOT_SET: + if out_spatial_shape is NOT_SET: + out_spatial_shape = [] + out_shape = _unsplit_channel_spatial(out_channels, out_spatial_shape) + + self.layers.append(layer) + self.out_shape = out_shape + return self + + def build(self, + flatten_to_ndims: bool = True, + disable_jit: bool = False) -> T.Module: + """ + Build the sequential layer. + + Args: + flatten_to_ndims: Whether or not to wrap the sequential layer + with a :class:`FlattenToNDims` layer? + disable_jit: Whether or not to disable JIT? + + Returns: + The built sequential layer. + """ + if not self.layers: + raise RuntimeError('No layer has been added.') + elif len(self.layers) == 1: + layer = self.layers[0] + else: + layer = Sequential(self.layers) + + if flatten_to_ndims: + layer = FlattenToNDims(layer, ndims=len(self.in_shape) + 1) + if not disable_jit: + layer = T.jit_compile(layer) + return layer + + # ---- activation ---- + def _make_activation(self, type_): + self._assert_out_shape((False,), at_least=True) + layer = self.layer_args.build(type_) + return self.add(layer, self.out_shape) + + def relu(self): + return self._make_activation(ReLU) + + def leaky_relu(self): + return self._make_activation(LeakyReLU) + + def sigmoid(self): + return self._make_activation(Sigmoid) + + def tanh(self): + return self._make_activation(Tanh) + + def log_softmax(self): + return self._make_activation(LogSoftmax) + + # ---- fully-connected layers ---- + def _fully_connected(self, layer_cls, out_features, **kwargs): + in_features, _ = self._split_out_shape(True) + layer = self.layer_args.build(layer_cls, in_features, out_features, **kwargs) + return self.add(layer, [out_features]) + + def linear(self, out_features: int, **kwargs): + return self._fully_connected(Linear, out_features, **kwargs) + + def dense(self, out_features: int, **kwargs): + return self._fully_connected(Dense, out_features, **kwargs) + + # ---- convolution layers ---- + def _conv_nd(self, spatial_ndims, conv_cls, out_channels, **kwargs): + in_channels, in_size = self._split_out_shape(True, [False] * spatial_ndims) + + # validate the arguments + kwargs = self.layer_args.get_kwargs(conv_cls, **kwargs) + kernel_size = validate_conv_size('kernel_size', kwargs['kernel_size'], spatial_ndims) + stride = validate_conv_size('stride', kwargs.get('stride', 1), spatial_ndims) + dilation = validate_conv_size('dilation', kwargs.get('dilation', 1), spatial_ndims) + padding = validate_padding( + kwargs.get('padding', PaddingMode.DEFAULT), kernel_size, dilation, spatial_ndims) + + # calculate the output shape + out_size = _calculate_conv_output_size(in_size, kernel_size, stride, padding, dilation) + out_shape = _unsplit_channel_spatial(out_channels, out_size) + + # build the layer + layer = conv_cls(in_channels, out_channels, **kwargs) + return self.add(layer, out_shape) + + def linear_conv1d(self, + out_channels: int, + **kwargs) -> 'SequentialBuilder': + return self._conv_nd(1, LinearConv1d, out_channels, **kwargs) + + def linear_conv2d(self, + out_channels: int, + **kwargs) -> 'SequentialBuilder': + return self._conv_nd(2, LinearConv2d, out_channels, **kwargs) + + def linear_conv3d(self, + out_channels: int, + **kwargs) -> 'SequentialBuilder': + return self._conv_nd(3, LinearConv3d, out_channels, **kwargs) + + def conv1d(self, + out_channels: int, + **kwargs) -> 'SequentialBuilder': + return self._conv_nd(1, Conv1d, out_channels, **kwargs) + + def conv2d(self, + out_channels: int, + **kwargs) -> 'SequentialBuilder': + return self._conv_nd(2, Conv2d, out_channels, **kwargs) + + def conv3d(self, + out_channels: int, + **kwargs) -> 'SequentialBuilder': + return self._conv_nd(3, Conv3d, out_channels, **kwargs) + + def res_block1d(self, + out_channels: int, + **kwargs) -> 'SequentialBuilder': + return self._conv_nd(1, ResBlock1d, out_channels, **kwargs) + + def res_block2d(self, + out_channels: int, + **kwargs) -> 'SequentialBuilder': + return self._conv_nd(2, ResBlock2d, out_channels, **kwargs) + + def res_block3d(self, + out_channels: int, + **kwargs) -> 'SequentialBuilder': + return self._conv_nd(3, ResBlock3d, out_channels, **kwargs) + + # ---- deconvolution layers ---- + def _deconv_nd(self, spatial_ndims, deconv_cls, out_channels, output_size, **kwargs): + in_channels, in_size = self._split_out_shape(True, [False] * spatial_ndims) + + # validate the arguments + kwargs = self.layer_args.get_kwargs(deconv_cls, **kwargs) + kernel_size = validate_conv_size('kernel_size', kwargs['kernel_size'], spatial_ndims) + stride = validate_conv_size('stride', kwargs.get('stride', 1), spatial_ndims) + dilation = validate_conv_size('dilation', kwargs.get('dilation', 1), spatial_ndims) + padding = validate_padding( + kwargs.get('padding', PaddingMode.DEFAULT), kernel_size, dilation, spatial_ndims) + + if 'output_padding' in kwargs and output_size is not NOT_SET: + raise ValueError('`output_padding` and `out_shape` cannot be both specified.') + elif output_size is not NOT_SET: + if len(output_size) != spatial_ndims: + raise ValueError( + f'`output_size` is expected to be {spatial_ndims}d: ' + f'got {output_size}.' + ) + if any(i is None for i in in_size): + raise ValueError( + f'Specifying `output_size` instead of `output_padding` ' + f'is supported only when the previous output shape ' + f'is all deterministic.' + ) + out_size = output_size + output_padding = [ + T.utils.calculate_deconv_output_padding(*args) + for args in zip( + in_size, output_size, kernel_size, stride, padding, dilation) + ] + elif 'output_padding' in kwargs: + output_padding = validate_output_padding( + kwargs.get('output_padding', 0), stride, dilation, spatial_ndims) + out_size = None + else: + output_padding = [0] * spatial_ndims + out_size = None + + # calculate the output shape if not specified + if out_size is None: + out_size = _calculate_deconv_output_size( + in_size, kernel_size, stride, padding, output_padding, dilation) + out_shape = _unsplit_channel_spatial(out_channels, out_size) + + # build the layer + kwargs['output_padding'] = output_padding + layer = deconv_cls(in_channels, out_channels, **kwargs) + return self.add(layer, out_shape) + + def linear_conv_transpose1d(self, + out_channels: int, + output_size: List[int] = NOT_SET, + **kwargs) -> 'SequentialBuilder': + return self._deconv_nd( + 1, LinearConvTranspose1d, out_channels, output_size, **kwargs) + + def linear_conv_transpose2d(self, + out_channels: int, + output_size: List[int] = NOT_SET, + **kwargs) -> 'SequentialBuilder': + return self._deconv_nd( + 2, LinearConvTranspose2d, out_channels, output_size, **kwargs) + + def linear_conv_transpose3d(self, + out_channels: int, + output_size: List[int] = NOT_SET, + **kwargs) -> 'SequentialBuilder': + return self._deconv_nd( + 3, LinearConvTranspose3d, out_channels, output_size, **kwargs) + + def conv_transpose1d(self, + out_channels: int, + output_size: List[int] = NOT_SET, + **kwargs) -> 'SequentialBuilder': + return self._deconv_nd( + 1, ConvTranspose1d, out_channels, output_size, **kwargs) + + def conv_transpose2d(self, + out_channels: int, + output_size: List[int] = NOT_SET, + **kwargs) -> 'SequentialBuilder': + return self._deconv_nd( + 2, ConvTranspose2d, out_channels, output_size, **kwargs) + + def conv_transpose3d(self, + out_channels: int, + output_size: List[int] = NOT_SET, + **kwargs) -> 'SequentialBuilder': + return self._deconv_nd( + 3, ConvTranspose3d, out_channels, output_size, **kwargs) + + def res_block_transpose1d(self, + out_channels: int, + output_size: List[int] = NOT_SET, + **kwargs) -> 'SequentialBuilder': + return self._deconv_nd( + 1, ResBlockTranspose1d, out_channels, output_size, **kwargs) + + def res_block_transpose2d(self, + out_channels: int, + output_size: List[int] = NOT_SET, + **kwargs) -> 'SequentialBuilder': + return self._deconv_nd( + 2, ResBlockTranspose2d, out_channels, output_size, **kwargs) + + def res_block_transpose3d(self, + out_channels: int, + output_size: List[int] = NOT_SET, + **kwargs) -> 'SequentialBuilder': + return self._deconv_nd( + 3, ResBlockTranspose3d, out_channels, output_size, **kwargs) + + # aliases for the deconvolution layers + linear_deconv1d = linear_conv_transpose1d + linear_deconv2d = linear_conv_transpose2d + linear_deconv3d = linear_conv_transpose3d + deconv1d = conv_transpose1d + deconv2d = conv_transpose2d + deconv3d = conv_transpose3d + + # ---- pool layers ---- + def _pool_nd(self, spatial_ndims, pool_cls, **kwargs): + in_channels, in_size = self._split_out_shape(True, [False] * spatial_ndims) + + # validate the arguments + kwargs = self.layer_args.get_kwargs(pool_cls, **kwargs) + kernel_size = validate_conv_size('kernel_size', kwargs['kernel_size'], spatial_ndims) + stride = validate_conv_size('stride', kwargs.get('stride', kernel_size), spatial_ndims) + dilation = [1] * spatial_ndims + padding = validate_padding(kwargs.get('padding', PaddingMode.DEFAULT), kernel_size, dilation, spatial_ndims) + + # calculate the output shape + out_size = _calculate_conv_output_size(in_size, kernel_size, stride, padding, dilation) + out_shape = _unsplit_channel_spatial(in_channels, out_size) + + # build the layer + layer = pool_cls(**kwargs) + return self.add(layer, out_shape) + + def avg_pool1d(self, **kwargs) -> 'SequentialBuilder': + return self._pool_nd(1, AvgPool1d, **kwargs) + + def avg_pool2d(self, **kwargs) -> 'SequentialBuilder': + return self._pool_nd(2, AvgPool2d, **kwargs) + + def avg_pool3d(self, **kwargs) -> 'SequentialBuilder': + return self._pool_nd(3, AvgPool3d, **kwargs) + + def max_pool1d(self, **kwargs) -> 'SequentialBuilder': + return self._pool_nd(1, MaxPool1d, **kwargs) + + def max_pool2d(self, **kwargs) -> 'SequentialBuilder': + return self._pool_nd(2, MaxPool2d, **kwargs) + + def max_pool3d(self, **kwargs) -> 'SequentialBuilder': + return self._pool_nd(3, MaxPool3d, **kwargs) + + def _global_avg_pool_nd(self, spatial_ndims, pool_cls, **kwargs): + in_channels, in_size = self._split_out_shape(True, [False] * spatial_ndims) + keepdims = kwargs.get('keepdims', False) + if keepdims: + out_shape = _unsplit_channel_spatial(in_channels, [1] * spatial_ndims) + else: + out_shape = [in_channels] + layer = pool_cls(**self.layer_args.get_kwargs(pool_cls, **kwargs)) + return self.add(layer, out_shape) + + def global_avg_pool1d(self, **kwargs) -> 'SequentialBuilder': + return self._global_avg_pool_nd(1, GlobalAvgPool1d, **kwargs) + + def global_avg_pool2d(self, **kwargs) -> 'SequentialBuilder': + return self._global_avg_pool_nd(2, GlobalAvgPool2d, **kwargs) + + def global_avg_pool3d(self, **kwargs) -> 'SequentialBuilder': + return self._global_avg_pool_nd(3, GlobalAvgPool3d, **kwargs) diff --git a/tensorkit/layers/contextual.py b/tensorkit/layers/contextual.py index 5b4769f..1251830 100644 --- a/tensorkit/layers/contextual.py +++ b/tensorkit/layers/contextual.py @@ -1,6 +1,6 @@ from typing import * -from ..tensor import Tensor, jit_method +from ..tensor import Tensor from .core import * __all__ = [ @@ -8,37 +8,42 @@ ] -class IgnoreContext(BaseContextualLayer): +class IgnoreContext(BaseLayer): """ A module which simply returns the input, ignoring any context. """ - @jit_method - def _forward(self, input: Tensor, context: List[Tensor]) -> Tensor: + def forward(self, + input: Tensor, + context: Optional[List[Tensor]] = None) -> Tensor: return input -class AddContext(BaseContextualLayer): +class AddContext(BaseLayer): """ A module which adds the input with the contexts. """ - @jit_method - def _forward(self, input: Tensor, context: List[Tensor]) -> Tensor: + def forward(self, + input: Tensor, + context: Optional[List[Tensor]] = None) -> Tensor: output = input - for t in context: - output = output + t + if context is not None: + for t in context: + output = output + t return output -class MultiplyContext(BaseContextualLayer): +class MultiplyContext(BaseLayer): """ A module which multiplies the input with the contexts. """ - @jit_method - def _forward(self, input: Tensor, context: List[Tensor]) -> Tensor: + def forward(self, + input: Tensor, + context: Optional[List[Tensor]] = None) -> Tensor: output = input - for t in context: - output = output * t + if context is not None: + for t in context: + output = output * t return output diff --git a/tensorkit/layers/flow_layer.py b/tensorkit/layers/flow_layer.py index d780b0d..0ad547e 100644 --- a/tensorkit/layers/flow_layer.py +++ b/tensorkit/layers/flow_layer.py @@ -1,4 +1,4 @@ -from ..backend.flows import BaseFlow +from ..backend.flows import Flow from ..tensor import Tensor, Module, is_jit_layer from .core import * @@ -8,7 +8,7 @@ ] -class FlowLayer(BaseSingleVariateLayer): +class FlowLayer(BaseLayer): """ Wrap a :class:`tk.flows.BaseFlow` into a single-input, single-output layer. """ @@ -17,13 +17,13 @@ class FlowLayer(BaseSingleVariateLayer): flow: Module - def __init__(self, flow: BaseFlow): - if not is_jit_layer(flow) and not isinstance(flow, BaseFlow): + def __init__(self, flow: Flow): + if not is_jit_layer(flow) and not isinstance(flow, Flow): raise TypeError(f'`flow` must be a flow: got {flow!r}') super().__init__() self.flow = flow - def _forward(self, input: Tensor) -> Tensor: + def forward(self, input: Tensor) -> Tensor: output, output_log_det = self.flow(input, compute_log_det=False) return output diff --git a/tensorkit/layers/gated.py b/tensorkit/layers/gated.py index 99f3241..83bdaa3 100644 --- a/tensorkit/layers/gated.py +++ b/tensorkit/layers/gated.py @@ -7,7 +7,7 @@ ] -class BaseGated(BaseSingleVariateLayer): +class BaseGated(BaseLayer): __constants__ = ('feature_axis', 'num_features', 'gate_bias', 'activation') @@ -27,8 +27,7 @@ def __init__(self, def _apply_activation(self, input: Tensor) -> Tensor: raise NotImplementedError() - @jit_method - def _forward(self, input: Tensor) -> Tensor: + def forward(self, input: Tensor) -> Tensor: if input.shape[self.feature_axis] != self.num_features * 2: raise ValueError( 'The shape of the pre-gated output is invalid: ' diff --git a/tensorkit/layers/pixelcnn.py b/tensorkit/layers/pixelcnn.py index 309ac4b..52a551c 100644 --- a/tensorkit/layers/pixelcnn.py +++ b/tensorkit/layers/pixelcnn.py @@ -72,7 +72,7 @@ def shifted_deconv(deconv_cls, dilation=dilation, padding=padding, **kwargs) -class SpatialShift(BaseSingleVariateLayer): +class SpatialShift(BaseLayer): __constants__ = ('shift',) @@ -85,11 +85,11 @@ def __init__(self, shift: Sequence[int]): else: self.shift = list(shift) - def _forward(self, input: Tensor) -> Tensor: + def forward(self, input: Tensor) -> Tensor: return shift(input, self.shift) -class BranchAndAdd(BaseSingleVariateLayer): +class BranchAndAdd(BaseLayer): __constants__ = ('branches',) @@ -99,7 +99,7 @@ def __init__(self, *branches: Union[Module, Sequence[Module]]): super().__init__() self.branches = ModuleList(flatten_nested_layers(branches)) - def _forward(self, input: Tensor) -> Tensor: + def forward(self, input: Tensor) -> Tensor: branch_outputs: List[Tensor] = [] for branch in self.branches: branch_outputs.append(branch(input)) @@ -109,7 +109,7 @@ def _forward(self, input: Tensor) -> Tensor: return output -class AddOnesChannelNd(BaseSingleVariateLayer): +class AddOnesChannelNd(BaseLayer): __constants__ = ('_channel_axis', '_spatial_ndims') @@ -128,7 +128,7 @@ def __init__(self): def _get_spatial_ndims(self) -> int: raise NotImplementedError() - def _forward(self, input: Tensor) -> Tensor: + def forward(self, input: Tensor) -> Tensor: channel_shape = shape(input) channel_shape[self._channel_axis] = 1 @@ -154,7 +154,7 @@ def _get_spatial_ndims(self) -> int: return 3 -class AddLeadingContext(BaseContextualLayer): +class AddLeadingContext(BaseLayer): __constants__ = ('first_n',) @@ -162,14 +162,18 @@ def __init__(self, first_n: int): super().__init__() self.first_n = first_n - def _forward(self, input: Tensor, context: List[Tensor]) -> Tensor: + def forward(self, + input: Tensor, + context: Optional[List[Tensor]] = None) -> Tensor: + if context is None: # pragma: no cover + raise RuntimeError('`context` is required.') output = input for i in range(self.first_n): output = output + context[i] return output -class IgnoreLeadingContext(BaseContextualLayer): +class IgnoreLeadingContext(BaseLayer): __constants__ = ('wrapped', 'first_n',) @@ -181,7 +185,11 @@ def __init__(self, wrapped: Module, first_n: int): self.wrapped = wrapped self.first_n = first_n - def _forward(self, input: Tensor, context: List[Tensor]) -> Tensor: + def forward(self, + input: Tensor, + context: Optional[List[Tensor]] = None) -> Tensor: + if context is None: # pragma: no cover + raise RuntimeError('`context` is required.') return self.wrapped(input, context[self.first_n:]) @@ -226,7 +234,7 @@ def validate_pixelcnn_kernel_size(kernel_size, spatial_ndims: int) -> List[int]: # ---- pixelcnn input layer, which constructs the multiple pixelcnn stacks ---- -class PixelCNNInputNd(BaseSplitLayer): +class PixelCNNInputNd(BaseLayer): __constants__ = ('_spatial_ndims', 'add_ones_channel', 'stacks',) @@ -320,7 +328,7 @@ def __init__(self, def _get_spatial_ndims(self) -> int: raise NotImplementedError() - def _forward(self, input: Tensor) -> List[Tensor]: + def forward(self, input: Tensor) -> List[Tensor]: if rank(input) != self._spatial_ndims + 2: raise ValueError( '`input` is expected to be {}d: got input shape {}.'. @@ -371,7 +379,7 @@ def _get_spatial_ndims(self) -> int: # ---- pixelcnn output layer, which obtains the final output from the stacks ---- -class PixelCNNOutputNd(BaseMergeLayer): +class PixelCNNOutputNd(BaseLayer): __constants__ = ('_spatial_ndims',) @@ -384,7 +392,7 @@ def __init__(self): def _get_spatial_ndims(self) -> int: raise NotImplementedError() - def _forward(self, inputs: List[Tensor]) -> Tensor: + def forward(self, inputs: List[Tensor]) -> Tensor: if len(inputs) != self._spatial_ndims: raise ValueError( '`len(inputs)` is expected to be {}: got {} tensors.'. @@ -424,7 +432,7 @@ def _get_spatial_ndims(self) -> int: # ---- pixelcnn layers ---- -class PixelCNNResBlockNd(BaseMultiVariateContextualLayer): +class PixelCNNResBlockNd(BaseLayer): __constants__ = ('resnet_layers',) @@ -549,7 +557,12 @@ def __init__(self, super().__init__() self.resnet_layers = ModuleList(resnet_layers) - def _forward(self, inputs: List[Tensor], context: List[Tensor]) -> List[Tensor]: + def forward(self, + inputs: List[Tensor], + context: Optional[List[Tensor]] = None) -> List[Tensor]: + if context is None: + context = [] + resnet_outputs: List[Tensor] = [] i = 0 for resnet_layer in self.resnet_layers: @@ -585,7 +598,7 @@ def _get_spatial_ndims(self) -> int: # ---- pixelcnn down-sampling conv layers and up-sampling deconv layers ---- -class PixelCNNConvNd(BaseMultiVariateContextualLayer): +class PixelCNNConvNd(BaseLayer): __constants__ = ('conv_layers',) @@ -654,7 +667,9 @@ def __init__(self, def _get_spatial_ndims(self) -> int: raise NotImplementedError() - def _forward(self, inputs: List[Tensor], context: List[Tensor]) -> List[Tensor]: + def forward(self, + inputs: List[Tensor], + context: Optional[List[Tensor]] = None) -> List[Tensor]: conv_outputs: List[Tensor] = [] i = 0 for conv_layer in self.conv_layers: @@ -703,7 +718,7 @@ def _get_spatial_ndims(self) -> int: return 3 -class PixelCNNConvTransposeNd(BaseMultiVariateContextualLayer): +class PixelCNNConvTransposeNd(BaseLayer): __constants__ = ('deconv_layers',) @@ -775,7 +790,9 @@ def __init__(self, def _get_spatial_ndims(self) -> int: raise NotImplementedError() - def _forward(self, inputs: List[Tensor], context: List[Tensor]) -> List[Tensor]: + def forward(self, + inputs: List[Tensor], + context: Optional[List[Tensor]] = None) -> List[Tensor]: deconv_outputs: List[Tensor] = [] i = 0 for conv_layer in self.deconv_layers: @@ -825,7 +842,7 @@ def _get_spatial_ndims(self) -> int: # ---- pixelcnn network composer ---- -class PixelCNNNd(BaseContextualLayer): +class PixelCNNNd(BaseLayer): __constants__ = ('input_layer', 'layers', 'output_layer') @@ -865,7 +882,9 @@ def __init__(self, def _get_spatial_ndims(self) -> int: raise NotImplementedError() - def _forward(self, input: Tensor, context: List[Tensor]) -> Tensor: + def forward(self, + input: Tensor, + context: Optional[List[Tensor]] = None) -> Tensor: outputs = self.input_layer(input) for block in self.layers: outputs = block(outputs, context) diff --git a/tensorkit/layers/pool.py b/tensorkit/layers/pool.py index f1ffd17..21a09aa 100644 --- a/tensorkit/layers/pool.py +++ b/tensorkit/layers/pool.py @@ -15,7 +15,7 @@ # ---- average pooling ---- -class AvgPoolNd(BaseSingleVariateLayer): +class AvgPoolNd(BaseLayer): __constants__ = ('kernel_size', 'stride', 'padding', 'count_padded_zeros') @@ -67,7 +67,7 @@ class AvgPool1d(AvgPoolNd): def _get_spatial_ndims(self) -> int: return 1 - def _forward(self, input: Tensor) -> Tensor: + def forward(self, input: Tensor) -> Tensor: return avg_pool1d( input, kernel_size=self.kernel_size, stride=self.stride, padding=self.padding, count_padded_zeros=self.count_padded_zeros, @@ -79,7 +79,7 @@ class AvgPool2d(AvgPoolNd): def _get_spatial_ndims(self) -> int: return 2 - def _forward(self, input: Tensor) -> Tensor: + def forward(self, input: Tensor) -> Tensor: return avg_pool2d( input, kernel_size=self.kernel_size, stride=self.stride, padding=self.padding, count_padded_zeros=self.count_padded_zeros, @@ -91,7 +91,7 @@ class AvgPool3d(AvgPoolNd): def _get_spatial_ndims(self) -> int: return 3 - def _forward(self, input: Tensor) -> Tensor: + def forward(self, input: Tensor) -> Tensor: return avg_pool3d( input, kernel_size=self.kernel_size, stride=self.stride, padding=self.padding, count_padded_zeros=self.count_padded_zeros, @@ -99,7 +99,7 @@ def _forward(self, input: Tensor) -> Tensor: # ---- max pooling ---- -class MaxPoolNd(BaseSingleVariateLayer): +class MaxPoolNd(BaseLayer): __constants__ = ('kernel_size', 'stride', 'padding') @@ -146,7 +146,7 @@ class MaxPool1d(MaxPoolNd): def _get_spatial_ndims(self) -> int: return 1 - def _forward(self, input: Tensor) -> Tensor: + def forward(self, input: Tensor) -> Tensor: return max_pool1d( input, kernel_size=self.kernel_size, stride=self.stride, padding=self.padding @@ -158,7 +158,7 @@ class MaxPool2d(MaxPoolNd): def _get_spatial_ndims(self) -> int: return 2 - def _forward(self, input: Tensor) -> Tensor: + def forward(self, input: Tensor) -> Tensor: return max_pool2d( input, kernel_size=self.kernel_size, stride=self.stride, padding=self.padding @@ -170,7 +170,7 @@ class MaxPool3d(MaxPoolNd): def _get_spatial_ndims(self) -> int: return 3 - def _forward(self, input: Tensor) -> Tensor: + def forward(self, input: Tensor) -> Tensor: return max_pool3d( input, kernel_size=self.kernel_size, stride=self.stride, padding=self.padding @@ -178,7 +178,7 @@ def _forward(self, input: Tensor) -> Tensor: # ---- global average pooling ---- -class GlobalAvgPoolNd(BaseSingleVariateLayer): +class GlobalAvgPoolNd(BaseLayer): __constants__ = ('spatial_ndims', 'reduce_axis', 'keepdims') @@ -208,7 +208,7 @@ def _get_spatial_ndims(self) -> int: def __repr__(self) -> str: return f'{self.__class__.__qualname__}(keepdims={self.keepdims})' - def _forward(self, input: Tensor) -> Tensor: + def forward(self, input: Tensor) -> Tensor: if len(input.shape) < self.spatial_ndims + 1: raise ValueError( '`rank(input)` is too low: expected to be at least ' diff --git a/tensorkit/layers/resnet.py b/tensorkit/layers/resnet.py index 7a5ffda..9f4d5bd 100644 --- a/tensorkit/layers/resnet.py +++ b/tensorkit/layers/resnet.py @@ -13,7 +13,7 @@ ] -class ResBlockNd(BaseContextualLayer): +class ResBlockNd(BaseLayer): """ A general implementation of ResNet block. @@ -24,7 +24,8 @@ class ResBlockNd(BaseContextualLayer): .. code-block:: python shortcut = input - if strides != 1 or in_channels != out_channels or use_shortcut: + if strides != 1 or (kernel_size != 1 and padding != 'half') or \ + in_channels != out_channels or use_shortcut: shortcut_layer = shortcut( in_channels=in_channels, out_channels=out_channels, @@ -211,6 +212,7 @@ def compile_layer_list(layers: List[Module]) -> Module: kernel_size = validate_conv_size('kernel_size', kernel_size, spatial_ndims) stride = validate_conv_size('strides', stride, spatial_ndims) dilation = validate_conv_size('dilation', dilation, spatial_ndims) + is_half_padding = padding == PaddingMode.HALF.value padding = validate_padding(padding, kernel_size, dilation, spatial_ndims) if output_padding != 0 and \ @@ -240,8 +242,10 @@ def compile_layer_list(layers: List[Module]) -> Module: if shortcut is not None: use_shortcut = True if use_shortcut is None: - use_shortcut = (any(s != 1 for s in stride) or - in_channels != out_channels) + use_shortcut = ( + any(s != 1 for s in stride) or + (not is_half_padding and any(k != 1 for k in stride)) or + in_channels != out_channels) if activation is not None: activation_factory = validate_layer_factory('activation', activation) @@ -391,8 +395,12 @@ def _default_conv_factory(self) -> LayerFactory: def _add_output_padding_to_kwargs(self, output_padding, kwargs): return kwargs - @jit_method - def _forward(self, input: Tensor, context: List[Tensor]) -> Tensor: + def forward(self, + input: Tensor, + context: Optional[List[Tensor]] = None) -> Tensor: + if context is None: + context = [] + # feed the input into both the shortcut and the residual path residual = shortcut = input diff --git a/tensorkit/layers/shape_.py b/tensorkit/layers/shape_.py index 44bbc8c..ea7266a 100644 --- a/tensorkit/layers/shape_.py +++ b/tensorkit/layers/shape_.py @@ -14,11 +14,10 @@ # ---- FlattenToNDims ---- -class FlattenToNDims(BaseSingleVariateLayer): +class FlattenToNDims(BaseLayer): __constants__ = ('layer', 'ndims') - layer: Module ndims: int def __init__(self, layer: Module, ndims: int): @@ -26,7 +25,7 @@ def __init__(self, layer: Module, ndims: int): self.layer = layer self.ndims = ndims - def _forward(self, input: Tensor) -> Tensor: + def forward(self, input: Tensor) -> Tensor: # validate the shape of input input_rank = rank(input) expected_rank = self.ndims @@ -44,7 +43,7 @@ def _forward(self, input: Tensor) -> Tensor: # ---- pad ---- -class ConstantPad(BaseSingleVariateLayer): +class ConstantPad(BaseLayer): __constants__ = ('padding', 'value') @@ -73,7 +72,7 @@ def check_int_tuple(t): self.padding = padding self.value = value - def _forward(self, input: Tensor) -> Tensor: + def forward(self, input: Tensor) -> Tensor: return pad(input, self.padding, value=self.value) @@ -130,37 +129,37 @@ def _get_spatial_ndims(self): # ---- channel swap ---- -class ChannelFirstToLast1d(BaseSingleVariateLayer): +class ChannelFirstToLast1d(BaseLayer): - def _forward(self, input: Tensor) -> Tensor: + def forward(self, input: Tensor) -> Tensor: return channel_first_to_last1d(input) -class ChannelFirstToLast2d(BaseSingleVariateLayer): +class ChannelFirstToLast2d(BaseLayer): - def _forward(self, input: Tensor) -> Tensor: + def forward(self, input: Tensor) -> Tensor: return channel_first_to_last2d(input) -class ChannelFirstToLast3d(BaseSingleVariateLayer): +class ChannelFirstToLast3d(BaseLayer): - def _forward(self, input: Tensor) -> Tensor: + def forward(self, input: Tensor) -> Tensor: return channel_first_to_last3d(input) -class ChannelLastToFirst1d(BaseSingleVariateLayer): +class ChannelLastToFirst1d(BaseLayer): - def _forward(self, input: Tensor) -> Tensor: + def forward(self, input: Tensor) -> Tensor: return channel_last_to_first1d(input) -class ChannelLastToFirst2d(BaseSingleVariateLayer): +class ChannelLastToFirst2d(BaseLayer): - def _forward(self, input: Tensor) -> Tensor: + def forward(self, input: Tensor) -> Tensor: return channel_last_to_first2d(input) -class ChannelLastToFirst3d(BaseSingleVariateLayer): +class ChannelLastToFirst3d(BaseLayer): - def _forward(self, input: Tensor) -> Tensor: + def forward(self, input: Tensor) -> Tensor: return channel_last_to_first3d(input) diff --git a/tensorkit/layers/split_.py b/tensorkit/layers/split_.py index eee4c80..8945443 100644 --- a/tensorkit/layers/split_.py +++ b/tensorkit/layers/split_.py @@ -6,7 +6,7 @@ __all__ = ['Branch'] -class Branch(BaseSplitLayer): +class Branch(BaseLayer): """ A module that maps the input tensor into multiple tensors via sub-modules. @@ -38,7 +38,7 @@ def __init__(self, self.branches = ModuleList(list(branches)) self.shared = shared - def _forward(self, input: Tensor) -> List[Tensor]: + def forward(self, input: Tensor) -> List[Tensor]: outputs: List[Tensor] = [] shared_output = self.shared(input) for branch in self.branches: diff --git a/tensorkit/losses/core.py b/tensorkit/losses/core.py deleted file mode 100644 index e938efa..0000000 --- a/tensorkit/losses/core.py +++ /dev/null @@ -1,4 +0,0 @@ -from ..backend import losses -from ..backend.losses import * - -__all__ = losses.__all__ diff --git a/tensorkit/optim/__init__.py b/tensorkit/optim/__init__.py new file mode 100644 index 0000000..525c576 --- /dev/null +++ b/tensorkit/optim/__init__.py @@ -0,0 +1,2 @@ +from . import lr_scheduler +from .core import * diff --git a/tensorkit/optim/core.py b/tensorkit/optim/core.py new file mode 100644 index 0000000..da59b02 --- /dev/null +++ b/tensorkit/optim/core.py @@ -0,0 +1,4 @@ +from ..backend import optim +from ..backend.optim import * + +__all__ = optim.__all__ diff --git a/tensorkit/optim/lr_scheduler.py b/tensorkit/optim/lr_scheduler.py new file mode 100644 index 0000000..8dfb597 --- /dev/null +++ b/tensorkit/optim/lr_scheduler.py @@ -0,0 +1,71 @@ +from typing import * + +import mltk + +from .core import * + +__all__ = [ + 'LRScheduler', 'AnnealingLR', +] + + +class LRScheduler(object): + """ + Base class that schedules the learning rate of an optimizer + during a :class:`mltk.TrainLoop`. + """ + + loop: mltk.TrainLoop + optimizer: Optimizer + + def __init__(self, + loop: mltk.TrainLoop, + optimizer: Optimizer): + self.loop = loop + self.optimizer = optimizer + self._bind_events(loop) + self.update_lr() + + def update_lr(self): + """Update the learning rate of the optimizer according to the loop.""" + raise NotImplementedError() + + def close(self): + """Close this scheduler, such that it will no longer affect the optimizer.""" + self._unbind_events(self.loop) + + def _bind_events(self, loop: mltk.TrainLoop): + raise NotImplementedError() + + def _unbind_events(self, loop: mltk.TrainLoop): + raise NotImplementedError() + + +class AnnealingLR(LRScheduler): + + initial_lr: float + ratio: float + epochs: int + + def __init__(self, + loop: mltk.TrainLoop, + optimizer: Optimizer, + initial_lr: float, + ratio: float, + epochs: int + ): + self.initial_lr = float(initial_lr) + self.ratio = float(ratio) + self.epochs = int(epochs) + super().__init__(loop, optimizer) + + def _bind_events(self, loop: mltk.TrainLoop): + loop.on_epoch_end.do(self.update_lr) + + def _unbind_events(self, loop: mltk.TrainLoop): + loop.on_epoch_end.cancel_do(self.update_lr) + + def update_lr(self): + n_cycles = int(self.loop.epoch // self.epochs) + lr_discount = self.ratio ** n_cycles + self.optimizer.set_lr(self.initial_lr * lr_discount) diff --git a/tensorkit/tensor/__init__.py b/tensorkit/tensor/__init__.py index 2d649ea..21aced9 100644 --- a/tensorkit/tensor/__init__.py +++ b/tensorkit/tensor/__init__.py @@ -1,2 +1,2 @@ -from . import linalg, nn, random +from . import linalg, nn, random, utils from .core import * diff --git a/tensorkit/tensor/utils.py b/tensorkit/tensor/utils.py new file mode 100644 index 0000000..1a13f13 --- /dev/null +++ b/tensorkit/tensor/utils.py @@ -0,0 +1,4 @@ +from ..backend import utils +from ..backend.utils import * + +__all__ = utils.__all__ diff --git a/tensorkit/losses/__init__.py b/tensorkit/train/__init__.py similarity index 100% rename from tensorkit/losses/__init__.py rename to tensorkit/train/__init__.py diff --git a/tensorkit/train/core.py b/tensorkit/train/core.py new file mode 100644 index 0000000..cd13464 --- /dev/null +++ b/tensorkit/train/core.py @@ -0,0 +1,4 @@ +from ..backend import train +from ..backend.train import * + +__all__ = train.__all__ diff --git a/tensorkit/utils/__init__.py b/tensorkit/utils/__init__.py new file mode 100644 index 0000000..101c343 --- /dev/null +++ b/tensorkit/utils/__init__.py @@ -0,0 +1,2 @@ +from .data_utils import * +from .tensor_stream import * diff --git a/tensorkit/utils/data_utils.py b/tensorkit/utils/data_utils.py new file mode 100644 index 0000000..a6a0168 --- /dev/null +++ b/tensorkit/utils/data_utils.py @@ -0,0 +1,97 @@ +import mltk + +import numpy as np + +from tensorkit import tensor as T + +__all__ = [ + 'numpy_channel_from_last_to_first1d', + 'numpy_channel_from_last_to_first2d', + 'numpy_channel_from_last_to_first3d', + + 'numpy_channel_from_first_to_last1d', + 'numpy_channel_from_first_to_last2d', + 'numpy_channel_from_first_to_last3d', + + 'numpy_channel_from_last_to_default1d', + 'numpy_channel_from_last_to_default2d', + 'numpy_channel_from_last_to_default3d', + + 'numpy_channel_from_default_to_last1d', + 'numpy_channel_from_default_to_last2d', + 'numpy_channel_from_default_to_last3d', +] + + +def numpy_channel_from_last_to_first_nd(input: np.ndarray, + spatial_ndims: int + ) -> np.ndarray: + if len(input.shape) < spatial_ndims + 2: + raise ValueError( + f'`input` is expected to be at least {spatial_ndims + 2}d: ' + f'got `input.shape` {input.shape}.' + ) + axis = list(range(len(input.shape))) + transpose_axis = ( + axis[: -(spatial_ndims + 1)] + [-1] + + [i for i in range(-spatial_ndims - 1, -1)] + ) + return np.transpose(input, transpose_axis) + + +def numpy_channel_from_last_to_first1d(input: np.ndarray) -> np.ndarray: + return numpy_channel_from_last_to_first_nd(input, 1) + + +def numpy_channel_from_last_to_first2d(input: np.ndarray) -> np.ndarray: + return numpy_channel_from_last_to_first_nd(input, 2) + + +def numpy_channel_from_last_to_first3d(input: np.ndarray) -> np.ndarray: + return numpy_channel_from_last_to_first_nd(input, 3) + + +def numpy_channel_from_first_to_last_nd(input: np.ndarray, + spatial_ndims: int + ) -> np.ndarray: + if len(input.shape) < spatial_ndims + 2: + raise ValueError( + f'`input` is expected to be at least {spatial_ndims + 2}d: ' + f'got `input.shape` {input.shape}.' + ) + axis = list(range(len(input.shape))) + transpose_axis = ( + axis[: -(spatial_ndims + 1)] + + [i for i in range(-spatial_ndims, 0)] + + [-(spatial_ndims + 1)] + ) + return np.transpose(input, transpose_axis) + + +def numpy_channel_from_first_to_last1d(input: np.ndarray) -> np.ndarray: + return numpy_channel_from_first_to_last_nd(input, 1) + + +def numpy_channel_from_first_to_last2d(input: np.ndarray) -> np.ndarray: + return numpy_channel_from_first_to_last_nd(input, 2) + + +def numpy_channel_from_first_to_last3d(input: np.ndarray) -> np.ndarray: + return numpy_channel_from_first_to_last_nd(input, 3) + + +if T.IS_CHANNEL_LAST: + numpy_channel_from_last_to_default1d = \ + numpy_channel_from_last_to_default2d = \ + numpy_channel_from_last_to_default3d = \ + numpy_channel_from_default_to_last1d = \ + numpy_channel_from_default_to_last2d = \ + numpy_channel_from_default_to_last3d = \ + (lambda x: x) +else: + numpy_channel_from_last_to_default1d = numpy_channel_from_last_to_first1d + numpy_channel_from_last_to_default2d = numpy_channel_from_last_to_first2d + numpy_channel_from_last_to_default3d = numpy_channel_from_last_to_first3d + numpy_channel_from_default_to_last1d = numpy_channel_from_first_to_last1d + numpy_channel_from_default_to_last2d = numpy_channel_from_first_to_last2d + numpy_channel_from_default_to_last3d = numpy_channel_from_first_to_last3d diff --git a/tensorkit/utils/tensor_stream.py b/tensorkit/utils/tensor_stream.py new file mode 100644 index 0000000..e154a93 --- /dev/null +++ b/tensorkit/utils/tensor_stream.py @@ -0,0 +1,48 @@ +from typing import * + +import mltk +from mltk import ArrayTuple + +from .. import tensor as T + +__all__ = [ + 'TensorStream', + 'as_tensor_stream', +] + + +class TensorStream(mltk.DataStream): + + source: mltk.DataStream + + def __init__(self, source: mltk.DataStream): + super().__init__( + batch_size=source.batch_size, + array_count=source.array_count, + data_shapes=source.data_shapes, + data_length=source.data_length, + random_state=source.random_state, + ) + self.source = source + + def copy(self, **kwargs): + return TensorStream(source=self.source, **kwargs) + + def _minibatch_iterator(self) -> Generator[ArrayTuple, None, None]: + g = iter(self.source) + try: + for batch_data in g: + with T.no_grad(): + batch_data = tuple(T.from_numpy(arr) for arr in batch_data) + yield batch_data + finally: + g.close() + + +def as_tensor_stream(source: mltk.DataStream, + prefetch: Optional[int] = None + ) -> mltk.DataStream: + stream = TensorStream(source) + if prefetch is not None: + stream = stream.threaded(prefetch) + return stream diff --git a/tests/distributions/test_flow.py b/tests/distributions/test_flow.py index 5889b82..179154b 100644 --- a/tests/distributions/test_flow.py +++ b/tests/distributions/test_flow.py @@ -14,14 +14,14 @@ from tests.helper import * -class _MyFlow(tk.flows.BaseFlow): - - def _forward(self, - input: Tensor, - input_log_det: Optional[Tensor], - inverse: bool, - compute_log_det: bool - ) -> Tuple[Tensor, Optional[Tensor]]: +class _MyFlow(tk.flows.Flow): + + def _transform(self, + input: Tensor, + input_log_det: Optional[Tensor], + inverse: bool, + compute_log_det: bool + ) -> Tuple[Tensor, Optional[Tensor]]: if inverse: output = input * 2.0 + 1 event_ndims = self.x_event_ndims @@ -49,9 +49,9 @@ def _forward(self, def check_flow_distribution(ctx, distribution, flow): - min_event_ndims = flow.y_event_ndims + min_event_ndims = flow.get_y_event_ndims() max_event_ndims = (distribution.value_ndims + - (flow.y_event_ndims - flow.x_event_ndims)) + (flow.get_y_event_ndims() - flow.get_x_event_ndims())) def fn(event_ndims, reparameterized, validate_tensors): # construct the instance @@ -65,7 +65,7 @@ def fn(event_ndims, reparameterized, validate_tensors): if event_ndims is not None: kwargs['event_ndims'] = event_ndims else: - event_ndims = flow.y_event_ndims + event_ndims = flow.get_y_event_ndims() if validate_tensors is not None: kwargs['validate_tensors'] = validate_tensors @@ -82,11 +82,11 @@ def log_prob_fn(t): assert_allclose(y, t.tensor, atol=1e-4, rtol=1e-6) ctx.assertEqual( T.rank(log_det), - T.rank(log_px) - (flow.x_event_ndims - distribution.event_ndims) + T.rank(log_px) - (flow.get_x_event_ndims() - distribution.event_ndims) ) return -log_det + T.reduce_sum( log_px, T.int_range( - -(flow.x_event_ndims - distribution.event_ndims), + -(flow.get_x_event_ndims() - distribution.event_ndims), 0 ) ) @@ -100,7 +100,7 @@ def log_prob_fn(t): max_event_ndims=max_event_ndims, log_prob_fn=log_prob_fn, transform_origin_distribution=distribution, - transform_origin_group_ndims=flow.x_event_ndims - distribution.event_ndims, + transform_origin_group_ndims=flow.get_x_event_ndims() - distribution.event_ndims, # other attributes base_distribution=distribution, flow=flow, diff --git a/tests/flows/test_core.py b/tests/flows/test_core.py index 26cf5c1..8aeb32c 100644 --- a/tests/flows/test_core.py +++ b/tests/flows/test_core.py @@ -14,18 +14,18 @@ from tests.ops import * -class _MyFlow(BaseFlow): +class _MyFlow(Flow): def __init__(self): super().__init__(x_event_ndims=1, y_event_ndims=2, explicitly_invertible=True) - def _forward(self, - input: Tensor, - input_log_det: Optional[Tensor], - inverse: bool, - compute_log_det: bool) -> Tuple[Tensor, Optional[Tensor]]: + def _transform(self, + input: Tensor, + input_log_det: Optional[Tensor], + inverse: bool, + compute_log_det: bool) -> Tuple[Tensor, Optional[Tensor]]: if inverse: output = reshape_tail(0.5 * (input - 1.), 2, [-1]) else: @@ -47,18 +47,18 @@ def _forward(self, return output, output_log_det -class _MyBadFlow(BaseFlow): +class _MyBadFlow(Flow): def __init__(self): super().__init__(x_event_ndims=1, y_event_ndims=1, explicitly_invertible=True) - def _forward(self, - input: Tensor, - input_log_det: Optional[Tensor], - inverse: bool, - compute_log_det: bool) -> Tuple[Tensor, Optional[Tensor]]: + def _transform(self, + input: Tensor, + input_log_det: Optional[Tensor], + inverse: bool, + compute_log_det: bool) -> Tuple[Tensor, Optional[Tensor]]: output = input output_log_det = input_log_det if compute_log_det: @@ -72,19 +72,19 @@ def _forward(self, class BaseFlowTestCase(unittest.TestCase): def test_constructor(self): - flow = BaseFlow(x_event_ndims=1, - y_event_ndims=2, - explicitly_invertible=True) - self.assertEqual(flow.x_event_ndims, 1) - self.assertEqual(flow.y_event_ndims, 2) - self.assertEqual(flow.explicitly_invertible, True) - - flow = BaseFlow(x_event_ndims=3, - y_event_ndims=1, - explicitly_invertible=False) - self.assertEqual(flow.x_event_ndims, 3) - self.assertEqual(flow.y_event_ndims, 1) - self.assertEqual(flow.explicitly_invertible, False) + flow = Flow(x_event_ndims=1, + y_event_ndims=2, + explicitly_invertible=True) + self.assertEqual(flow.get_x_event_ndims(), 1) + self.assertEqual(flow.get_y_event_ndims(), 2) + self.assertEqual(flow.is_explicitly_invertible(), True) + + flow = Flow(x_event_ndims=3, + y_event_ndims=1, + explicitly_invertible=False) + self.assertEqual(flow.get_x_event_ndims(), 3) + self.assertEqual(flow.get_y_event_ndims(), 1) + self.assertEqual(flow.is_explicitly_invertible(), False) def test_invert(self): flow = _MyFlow() @@ -93,9 +93,9 @@ def test_invert(self): def test_call(self): flow = T.jit_compile(_MyFlow()) - self.assertEqual(flow.x_event_ndims, 1) - self.assertEqual(flow.y_event_ndims, 2) - self.assertEqual(flow.explicitly_invertible, True) + self.assertEqual(flow.get_x_event_ndims(), 1) + self.assertEqual(flow.get_y_event_ndims(), 2) + self.assertEqual(flow.is_explicitly_invertible(), True) # test call x = T.random.randn([2, 3, 4]) @@ -138,13 +138,14 @@ def test_constructor(self): flow = FeatureMappingFlow(axis=-1, event_ndims=2, explicitly_invertible=True) - self.assertEqual(flow.event_ndims, 2) - - flow = T.jit_compile(flow) + self.assertEqual(flow.get_event_ndims(), 2) self.assertEqual(flow.axis, -1) - self.assertEqual(flow.x_event_ndims, 2) - self.assertEqual(flow.y_event_ndims, 2) - self.assertEqual(flow.explicitly_invertible, True) + flow = T.jit_compile(flow) + + self.assertEqual(flow.get_axis(), -1) + self.assertEqual(flow.get_x_event_ndims(), 2) + self.assertEqual(flow.get_y_event_ndims(), 2) + self.assertEqual(flow.is_explicitly_invertible(), True) with pytest.raises(ValueError, match='`event_ndims` must be at least 1'): @@ -168,9 +169,9 @@ def test_InverseFlow(self): self.assertIs(flow.invert(), original_flow) flow = T.jit_compile(flow) - self.assertEqual(flow.x_event_ndims, 2) - self.assertEqual(flow.y_event_ndims, 1) - self.assertTrue(flow.explicitly_invertible) + self.assertEqual(flow.get_x_event_ndims(), 2) + self.assertEqual(flow.get_y_event_ndims(), 1) + self.assertTrue(flow.is_explicitly_invertible()) x = T.random.randn([2, 3, 4, 1]) expected_y = T.reshape((x - 1.) * 0.5, [2, 3, 4]) @@ -191,18 +192,18 @@ def test_InverseFlow(self): _ = InverseFlow(T.jit_compile(base_flow)) -class _MyFlow1(BaseFlow): +class _MyFlow1(Flow): def __init__(self): super().__init__(x_event_ndims=1, y_event_ndims=1, explicitly_invertible=True) - def _forward(self, - input: Tensor, - input_log_det: Optional[Tensor], - inverse: bool, - compute_log_det: bool - ) -> Tuple[Tensor, Optional[Tensor]]: + def _transform(self, + input: Tensor, + input_log_det: Optional[Tensor], + inverse: bool, + compute_log_det: bool + ) -> Tuple[Tensor, Optional[Tensor]]: if inverse: output = (input - 1.) * 0.5 else: @@ -230,15 +231,15 @@ class SequentialFlowTestCase(unittest.TestCase): def test_constructor(self): flows = [T.jit_compile(_MyFlow1()), T.jit_compile(_MyFlow())] flow = T.jit_compile(SequentialFlow(flows)) - self.assertEqual(flow.x_event_ndims, 1) - self.assertEqual(flow.y_event_ndims, 2) - self.assertTrue(flow.explicitly_invertible) + self.assertEqual(flow.get_x_event_ndims(), 1) + self.assertEqual(flow.get_y_event_ndims(), 2) + self.assertTrue(flow.is_explicitly_invertible()) flow2 = _MyFlow() flow2.explicitly_invertible = False flows = [T.jit_compile(_MyFlow1()), T.jit_compile(flow2)] flow = T.jit_compile(SequentialFlow(flows)) - self.assertFalse(flow.explicitly_invertible) + self.assertFalse(flow.is_explicitly_invertible()) with pytest.raises(ValueError, match='`flows` must not be empty'): @@ -281,19 +282,19 @@ def test_call(self): _ = flow(x, inverse=True) -def check_invertible_matrix(ctx, m): +def check_invertible_matrix(ctx, m, size): matrix, log_det = m(inverse=False, compute_log_det=False) ctx.assertIsNone(log_det) matrix, log_det = m(inverse=False, compute_log_det=True) - ctx.assertEqual(T.shape(matrix), [m.size, m.size]) + ctx.assertEqual(T.shape(matrix), [size, size]) assert_allclose(T.matrix_inverse(T.matrix_inverse(matrix)), matrix, rtol=1e-4, atol=1e-6) assert_allclose(T.linalg.slogdet(matrix)[1], log_det, rtol=1e-4, atol=1e-6) inv_matrix, inv_log_det = m(inverse=True, compute_log_det=True) - ctx.assertEqual(T.shape(inv_matrix), [m.size, m.size]) + ctx.assertEqual(T.shape(inv_matrix), [size, size]) assert_allclose(T.matrix_inverse(inv_matrix), matrix, rtol=1e-4, atol=1e-6) assert_allclose(T.matrix_inverse(T.matrix_inverse(inv_matrix)), @@ -310,9 +311,9 @@ def test_invertible_matrices(self): for n in [1, 3, 5]: m = cls(np.random.randn(n, n)) self.assertEqual(repr(m), f'{cls.__qualname__}(size={n})') + self.assertEqual(m.size, n) m = T.jit_compile(m) - self.assertEqual(m.size, n) # check the initial value is an orthogonal matrix matrix, _ = m(inverse=False, compute_log_det=False) @@ -323,19 +324,20 @@ def test_invertible_matrices(self): rtol=1e-4, atol=1e-6) # check the invertibility - check_invertible_matrix(self, m) + check_invertible_matrix(self, m, n) # check the gradient matrix, log_det = m(inverse=False, compute_log_det=True) - params = [v for _, v in tk.layers.get_parameters(m)] - grads = T.grad([T.reduce_sum(matrix), T.reduce_sum(log_det)], params) + params = list(tk.layers.get_parameters(m)) + grads = T.grad( + [T.reduce_sum(matrix), T.reduce_sum(log_det)], params) # update with gradient, then check the invertibility if cls is StrictInvertibleMatrix: for param, grad in zip(params, grads): with T.no_grad(): T.assign(param, param + 0.001 * grad) - check_invertible_matrix(self, m) + check_invertible_matrix(self, m, n) def check_invertible_linear(ctx, @@ -405,7 +407,7 @@ def test_invertible_conv_nd(self): def check_scale(ctx, - scale: BaseScale, + scale: Scale, x, pre_scale, expected_y, @@ -479,7 +481,7 @@ def check_scale(ctx, rtol=1e-4, atol=1e-6) -class _BadScale1(BaseScale): +class _BadScale1(Scale): def _scale_and_log_scale(self, pre_scale: Tensor, @@ -494,7 +496,7 @@ def _scale_and_log_scale(self, return scale, log_scale -class _BadScale2(BaseScale): +class _BadScale2(Scale): def _scale_and_log_scale(self, pre_scale: Tensor, diff --git a/tests/flows/test_coupling.py b/tests/flows/test_coupling.py index 3958773..aa1b21f 100644 --- a/tests/flows/test_coupling.py +++ b/tests/flows/test_coupling.py @@ -51,7 +51,7 @@ def do_check(secondary, scale_type): scale = SigmoidScale(pre_scale_bias=sigmoid_scale_bias) elif scale_type == 'linear' or scale_type is LinearScale: scale = LinearScale() - elif isinstance(scale_type, BaseScale) or T.is_jit_layer(scale_type): + elif isinstance(scale_type, Scale) or T.is_jit_layer(scale_type): scale = scale_type else: raise ValueError(f'Invalid value for `scale`: {scale_type}') diff --git a/tests/flows/test_shape_.py b/tests/flows/test_shape_.py index e347a44..b972c73 100644 --- a/tests/flows/test_shape_.py +++ b/tests/flows/test_shape_.py @@ -16,8 +16,8 @@ def test_ReshapeFlow(self): flow = ReshapeFlow([4, -1], [-1]) self.assertEqual(flow.x_event_shape, [4, -1]) self.assertEqual(flow.y_event_shape, [-1]) - self.assertEqual(flow.x_event_ndims, 2) - self.assertEqual(flow.y_event_ndims, 1) + self.assertEqual(flow.get_x_event_ndims(), 2) + self.assertEqual(flow.get_y_event_ndims(), 1) self.assertIn('x_event_shape=[4, -1]', repr(flow)) self.assertIn('y_event_shape=[-1]', repr(flow)) flow = T.jit_compile(flow) diff --git a/tests/flows/test_split_.py b/tests/flows/test_split_.py index 0cd9508..8250a6c 100644 --- a/tests/flows/test_split_.py +++ b/tests/flows/test_split_.py @@ -91,10 +91,8 @@ def test_SplitFlow(self): T.random.seed(1234) # x and y with the same event ndims - left = T.jit_compile(ActNorm(2)) - right = T.jit_compile(ActNorm(3)) - _ = left(T.random.randn([5, 2])) - _ = right(T.random.randn([5, 3])) + left = T.jit_compile(InvertibleDense(2)) + right = T.jit_compile(InvertibleDense(3)) check_split_flow( ctx=self, @@ -110,20 +108,20 @@ def test_SplitFlow(self): with pytest.raises(ValueError, match=f'`left` and `right` flows must have same ' f'`x_event_ndims` and `y_event_ndims`: ' - f'got `left.x_event_ndims` == {left.x_event_ndims}, ' - f'`left.y_event_ndims` == {left.y_event_ndims}, ' - f'`right.x_event_ndims` == {left.x_event_ndims}, ' + f'got `left.x_event_ndims` == {left.get_x_event_ndims()}, ' + f'`left.y_event_ndims` == {left.get_y_event_ndims()}, ' + f'`right.x_event_ndims` == {left.get_x_event_ndims()}, ' f'and `right.y_event_ndims` == 6'): - _ = SplitFlow([2, 3], left, ReshapeFlow([1] * left.x_event_ndims, [1] * 6)) + _ = SplitFlow([2, 3], left, ReshapeFlow([1] * left.get_x_event_ndims(), [1] * 6)) with pytest.raises(ValueError, match=f'`left` and `right` flows must have same ' f'`x_event_ndims` and `y_event_ndims`: ' - f'got `left.x_event_ndims` == {left.x_event_ndims}, ' - f'`left.y_event_ndims` == {left.y_event_ndims}, ' + f'got `left.x_event_ndims` == {left.get_x_event_ndims()}, ' + f'`left.y_event_ndims` == {left.get_y_event_ndims()}, ' f'`right.x_event_ndims` == 6, ' - f'and `right.y_event_ndims` == {left.y_event_ndims}'): - _ = SplitFlow([2, 3], left, ReshapeFlow([1] * 6, [1] * left.y_event_ndims)) + f'and `right.y_event_ndims` == {left.get_y_event_ndims()}'): + _ = SplitFlow([2, 3], left, ReshapeFlow([1] * 6, [1] * left.get_y_event_ndims())) # x and y with different event ndims left = ReshapeFlow([-1], [-1, 2]) @@ -159,14 +157,10 @@ def test_SplitFlowNd(self): for spatial_ndims in (1, 2, 3): cls = getattr(tk.flows, f'SplitFlow{spatial_ndims}d') - sub_cls = getattr(tk.flows, f'ActNorm{spatial_ndims}d') + sub_cls = getattr(tk.flows, f'InvertibleConv{spatial_ndims}d') left = T.jit_compile(sub_cls(2)) right = T.jit_compile(sub_cls(3)) - _ = left(T.random.randn( - make_conv_shape([5], 2, [6, 7, 8][:spatial_ndims]))) - _ = right(T.random.randn( - make_conv_shape([5], 3, [6, 7, 8][:spatial_ndims]))) check_split_flow( ctx=self, diff --git a/tests/init/test_core.py b/tests/init/test_core.py index 2caf524..0039cdf 100644 --- a/tests/init/test_core.py +++ b/tests/init/test_core.py @@ -325,7 +325,7 @@ class _MyDataDependentInitializer(tk.init.DataDependentInitializer): def __init__(self, watcher): self.watcher = watcher - def _forward(self, layer: T.Module, inputs: List[T.Tensor]) -> None: + def _init(self, layer: T.Module, inputs: List[T.Tensor]) -> None: _ = layer(inputs[0]) self.watcher.append((layer, inputs)) diff --git a/tests/layers/test_contextual.py b/tests/layers/test_contextual.py index 373bc3e..7dd093b 100644 --- a/tests/layers/test_contextual.py +++ b/tests/layers/test_contextual.py @@ -1,70 +1,12 @@ import unittest -from typing import List import tensorkit as tk from tensorkit import tensor as T -from tensorkit.backend import Tensor from tests.helper import * -class _MyContextualLayer(tk.layers.BaseContextualLayer): - - def _forward(self, input: Tensor, context: List[Tensor]) -> Tensor: - output = input - base = -1. - for t in context: - output = output + t * base - base = base * 10. - return output - - -class _MyMultiVariateContextualLayer(tk.layers.BaseMultiVariateContextualLayer): - - def _forward(self, inputs: List[Tensor], context: List[Tensor]) -> List[Tensor]: - outputs: List[Tensor] = [] - input_base = 1. - for input in inputs: - output = input * input_base - base = -1. - for t in context: - output = output + t * base - base = base * 10. - outputs.append(output) - input_base *= 10. - return outputs - - class ContextualTestCase(unittest.TestCase): - def test_BaseContextualLayer(self): - x = T.random.randn([2, 3, 4]) - context = [T.random.randn([2, 3, 4]), - T.random.randn([2, 3, 4])] - layer = T.jit_compile(_MyContextualLayer()) - assert_allclose(layer(x), x) - assert_allclose(layer(x, context), x - context[0] - 10. * context[1]) - - def test_BaseMultiVariateContextualLayer(self): - inputs = [T.random.randn([2, 3, 4]), - T.random.randn([2, 3, 4])] - context = [T.random.randn([2, 3, 4]), - T.random.randn([2, 3, 4])] - layer = T.jit_compile(_MyMultiVariateContextualLayer()) - - for k in range(len(inputs)): - outputs = layer(inputs[:k]) - self.assertEqual(len(outputs), k) - for j, (input, output) in enumerate(zip(inputs, outputs)): - assert_allclose(output, input * (10 ** j)) - - outputs = layer(inputs[:k], context) - self.assertEqual(len(outputs), k) - for j, (input, output) in enumerate(zip(inputs, outputs)): - assert_allclose( - output, - input * (10 ** j) - context[0] - 10. * context[1] - ) - def test_IgnoreContext(self): x = T.random.randn([2, 3, 4]) context = [T.random.randn([2, 3, 4]), diff --git a/tests/layers/test_core.py b/tests/layers/test_core.py index 15450d0..99a84c9 100644 --- a/tests/layers/test_core.py +++ b/tests/layers/test_core.py @@ -14,7 +14,7 @@ from tests.ops import * -class _MyWrapper(BaseSingleVariateLayer): +class _MyWrapper(BaseLayer): __constants__ = ('wrapped',) @@ -24,7 +24,7 @@ def __init__(self, wrapped: Module): super().__init__() self.wrapped = wrapped - def _forward(self, input: Tensor) -> Tensor: + def forward(self, input: Tensor) -> Tensor: return self.wrapped(input) @@ -67,14 +67,20 @@ def test_param_and_buffer(self): c = get_buffer(layer, 'c') c2 = get_buffer(layer, 'c2') - self.assertDictEqual(dict(get_parameters(layer)), {'w': w, 'w2': w2}) - self.assertDictEqual(dict(get_buffers(layer)), {'c': c, 'c2': c2}) + self.assertListEqual(list(get_parameters(layer)), [w, w2]) + self.assertDictEqual(dict(get_named_parameters(layer)), {'w': w, 'w2': w2}) + self.assertListEqual(list(get_buffers(layer)), [c, c2]) + self.assertDictEqual(dict(get_named_buffers(layer)), {'c': c, 'c2': c2}) seq = _MyWrapper(layer) - self.assertDictEqual(dict(get_parameters(seq)), {'wrapped.w': w, 'wrapped.w2': w2}) - self.assertDictEqual(dict(get_parameters(seq, recursive=False)), {}) - self.assertDictEqual(dict(get_buffers(seq)), {'wrapped.c': c, 'wrapped.c2': c2}) - self.assertDictEqual(dict(get_buffers(seq, recursive=False)), {}) + self.assertListEqual(list(get_parameters(seq)), [w, w2]) + self.assertListEqual(list(get_parameters(seq, recursive=False)), []) + self.assertDictEqual(dict(get_named_parameters(seq)), {'wrapped.w': w, 'wrapped.w2': w2}) + self.assertDictEqual(dict(get_named_parameters(seq, recursive=False)), {}) + self.assertListEqual(list(get_buffers(seq)), [c, c2]) + self.assertListEqual(list(get_buffers(seq, recursive=False)), []) + self.assertDictEqual(dict(get_named_buffers(seq)), {'wrapped.c': c, 'wrapped.c2': c2}) + self.assertDictEqual(dict(get_named_buffers(seq, recursive=False)), {}) def test_SimpleParamStore(self): initial_value = np.random.randn(2, 3, 4) @@ -173,7 +179,7 @@ def test_identity(self): assert_equal(x, layer(x)) -class _MySingleVariateLayer(BaseSingleVariateLayer): +class _MySingleVariateLayer(BaseLayer): bias: float @@ -190,29 +196,28 @@ def _add_numpy_array(self, x: Tensor) -> Tensor: return x + T.from_numpy(np.arange(x.shape[-1]), dtype=T.get_dtype(x)) - @T.jit_method - def _forward(self, x: Tensor) -> Tensor: + def forward(self, x: Tensor) -> Tensor: return self._add_numpy_array(x * 11. + self.bias) -class _MyMultiVariateLayer(BaseMultiVariateLayer): +class _MyMultiVariateLayer(BaseLayer): - def _forward(self, inputs: List[Tensor]) -> List[Tensor]: + def forward(self, inputs: List[Tensor]) -> List[Tensor]: ret: List[Tensor] = [] for i in range(len(inputs) - 1): ret.append(inputs[i] + inputs[i + 1]) return ret -class _MySplitLayer(BaseSplitLayer): +class _MySplitLayer(BaseLayer): - def _forward(self, input: Tensor) -> List[Tensor]: + def forward(self, input: Tensor) -> List[Tensor]: return [input, input + 1, input + 2] -class _MyMergeLayer(BaseMergeLayer): +class _MyMergeLayer(BaseLayer): - def _forward(self, inputs: List[Tensor]) -> Tensor: + def forward(self, inputs: List[Tensor]) -> Tensor: return T.add_n(inputs) diff --git a/tests/layers/test_flow_layer.py b/tests/layers/test_flow_layer.py index 995e86e..bb0e654 100644 --- a/tests/layers/test_flow_layer.py +++ b/tests/layers/test_flow_layer.py @@ -10,15 +10,15 @@ from tests.ops import make_conv_shape -class _MyFlow(tk.flows.BaseFlow): +class _MyFlow(tk.flows.Flow): @T.jit_method - def _forward(self, - input: Tensor, - input_log_det: Optional[Tensor], - inverse: bool, - compute_log_det: bool - ) -> Tuple[Tensor, Optional[Tensor]]: + def _transform(self, + input: Tensor, + input_log_det: Optional[Tensor], + inverse: bool, + compute_log_det: bool + ) -> Tuple[Tensor, Optional[Tensor]]: if inverse: raise RuntimeError('Not invertible.') output = input * 2. diff --git a/tests/layers/test_pixelcnn.py b/tests/layers/test_pixelcnn.py index b312f53..59ede6f 100644 --- a/tests/layers/test_pixelcnn.py +++ b/tests/layers/test_pixelcnn.py @@ -103,9 +103,9 @@ def ensure_full_receptive_field(ctx, ) -class _MyAddContext(tk.layers.BaseContextualLayer): +class _MyAddContext(tk.layers.BaseLayer): - def _forward(self, input: Tensor, context: List[Tensor]) -> Tensor: + def forward(self, input: Tensor, context: List[Tensor]) -> Tensor: if len(context) == 0: return input elif len(context) == 1: @@ -266,7 +266,7 @@ def test_pixelcnn_network(self): deconv_layer_cls = getattr( tk.layers, f'PixelCNNConvTranspose{spatial_ndims}d') normalizer_cls = getattr( - tk.layers, f'ActNorm{spatial_ndims}d') + tk.layers, f'BatchNorm{spatial_ndims}d') dropout_cls = getattr( tk.layers, f'Dropout{spatial_ndims}d') diff --git a/tests/layers/test_resnet.py b/tests/layers/test_resnet.py index abf95e9..79742c2 100644 --- a/tests/layers/test_resnet.py +++ b/tests/layers/test_resnet.py @@ -48,8 +48,8 @@ def check_resblock(ctx, # force `use_bias` = False layer = resblock_cls(in_channels=5, out_channels=5, kernel_size=1, use_bias=False) - ctx.assertIsNone(layer.conv0.bias_store) - ctx.assertIsNone(layer.conv1.bias_store) + ctx.assertFalse(layer.conv0.use_bias) + ctx.assertFalse(layer.conv1.use_bias) layer = T.jit_compile(layer) assert_allclose( @@ -63,7 +63,7 @@ def check_resblock(ctx, use_shortcut=True) ctx.assertIsInstance(layer.shortcut, linear_cls) ctx.assertIsInstance(layer.shortcut.weight_store, tk.layers.SimpleParamStore) - ctx.assertIsNone(layer.shortcut.bias_store) + ctx.assertFalse(layer.shortcut.use_bias) ctx.assertEqual(layer.shortcut.kernel_size, [1] * spatial_ndims) ctx.assertEqual(layer.shortcut.stride, [1] * spatial_ndims) ctx.assertEqual(layer.shortcut.padding, [(0, 0)] * spatial_ndims) @@ -101,7 +101,7 @@ def check_resblock(ctx, **output_padding_arg ) ctx.assertIsInstance(layer.shortcut, linear_cls) - ctx.assertIsNone(layer.shortcut.bias_store) + ctx.assertFalse(layer.shortcut.use_bias) ctx.assertEqual(layer.shortcut.kernel_size, kernel_size) ctx.assertEqual(layer.shortcut.stride, stride) ctx.assertEqual(layer.shortcut.padding, padding) @@ -158,7 +158,7 @@ def check_resblock(ctx, tk.layers.set_train_mode(layer, True) _ = layer(x) # initialize the normalizers tk.layers.set_train_mode(layer, False) - ctx.assertIsNone(layer.conv0.bias_store) + ctx.assertFalse(layer.conv0.use_bias) ctx.assertIsInstance(layer.pre_conv0, tk.layers.Sequential) ctx.assertIsInstance(layer.pre_conv0[0], normalizer_cls) ctx.assertIsInstance(layer.pre_conv0[1], tk.layers.LeakyReLU) diff --git a/tests/losses/test_core.py b/tests/losses/test_core.py deleted file mode 100644 index 6d17cce..0000000 --- a/tests/losses/test_core.py +++ /dev/null @@ -1,31 +0,0 @@ -import unittest - -import tensorkit as tk -from tensorkit import tensor as T -from tensorkit.tensor import Tensor - -from tests.helper import * - - -class _MySupervisedLoss1(tk.losses.BaseSupervisedLossLayer): - - def _forward(self, output: Tensor, target: Tensor) -> Tensor: - return output + target - - -class _MySupervisedLoss2(tk.losses.BaseSupervisedLossLayer): - - def _forward(self, output: Tensor, target: Tensor) -> Tensor: - return (output + target).mean() - - -class BaseLossesTestCase(unittest.TestCase): - - def test_supervised(self): - output = T.random.randn([2, 3, 4]) - target = T.random.randn([3, 4]) - - l = T.jit_compile(_MySupervisedLoss1()) - assert_allclose(l(output, target), (output + target).mean()) - l = T.jit_compile(_MySupervisedLoss2()) - assert_allclose(l(output, target), (output + target).mean()) diff --git a/tests/ops.py b/tests/ops.py index c917ac2..3dc8d17 100644 --- a/tests/ops.py +++ b/tests/ops.py @@ -14,7 +14,7 @@ # convolution shape ops 'get_spatial_axis', 'get_channel_axis', 'channel_to_last_nd', 'channel_to_first_nd', 'space_to_depth_nd', - 'make_conv_shape', + 'make_conv_shape', 'get_conv_output_size', 'get_deconv_output_size', # convolution ops 'dense', 'conv_nd', 'conv_transpose_nd', diff --git a/tests/tensor/test_core.py b/tests/tensor/test_core.py index c43763a..f5d5124 100644 --- a/tests/tensor/test_core.py +++ b/tests/tensor/test_core.py @@ -1080,6 +1080,24 @@ def log_f_exp(f, x, axis=None, keepdims=False): match='`axis` must not be an empty list'): _ = T_op(t, axis=[]) + # test argmax, argmin + def np_argmaxmin(fn, x, axis, keepdims=False): + r_shape = list(x.shape) + r_shape[axis] = 1 + r = fn(x, axis) + if keepdims: + r = r.reshape(r_shape) + return r + + for name in ['argmax', 'argmin']: + T_op = getattr(T, name, getattr(T, name, None)) + np_op = partial(np_argmaxmin, getattr(np, name)) + + for axis in (0, 1, 2, -1, -2, -3): + assert_allclose(T_op(t, axis=axis), np_op(x, axis=axis)) + assert_allclose(T_op(t, axis=axis, keepdims=True), + np_op(x, axis=axis, keepdims=True)) + # test calculate_mean_and_var x = np.random.randn(3, 4, 5) for dtype in float_dtypes: @@ -1362,7 +1380,7 @@ def test_gradient(self): [l_sum, l_squares], [xt, yt], grad_outputs=[None, T.ones_like(l_squares)], - keep_graph=True, + retain_graph=True, create_graph=True ) assert_allclose(x_grad, y + 21 * x ** 2) @@ -1373,7 +1391,7 @@ def test_gradient(self): [x_grad, y_grad], [xt, yt], grad_outputs=[T.ones_like(xt), T.ones_like(yt)], - keep_graph=True, + retain_graph=True, create_graph=False ) assert_allclose(x_grad_2, 42. * x + 1.) @@ -1384,7 +1402,7 @@ def test_gradient(self): [l_sum, l_squares], [xt], grad_outputs=[None, T.ones_like(l_squares)], - keep_graph=True, + retain_graph=True, create_graph=True ) assert_allclose(x_grad, y + 21 * x ** 2) @@ -1393,7 +1411,7 @@ def test_gradient(self): [l_sum, l_squares], [yt], grad_outputs=[None, T.ones_like(l_squares)], - keep_graph=True, + retain_graph=True, create_graph=True ) assert_allclose(y_grad, x + 33 * y ** 2) @@ -1403,7 +1421,7 @@ def test_gradient(self): [x_grad, y_grad] = T.grad( [l_sum], [xt, yt], - keep_graph=False, + retain_graph=False, create_graph=False, allow_unused=True, ) @@ -1417,11 +1435,11 @@ def test_gradient(self): # stop_grad, but `allow_unused` is False l_sum = T.reduce_sum(T.stop_grad(xt ** 2) * yt) - with pytest.raises(Exception, match='Set allow_unused=True'): + with pytest.raises(Exception): _ = T.grad( [l_sum], [xt, yt], - keep_graph=False, + retain_graph=False, create_graph=False, allow_unused=False, ) diff --git a/tests/tensor/test_nn.py b/tests/tensor/test_nn.py index 3285e20..78798d5 100644 --- a/tests/tensor/test_nn.py +++ b/tests/tensor/test_nn.py @@ -203,6 +203,12 @@ def cross_entropy(logits, labels, reduction, negative): _f(logits), _f(labels), reduction, negative) assert_allclose(ans, out) + # test cross_entropy with int32 labels + ans = cross_entropy(logits, labels, reduction, negative) + out = T.nn.cross_entropy_with_logits( + _f(logits), T.cast(_f(labels), dtype=T.int32), reduction, negative) + assert_allclose(ans, out) + # test cross_entropy on 2d ans = cross_entropy( logits[0, 0, 0], labels[0, 0], reduction, negative) diff --git a/tests/tensor/test_utils.py b/tests/tensor/test_utils.py new file mode 100644 index 0000000..5bd668e --- /dev/null +++ b/tests/tensor/test_utils.py @@ -0,0 +1,119 @@ +import unittest +from itertools import product + +import pytest + +import tensorkit as tk +from tensorkit import tensor as T +from tests.ops import * + + +class UtilsTestCase(unittest.TestCase): + + def test_split_channel_spatial_shape(self): + for spatial_ndims in (1, 2, 3): + conv_shape = make_conv_shape([], 6, [7, 8, 9][:spatial_ndims]) + self.assertEqual( + T.utils.split_channel_spatial_shape(conv_shape), + (6, [7, 8, 9][:spatial_ndims]) + ) + with pytest.raises(Exception, match='Invalid `shape`'): + _ = T.utils.split_channel_spatial_shape([]) + + def test_unsplit_channel_spatial_shape(self): + for spatial_ndims in (1, 2, 3): + conv_shape = make_conv_shape([], 6, [7, 8, 9][:spatial_ndims]) + self.assertEqual( + T.utils.unsplit_channel_spatial_shape(6, [7, 8, 9][:spatial_ndims]), + conv_shape + ) + with pytest.raises(Exception, match='Invalid `size`'): + _ = T.utils.unsplit_channel_spatial_shape(1, []) + + def test_conv_deconv_output_shape_and_args(self): + for input_size, kernel_size, stride, padding, dilation in product( + ([8, 9, 10], [16, 21, 32], [30, 31, 32]), + ([1] * 3, [2] * 3, [3] * 3, [1, 2, 3]), + ([1] * 3, [2] * 3, [3] * 3, [1, 2, 3]), + ([(0, 0)] * 3, [(1, 1)] * 3, [(2, 2)] * 3, [(3, 3)] * 3, + [(1, 2), (2, 3), (3, 4)]), + ([1] * 3, [2] * 3, [3] * 3, [1, 2, 3]), + ): + args = (input_size, kernel_size, stride, padding, dilation) + + # calculate_conv_output_size + output_size = [get_conv_output_size(*a) for a in zip(*args)] + self.assertEqual( + T.utils.calculate_conv_output_size( + input_size=input_size, kernel_size=kernel_size, + stride=stride, padding=padding, dilation=dilation, + ), + output_size + ) + layer1 = tk.layers.LinearConv3d( + 1, 1, kernel_size=kernel_size, stride=stride, padding=padding, + dilation=dilation, + ) + x = T.zeros(make_conv_shape([1], 1, input_size)) + y = layer1(x) + self.assertEqual( + T.utils.split_channel_spatial_shape(T.shape(y)[1:])[1], + output_size, + ) + + # calculate_deconv_output_padding + output_padding = T.utils.calculate_deconv_output_padding( + input_size=output_size, output_size=input_size, + kernel_size=kernel_size, stride=stride, padding=padding, + dilation=dilation, + ) + layer2 = tk.layers.LinearConvTranspose3d( + 1, 1, kernel_size=kernel_size, stride=stride, padding=padding, + output_padding=output_padding, dilation=dilation, + ) + z = layer2(y) + self.assertEqual( + T.utils.split_channel_spatial_shape(T.shape(z)[1:])[1], + input_size, + ) + + # calculate_deconv_output_size + self.assertEqual( + T.utils.calculate_deconv_output_size( + input_size=output_size, kernel_size=kernel_size, + stride=stride, padding=padding, + output_padding=output_padding, dilation=dilation, + ), + input_size + ) + + # test error + kwargs = dict(kernel_size=[1], stride=[1], dilation=[1], padding=[(0, 0)]) + for input_size in ([], [1, 2, 3, 4]): + with pytest.raises(Exception, + match='`input_size` is not a 1d, 2d or 3d ' + 'convolutional input size'): + _ = T.utils.calculate_conv_output_size(input_size, **kwargs) + with pytest.raises(Exception, + match='`input_size` is not a 1d, 2d or 3d ' + 'convolutional input size'): + _ = T.utils.calculate_deconv_output_size(input_size, output_padding=[0], **kwargs) + + for arg_name in ('kernel_size', 'stride', 'dilation', 'padding'): + kwargs2 = dict(kwargs) + if arg_name == 'padding': + kwargs2[arg_name] = [(0, 0)] * 2 + else: + kwargs2[arg_name] = [1, 1] + with pytest.raises(Exception, match='`.*` is not for .*d convolution'): + _ = T.utils.calculate_conv_output_size([11], **kwargs2) + with pytest.raises(Exception, match='`.*` is not for .*d convolution'): + _ = T.utils.calculate_deconv_output_size([11], output_padding=[0], **kwargs2) + + with pytest.raises(Exception, match='`.*` is not for .*d convolution'): + _ = T.utils.calculate_deconv_output_size([11], output_padding=[0, 0], **kwargs) + + with pytest.raises(Exception, + match='No `output_padding` can satisfy the ' + 'deconvolution task'): + _ = T.utils.calculate_deconv_output_padding([2], [1], [1], [1], [(0, 0)], [1]) diff --git a/tests/train/__init__.py b/tests/train/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/train/test_core.py b/tests/train/test_core.py new file mode 100644 index 0000000..2b1f793 --- /dev/null +++ b/tests/train/test_core.py @@ -0,0 +1,67 @@ +import os +import unittest +from tempfile import TemporaryDirectory + +import numpy as np +import pytest +import torch + +from mltk import SimpleStatefulObject + +import tensorkit as tk + + +class TorchCheckpointTestCase(unittest.TestCase): + + def test_invalid_type(self): + with pytest.raises(TypeError, + match=r'Object must be a :class:`StatefulObject`, ' + r'or has `state_dict\(\)` and ' + r'`load_state_dict\(\)` methods: got 123'): + _ = tk.train.Checkpoint(obj=123) + + def test_save_restore(self): + x = torch.from_numpy(np.random.normal(size=[2, 5]).astype(np.float32)) + + with TemporaryDirectory() as temp_dir: + root_dir = os.path.join(temp_dir, 'ckpt') + + # test save + layer = torch.nn.Linear(5, 3) + optimizer = tk.optim.Adam(tk.layers.get_parameters(layer)) + + obj = SimpleStatefulObject() + obj.value = 123456 + ckpt = tk.train.Checkpoint(obj=obj, optimizer=optimizer, layer=layer) + ckpt.save(root_dir) + + # test restore + layer2 = torch.nn.Linear(5, 3) + optimizer2 = tk.optim.Adam(tk.layers.get_parameters(layer2)) + obj2 = SimpleStatefulObject() + ckpt2 = tk.train.Checkpoint(obj=obj2, optimizer=optimizer2, layer=layer2) + ckpt2.restore(root_dir) + + # todo: check the state of the optimizer + + # compare two objects + out = layer(x) + out2 = layer2(x) + self.assertTrue(torch.allclose(out2, out)) + self.assertEqual(obj2.value, 123456) + + # test partial restore + layer3 = torch.nn.Linear(5, 3) + ckpt3 = tk.train.Checkpoint(layer=layer3) + ckpt3.restore(root_dir) + self.assertTrue(torch.allclose(layer3(x), out)) + + # test restore error + ckpt4 = tk.train.Checkpoint(layer=layer3, xyz=SimpleStatefulObject()) + with pytest.raises(ValueError, + match=f'Key \'xyz\' does not exist in ' + f'the state dict recovered from: ' + f'{root_dir}'): + ckpt4.restore(root_dir) + + From 3c08c4c5fbfde90608129e47f45d9737a208825e Mon Sep 17 00:00:00 2001 From: Haowen Xu Date: Mon, 17 Feb 2020 17:38:50 +0800 Subject: [PATCH 2/7] add gpu device support --- tensorkit/arg_check.py | 2 +- tensorkit/backend/pytorch_/core.py | 202 +++++++++++++----- tensorkit/backend/pytorch_/flows.py | 54 +++-- tensorkit/backend/pytorch_/layers.py | 132 ++++++++++-- tensorkit/backend/pytorch_/random.py | 41 +++- tensorkit/distributions/base.py | 8 +- tensorkit/distributions/bernoulli.py | 9 +- tensorkit/distributions/categorical.py | 20 +- tensorkit/distributions/discretized.py | 10 +- tensorkit/distributions/flow.py | 6 +- tensorkit/distributions/mixture.py | 18 +- tensorkit/distributions/normal.py | 38 +++- tensorkit/distributions/uniform.py | 15 +- tensorkit/distributions/utils.py | 40 +++- tensorkit/examples/classification/mnist.py | 6 +- .../examples/classification/mnist_resnet.py | 6 +- tensorkit/flows/act_norm.py | 38 ++-- tensorkit/flows/coupling.py | 5 +- tensorkit/flows/rearrangement.py | 11 +- tensorkit/flows/reshape_.py | 6 +- tensorkit/flows/split_.py | 5 +- tensorkit/init/std_data_init.py | 6 +- tensorkit/layers/builder.py | 7 +- tensorkit/layers/composed.py | 6 + tensorkit/layers/flow_layer.py | 2 +- tensorkit/layers/pixelcnn.py | 26 ++- tensorkit/layers/resnet.py | 37 +++- tensorkit/layers/utils.py | 23 +- tensorkit/tensor/core.py | 4 +- tensorkit/tensor/core_extras.py | 1 - tensorkit/tensor/random_extras.py | 33 +-- tensorkit/utils/tensor_stream.py | 14 +- tests/distributions/test_flow.py | 6 +- tests/flows/test_act_norm.py | 2 +- tests/flows/test_core.py | 42 ++-- tests/flows/test_coupling.py | 11 +- tests/flows/test_rearrangement.py | 2 +- tests/flows/test_shape_.py | 2 +- tests/flows/test_split_.py | 12 +- tests/init/test_core.py | 2 +- tests/init/test_std_data_init.py | 2 +- tests/layers/test_composed.py | 12 +- tests/layers/test_contextual.py | 6 +- tests/layers/test_core.py | 24 +-- tests/layers/test_flow_layer.py | 8 +- tests/layers/test_gated.py | 4 +- tests/layers/test_pixelcnn.py | 14 +- tests/layers/test_pool.py | 4 +- tests/layers/test_resnet.py | 18 +- tests/layers/test_shape_.py | 12 +- tests/layers/test_split_.py | 4 +- tests/tensor/test_core.py | 171 +++++++-------- tests/tensor/test_nn.py | 6 +- tests/tensor/test_random.py | 22 +- tests/test_arg_check.py | 4 +- 55 files changed, 803 insertions(+), 418 deletions(-) delete mode 100644 tensorkit/tensor/core_extras.py diff --git a/tensorkit/arg_check.py b/tensorkit/arg_check.py index 67edf5c..154d7ac 100644 --- a/tensorkit/arg_check.py +++ b/tensorkit/arg_check.py @@ -22,7 +22,7 @@ def validate_positive_int(arg_name: str, arg_value) -> int: # layer argument validators def validate_layer(arg_name: str, layer) -> 'Module': - from tensorkit.tensor import is_jit_layer + from tensorkit.layers import is_jit_layer if isinstance(layer, Module) or is_jit_layer(layer): return layer else: diff --git a/tensorkit/backend/pytorch_/core.py b/tensorkit/backend/pytorch_/core.py index 8c01417..c1a2aa0 100644 --- a/tensorkit/backend/pytorch_/core.py +++ b/tensorkit/backend/pytorch_/core.py @@ -1,4 +1,5 @@ import math +from contextlib import contextmanager from typing import * import numpy as np @@ -10,14 +11,18 @@ __all__ = [ # constants - 'IS_CHANNEL_LAST', 'EPSILON', + 'IS_CHANNEL_LAST', 'EPSILON', 'CPU_DEVICE', # typing 'Tensor', 'Variable', 'Module', # ordinary module base classes # jit - 'jit', 'jit_ignore', 'jit_method', 'jit_compile', 'is_jit_layer', + 'jit', 'jit_ignore', 'jit_method', + + # device + 'get_device', 'to_device', 'current_device', 'use_device', + 'gpu_device_list', 'first_gpu_device', # utilities 'int_range', 'identity', @@ -27,7 +32,8 @@ 'is_floating_point_dtype', # tensor constructors - 'as_tensor_backend', 'as_tensor', 'from_numpy', 'float_scalar', 'int_scalar', + 'as_tensor', 'from_numpy', + 'float_scalar', 'float_scalar_like', 'int_scalar', 'int_scalar_like', 'zeros', 'zeros_like', 'ones', 'ones_like', 'full', 'full_like', 'arange', 'one_hot', @@ -92,6 +98,9 @@ EPSILON = 1e-6 """The small infinitesimal constant to avoid diving by zero of taking logarithm of zero.""" +CPU_DEVICE = 'cpu' +"""The constant that represents the local CPU device.""" + # ---- typing ---- Tensor = torch.Tensor @@ -118,18 +127,55 @@ def jit_method(fn): return fn -def jit_compile(m): - if isinstance(m, Module): - if not settings.disable_jit: - m = torch.jit.script(m) +# ---- device ---- +@jit +def get_device(t: Tensor) -> str: + return str(t.device) + + +@jit +def to_device(t: Tensor, device: str) -> Tensor: + if str(t.device) != device: + t = t.to(device=device) + return t + + +_current_device = [CPU_DEVICE] + + +@jit_ignore +def current_device() -> str: + return _current_device[0] + + +@contextmanager +def use_device(device: str): + if not torch.cuda.is_available(): + if device != CPU_DEVICE: + raise RuntimeError('GPU is not available.') + yield else: - raise TypeError(f'Not supported by `jit_compile`: {m!r}') - return m + old_device = _current_device[0] + try: + with torch.cuda.device(device): + _current_device[0] = device + yield + finally: + _current_device[0] = old_device -def is_jit_layer(layer: Module) -> bool: - """Check whether or not `layer` is a JIT compiled layer.""" - return isinstance(layer, torch.jit.ScriptModule) +def gpu_device_list() -> List[str]: + return [f'cuda:{index}' for index in range(torch.cuda.device_count())] + + +def first_gpu_device(fallback_to_cpu: bool = True) -> str: + gpu_list = gpu_device_list() + if not gpu_list: + if not fallback_to_cpu: # pragma: no cover + raise RuntimeError('No GPU is available.') + else: + return CPU_DEVICE + return gpu_list[0] # ---- utilities ---- @@ -147,22 +193,28 @@ def int_range(start: int, end: int, step: int = 1) -> List[int]: # ---- dtypes ---- @jit -def cast(input: Tensor, dtype: str) -> Tensor: +def cast(input: Tensor, dtype: str, device: Optional[str] = None) -> Tensor: if dtype == 'float32': target_dtype = torch.float32 elif dtype == 'int32': target_dtype = torch.int32 else: target_dtype = {'int8': torch.int8, 'uint8': torch.uint8, 'int16': torch.int16, 'int64': torch.int64, 'float16': torch.float16, 'float64': torch.float64, 'bool': torch.bool}[dtype] - if target_dtype != input.dtype: + + if target_dtype != input.dtype and device is not None: + input = input.to(dtype=target_dtype, device=device) + elif target_dtype != input.dtype: input = input.to(dtype=target_dtype) + elif device is not None: + input = input.to(device=device) + return input @jit -def cast_like(input: Tensor, dtype_as: Tensor) -> Tensor: - if dtype_as.dtype != input.dtype: - input = input.to(dtype=dtype_as.dtype) +def cast_like(input: Tensor, like: Tensor) -> Tensor: + if like.dtype != input.dtype: + input = input.to(dtype=like.dtype, device=like.device) return input @@ -193,18 +245,10 @@ def is_floating_point_dtype(dtype: str) -> bool: # ---- tensor constructors ---- -as_tensor_backend = torch.as_tensor -""" -``T.as_tensor`` with JIT support. - -This should be an alias of the backend function ``as_tensor(data, dtype=None)``. -Use only ``(data) -> torch.Tensor``, or ``(data, dtype=another_tensor.dtype) -> torch.Tensor``. -""" - - @jit_ignore def as_tensor(data, dtype: Optional[Union[torch.dtype, str]] = None, + device: Optional[str] = None, force_copy: bool = False) -> Tensor: """ Construct a new tensor from `data`. @@ -217,6 +261,7 @@ def as_tensor(data, another tensor, a :class:`~tensorkit.StochasticTensor`, or anything else that the backend supports. dtype: The expected dtype of the constructed tensor. + device: Where to put the new tensor. force_copy: Force to copy `data` even if it is not necessary. The gradient propagation will not be stopped from the copied tensor to the original tensor. The caller may need to use `T.stop_grad()` @@ -243,27 +288,38 @@ def as_tensor(data, else: target_dtype = {'int8': torch.int8, 'uint8': torch.uint8, 'int16': torch.int16, 'int64': torch.int64, 'float16': torch.float16, 'float64': torch.float64, 'bool': torch.bool}[dtype] + # check the device argument + if device is None: + device = current_device() + # if `data` is already a tensor if isinstance(data, StochasticTensor): data = data.tensor if isinstance(data, Tensor): # input `data` may be `StochasticTensor`, `Tensor` or `numpy.ndarray` + kwargs = {} if data.dtype != target_dtype: - data = data.to(target_dtype) + kwargs['dtype'] = target_dtype + if str(data.device) != device: + kwargs['device'] = device + if kwargs: + data = data.to(**kwargs) if force_copy: data = data.clone() return data # or if `data` is other types - ret = torch.as_tensor(data, dtype=target_dtype) + ret = torch.as_tensor(data, dtype=target_dtype, device=device) if force_copy: ret = ret.clone() return ret @jit_ignore -def from_numpy(data, dtype: Optional[Union[torch.dtype, str]] = None) -> Tensor: +def from_numpy(data, + dtype: Optional[Union[torch.dtype, str]] = None, + device: Optional[str] = None) -> Tensor: """ Construct a new tensor from given numpy array `data`. @@ -271,40 +327,68 @@ def from_numpy(data, dtype: Optional[Union[torch.dtype, str]] = None) -> Tensor: data: The numpy array, which will always be copied, even if the backend supports share memory between a numpy array and a tensor. dtype: The expected dtype of the constructed tensor. + device: Where to put the new tensor. Returns: The constructed tensor. """ - return as_tensor(data, dtype=dtype, force_copy=True) + if device is None: + device = current_device() + return as_tensor(data, dtype=dtype, device=device, force_copy=True) @jit -def float_scalar(data: float, dtype: str = settings.float_x) -> Tensor: +def float_scalar(data: float, + dtype: str = settings.float_x, + device: Optional[str] = None) -> Tensor: if dtype == 'float32': real_dtype = torch.float32 else: real_dtype = {'float16': torch.float16, 'float64': torch.float64}[dtype] - return torch.tensor(data, dtype=real_dtype) + + if device is None: + device = current_device() + return torch.tensor(data, dtype=real_dtype, device=device) + + +@jit +def float_scalar_like(data: float, like: Tensor) -> Tensor: + return torch.tensor(data, dtype=like.dtype, device=like.device) @jit -def int_scalar(data: int, dtype: str = 'int32') -> Tensor: +def int_scalar(data: int, + dtype: str = 'int32', + device: Optional[str] = None) -> Tensor: if dtype == 'int32': int_dtype = torch.int32 else: int_dtype = {'int8': torch.int8, 'int16': torch.int16, 'int64': torch.int64}[dtype] - return torch.tensor(data, dtype=int_dtype) + + if device is None: + device = current_device() + return torch.tensor(data, dtype=int_dtype, device=device) @jit -def zeros(shape: List[int], dtype: str = settings.float_x) -> Tensor: +def int_scalar_like(data: int, like: Tensor) -> Tensor: + return torch.tensor(data, dtype=like.dtype, device=like.device) + + +@jit +def zeros(shape: List[int], + dtype: str = settings.float_x, + device: Optional[str] = None) -> Tensor: if dtype == 'float32': target_dtype = torch.float32 elif dtype == 'int32': target_dtype = torch.int32 else: target_dtype = {'int8': torch.int8, 'uint8': torch.uint8, 'int16': torch.int16, 'int64': torch.int64, 'float16': torch.float16, 'float64': torch.float64, 'bool': torch.bool}[dtype] - return torch.zeros(shape, dtype=target_dtype) + + if device is None: + device = current_device() + return torch.zeros(shape, dtype=target_dtype, device=device) @jit @@ -322,18 +406,23 @@ def zeros_like(input: Tensor, target_dtype = input.dtype if shape is None: shape = list(input.shape) - return torch.zeros(shape, dtype=target_dtype) + return torch.zeros(shape, dtype=target_dtype, device=input.device) @jit -def ones(shape: List[int], dtype: str = settings.float_x) -> Tensor: +def ones(shape: List[int], + dtype: str = settings.float_x, + device: Optional[str] = None) -> Tensor: if dtype == 'float32': target_dtype = torch.float32 elif dtype == 'int32': target_dtype = torch.int32 else: target_dtype = {'int8': torch.int8, 'uint8': torch.uint8, 'int16': torch.int16, 'int64': torch.int64, 'float16': torch.float16, 'float64': torch.float64, 'bool': torch.bool}[dtype] - return torch.ones(shape, dtype=target_dtype) + + if device is None: + device = current_device() + return torch.ones(shape, dtype=target_dtype, device=device) @jit @@ -351,20 +440,24 @@ def ones_like(input: Tensor, target_dtype = input.dtype if shape is None: shape = list(input.shape) - return torch.ones(shape, dtype=target_dtype) + return torch.ones(shape, dtype=target_dtype, device=input.device) @jit def full(shape: List[int], fill_value: float, - dtype: str = settings.float_x) -> Tensor: + dtype: str = settings.float_x, + device: Optional[str] = None) -> Tensor: if dtype == 'float32': target_dtype = torch.float32 elif dtype == 'int32': target_dtype = torch.int32 else: target_dtype = {'int8': torch.int8, 'uint8': torch.uint8, 'int16': torch.int16, 'int64': torch.int64, 'float16': torch.float16, 'float64': torch.float64, 'bool': torch.bool}[dtype] - return torch.full(shape, fill_value, dtype=target_dtype) + + if device is None: + device = current_device() + return torch.full(shape, fill_value, dtype=target_dtype, device=device) @jit @@ -383,18 +476,22 @@ def full_like(input: Tensor, target_dtype = input.dtype if shape is None: shape = list(input.shape) - return torch.full(shape, fill_value, dtype=target_dtype) + return torch.full(shape, fill_value, dtype=target_dtype, device=input.device) @jit -def arange(start: int, end: int, step: int = 1, dtype: str = 'int32') -> Tensor: +def arange(start: int, end: int, step: int = 1, dtype: str = 'int32', + device: Optional[str] = None) -> Tensor: if dtype == 'float32': target_dtype = torch.float32 elif dtype == 'int32': target_dtype = torch.int32 else: target_dtype = {'int8': torch.int8, 'uint8': torch.uint8, 'int16': torch.int16, 'int64': torch.int64, 'float16': torch.float16, 'float64': torch.float64, 'bool': torch.bool}[dtype] - return torch.arange(start, end, step, dtype=target_dtype) + + if device is None: + device = current_device() + return torch.arange(start, end, step, dtype=target_dtype, device=device) @jit @@ -430,6 +527,7 @@ def to_numpy(input: Tensor) -> np.ndarray: # ---- variable and initializer ---- def variable(shape: List[int], dtype: Union[str, torch.dtype] = settings.float_x, + device: Optional[str] = None, initializer: Optional[ Union[ int, float, np.ndarray, Tensor, @@ -444,6 +542,7 @@ def variable(shape: List[int], Args: shape: Shape of the variable. dtype: Dtype of the variable. + device: The device where to place new tensors and variables. initializer: The variable initializer. It may be a scalar (which will be filled into the new variable), an array or another `Tensor` with the same shape as specified `shape`, or a callable @@ -467,28 +566,31 @@ def variable(shape: List[int], else: target_dtype = dtype + if device is None: + device = current_device() + if isinstance(initializer, (int, float)): ret = torch.full(shape, float(initializer), dtype=target_dtype, - requires_grad=requires_grad) + device=device, requires_grad=requires_grad) elif isinstance(initializer, np.ndarray) and initializer.shape == (): ret = torch.full(shape, initializer.tolist(), dtype=target_dtype, - requires_grad=requires_grad) + device=device, requires_grad=requires_grad) elif isinstance(initializer, (np.ndarray, Tensor)): if list(initializer.shape) != shape: raise ValueError(f'`initializer.shape` != `shape`: ' f'{list(initializer.shape)} vs {shape}') ret = as_tensor(initializer, dtype=target_dtype, - force_copy=force_copy) + device=device, force_copy=force_copy) if requires_grad: ret.requires_grad_(True) elif isinstance(initializer, Callable): - ret = zeros(shape, dtype=dtype) + ret = zeros(shape, device=device, dtype=dtype) with torch.no_grad(): initializer(ret) if requires_grad: ret.requires_grad_(True) elif initializer is None: - ret = torch.zeros(shape, dtype=target_dtype, + ret = torch.zeros(shape, dtype=target_dtype, device=device, requires_grad=requires_grad) else: raise TypeError(f'Unsupported initializer: {initializer!r}') diff --git a/tensorkit/backend/pytorch_/flows.py b/tensorkit/backend/pytorch_/flows.py index 216bc72..df28aec 100644 --- a/tensorkit/backend/pytorch_/flows.py +++ b/tensorkit/backend/pytorch_/flows.py @@ -353,7 +353,8 @@ class LooseInvertibleMatrix(InvertibleMatrix): def __init__(self, seed_matrix: np.ndarray, - dtype: str = settings.float_x): + dtype: str = settings.float_x, + device: Optional[str] = None): """ Construct a new :class:`LooseInvertibleMatrix`. @@ -361,11 +362,16 @@ def __init__(self, seed_matrix: A matrix that is used as a seed to obtain the initial invertible and orthogonal matrix. dtype: The dtype of the matrix. + device: The device where to place new tensors and variables. """ + device = device or current_device() initial_matrix = la.qr(seed_matrix)[0] super().__init__(initial_matrix.shape[0]) - add_parameter(self, 'matrix', from_numpy(initial_matrix, dtype=dtype)) + add_parameter( + self, 'matrix', + from_numpy(initial_matrix, dtype=dtype, device=device) + ) def forward(self, inverse: bool, @@ -392,6 +398,7 @@ class StrictInvertibleMatrix(InvertibleMatrix): def __init__(self, seed_matrix: np.ndarray, dtype: str = settings.float_x, + device: Optional[str] = None, epsilon: float = EPSILON): """ Construct a new :class:`StrictInvertibleMatrix`. @@ -400,8 +407,12 @@ def __init__(self, seed_matrix: A matrix that is used as a seed to obtain the initial invertible and orthogonal matrix. dtype: The dtype of the matrix. + device: The device where to place new tensors and variables. + epsilon: The infinitesimal constant to avoid dividing by zero or + taking logarithm of zero. """ initial_matrix = la.qr(seed_matrix)[0] + device = device or current_device() super().__init__(initial_matrix.shape[0]) matrix_shape = list(initial_matrix.shape) @@ -413,25 +424,29 @@ def __init__(self, initial_log_s = np.log(np.maximum(np.abs(initial_s), epsilon)) initial_U = np.triu(initial_U, k=1) - add_buffer(self, 'P', from_numpy(initial_P, dtype=dtype)) + add_buffer(self, 'P', from_numpy(initial_P, dtype=dtype, device=device)) assert_finite( - add_parameter(self, 'pre_L', from_numpy(initial_L, dtype=dtype)), + add_parameter( + self, 'pre_L', from_numpy(initial_L, dtype=dtype, device=device)), 'pre_L', ) add_buffer( - self, 'L_mask', - from_numpy(np.tril(np.ones(matrix_shape), k=-1), dtype=dtype)) + self, 'L_mask', from_numpy( + np.tril(np.ones(matrix_shape), k=-1), dtype=dtype, device=device) + ) assert_finite( - add_parameter(self, 'pre_U', from_numpy(initial_U, dtype=dtype)), + add_parameter(self, 'pre_U', from_numpy( + initial_U, dtype=dtype, device=device)), 'pre_U', ) add_buffer( - self, 'U_mask', - from_numpy(np.triu(np.ones(matrix_shape), k=1), dtype=dtype)) + self, 'U_mask', from_numpy( + np.triu(np.ones(matrix_shape), k=1), dtype=dtype, device=device)) add_buffer( - self, 'sign', from_numpy(initial_sign, dtype=dtype)) + self, 'sign', from_numpy(initial_sign, dtype=dtype, device=device)) assert_finite( - add_parameter(self, 'log_s', from_numpy(initial_log_s, dtype=dtype)), + add_parameter(self, 'log_s', from_numpy( + initial_log_s, dtype=dtype, device=device)), 'log_s', ) @@ -441,7 +456,7 @@ def forward(self, ) -> Tuple[Tensor, Optional[Tensor]]: P = self.P L = (self.L_mask * self.pre_L + - torch.eye(self.size, dtype=P.dtype)) + torch.eye(self.size, dtype=P.dtype, device=self.P.device)) U = self.U_mask * self.pre_U + torch.diag(self.sign * exp(self.log_s)) log_det: Optional[Tensor] = None @@ -476,6 +491,7 @@ def __init__(self, strict: bool = False, weight_init: TensorInitArgType = init.kaming_uniform, dtype: str = settings.float_x, + device: Optional[str] = None, epsilon: float = EPSILON): """ Construct a new linear transformation flow. @@ -489,9 +505,12 @@ def __init__(self, and :class:`StrictInvertibleMatrix`. weight_init: The weight initializer for the seed matrix. dtype: The dtype of the invertible matrix. + device: The device where to place new tensors and variables. epsilon: The infinitesimal constant to avoid having numerical issues. """ spatial_ndims = self._get_spatial_ndims() + device = device or current_device() + super().__init__( axis=-(spatial_ndims + 1), event_ndims=(spatial_ndims + 1), @@ -506,16 +525,17 @@ def __init__(self, # will allow the backend random seed to have effect on the initialization # step of the invertible matrix. seed_matrix = variable( - shape=[num_features, num_features], dtype=dtype, + shape=[num_features, num_features], dtype=dtype, device='cpu', initializer=weight_init, requires_grad=False, ) + seed_matrix = to_numpy(seed_matrix) if strict: self.invertible_matrix = StrictInvertibleMatrix( - to_numpy(seed_matrix), dtype=dtype, epsilon=epsilon) + seed_matrix, dtype=dtype, device=device, epsilon=epsilon) else: self.invertible_matrix = LooseInvertibleMatrix( - to_numpy(seed_matrix), dtype=dtype) + seed_matrix, dtype=dtype, device=device) def _get_spatial_ndims(self) -> int: raise NotImplementedError() @@ -792,12 +812,12 @@ def _scale_and_log_scale(self, if inverse: scale = 1. / pre_scale if compute_log_scale: - epsilon = as_tensor_backend(self.epsilon, dtype=pre_scale.dtype) + epsilon = float_scalar_like(self.epsilon, pre_scale) log_scale = -log(maximum(abs(pre_scale), epsilon)) else: scale = pre_scale if compute_log_scale: - epsilon = as_tensor_backend(self.epsilon, dtype=pre_scale.dtype) + epsilon = float_scalar_like(self.epsilon, pre_scale) log_scale = log(maximum(abs(pre_scale), epsilon)) return scale, log_scale diff --git a/tensorkit/backend/pytorch_/layers.py b/tensorkit/backend/pytorch_/layers.py index 53fdc72..4f16167 100644 --- a/tensorkit/backend/pytorch_/layers.py +++ b/tensorkit/backend/pytorch_/layers.py @@ -2,21 +2,24 @@ import torch from torch import nn as torch_nn +from torch.jit import script as torch_script from torch.nn import ModuleList -from . import init -from .core import * +from ...settings_ import settings from ...typing_ import * from ...arg_check import * +from . import init +from .core import * __all__ = [ # constants 'DEFAULT_GATE_BIAS', 'DEFAULT_WEIGHT_INIT', 'DEFAULT_BIAS_INIT', # utils + 'jit_compile', 'is_jit_layer', 'layer_to_device', 'add_parameter', 'get_parameter', 'get_parameters', 'get_named_parameters', 'add_buffer', 'get_buffer', 'get_buffers', 'get_named_buffers', - 'set_train_mode', + 'set_train_mode', 'set_eval_mode', # parameter store modules 'ParamStore', 'SimpleParamStore', @@ -49,6 +52,36 @@ # ---- utils ---- +def jit_compile(m: Module) -> Module: + if not settings.disable_jit: + m = torch_script(m) + return m + + +def is_jit_layer(layer: Module) -> bool: + """Check whether or not `layer` is a JIT compiled layer.""" + return isinstance(layer, torch.jit.ScriptModule) + + +def layer_to_device(layer: Module, device: Optional[str] = None) -> Module: + """ + Move the specified module or layer to the given device. + The module or layer may be changed in-place. + + Args: + layer: The module or layer to be moved. + device: The device, to where move the module or layer. + If not specified, will move to ``T.current_device()``. + + Returns: + The layer instance. + """ + if device is None: + device = current_device() + layer = layer.to(device=torch.device(device)) + return layer + + def add_parameter(layer: Module, name: str, value: Optional[Tensor], @@ -103,14 +136,27 @@ def set_train_mode(layer: Module, training: bool = True): return layer +def set_eval_mode(layer: Module): + layer.train(False) + return layer + + # ---- weight wrapper: a simple weight, or a normed weight ---- class _NullParamStore(Module): # This module is actually not used in any context. # It is just a place-holder module, to gain JIT support. + __constants__ = ('device',) + + device: str + + def __init__(self, device: Optional[str] = None): + super().__init__() + self.device = device or current_device() + def forward(self) -> Tensor: # pragma: no cover zero_shape: List[int] = [] - return torch.zeros(zero_shape, dtype=torch.float32) + return zeros(zero_shape, dtype='float32', device=self.device) class ParamStore(Module): @@ -146,9 +192,12 @@ class SimpleParamStore(ParamStore): def __init__(self, shape: List[int], - initializer: TensorInitArgType): + initializer: TensorInitArgType, + device: Optional[str] = None): + device = device or current_device() super().__init__(shape) - add_parameter(self, 'value', variable(shape, initializer=initializer)) + add_parameter(self, 'value', variable( + shape, initializer=initializer, device=device)) @jit_method def get(self) -> Tensor: @@ -177,7 +226,7 @@ def weight_norm_decompose(weight: Tensor, A tuple of `(v, v_norm)`. """ v_norm = norm_except_axis(weight, axis=[norm_axis], keepdims=True) - v = weight / torch.max(v_norm, torch.as_tensor(epsilon, dtype=v_norm.dtype)) + v = weight / torch.max(v_norm, float_scalar_like(epsilon, v_norm)) return v, v_norm @@ -193,12 +242,15 @@ def __init__(self, shape: List[int], initializer: TensorInitArgType, norm_axis: int = 1, + device: Optional[str] = None, epsilon: float = EPSILON): + device = device or current_device() + super().__init__(shape) self.norm_axis = norm_axis self.epsilon = epsilon - weight = variable(shape, initializer=initializer) + weight = variable(shape, initializer=initializer, device=device) with no_grad(): v, _ = weight_norm_decompose(weight, norm_axis, epsilon) add_parameter(self, 'v', v) @@ -211,7 +263,7 @@ def get(self) -> Tensor: def set(self, value: TensorOrData) -> None: with no_grad(): v, _ = weight_norm_decompose( - as_tensor(value, dtype=get_dtype(self.v)), + as_tensor(value, dtype=get_dtype(self.v), device=get_device(self.v)), self.norm_axis, self.epsilon, ) @@ -230,12 +282,15 @@ def __init__(self, shape: List[int], initializer: TensorInitArgType, norm_axis: int = 1, + device: Optional[str] = None, epsilon: float = EPSILON): + device = device or current_device() + super().__init__(shape) self.norm_axis = norm_axis self.epsilon = epsilon - weight = variable(shape, initializer=initializer) + weight = variable(shape, initializer=initializer, device=device) with no_grad(): v, g = weight_norm_decompose(weight, norm_axis, epsilon) add_parameter(self, 'v', v) @@ -249,7 +304,7 @@ def get(self) -> Tensor: def set(self, value: TensorOrData) -> None: with no_grad(): v, g = weight_norm_decompose( - as_tensor(value, dtype=get_dtype(self.v)), + as_tensor(value, dtype=get_dtype(self.v), device=get_dtype(self.v)), self.norm_axis, self.epsilon, ) @@ -260,7 +315,8 @@ def set(self, value: TensorOrData) -> None: def get_weight_store(shape: List[int], initializer: TensorInitArgType = DEFAULT_WEIGHT_INIT, norm_axis: int = 1, - weight_norm: WeightNormArgType = False + weight_norm: WeightNormArgType = False, + device: Optional[str] = None, ) -> ParamStore: """ Create a module which carries the `weight` parameter. @@ -273,16 +329,18 @@ def get_weight_store(shape: List[int], Use `NormedAndScaledWeightStore` if `True` or `WeightNormMode.FULL`. Use `NormedWeightStore` if `WeightNormMode.NO_SCALE`. Use `WeightStore` if `False` or `WeightNormMode.NONE`. + device: The device where to place new tensors and variables. Returns: The weight object. """ + device = device or current_device() if weight_norm is True or weight_norm == WeightNormMode.FULL: - return NormedAndScaledWeightStore(shape, initializer, norm_axis) + return NormedAndScaledWeightStore(shape, initializer, norm_axis, device) elif weight_norm == WeightNormMode.NO_SCALE: - return NormedWeightStore(shape, initializer, norm_axis) + return NormedWeightStore(shape, initializer, norm_axis, device) elif weight_norm is False or weight_norm == WeightNormMode.NONE: - return SimpleParamStore(shape, initializer) + return SimpleParamStore(shape, initializer, device) else: raise ValueError(f'Invalid value for argument `weight_norm`: ' f'{weight_norm!r}.') @@ -290,7 +348,8 @@ def get_weight_store(shape: List[int], def get_bias_store(shape: List[int], initializer: TensorInitArgType = DEFAULT_BIAS_INIT, - use_bias: bool = True + use_bias: bool = True, + device: Optional[str] = None ) -> Optional[ParamStore]: """ Create a module that carries the `bias` parameter. @@ -300,12 +359,14 @@ def get_bias_store(shape: List[int], initializer: The initializer for the bias. use_bias: Whether or not to use the bias? If `False`, will return :obj:`None`. + device: The device where to place new tensors and variables. Returns: The bias object, or :obj:`None` if `use_bias` is False. """ + device = device or current_device() if use_bias: - return SimpleParamStore(shape, initializer) + return SimpleParamStore(shape, initializer, device) # ---- identity layer ---- @@ -393,13 +454,17 @@ def __init__(self, weight_init: TensorInitArgType = DEFAULT_WEIGHT_INIT, bias_init: TensorInitArgType = DEFAULT_BIAS_INIT, data_init: Optional[DataInitArgType] = None, + device: Optional[str] = None, ): + device = device or current_device() weight_store = get_weight_store( - weight_shape, initializer=weight_init, weight_norm=weight_norm) + weight_shape, initializer=weight_init, weight_norm=weight_norm, + device=device + ) bias_store = get_bias_store( - bias_shape, initializer=bias_init, use_bias=use_bias) + bias_shape, initializer=bias_init, use_bias=use_bias, device=device) if bias_store is None: - bias_store = _NullParamStore() + bias_store = _NullParamStore(device=device) if data_init is not None: if not isinstance(data_init, init.DataDependentInitializer) and \ @@ -453,6 +518,7 @@ def __init__(self, weight_init: TensorInitArgType = DEFAULT_WEIGHT_INIT, bias_init: TensorInitArgType = DEFAULT_BIAS_INIT, data_init: Optional[DataInitArgType] = None, + device: Optional[str] = None, ): in_features = validate_positive_int('in_features', in_features) out_features = validate_positive_int('out_features', out_features) @@ -468,6 +534,7 @@ def __init__(self, weight_init=weight_init, bias_init=bias_init, data_init=data_init, + device=device, ) @jit_method @@ -504,6 +571,7 @@ def __init__(self, weight_init: TensorInitArgType = DEFAULT_WEIGHT_INIT, bias_init: TensorInitArgType = DEFAULT_BIAS_INIT, data_init: Optional[DataInitArgType] = None, + device: Optional[str] = None, ): spatial_ndims = self._get_spatial_ndims() in_channels = validate_positive_int('in_channels', in_channels) @@ -530,6 +598,7 @@ def __init__(self, weight_init=weight_init, bias_init=bias_init, data_init=data_init, + device=device, ) def _get_spatial_ndims(self) -> int: @@ -632,6 +701,7 @@ def __init__(self, weight_init: TensorInitArgType = DEFAULT_WEIGHT_INIT, bias_init: TensorInitArgType = DEFAULT_BIAS_INIT, data_init: Optional[DataInitArgType] = None, + device: Optional[str] = None, ): spatial_ndims = self._get_spatial_ndims() in_channels = validate_positive_int('in_channels', in_channels) @@ -661,6 +731,7 @@ def __init__(self, weight_init=weight_init, bias_init=bias_init, data_init=data_init, + device=device, ) def _get_spatial_ndims(self) -> int: @@ -767,8 +838,12 @@ class BatchNorm(torch_nn.BatchNorm1d): def __init__(self, num_features: int, momentum: float = 0.1, + device: Optional[str] = None, epsilon: float = EPSILON): + device = device or current_device() super().__init__(num_features, eps=epsilon, momentum=momentum) + if device != CPU_DEVICE: + self.to(device=device) def _check_input_dim(self, input: Tensor): if rank(input) != 2: @@ -782,8 +857,12 @@ class BatchNorm1d(torch_nn.BatchNorm1d): def __init__(self, num_features: int, momentum: float = 0.1, + device: Optional[str] = None, epsilon: float = EPSILON): + device = device or current_device() super().__init__(num_features, eps=epsilon, momentum=momentum) + if device != CPU_DEVICE: + self.to(device=device) def _check_input_dim(self, input: Tensor): if rank(input) != 3: @@ -797,8 +876,12 @@ class BatchNorm2d(torch_nn.BatchNorm2d): def __init__(self, num_features: int, momentum: float = 0.1, + device: Optional[str] = None, epsilon: float = EPSILON): + device = device or current_device() super().__init__(num_features, eps=epsilon, momentum=momentum) + if device != CPU_DEVICE: + self.to(device=device) def _check_input_dim(self, input: Tensor): if input.dim() != 4: @@ -812,8 +895,12 @@ class BatchNorm3d(torch_nn.BatchNorm3d): def __init__(self, num_features: int, momentum: float = 0.1, + device: Optional[str] = None, epsilon: float = EPSILON): + device = device or current_device() super().__init__(num_features, eps=epsilon, momentum=momentum) + if device != CPU_DEVICE: + self.to(device=device) def _check_input_dim(self, input: Tensor): if rank(input) != 5: @@ -843,11 +930,12 @@ def forward(self, input: Tensor) -> Tensor: raise ValueError('`input` must be at least 2d, but the ' 'input shape is {}.'.format(shape(input))) + device = input.device output = input if self.training: noise_shape = output.shape[:-1] + (1,) - noise = torch.zeros(noise_shape, dtype=output.dtype) - keep_prob = torch.as_tensor(self._keep_prob, dtype=output.dtype) + noise = torch.zeros(noise_shape, dtype=output.dtype, device=device) + keep_prob = torch.as_tensor(self._keep_prob, dtype=output.dtype, device=device) noise = torch.bernoulli(keep_prob.expand(noise_shape), out=noise) noise = noise.detach() output = output * noise / keep_prob diff --git a/tensorkit/backend/pytorch_/random.py b/tensorkit/backend/pytorch_/random.py index b46cf25..aec0f2b 100644 --- a/tensorkit/backend/pytorch_/random.py +++ b/tensorkit/backend/pytorch_/random.py @@ -37,26 +37,34 @@ def seed(seed: int): torch.manual_seed(seed) + torch.cuda.manual_seed(seed) + torch.cuda.manual_seed_all(seed) # ---- uniform distribution ---- @jit -def rand(shape: List[int], dtype: str = settings.float_x) -> Tensor: +def rand(shape: List[int], + dtype: str = settings.float_x, + device: Optional[str] = None) -> Tensor: if dtype == 'float32': real_dtype = torch.float32 else: real_dtype = {'float16': torch.float16, 'float64': torch.float64}[dtype] - return torch.rand(shape, dtype=real_dtype) + + if device is None: + device = current_device() + return torch.rand(shape, dtype=real_dtype, device=device) @jit def uniform(shape: List[int], low: float, high: float, - dtype: str = settings.float_x) -> Tensor: + dtype: str = settings.float_x, + device: Optional[str] = None) -> Tensor: if low >= high: raise ValueError('`low` < `high` does not hold: low == {}, high == {}'. format(low, high)) scale = high - low - return rand(shape, dtype) * scale + low + return rand(shape, dtype, device=device) * scale + low # ---- shuffle and random permutation ---- @@ -64,7 +72,8 @@ def uniform(shape: List[int], low: float, high: float, def shuffle(input: Tensor, axis: int = 0) -> Tensor: input_shape = input.shape shuffle_size = input_shape[axis] - permutation = torch.randperm(shuffle_size, dtype=torch.long) + permutation = torch.randperm( + shuffle_size, dtype=torch.long, device=input.device) if axis == 0: return input[permutation] else: @@ -72,22 +81,32 @@ def shuffle(input: Tensor, axis: int = 0) -> Tensor: @jit -def random_permutation(n: int, dtype: str = 'int32') -> Tensor: +def random_permutation(n: int, + dtype: str = 'int32', + device: Optional[str] = None) -> Tensor: if dtype == 'int32': int_dtype = torch.int32 else: int_dtype = {'int8': torch.int8, 'int16': torch.int16, 'int64': torch.int64}[dtype] - return torch.randperm(n, dtype=int_dtype) + + if device is None: + device = current_device() + return torch.randperm(n, dtype=int_dtype, device=device) # ---- normal distribution ---- @jit -def randn(shape: List[int], dtype: str = settings.float_x) -> Tensor: +def randn(shape: List[int], + dtype: str = settings.float_x, + device: Optional[str] = None,) -> Tensor: if dtype == 'float32': real_dtype = torch.float32 else: real_dtype = {'float16': torch.float16, 'float64': torch.float64}[dtype] - return torch.randn(shape, dtype=real_dtype) + + if device is None: + device = current_device() + return torch.randn(shape, dtype=real_dtype, device=device) @jit @@ -116,7 +135,7 @@ def normal(mean: Tensor, param_shape = broadcast_shape(shape(mean), shape(std)) if n_samples is not None: param_shape = [n_samples] + param_shape - r = std * torch.randn(param_shape, dtype=mean.dtype) + mean + r = std * torch.randn(param_shape, dtype=mean.dtype, device=mean.device) + mean if not reparameterized: r = r.detach() return r @@ -181,7 +200,7 @@ def bernoulli(probs: Tensor, if n_samples is not None: sample_shape = (n_samples,) + sample_shape probs = probs.unsqueeze(dim=0).expand(sample_shape) - out = torch.zeros(sample_shape, dtype=target_dtype) + out = torch.zeros(sample_shape, dtype=target_dtype, device=probs.device) return torch.bernoulli(probs, out=out).detach() diff --git a/tensorkit/distributions/base.py b/tensorkit/distributions/base.py index 7adf2bd..81ed490 100644 --- a/tensorkit/distributions/base.py +++ b/tensorkit/distributions/base.py @@ -70,6 +70,9 @@ class Distribution(metaclass=DocInherit): ``value_shape == batch_shape + event_shape``. """ + device: str + """Device, where the parameters of this distribution is placed.""" + validate_tensors: bool """ Whether or not to perform time-consuming validation on argument tensors @@ -85,6 +88,7 @@ def __init__(self, reparameterized: Optional[bool] = None, event_ndims: Optional[int] = None, min_event_ndims: Optional[int] = None, + device: Optional[str] = None, validate_tensors: Optional[bool] = None): # either `value_shape` or `batch_shape` should be specified, but not both. if value_shape is None and batch_shape is None: @@ -158,6 +162,7 @@ def __init__(self, self.batch_shape = batch_shape self.event_shape = event_shape self.event_ndims = event_ndims + self.device = device or T.current_device() self.validate_tensors = ( settings.validate_tensors if validate_tensors is None else bool(validate_tensors) @@ -251,7 +256,8 @@ def log_prob(self, Returns: The computed log-prob or log-density. """ - given = T.as_tensor(given) + if not isinstance(given, T.Tensor): + given = T.as_tensor(given, device=self.device) reduce_ndims = get_prob_reduce_ndims( # here `given` might have lower rank than `len(value_shape)`, # in which case `given` should be broadcasted to match `value_shape`. diff --git a/tensorkit/distributions/bernoulli.py b/tensorkit/distributions/bernoulli.py index a13b98b..3000bdf 100644 --- a/tensorkit/distributions/bernoulli.py +++ b/tensorkit/distributions/bernoulli.py @@ -40,6 +40,7 @@ def __init__(self, dtype: str = T.int32, event_ndims: int = 0, epsilon: float = T.EPSILON, + device: Optional[str] = None, validate_tensors: Optional[bool] = None): """ Construct a new :class:`Bernoulli` distribution object. @@ -53,12 +54,14 @@ def __init__(self, event_ndims: The number of dimensions in the samples to be considered as an event. epsilon: The infinitesimal constant, used for computing `logits`. + device: The device where to place new tensors and variables. validate_tensors: Whether or not to check the numerical issues? Defaults to ``settings.validate_tensors``. """ # validate the arguments (logits, probs), = check_tensor_arg_types([('logits', logits), - ('probs', probs)]) + ('probs', probs)], + device=device) if logits is not None: value_shape = T.shape(logits) mutual_params = {'logits': logits} @@ -72,6 +75,7 @@ def __init__(self, dtype=dtype, value_shape=value_shape, event_ndims=event_ndims, + device=device or T.get_device(logits), validate_tensors=validate_tensors, ) for k, v in mutual_params.items(): @@ -130,7 +134,8 @@ def copy(self, **overrided_params): return copy_distribution( cls=Bernoulli, base=self, - attrs=('dtype', 'event_ndims', 'validate_tensors', 'epsilon'), + attrs=('dtype', 'device', 'event_ndims', 'validate_tensors', + 'epsilon'), mutual_attrs=(('logits', 'probs'),), compute_deps={'logits': ('epsilon',)}, original_mutual_params=self._mutual_params, diff --git a/tensorkit/distributions/categorical.py b/tensorkit/distributions/categorical.py index ec7eda1..1140474 100644 --- a/tensorkit/distributions/categorical.py +++ b/tensorkit/distributions/categorical.py @@ -36,9 +36,11 @@ def __init__(self, dtype: str, event_ndims: int, epsilon: float = T.EPSILON, + device: Optional[str] = None, validate_tensors: Optional[bool] = None): (logits, probs), = check_tensor_arg_types([('logits', logits), - ('probs', probs)]) + ('probs', probs)], + device=device) if logits is not None: param_shape = T.shape(logits) mutual_params = {'logits': logits} @@ -59,6 +61,7 @@ def __init__(self, dtype=dtype, value_shape=value_shape, event_ndims=event_ndims, + device=device or T.get_device(logits), validate_tensors=validate_tensors, ) for k, v in mutual_params.items(): @@ -117,7 +120,7 @@ def copy(self, **overrided_params): return copy_distribution( cls=self.__class__, base=self, - attrs=('dtype', 'event_ndims', 'validate_tensors', 'epsilon'), + attrs=('dtype', 'event_ndims', 'epsilon', 'device', 'validate_tensors'), mutual_attrs=(('logits', 'probs'),), compute_deps={'logits': ('epsilon',)}, original_mutual_params=self._mutual_params, @@ -143,6 +146,7 @@ def __init__(self, dtype: str = T.categorical_dtype, event_ndims: int = 0, epsilon: float = T.EPSILON, + device: Optional[str] = None, validate_tensors: Optional[bool] = None): """ Construct a new :class:`Categorical` distribution object. @@ -165,8 +169,9 @@ def __init__(self, probs=probs, dtype=dtype, event_ndims=event_ndims, - validate_tensors=validate_tensors, epsilon=epsilon, + device=device, + validate_tensors=validate_tensors, ) def _sample(self, @@ -195,7 +200,7 @@ def to_one_hot(self, dtype: str = T.int32) -> 'OneHotCategorical': return copy_distribution( cls=OneHotCategorical, base=self, - attrs=('dtype', 'validate_tensors', 'event_ndims', 'epsilon'), + attrs=('dtype', 'event_ndims', 'epsilon', 'device', 'validate_tensors'), mutual_attrs=(('logits', 'probs'),), compute_deps={'logits': ('epsilon',)}, original_mutual_params=self._mutual_params, @@ -223,6 +228,7 @@ def __init__(self, dtype: str = T.int32, event_ndims: int = 1, epsilon: float = T.EPSILON, + device: Optional[str] = None, validate_tensors: Optional[bool] = None): """ Construct a new :class:`OneHotCategorical` distribution object. @@ -234,6 +240,7 @@ def __init__(self, probs: The probability `p` of being each possible value. ``p = softmax(logits)``. dtype: The dtype of the samples. + device: The device where to place new tensors and variables. event_ndims: The number of dimensions in the samples to be considered as an event. epsilon: The infinitesimal constant, used for computing `logits`. @@ -245,8 +252,9 @@ def __init__(self, probs=probs, dtype=dtype, event_ndims=event_ndims, - validate_tensors=validate_tensors, epsilon=epsilon, + device=device, + validate_tensors=validate_tensors, ) def _sample(self, @@ -272,7 +280,7 @@ def to_indexed(self, dtype: str = T.categorical_dtype) -> 'Categorical': return copy_distribution( cls=Categorical, base=self, - attrs=('dtype', 'validate_tensors', 'event_ndims', 'epsilon'), + attrs=('dtype', 'event_ndims', 'epsilon', 'device', 'validate_tensors'), mutual_attrs=(('logits', 'probs'),), compute_deps={'logits': ('epsilon',)}, original_mutual_params=self._mutual_params, diff --git a/tensorkit/distributions/discretized.py b/tensorkit/distributions/discretized.py index dad8f19..549cc8a 100644 --- a/tensorkit/distributions/discretized.py +++ b/tensorkit/distributions/discretized.py @@ -65,6 +65,7 @@ def __init__(self, reparameterized: bool = False, event_ndims: int = 0, epsilon: float = T.EPSILON, + device: Optional[str] = None, validate_tensors: Optional[bool] = None): """ Construct a new :class:`DiscretizedLogistic`. @@ -87,6 +88,7 @@ def __init__(self, considered as an event. epsilon: An infinitesimal constant to avoid dividing by zero or taking logarithm of zero. + device: The device where to place new tensors and variables. validate_tensors: Whether or not to check the numerical issues? Defaults to ``settings.validate_tensors``. """ @@ -96,8 +98,8 @@ def __init__(self, '`discretize_sample` is True.') mean, log_scale = check_tensor_arg_types( - ('mean', mean), ('log_scale', log_scale)) - log_scale = T.as_tensor_backend(log_scale, dtype=mean.dtype) + ('mean', mean), ('log_scale', log_scale), device=device) + log_scale = T.cast_like(log_scale, mean) dtype = T.get_dtype(mean) if min_val is not None and max_val is not None: @@ -127,6 +129,7 @@ def __init__(self, continuous=not discretize_sample, reparameterized=reparameterized, event_ndims=event_ndims, + device=device or T.get_device(mean), validate_tensors=validate_tensors, ) self.mean = mean @@ -188,7 +191,8 @@ def copy(self, **overrided_params): attrs=( 'mean', 'log_scale', 'bin_size', 'min_val', 'max_val', 'biased_edges', 'discretize_given', 'discretize_sample', - 'reparameterized', 'event_ndims', 'epsilon', 'validate_tensors' + 'reparameterized', 'event_ndims', 'epsilon', 'device', + 'validate_tensors', ), overrided_params=overrided_params, ) diff --git a/tensorkit/distributions/flow.py b/tensorkit/distributions/flow.py index 6064903..714d268 100644 --- a/tensorkit/distributions/flow.py +++ b/tensorkit/distributions/flow.py @@ -2,6 +2,7 @@ from .. import tensor as T from ..flows import Flow +from ..layers import is_jit_layer from ..stochastic import StochasticTensor from .base import Distribution from .utils import copy_distribution, get_overrided_parameterized @@ -34,7 +35,7 @@ def __init__(self, if not isinstance(distribution, Distribution): raise TypeError(f'`distribution` is not an instance of ' f'`Distribution`: got {distribution!r}') - if not isinstance(flow, Flow) and not T.is_jit_layer(flow): + if not isinstance(flow, Flow) and not is_jit_layer(flow): raise TypeError(f'`flow` is not a flow: {flow!r}') # `distribution` is required to be continuous and have float dtype. @@ -93,7 +94,8 @@ def __init__(self, super(FlowDistribution, self).__init__( dtype=dtype, batch_shape=batch_shape, continuous=continuous, reparameterized=reparameterized, event_ndims=event_ndims, - min_event_ndims=min_event_ndims, validate_tensors=validate_tensors, + min_event_ndims=min_event_ndims, device=distribution.device, + validate_tensors=validate_tensors, ) self._base_distribution = distribution self.flow = flow diff --git a/tensorkit/distributions/mixture.py b/tensorkit/distributions/mixture.py index 648be13..59c6ae1 100644 --- a/tensorkit/distributions/mixture.py +++ b/tensorkit/distributions/mixture.py @@ -98,7 +98,7 @@ def __init__(self, validate_tensors = True # attributes of `components` - for attr in ('dtype', 'continuous', 'event_ndims', 'batch_shape'): + for attr in ('dtype', 'continuous', 'event_ndims', 'batch_shape', 'device'): c0_val = getattr(components[0], attr) for i, c in enumerate(components[1:], 1): c_val = getattr(c, attr) @@ -108,15 +108,18 @@ def __init__(self, f'{c_val} vs {c0_val}.' ) dtype = components[0].dtype + device = components[0].device continuous = components[0].continuous batch_shape = components[0].batch_shape - # categorical `batch_shape` must be broadcastable to `batch_shape` - if categorical.batch_shape != batch_shape: - raise ValueError( - f'`categorical.batch_shape` != the `batch_shape` of ' - f'`components`: {categorical.batch_shape} vs {batch_shape}.' - ) + # categorical `batch_shape` and `device` must match the components + for attr in ('batch_shape', 'device'): + if getattr(categorical, attr) != getattr(components[0], attr): + raise ValueError( + f'`categorical.{attr}` != the `{attr}` of ' + f'`components`: {getattr(categorical, attr)} vs ' + f'{getattr(components[0], attr)}.' + ) # infer the `min_event_shape` and `min_event_ndims` min_event_shape = components[0].event_shape @@ -159,6 +162,7 @@ def __init__(self, reparameterized=reparameterized, event_ndims=event_ndims, min_event_ndims=min_event_ndims, + device=device, validate_tensors=validate_tensors, ) self.categorical = categorical.to_indexed() diff --git a/tensorkit/distributions/normal.py b/tensorkit/distributions/normal.py index 3ea34f0..b5f1fb0 100644 --- a/tensorkit/distributions/normal.py +++ b/tensorkit/distributions/normal.py @@ -34,6 +34,7 @@ def __init__(self, dtype: str = T.float_x(), reparameterized: bool = True, event_ndims: int = 0, + device: Optional[str] = None, validate_tensors: Optional[bool] = None): """ Construct a new :class:`UnitNormal` distribution. @@ -44,6 +45,7 @@ def __init__(self, reparameterized: Whether the distribution should be reparameterized? event_ndims: The number of dimensions in the samples to be considered as an event. + device: The device where to place new tensors and variables. validate_tensors: Whether or not to check the numerical issues? Defaults to ``settings.validate_tensors``. """ @@ -52,6 +54,7 @@ def __init__(self, value_shape=shape, reparameterized=reparameterized, event_ndims=event_ndims, + device=device or T.current_device(), validate_tensors=validate_tensors, ) @@ -59,21 +62,24 @@ def __init__(self, def mean(self) -> T.Tensor: """The mean of the normal distribution.""" if self._mean is None: - self._mean = T.zeros(self.value_shape, self.dtype) + self._mean = T.zeros( + self.value_shape, dtype=self.dtype, device=self.device) return self._mean @property def std(self) -> T.Tensor: """The standard deviation (std) of the normal distribution.""" if self._std is None: - self._std = T.ones(self.value_shape, self.dtype) + self._std = T.ones( + self.value_shape, dtype=self.dtype, device=self.device) return self._std @property def logstd(self) -> T.Tensor: """The log-std of the normal distribution.""" if self._logstd is None: - self._logstd = T.zeros(self.value_shape, self.dtype) + self._logstd = T.zeros( + self.value_shape, dtype=self.dtype, device=self.device) return self._logstd def _sample(self, @@ -86,6 +92,7 @@ def _sample(self, shape=([n_samples] + self.value_shape if n_samples is not None else self.value_shape), dtype=self.dtype, + device=self.device, ), distribution=self, n_samples=n_samples, @@ -104,12 +111,12 @@ def copy(self, **overrided_params): cls=UnitNormal, base=self, attrs=(('shape', 'value_shape'), 'dtype', 'reparameterized', - 'event_ndims', 'validate_tensors'), + 'event_ndims', 'device', 'validate_tensors'), cached_attrs=('mean', 'std', 'logstd'), compute_deps={ - 'mean': ('dtype', 'value_shape'), - 'std': ('dtype', 'value_shape'), - 'logstd': ('dtype', 'value_shape'), + 'mean': ('dtype', 'value_shape', 'device'), + 'std': ('dtype', 'value_shape', 'device'), + 'logstd': ('dtype', 'value_shape', 'device'), }, overrided_params=overrided_params, ) @@ -147,10 +154,13 @@ def __init__(self, logstd: Optional[TensorOrData] = None, reparameterized: bool = True, event_ndims: int = 0, + device: Optional[str] = None, validate_tensors: Optional[bool] = None): # validate the arguments mean, (std, logstd) = check_tensor_arg_types( - ('mean', mean), [('std', std), ('logstd', logstd)]) + ('mean', mean), [('std', std), ('logstd', logstd)], + device=device, + ) if std is not None: mutual_params = {'std': std} stdx = std @@ -168,6 +178,7 @@ def __init__(self, reparameterized=reparameterized, value_shape=value_shape, event_ndims=event_ndims, + device=device or T.get_device(mean), validate_tensors=validate_tensors, ) for k, v in mutual_params.items(): @@ -208,7 +219,8 @@ def copy(self, **overrided_params): cls=self.__class__, base=self, attrs=( - ('mean', 'reparameterized', 'event_ndims', 'validate_tensors') + + ('mean', 'reparameterized', 'event_ndims', 'device', + 'validate_tensors') + self._extra_args ), mutual_attrs=(('std', 'logstd'),), @@ -227,6 +239,7 @@ def __init__(self, logstd: Optional[TensorOrData] = None, reparameterized: bool = True, event_ndims: int = 0, + device: Optional[str] = None, validate_tensors: Optional[bool] = None): """ Construct a new :class:`Normal` distribution instance. @@ -238,12 +251,13 @@ def __init__(self, reparameterized: Whether the distribution should be reparameterized? event_ndims: The number of dimensions in the samples to be considered as an event. + device: The device where to place new tensors and variables. validate_tensors: Whether or not to check the numerical issues? Defaults to ``settings.validate_tensors``. """ super().__init__( mean=mean, std=std, logstd=logstd, reparameterized=reparameterized, - event_ndims=event_ndims, validate_tensors=validate_tensors, + event_ndims=event_ndims, device=device, validate_tensors=validate_tensors, ) def _sample(self, @@ -304,6 +318,7 @@ def __init__(self, event_ndims: int = 0, epsilon: float = T.EPSILON, log_zero: float = T.random.LOG_ZERO_VALUE, + device: Optional[str] = None, validate_tensors: Optional[bool] = None): """ Construct a new :class:`TruncatedNormal` distribution instance. @@ -321,6 +336,7 @@ def __init__(self, log_zero: The value to represent ``log(0)`` in the result of :meth:`log_prob()`, instead of using ``-math.inf``, to avoid potential numerical issues. + device: The device where to place new tensors and variables. validate_tensors: Whether or not to check the numerical issues? Defaults to ``settings.validate_tensors``. """ @@ -330,7 +346,7 @@ def __init__(self, f'and high == {high}.') super().__init__( mean=mean, std=std, logstd=logstd, reparameterized=reparameterized, - event_ndims=event_ndims, validate_tensors=validate_tensors, + event_ndims=event_ndims, device=device, validate_tensors=validate_tensors, ) self.low = low self.high = high diff --git a/tensorkit/distributions/uniform.py b/tensorkit/distributions/uniform.py index a1c5250..547c0c6 100644 --- a/tensorkit/distributions/uniform.py +++ b/tensorkit/distributions/uniform.py @@ -19,10 +19,10 @@ class Uniform(Distribution): _shape: Optional[List[int]] """The original `shape` argument for constructor.""" - low: Union[T.Tensor] + low: T.Tensor """The lower-bound of the uniform distribution.""" - high: Union[T.Tensor] + high: T.Tensor """The upper-bound of the uniform distribution (exclusive).""" log_zero: float @@ -37,6 +37,7 @@ def __init__(self, dtype: str = T.float_x(), reparameterized: bool = True, event_ndims: int = 0, + device: Optional[str] = None, log_zero: float = T.random.LOG_ZERO_VALUE, validate_tensors: Optional[bool] = None): """ @@ -55,6 +56,7 @@ def __init__(self, reparameterized: Whether the distribution should be reparameterized? event_ndims: The number of dimensions in the samples to be considered as an event. + device: The device where to place new tensors and variables. log_zero: The value to represent ``log(0)`` in the result of :meth:`log_prob()`, instead of using ``-math.inf``, to avoid potential numerical issues. @@ -79,7 +81,9 @@ def __init__(self, range_checked = True low, high = check_tensor_arg_types( - ('low', low), ('high', high), default_dtype=dtype) + ('low', low), ('high', high), default_dtype=dtype, + device=device, + ) dtype = T.get_dtype(low) value_shape = (value_shape + @@ -90,6 +94,7 @@ def __init__(self, value_shape=value_shape, reparameterized=reparameterized, event_ndims=event_ndims, + device=device or T.get_device(low), validate_tensors=validate_tensors, ) @@ -118,7 +123,7 @@ def _sample(self, reparameterized: bool) -> StochasticTensor: sample_shape = ([n_samples] + self.value_shape if n_samples is not None else self.value_shape) - samples = T.random.rand(sample_shape, dtype=self.dtype) + samples = T.random.rand(sample_shape, dtype=self.dtype, device=self.device) if self.low is not None and self.high is not None: scale = self.high - self.low samples = samples * scale + self.low @@ -157,6 +162,6 @@ def copy(self, **overrided_params): base=self, attrs=(('shape', '_shape'), 'low', 'high', 'dtype', 'reparameterized', 'event_ndims', 'log_zero', - 'validate_tensors'), + 'device', 'validate_tensors'), overrided_params=overrided_params, ) diff --git a/tensorkit/distributions/utils.py b/tensorkit/distributions/utils.py index 95f8e68..6fc3478 100644 --- a/tensorkit/distributions/utils.py +++ b/tensorkit/distributions/utils.py @@ -3,8 +3,8 @@ import numpy as np -from ..tensor import (jit, Tensor, where, as_tensor_backend, as_tensor, get_dtype, - float_x) +from ..tensor import (jit, Tensor, where, float_scalar_like, as_tensor, + get_dtype, get_device, current_device, float_x) __all__ = [ 'get_overrided_parameterized', @@ -84,12 +84,14 @@ def log_pdf_mask(condition: Tensor, out remaining positions (i.e., set log-pdf of these locations to `log_zero`). """ - return where(condition, log_pdf, as_tensor_backend(log_zero, dtype=log_pdf.dtype)) + return where(condition, log_pdf, float_scalar_like(log_zero, log_pdf)) def check_tensor_arg_types(*args, dtype: Optional[str] = None, - default_dtype: str = float_x() + device: Optional[str] = None, + default_dtype: str = float_x(), + default_device: Optional[str] = None, ) -> Tuple[Union[Tensor, Tuple[Tensor, ...]], ...]: """ Validate tensor argument types. @@ -104,9 +106,13 @@ def check_tensor_arg_types(*args, while the others must be None. dtype: If specified, all arguments must be tensors of this dtype, or Python numbers (which can be casted into this dtype). + device: If specified, all tensor arguments must be placed on this device. default_dtype: The default dtype to cast Python numbers into, if `dtype` is not specified, and all arguments are Python numbers (thus no dtype can be inferred). + default_device: The default device where to place new tensors and + variables, if `device` is not specified, and all arguments are + Python numbers (thus no device can be inferred). Returns: A list of validated tensors. @@ -130,12 +136,25 @@ def check_dtype(name, data): raise ValueError(f'`{name}.dtype` != `{inferred_dtype[0]}`: ' f'{data_dtype} vs {inferred_dtype[1]}') + def check_device(name, data): + if isinstance(data, StochasticTensor): + data = data.tensor + if isinstance(data, Tensor): + data_device = get_device(data) + if inferred_device[1] is None: + inferred_device[0] = f'{name}.device' + inferred_device[1] = data_device + elif inferred_device[1] != data_device: + raise ValueError(f'`{name}.device` != `{inferred_device[0]}`: ' + f'{data_device} vs {inferred_device[1]}') + def check_arg(arg): if isinstance(arg, tuple): name, data = arg if data is None: raise ValueError(f'`{name}` must be specified.') check_dtype(name, data) + check_device(name, data) else: not_none_count = 0 for i, (name, data) in enumerate(arg): @@ -144,6 +163,7 @@ def check_arg(arg): if not_none_count != 1: break check_dtype(name, data) + check_device(name, data) if not_none_count != 1: names = [f'`{n}`' for n, _ in arg] if len(names) == 2: @@ -161,18 +181,26 @@ def check_arg(arg): else: inferred_dtype = [None, None] + if device is not None: + inferred_device = ['device', device] + else: + inferred_device = [None, None] + for a in args: check_arg(a) # do cast the tensors + default_device = default_device or current_device() target_dtype = inferred_dtype[1] or default_dtype + target_device = inferred_device[1] or default_device + ret: List[Union[Tensor, Tuple[Tensor, ...]]] = [] for arg in args: if isinstance(arg, tuple): - ret.append(as_tensor(arg[1], dtype=target_dtype)) + ret.append(as_tensor(arg[1], dtype=target_dtype, device=target_device)) else: ret.append(tuple( - (as_tensor(data, dtype=target_dtype) + (as_tensor(data, dtype=target_dtype, device=target_device) if data is not None else None) for _, data in arg )) diff --git a/tensorkit/examples/classification/mnist.py b/tensorkit/examples/classification/mnist.py index 7b43db0..b91a8bb 100644 --- a/tensorkit/examples/classification/mnist.py +++ b/tensorkit/examples/classification/mnist.py @@ -43,7 +43,7 @@ def train_step(x, y): return {'loss': loss, 'acc': acc} def evaluate(x, y): - with T.no_grad(): + with tk.layers.scoped_eval_mode(net), T.no_grad(): logits = net(x) acc = utils.calculate_acc(logits, y) return {'acc': acc} @@ -87,6 +87,7 @@ def evaluate(x, y): ) # train the model + tk.layers.set_train_mode(net, True) utils.fit_model(loop=loop, optimizer=optimizer, fn=train_step, stream=train_stream) @@ -96,4 +97,5 @@ def evaluate(x, y): if __name__ == '__main__': with mltk.Experiment(Config) as exp: - main(exp) + with T.use_device(T.first_gpu_device()): + main(exp) diff --git a/tensorkit/examples/classification/mnist_resnet.py b/tensorkit/examples/classification/mnist_resnet.py index 4b5aacd..374c6a5 100644 --- a/tensorkit/examples/classification/mnist_resnet.py +++ b/tensorkit/examples/classification/mnist_resnet.py @@ -49,7 +49,7 @@ def train_step(x, y): return {'loss': loss, 'acc': acc} def evaluate(x, y): - with T.no_grad(): + with tk.layers.scoped_eval_mode(net), T.no_grad(): logits = net(x) acc = utils.calculate_acc(logits, y) return {'acc': acc} @@ -92,6 +92,7 @@ def evaluate(x, y): ) # train the model + tk.layers.set_train_mode(net, True) utils.fit_model(loop=loop, optimizer=optimizer, fn=train_step, stream=train_stream) @@ -101,4 +102,5 @@ def evaluate(x, y): if __name__ == '__main__': with mltk.Experiment(Config) as exp: - main(exp) + with T.use_device(T.first_gpu_device()): + main(exp) diff --git a/tensorkit/flows/act_norm.py b/tensorkit/flows/act_norm.py index 239da4b..5a99a71 100644 --- a/tensorkit/flows/act_norm.py +++ b/tensorkit/flows/act_norm.py @@ -4,7 +4,7 @@ from .. import init, tensor as T from ..tensor import (Tensor, Module, reshape, shape, int_range, calculate_mean_and_var, assert_finite, - as_tensor_backend, maximum, log, sqrt) + float_scalar_like, maximum, log, sqrt) from ..layers import * from ..typing_ import * from .core import * @@ -43,8 +43,9 @@ def __init__(self, event_ndims: int = 1, scale: Union[str, ActNormScaleType] = 'exp', initialized: bool = False, - epsilon: float = T.EPSILON, - dtype: str = T.float_x()): + dtype: str = T.float_x(), + device: Optional[str] = None, + epsilon: float = T.EPSILON): """ Construct a new :class:`ActNorm` instance. @@ -64,9 +65,10 @@ def __init__(self, initialized: Whether or not the variables have been initialized? Defaults to :obj:`False`, where the first input `x` in the forward pass will be used to initialize the variables. + dtype: Dtype of the parameters. + device: The device where to place new tensors and variables. epsilon: The infinitesimal constant to avoid dividing by zero or taking logarithm of zero. - dtype: Dtype of the parameters. """ # validate the arguments scale_type = ActNormScaleType(scale) @@ -81,6 +83,8 @@ def __init__(self, else: # pragma: no cover raise ValueError(f'Unsupported `scale_type`: {scale_type}') + device = device or T.current_device() + # construct the layer super().__init__(axis=axis, event_ndims=event_ndims, @@ -94,11 +98,13 @@ def __init__(self, add_parameter( self, 'pre_scale', - T.variable([num_features], dtype=dtype, initializer=pre_scale_init), + T.variable([num_features], dtype=dtype, initializer=pre_scale_init, + device=device), ) add_parameter( self, 'bias', - T.variable([num_features], dtype=dtype, initializer=init.zeros), + T.variable([num_features], dtype=dtype, initializer=init.zeros, + device=device), ) @T.jit_method @@ -133,7 +139,7 @@ def calculate_bias_and_pre_scale_for_init(self, input: Tensor) -> Tuple[Tensor, bias = -input_mean # calculate the initial value for `pre_scale` - epsilon = as_tensor_backend(self.epsilon, dtype=input_var.dtype) + epsilon = float_scalar_like(self.epsilon, input_var) if self.scale_type == 'exp': pre_scale = -0.5 * log(maximum(input_var, epsilon)) else: @@ -142,12 +148,13 @@ def calculate_bias_and_pre_scale_for_init(self, input: Tensor) -> Tuple[Tensor, return bias, pre_scale @T.jit_ignore - def _initialize_act_norm(self, input: Tensor) -> None: + def _initialize_act_norm(self, input: Tensor) -> bool: bias, pre_scale = self.calculate_bias_and_pre_scale_for_init(input) with T.no_grad(): - T.assign(get_parameter(self, 'bias'), bias) - T.assign(get_parameter(self, 'pre_scale'), pre_scale) + T.assign(self.bias, bias) + T.assign(self.pre_scale, pre_scale) self.set_initialized(True) + return False @T.jit_method def _transform(self, @@ -199,8 +206,9 @@ def __init__(self, num_features: int, scale: Union[str, ActNormScaleType] = 'exp', initialized: bool = False, - epsilon: float = T.EPSILON, - dtype: str = T.float_x()): + dtype: str = T.float_x(), + device: Optional[str] = None, + epsilon: float = T.EPSILON): """ Construct a new convolutional :class:`ActNorm` instance. @@ -213,9 +221,10 @@ def __init__(self, initialized: Whether or not the variables have been initialized? Defaults to :obj:`False`, where the first input `x` in the forward pass will be used to initialize the variables. + dtype: Dtype of the parameters. + device: The device where to place new tensors and variables. epsilon: The infinitesimal constant to avoid dividing by zero or taking logarithm of zero. - dtype: Dtype of the parameters. """ spatial_ndims = self._get_spatial_ndims() feature_axis = -1 if T.IS_CHANNEL_LAST else -(spatial_ndims + 1) @@ -226,8 +235,9 @@ def __init__(self, event_ndims=spatial_ndims + 1, scale=scale, initialized=initialized, - epsilon=epsilon, dtype=dtype, + device=device, + epsilon=epsilon, ) def _get_spatial_ndims(self) -> int: diff --git a/tensorkit/flows/coupling.py b/tensorkit/flows/coupling.py index 4d1729a..49d0c81 100644 --- a/tensorkit/flows/coupling.py +++ b/tensorkit/flows/coupling.py @@ -1,6 +1,7 @@ from typing import * from .. import tensor as T +from ..layers import is_jit_layer from ..tensor import Tensor, Module, concat, split from .core import (FeatureMappingFlow, Scale, ExpScale, SigmoidScale, LinearScale) @@ -106,11 +107,13 @@ def __init__(self, scale = INVALID if isinstance(scale, Module): - if not isinstance(scale, Scale) and not T.is_jit_layer(scale): + if not isinstance(scale, Scale) and not is_jit_layer(scale): scale = INVALID elif isinstance(scale, type) or callable(scale): if scale is SigmoidScale: scale = scale(pre_scale_bias=sigmoid_scale_bias) + elif scale is LinearScale: + scale = scale(epsilon=epsilon) else: scale = scale() else: diff --git a/tensorkit/flows/rearrangement.py b/tensorkit/flows/rearrangement.py index 539771c..c075298 100644 --- a/tensorkit/flows/rearrangement.py +++ b/tensorkit/flows/rearrangement.py @@ -1,7 +1,7 @@ from typing import * from .. import tensor as T -from ..tensor import Tensor, argsort, index_select, as_tensor_backend +from ..tensor import Tensor, argsort, index_select, float_scalar_like from ..tensor.random import random_permutation from ..layers import * from .core import * @@ -30,7 +30,8 @@ class FeatureShufflingFlow(FeatureMappingFlow): def __init__(self, num_features: int, axis: int = -1, - event_ndims: int = 1): + event_ndims: int = 1, + device: Optional[str] = None): """ Construct a new :class:`FeatureShufflingFlow`. @@ -39,13 +40,15 @@ def __init__(self, axis: The feature axis, to apply the transformation. event_ndims: Number of dimensions to be considered as the event dimensions. `x.ndims - event_ndims == log_det.ndims`. + device: The device where to place new tensors and variables. """ super().__init__(axis=int(axis), event_ndims=event_ndims, explicitly_invertible=True) self.num_features = num_features # initialize the permutation variable, and the inverse permutation - permutation = random_permutation(num_features, dtype=T.index_dtype) + permutation = random_permutation(num_features, dtype=T.index_dtype, + device=device) inv_permutation = argsort(permutation) # register the permutation as layer parameter, such that it could be @@ -66,7 +69,7 @@ def _transform(self, output = index_select(input, self.permutation, axis=self.axis) output_log_det = input_log_det if compute_log_det and output_log_det is None: - output_log_det = as_tensor_backend(0., dtype=input.dtype) + output_log_det = float_scalar_like(0., input) return output, output_log_det diff --git a/tensorkit/flows/reshape_.py b/tensorkit/flows/reshape_.py index 9f1eec8..f287ec1 100644 --- a/tensorkit/flows/reshape_.py +++ b/tensorkit/flows/reshape_.py @@ -1,6 +1,6 @@ from typing import * -from ..tensor import Tensor, reshape_tail, as_tensor_backend, jit_method +from ..tensor import Tensor, reshape_tail, float_scalar_like, jit_method from ..tensor.nn import * from .core import * @@ -84,7 +84,7 @@ def _transform(self, output_log_det = input_log_det if compute_log_det and output_log_det is None: - output_log_det = as_tensor_backend(0., dtype=input.dtype) + output_log_det = float_scalar_like(0., input) return output, output_log_det @@ -133,7 +133,7 @@ def _transform(self, output_log_det = input_log_det if compute_log_det and output_log_det is None: - output_log_det = as_tensor_backend(0., dtype=input.dtype) + output_log_det = float_scalar_like(0., input) return output, output_log_det diff --git a/tensorkit/flows/split_.py b/tensorkit/flows/split_.py index b055bfa..6cf9111 100644 --- a/tensorkit/flows/split_.py +++ b/tensorkit/flows/split_.py @@ -1,6 +1,7 @@ from typing import * from .. import tensor as T +from ..layers import is_jit_layer from ..tensor import Tensor, Module, split, concat from .core import * @@ -79,13 +80,13 @@ def __init__(self, f'two positive integers: got {y_sections!r}.') y_sections = list(map(int, y_sections)) - if not isinstance(left, Flow) and not T.is_jit_layer(left): + if not isinstance(left, Flow) and not is_jit_layer(left): raise TypeError(f'`left` is not a flow: got {left!r}.') x_event_ndims = left.get_x_event_ndims() y_event_ndims = left.get_y_event_ndims() if right is not None: - if not isinstance(right, Flow) and not T.is_jit_layer(right): + if not isinstance(right, Flow) and not is_jit_layer(right): raise TypeError(f'`right` is not a flow: got {right!r}.') if right.get_x_event_ndims() != x_event_ndims or \ right.get_y_event_ndims() != y_event_ndims: diff --git a/tensorkit/init/std_data_init.py b/tensorkit/init/std_data_init.py index 0527771..2b6429c 100644 --- a/tensorkit/init/std_data_init.py +++ b/tensorkit/init/std_data_init.py @@ -1,12 +1,12 @@ from typing import List from .. import tensor as T +from ..backend import Module, Tensor from ..layers import * from .core import * __all__ = ['StdDataInit'] -from ..backend import Module, Tensor class StdDataInit(DataDependentInitializer): @@ -22,7 +22,7 @@ def __init__(self, epsilon: float = T.EPSILON): self.epsilon = epsilon def _init(self, layer: Module, inputs: List[Tensor]) -> None: - if T.is_jit_layer(layer): + if is_jit_layer(layer): raise TypeError(f'JIT compiled layer is not supported: got {layer!r}') if not isinstance(layer, CoreLinear): raise TypeError(f'`layer` is not a core linear layer: got {layer!r}') @@ -62,7 +62,7 @@ def _init(self, layer: Module, inputs: List[Tensor]) -> None: out_std = T.sqrt( T.maximum( out_var, - T.as_tensor_backend(self.epsilon, dtype=out_var.dtype) + T.float_scalar_like(self.epsilon, out_var) ) ) weight_scale = out_std diff --git a/tensorkit/layers/builder.py b/tensorkit/layers/builder.py index 9cae176..240d500 100644 --- a/tensorkit/layers/builder.py +++ b/tensorkit/layers/builder.py @@ -347,16 +347,13 @@ def add(self, self.out_shape = out_shape return self - def build(self, - flatten_to_ndims: bool = True, - disable_jit: bool = False) -> T.Module: + def build(self, flatten_to_ndims: bool = True) -> T.Module: """ Build the sequential layer. Args: flatten_to_ndims: Whether or not to wrap the sequential layer with a :class:`FlattenToNDims` layer? - disable_jit: Whether or not to disable JIT? Returns: The built sequential layer. @@ -370,8 +367,6 @@ def build(self, if flatten_to_ndims: layer = FlattenToNDims(layer, ndims=len(self.in_shape) + 1) - if not disable_jit: - layer = T.jit_compile(layer) return layer # ---- activation ---- diff --git a/tensorkit/layers/composed.py b/tensorkit/layers/composed.py index b17a49a..6a9fde8 100644 --- a/tensorkit/layers/composed.py +++ b/tensorkit/layers/composed.py @@ -61,6 +61,7 @@ def __init__(self, weight_init: TensorInitArgType = DEFAULT_WEIGHT_INIT, bias_init: TensorInitArgType = DEFAULT_BIAS_INIT, data_init: Optional[DataInitArgType] = None, + device: Optional[str] = None, ): # check the arguments if use_bias is None: @@ -76,6 +77,7 @@ def __init__(self, weight_init=weight_init, bias_init=bias_init, data_init=data_init, + device=device, ), out_features=out_features, out_feature_axis=-1, @@ -105,6 +107,7 @@ def __init__(self, weight_init: TensorInitArgType = DEFAULT_WEIGHT_INIT, bias_init: TensorInitArgType = DEFAULT_BIAS_INIT, data_init: Optional[DataInitArgType] = None, + device: Optional[str] = None, ): spatial_ndims = self._get_spatial_ndims() linear_factory = self._get_linear_factory() @@ -132,6 +135,7 @@ def __init__(self, weight_init=weight_init, bias_init=bias_init, data_init=data_init, + device=device, ), out_features=out_channels, out_feature_axis=-1 if T.IS_CHANNEL_LAST else -(spatial_ndims + 1), @@ -195,6 +199,7 @@ def __init__(self, weight_init: TensorInitArgType = DEFAULT_WEIGHT_INIT, bias_init: TensorInitArgType = DEFAULT_BIAS_INIT, data_init: Optional[DataInitArgType] = None, + device: Optional[str] = None, ): spatial_ndims = self._get_spatial_ndims() linear_factory = self._get_linear_factory() @@ -225,6 +230,7 @@ def __init__(self, weight_init=weight_init, bias_init=bias_init, data_init=data_init, + device=device, ), out_features=out_channels, out_feature_axis=-1 if T.IS_CHANNEL_LAST else -(spatial_ndims + 1), diff --git a/tensorkit/layers/flow_layer.py b/tensorkit/layers/flow_layer.py index 0ad547e..11b2941 100644 --- a/tensorkit/layers/flow_layer.py +++ b/tensorkit/layers/flow_layer.py @@ -1,5 +1,5 @@ from ..backend.flows import Flow -from ..tensor import Tensor, Module, is_jit_layer +from ..tensor import Tensor, Module from .core import * __all__ = [ diff --git a/tensorkit/layers/pixelcnn.py b/tensorkit/layers/pixelcnn.py index 52a551c..d3c5e2f 100644 --- a/tensorkit/layers/pixelcnn.py +++ b/tensorkit/layers/pixelcnn.py @@ -1,13 +1,13 @@ from functools import partial from typing import * -from . import resnet, core, composed -from .core import * -from .utils import flatten_nested_layers from .. import tensor as T from ..arg_check import * from ..tensor import Tensor, Module, rank, shift, shape, concat, ones_like from ..typing_ import * +from . import resnet, core, composed +from .core import * +from .utils import flatten_nested_layers __all__ = [ 'PixelCNNInput1d', 'PixelCNNInput2d', 'PixelCNNInput3d', @@ -250,7 +250,8 @@ def __init__(self, weight_norm: WeightNormArgType = False, weight_init: TensorInitArgType = DEFAULT_WEIGHT_INIT, bias_init: TensorInitArgType = DEFAULT_BIAS_INIT, - data_init: Optional[DataInitArgType] = None): + data_init: Optional[DataInitArgType] = None, + device: Optional[str] = None): """ Construct a new pixelcnn input layer. @@ -274,6 +275,7 @@ def __init__(self, weight_init: The weight initializer for the convolutional layers. bias_init: The bias initializer for the convolutional layers. data_init: The data-dependent initializer for the convolutional layers. + device: The device where to place new tensors and variables. """ super().__init__() @@ -314,6 +316,7 @@ def __init__(self, weight_init=weight_init, bias_init=bias_init, data_init=data_init, + device=device, ), SpatialShift(spatial_shift) ) @@ -453,6 +456,7 @@ def __init__(self, weight_init: TensorInitArgType = DEFAULT_WEIGHT_INIT, bias_init: TensorInitArgType = DEFAULT_BIAS_INIT, data_init: Optional[DataInitArgType] = None, + device: Optional[str] = None, ): """ Construct a new PixelCNN resnet block. @@ -490,6 +494,7 @@ def __init__(self, weight_init: The weight initializer for the convolutional layers. bias_init: The bias initializer for the convolutional layers. data_init: The data-dependent initializer for the convolutional layers. + device: The device where to place new tensors and variables. """ spatial_ndims = self._get_spatial_ndims() @@ -551,6 +556,7 @@ def __init__(self, weight_init=weight_init, bias_init=bias_init, data_init=data_init, + device=device, ) ) @@ -618,7 +624,9 @@ def __init__(self, gate_bias: float = DEFAULT_GATE_BIAS, weight_init: TensorInitArgType = DEFAULT_WEIGHT_INIT, bias_init: TensorInitArgType = DEFAULT_BIAS_INIT, - data_init: Optional[DataInitArgType] = None): + data_init: Optional[DataInitArgType] = None, + device: Optional[str] = None, + ): spatial_ndims = self._get_spatial_ndims() # validate the arguments @@ -658,6 +666,7 @@ def __init__(self, weight_init=weight_init, bias_init=bias_init, data_init=data_init, + device=device, ) ) @@ -739,7 +748,9 @@ def __init__(self, gate_bias: float = DEFAULT_GATE_BIAS, weight_init: TensorInitArgType = DEFAULT_WEIGHT_INIT, bias_init: TensorInitArgType = DEFAULT_BIAS_INIT, - data_init: Optional[DataInitArgType] = None): + data_init: Optional[DataInitArgType] = None, + device: Optional[str] = None, + ): spatial_ndims = self._get_spatial_ndims() # validate the arguments @@ -781,6 +792,7 @@ def __init__(self, weight_init=weight_init, bias_init=bias_init, data_init=data_init, + device=device, ) ) @@ -865,7 +877,7 @@ def __init__(self, input_cls_name = f'PixelCNNInput{spatial_ndims}d' if not isinstance(input_layer, global_dict[input_cls_name]) and \ - not T.is_jit_layer(input_layer): + not is_jit_layer(input_layer): raise TypeError( f'`input_layer` must be an instance of `{input_cls_name}`: ' f'got {input_layer!r}.' diff --git a/tensorkit/layers/resnet.py b/tensorkit/layers/resnet.py index 9f4d5bd..d2bb078 100644 --- a/tensorkit/layers/resnet.py +++ b/tensorkit/layers/resnet.py @@ -130,6 +130,7 @@ def __init__(self, weight_init: TensorInitArgType = DEFAULT_WEIGHT_INIT, bias_init: TensorInitArgType = DEFAULT_BIAS_INIT, data_init: Optional[DataInitArgType] = None, + device: Optional[str] = None, ): """ Construct a new resnet block. @@ -189,6 +190,7 @@ def __init__(self, weight_init: The weight initializer for the convolutional layers. bias_init: The bias initializer for the convolutional layers. data_init: The data-dependent initializer for the convolutional layers. + device: The device where to place new tensors and variables. """ def use_bias_or_else(default_val: bool): if use_bias is None: @@ -204,6 +206,7 @@ def compile_layer_list(layers: List[Module]) -> Module: return Sequential(layers) spatial_ndims = self._get_spatial_ndims() + is_deconv = self._is_deconv() # validate arguments in_channels = int(in_channels) @@ -212,13 +215,11 @@ def compile_layer_list(layers: List[Module]) -> Module: kernel_size = validate_conv_size('kernel_size', kernel_size, spatial_ndims) stride = validate_conv_size('strides', stride, spatial_ndims) dilation = validate_conv_size('dilation', dilation, spatial_ndims) - is_half_padding = padding == PaddingMode.HALF.value padding = validate_padding(padding, kernel_size, dilation, spatial_ndims) - if output_padding != 0 and \ - self._add_output_padding_to_kwargs(output_padding, {}) == {}: + if output_padding != 0 and not is_deconv: raise ValueError(f'The `output_padding` argument is not allowed ' - f'by a {self.__class__.__qualname__} layer.') + f'by {self.__class__.__qualname__}.') output_padding = validate_output_padding( output_padding, stride, dilation, spatial_ndims) @@ -244,7 +245,8 @@ def compile_layer_list(layers: List[Module]) -> Module: if use_shortcut is None: use_shortcut = ( any(s != 1 for s in stride) or - (not is_half_padding and any(k != 1 for k in stride)) or + any(p[0] + p[1] != (k - 1) * d + for p, k, d in zip(padding, kernel_size, dilation)) or in_channels != out_channels) if activation is not None: @@ -270,7 +272,7 @@ def compile_layer_list(layers: List[Module]) -> Module: ) kwargs = {'weight_init': weight_init, 'bias_init': bias_init, - 'data_init': data_init} + 'data_init': data_init, 'device': device} # build the shortcut path if use_shortcut: @@ -392,8 +394,11 @@ def _get_spatial_ndims(self) -> int: def _default_conv_factory(self) -> LayerFactory: raise NotImplementedError() + def _is_deconv(self) -> bool: + raise NotImplementedError() + def _add_output_padding_to_kwargs(self, output_padding, kwargs): - return kwargs + raise NotImplementedError() def forward(self, input: Tensor, @@ -422,7 +427,16 @@ def forward(self, return output -class ResBlock1d(ResBlockNd): +class ResBlockConvNd(ResBlockNd): + + def _add_output_padding_to_kwargs(self, output_padding, kwargs): + return kwargs + + def _is_deconv(self) -> bool: + return False + + +class ResBlock1d(ResBlockConvNd): """1D ResNet convolution block.""" def _get_spatial_ndims(self) -> int: @@ -432,7 +446,7 @@ def _default_conv_factory(self) -> LayerFactory: return LinearConv1d -class ResBlock2d(ResBlockNd): +class ResBlock2d(ResBlockConvNd): """2D ResNet convolution block.""" def _get_spatial_ndims(self) -> int: @@ -442,7 +456,7 @@ def _default_conv_factory(self) -> LayerFactory: return LinearConv2d -class ResBlock3d(ResBlockNd): +class ResBlock3d(ResBlockConvNd): """3D ResNet convolution block.""" def _get_spatial_ndims(self) -> int: @@ -459,6 +473,9 @@ def _add_output_padding_to_kwargs(self, output_padding, kwargs=None): kwargs['output_padding'] = output_padding return kwargs + def _is_deconv(self) -> bool: + return True + class ResBlockTranspose1d(ResBlockTransposeNd): """1D ResNet de-convolution block.""" diff --git a/tensorkit/layers/utils.py b/tensorkit/layers/utils.py index 906cda7..d04b837 100644 --- a/tensorkit/layers/utils.py +++ b/tensorkit/layers/utils.py @@ -1,13 +1,15 @@ +from contextlib import contextmanager from typing import * from ..arg_check import * from ..tensor import Module from ..typing_ import * from .activation import * +from .core import * __all__ = [ 'flatten_nested_layers', 'get_activation_class', - 'get_deconv_output_padding', + 'get_deconv_output_padding', 'scoped_eval_mode', ] @@ -129,3 +131,22 @@ def f(i, o, k, s, d, p): return [f(*args) for args in zip( input_size, output_size, kernel_size, stride, dilation, padding)] + + +@contextmanager +def scoped_eval_mode(*layer_or_layers: Union[Module, Sequence[Module]]): + """ + Set the layers to evaluation mode when entering the context, and + set to training mode when exiting the context. + + Args: + layer_or_layers: The layer or layers to be set. + """ + layer_or_layers = flatten_nested_layers(layer_or_layers) + try: + for layer in layer_or_layers: + set_eval_mode(layer) + yield + finally: + for layer in layer_or_layers: + set_train_mode(layer) diff --git a/tensorkit/tensor/core.py b/tensorkit/tensor/core.py index 1fab558..7891f94 100644 --- a/tensorkit/tensor/core.py +++ b/tensorkit/tensor/core.py @@ -1,6 +1,4 @@ from ..backend import core from ..backend.core import * -from . import core_extras -from .core_extras import * -__all__ = core.__all__ + core_extras.__all__ +__all__ = core.__all__ diff --git a/tensorkit/tensor/core_extras.py b/tensorkit/tensor/core_extras.py deleted file mode 100644 index a9a2c5b..0000000 --- a/tensorkit/tensor/core_extras.py +++ /dev/null @@ -1 +0,0 @@ -__all__ = [] diff --git a/tensorkit/tensor/random_extras.py b/tensorkit/tensor/random_extras.py index 04d97cd..a222f7d 100644 --- a/tensorkit/tensor/random_extras.py +++ b/tensorkit/tensor/random_extras.py @@ -35,10 +35,11 @@ def truncated_randn(shape: List[int], low: Optional[float] = None, high: Optional[float] = None, dtype: str = float_x(), + device: Optional[str] = None, epsilon: float = EPSILON) -> Tensor: # fast routine: low is None and high is None, use standard randn if low is None and high is None: - return randn(shape, dtype) + return randn(shape, dtype, device) # compute cdf(low) and cdf(high) if low is None: @@ -52,7 +53,7 @@ def truncated_randn(shape: List[int], high_cdf = _unit_normal_cdf_float(high) # sample u ~ uniform(0, 1) - u = rand(shape, dtype) + u = rand(shape, dtype, device) # transform uniform random variable into truncated normal if low_cdf == 0.: @@ -111,17 +112,17 @@ def truncated_randn_log_pdf(given: Tensor, log_pdf = where( logical_and(low <= given, given <= high), log_pdf, - as_tensor_backend(log_zero, dtype=log_pdf.dtype)) + float_scalar_like(log_zero, log_pdf)) elif low is not None: log_pdf = where( low <= given, log_pdf, - as_tensor_backend(log_zero, dtype=log_pdf.dtype)) + float_scalar_like(log_zero, log_pdf)) elif high is not None: log_pdf = where( given <= high, log_pdf, - as_tensor_backend(log_zero, dtype=log_pdf.dtype)) + float_scalar_like(log_zero, log_pdf)) else: log_pdf = log_pdf # do nothing, but JIT requires this branch @@ -145,10 +146,10 @@ def truncated_normal(mean: Tensor, if n_samples is not None: param_shape = [n_samples] + param_shape r = truncated_randn(param_shape, low=low, high=high, dtype=get_dtype(mean), - epsilon=epsilon) + epsilon=epsilon, device=get_device(mean)) r = r * std + mean if not reparameterized: - r = r.detach() + r = stop_grad(r) return r @@ -182,17 +183,17 @@ def truncated_normal_log_pdf(given: Tensor, logical_and((low * std + mean) <= given, given <= (high * std + mean)), log_pdf, - as_tensor_backend(log_zero, dtype=log_pdf.dtype)) + float_scalar_like(log_zero, log_pdf)) elif low is not None: log_pdf = where( (low * std + mean) <= given, log_pdf, - as_tensor_backend(log_zero, dtype=log_pdf.dtype)) + float_scalar_like(log_zero, log_pdf)) elif high is not None: log_pdf = where( given <= (high * std + mean), log_pdf, - as_tensor_backend(log_zero, dtype=log_pdf.dtype)) + float_scalar_like(log_zero, log_pdf)) else: log_pdf = log_pdf # do nothing, but JIT requires this branch @@ -244,7 +245,7 @@ def discretized_logistic(mean: Tensor, format(mean_dtype, log_scale_dtype)) u = uniform(shape=sample_shape, low=epsilon, high=1. - epsilon, - dtype=mean_dtype) + dtype=mean_dtype, device=get_device(mean)) # inverse CDF of the logistic inverse_logistic_cdf = log(u) - log1p(-u) @@ -318,7 +319,7 @@ def discretized_logistic_log_prob(given: Tensor, # the middle bins cases: # log(sigmoid(x_high) - sigmoid(x_low)) # middle_bins_pdf = tf.log(cdf_delta + self._epsilon) - epsilon_tensor = as_tensor_backend(epsilon, dtype=cdf_delta.dtype) + epsilon_tensor = float_scalar_like(epsilon, cdf_delta) middle_bins_pdf = log(maximum(cdf_delta, epsilon_tensor)) # # but in extreme cases where `sigmoid(x_high) - sigmoid(x_low)` @@ -328,7 +329,7 @@ def discretized_logistic_log_prob(given: Tensor, # cdf_delta > epsilon_tensor, # # to avoid NaNs pollute the select statement, we have to use # # `maximum(cdf_delta, 1e-12)` - # log(maximum(cdf_delta, as_tensor_backend(1e-12, dtype=cdf_delta.dtype))), + # log(maximum(cdf_delta, float_scalar_like(1e-12, cdf_delta))), # # the alternative form. basically it can be derived by using # # the mean value theorem for integration. # x_mid + log_delta - 2. * softplus(x_mid) @@ -345,7 +346,7 @@ def discretized_logistic_log_prob(given: Tensor, # the left-edge bin case # log(sigmoid(x_high) - sigmoid(-infinity)) - left_edge = as_tensor_backend(min_val + half_bin, dtype=broadcast_given.dtype) + left_edge = float_scalar_like(min_val + half_bin, broadcast_given) left_edge_pdf = -softplus(-x_high) if validate_tensors: left_edge_pdf = assert_finite(left_edge_pdf, 'left_edge_pdf') @@ -358,7 +359,7 @@ def discretized_logistic_log_prob(given: Tensor, # the right-edge bin case # log(sigmoid(infinity) - sigmoid(x_low)) - right_edge = as_tensor_backend(max_val - half_bin, dtype=broadcast_given.dtype) + right_edge = float_scalar_like(max_val - half_bin, broadcast_given) right_edge_pdf = -softplus(x_low) if validate_tensors: right_edge_pdf = assert_finite(right_edge_pdf, 'right_edge_pdf') @@ -376,7 +377,7 @@ def discretized_logistic_log_prob(given: Tensor, logical_and(given >= min_val - half_bin, given <= max_val + half_bin), log_prob, - as_tensor_backend(log_zero, dtype=log_prob.dtype)) + float_scalar_like(log_zero, log_prob)) # now reduce the group_ndims if group_ndims > 0: diff --git a/tensorkit/utils/tensor_stream.py b/tensorkit/utils/tensor_stream.py index e154a93..3b0abee 100644 --- a/tensorkit/utils/tensor_stream.py +++ b/tensorkit/utils/tensor_stream.py @@ -14,8 +14,10 @@ class TensorStream(mltk.DataStream): source: mltk.DataStream + device: str - def __init__(self, source: mltk.DataStream): + def __init__(self, source: mltk.DataStream, device: Optional[str] = None): + device = device or T.current_device() super().__init__( batch_size=source.batch_size, array_count=source.array_count, @@ -24,8 +26,10 @@ def __init__(self, source: mltk.DataStream): random_state=source.random_state, ) self.source = source + self.device = device def copy(self, **kwargs): + kwargs.setdefault('device', self.device) return TensorStream(source=self.source, **kwargs) def _minibatch_iterator(self) -> Generator[ArrayTuple, None, None]: @@ -33,16 +37,20 @@ def _minibatch_iterator(self) -> Generator[ArrayTuple, None, None]: try: for batch_data in g: with T.no_grad(): - batch_data = tuple(T.from_numpy(arr) for arr in batch_data) + batch_data = tuple( + T.from_numpy(arr, device=self.device) + for arr in batch_data + ) yield batch_data finally: g.close() def as_tensor_stream(source: mltk.DataStream, + device: Optional[str] = None, prefetch: Optional[int] = None ) -> mltk.DataStream: - stream = TensorStream(source) + stream = TensorStream(source, device=device) if prefetch is not None: stream = stream.threaded(prefetch) return stream diff --git a/tests/distributions/test_flow.py b/tests/distributions/test_flow.py index 179154b..a80e12f 100644 --- a/tests/distributions/test_flow.py +++ b/tests/distributions/test_flow.py @@ -10,7 +10,7 @@ from tensorkit.distributions import Categorical, FlowDistribution, UnitNormal from tensorkit.distributions.utils import copy_distribution from tensorkit.flows import ReshapeFlow, ActNorm -from tensorkit.tensor import Tensor, as_tensor_backend, int_range +from tensorkit.tensor import Tensor, float_scalar_like, int_range from tests.helper import * @@ -31,9 +31,9 @@ def _transform(self, if compute_log_det: if inverse: - output_log_det = as_tensor_backend(-math.log(2.)) + output_log_det = float_scalar_like(-math.log(2.), output) else: - output_log_det = as_tensor_backend(math.log(2.)) + output_log_det = float_scalar_like(math.log(2.), output) for axis in int_range(-event_ndims, 0): output_log_det = output_log_det * output.shape[axis] diff --git a/tests/flows/test_act_norm.py b/tests/flows/test_act_norm.py index c3b2cd5..7e02103 100644 --- a/tests/flows/test_act_norm.py +++ b/tests/flows/test_act_norm.py @@ -24,7 +24,7 @@ def do_check(batch_shape, scale_type, initialized, dtype): ctx.assertIn(f'num_features={num_features}', repr(flow)) ctx.assertIn(f'axis={-(spatial_ndims + 1)}', repr(flow)) ctx.assertIn(f'scale_type={scale_type!r}', repr(flow)) - flow = T.jit_compile(flow) + flow = tk.layers.jit_compile(flow) # check initialize if not initialized: diff --git a/tests/flows/test_core.py b/tests/flows/test_core.py index 8aeb32c..a7e30b3 100644 --- a/tests/flows/test_core.py +++ b/tests/flows/test_core.py @@ -7,7 +7,7 @@ import tensorkit as tk from tensorkit import tensor as T -from tensorkit.tensor import Tensor, reshape_tail, as_tensor_backend, zeros_like, shape +from tensorkit.tensor import Tensor, reshape_tail, float_scalar_like, zeros_like, shape from tensorkit.tensor.random import randn from tensorkit.flows import * from tests.helper import * @@ -33,7 +33,7 @@ def _transform(self, output_log_det = input_log_det if compute_log_det: - log_2 = as_tensor_backend(math.log(2.), dtype=output.dtype) + log_2 = float_scalar_like(math.log(2.), output) if output_log_det is None: if inverse: output_log_det = -log_2 * input.shape[-2] @@ -92,7 +92,7 @@ def test_invert(self): self.assertIsInstance(inv_flow, InverseFlow) def test_call(self): - flow = T.jit_compile(_MyFlow()) + flow = tk.layers.jit_compile(_MyFlow()) self.assertEqual(flow.get_x_event_ndims(), 1) self.assertEqual(flow.get_y_event_ndims(), 2) self.assertEqual(flow.is_explicitly_invertible(), True) @@ -123,7 +123,7 @@ def test_call(self): _ = flow(expected_y, T.random.randn([2, 4]), inverse=True) # test output_log_det shape error - flow = T.jit_compile(_MyBadFlow()) + flow = tk.layers.jit_compile(_MyBadFlow()) with pytest.raises(Exception, match='The shape of `output_log_det` is not expected'): _ = flow(x) @@ -140,7 +140,7 @@ def test_constructor(self): explicitly_invertible=True) self.assertEqual(flow.get_event_ndims(), 2) self.assertEqual(flow.axis, -1) - flow = T.jit_compile(flow) + flow = tk.layers.jit_compile(flow) self.assertEqual(flow.get_axis(), -1) self.assertEqual(flow.get_x_event_ndims(), 2) @@ -163,12 +163,12 @@ def test_constructor(self): class InverseFlowTestCase(unittest.TestCase): def test_InverseFlow(self): - original_flow = T.jit_compile(_MyFlow()) + original_flow = tk.layers.jit_compile(_MyFlow()) flow = InverseFlow(original_flow) self.assertIs(flow.original_flow, original_flow) self.assertIs(flow.invert(), original_flow) - flow = T.jit_compile(flow) + flow = tk.layers.jit_compile(flow) self.assertEqual(flow.get_x_event_ndims(), 2) self.assertEqual(flow.get_y_event_ndims(), 1) self.assertTrue(flow.is_explicitly_invertible()) @@ -189,7 +189,7 @@ def test_InverseFlow(self): base_flow.explicitly_invertible = False with pytest.raises(TypeError, match='`flow` must be an explicitly invertible flow'): - _ = InverseFlow(T.jit_compile(base_flow)) + _ = InverseFlow(tk.layers.jit_compile(base_flow)) class _MyFlow1(Flow): @@ -211,7 +211,7 @@ def _transform(self, output_log_det = input_log_det if compute_log_det: - log_2 = T.as_tensor_backend(math.log(2.), dtype=output.dtype) + log_2 = T.float_scalar_like(math.log(2.), output) if output_log_det is None: if inverse: output_log_det = -log_2 * input.shape[-1] @@ -229,16 +229,16 @@ def _transform(self, class SequentialFlowTestCase(unittest.TestCase): def test_constructor(self): - flows = [T.jit_compile(_MyFlow1()), T.jit_compile(_MyFlow())] - flow = T.jit_compile(SequentialFlow(flows)) + flows = [tk.layers.jit_compile(_MyFlow1()), tk.layers.jit_compile(_MyFlow())] + flow = tk.layers.jit_compile(SequentialFlow(flows)) self.assertEqual(flow.get_x_event_ndims(), 1) self.assertEqual(flow.get_y_event_ndims(), 2) self.assertTrue(flow.is_explicitly_invertible()) flow2 = _MyFlow() flow2.explicitly_invertible = False - flows = [T.jit_compile(_MyFlow1()), T.jit_compile(flow2)] - flow = T.jit_compile(SequentialFlow(flows)) + flows = [tk.layers.jit_compile(_MyFlow1()), tk.layers.jit_compile(flow2)] + flow = tk.layers.jit_compile(SequentialFlow(flows)) self.assertFalse(flow.is_explicitly_invertible()) with pytest.raises(ValueError, @@ -261,8 +261,8 @@ def test_constructor(self): def test_call(self): # test call and inverse call - flows = [_MyFlow1(), T.jit_compile(_MyFlow1())] - flow = T.jit_compile(SequentialFlow(flows)) + flows = [_MyFlow1(), tk.layers.jit_compile(_MyFlow1())] + flow = tk.layers.jit_compile(SequentialFlow(flows)) x = T.random.randn([2, 3, 4]) expected_y = (x * 2. + 1.) * 2. + 1. @@ -275,7 +275,7 @@ def test_call(self): # test no inverse call flows = [_MyFlow1()] flows[0].explicitly_invertible = False - flow = T.jit_compile(SequentialFlow(flows)) + flow = tk.layers.jit_compile(SequentialFlow(flows)) with pytest.raises(Exception, match='Not an explicitly invertible flow'): @@ -313,7 +313,7 @@ def test_invertible_matrices(self): self.assertEqual(repr(m), f'{cls.__qualname__}(size={n})') self.assertEqual(m.size, n) - m = T.jit_compile(m) + m = tk.layers.jit_compile(m) # check the initial value is an orthogonal matrix matrix, _ = m(inverse=False, compute_log_det=False) @@ -354,7 +354,7 @@ def check_invertible_linear(ctx, # construct the layer flow = invertible_linear_factory(num_features, strict=strict) ctx.assertIn(f'num_features={num_features}', repr(flow)) - flow = T.jit_compile(flow) + flow = tk.layers.jit_compile(flow) # derive the expected answer weight, log_det = flow.invertible_matrix( @@ -518,7 +518,7 @@ def test_ExpScale(self): x = T.random.randn([2, 3, 4]) scale = ExpScale() - scale = T.jit_compile(scale) + scale = tk.layers.jit_compile(scale) for pre_scale in [T.random.randn([4]), T.random.randn([3, 1]), @@ -541,7 +541,7 @@ def test_SigmoidScale(self): if pre_scale_bias is None: pre_scale_bias = 0. self.assertIn(f'pre_scale_bias={pre_scale_bias}', repr(scale)) - scale = T.jit_compile(scale) + scale = tk.layers.jit_compile(scale) for pre_scale in [T.random.randn([4]), T.random.randn([3, 1]), @@ -558,7 +558,7 @@ def test_LinearScale(self): x = T.random.randn([2, 3, 4]) scale = LinearScale(epsilon=T.EPSILON) self.assertIn('epsilon=', repr(scale)) - scale = T.jit_compile(scale) + scale = tk.layers.jit_compile(scale) for pre_scale in [T.random.randn([4]), T.random.randn([3, 1]), diff --git a/tests/flows/test_coupling.py b/tests/flows/test_coupling.py index aa1b21f..eddd9a2 100644 --- a/tests/flows/test_coupling.py +++ b/tests/flows/test_coupling.py @@ -1,5 +1,4 @@ import unittest -from itertools import product import pytest @@ -19,8 +18,8 @@ def check_coupling_layer(ctx, sigmoid_scale_bias = 1.5 n1, n2 = (num_features // 2), (num_features - num_features // 2) - shift_and_pre_scale_1 = T.jit_compile(shift_and_pre_scale_factory(n1, n2)) - shift_and_pre_scale_2 = T.jit_compile(shift_and_pre_scale_factory(n2, n1)) + shift_and_pre_scale_1 = tk.layers.jit_compile(shift_and_pre_scale_factory(n1, n2)) + shift_and_pre_scale_2 = tk.layers.jit_compile(shift_and_pre_scale_factory(n2, n1)) def do_check(secondary, scale_type): x = T.random.randn(make_conv_shape( @@ -35,7 +34,7 @@ def do_check(secondary, scale_type): sigmoid_scale_bias=sigmoid_scale_bias ) ctx.assertIn(f'secondary={secondary}', repr(flow)) - flow = T.jit_compile(flow) + flow = tk.layers.jit_compile(flow) # obtain the expected output channel_axis = get_channel_axis(spatial_ndims) @@ -51,7 +50,7 @@ def do_check(secondary, scale_type): scale = SigmoidScale(pre_scale_bias=sigmoid_scale_bias) elif scale_type == 'linear' or scale_type is LinearScale: scale = LinearScale() - elif isinstance(scale_type, Scale) or T.is_jit_layer(scale_type): + elif isinstance(scale_type, Scale) or tk.layers.is_jit_layer(scale_type): scale = scale_type else: raise ValueError(f'Invalid value for `scale`: {scale_type}') @@ -72,7 +71,7 @@ def do_check(secondary, scale_type): do_check(secondary, 'exp') for scale_type in ('exp', 'sigmoid', 'linear', - SigmoidScale, T.jit_compile(LinearScale())): + SigmoidScale, tk.layers.jit_compile(LinearScale())): do_check(False, scale_type) # test error constructors diff --git a/tests/flows/test_rearrangement.py b/tests/flows/test_rearrangement.py index 1c576d2..451c9af 100644 --- a/tests/flows/test_rearrangement.py +++ b/tests/flows/test_rearrangement.py @@ -23,7 +23,7 @@ def check_shuffling_flow(ctx, inv_permutation = tk.layers.get_parameter(flow, 'inv_permutation') assert_equal(T.argsort(permutation), inv_permutation) assert_equal(T.argsort(inv_permutation), permutation) - flow = T.jit_compile(flow) + flow = tk.layers.jit_compile(flow) # prepare for the answer x = T.random.randn(shape) diff --git a/tests/flows/test_shape_.py b/tests/flows/test_shape_.py index b972c73..f15ad38 100644 --- a/tests/flows/test_shape_.py +++ b/tests/flows/test_shape_.py @@ -20,7 +20,7 @@ def test_ReshapeFlow(self): self.assertEqual(flow.get_y_event_ndims(), 1) self.assertIn('x_event_shape=[4, -1]', repr(flow)) self.assertIn('y_event_shape=[-1]', repr(flow)) - flow = T.jit_compile(flow) + flow = tk.layers.jit_compile(flow) x = T.random.randn([2, 3, 4, 5]) expected_y = T.reshape_tail(x, 2, [-1]) diff --git a/tests/flows/test_split_.py b/tests/flows/test_split_.py index 8250a6c..67b707f 100644 --- a/tests/flows/test_split_.py +++ b/tests/flows/test_split_.py @@ -33,7 +33,7 @@ def check_split_flow(ctx, ctx.assertIn(f'y_sections={y_sections}', repr(flow)) ctx.assertIn(f'x_axis={x_axis}', repr(flow)) ctx.assertIn(f'y_axis={y_axis}', repr(flow)) - flow = T.jit_compile(flow) + flow = tk.layers.jit_compile(flow) x1, x2 = T.split(x, x_sections, axis=x_axis) y1, expected_log_det = left(x1, compute_log_det=True) @@ -45,7 +45,7 @@ def check_split_flow(ctx, # with right flow = cls(x_sections, left, right, **kwargs) - flow = T.jit_compile(flow) + flow = tk.layers.jit_compile(flow) x1, x2 = T.split(x, x_sections, axis=x_axis) y1, expected_log_det = left(x1, compute_log_det=True) @@ -91,8 +91,8 @@ def test_SplitFlow(self): T.random.seed(1234) # x and y with the same event ndims - left = T.jit_compile(InvertibleDense(2)) - right = T.jit_compile(InvertibleDense(3)) + left = tk.layers.jit_compile(InvertibleDense(2)) + right = tk.layers.jit_compile(InvertibleDense(3)) check_split_flow( ctx=self, @@ -159,8 +159,8 @@ def test_SplitFlowNd(self): cls = getattr(tk.flows, f'SplitFlow{spatial_ndims}d') sub_cls = getattr(tk.flows, f'InvertibleConv{spatial_ndims}d') - left = T.jit_compile(sub_cls(2)) - right = T.jit_compile(sub_cls(3)) + left = tk.layers.jit_compile(sub_cls(2)) + right = tk.layers.jit_compile(sub_cls(3)) check_split_flow( ctx=self, diff --git a/tests/init/test_core.py b/tests/init/test_core.py index 0039cdf..74774d0 100644 --- a/tests/init/test_core.py +++ b/tests/init/test_core.py @@ -377,7 +377,7 @@ def test_data_dependent_initializer(self): # also `set_initialized` will affect layers with `set_initialized()` # method, e.g., `ActNorm` x = T.random.randn([2, 3, 5]) - layer = T.jit_compile(tk.layers.ActNorm(5)) + layer = tk.layers.jit_compile(tk.layers.ActNorm(5)) self.assertFalse(layer.flow.initialized) tk.init.set_initialized(layer) diff --git a/tests/init/test_std_data_init.py b/tests/init/test_std_data_init.py index 2802590..349720e 100644 --- a/tests/init/test_std_data_init.py +++ b/tests/init/test_std_data_init.py @@ -50,7 +50,7 @@ def check_x(layer): if not tk.settings.disable_jit: with pytest.raises(TypeError, match='JIT compiled layer is not supported'): - layer = T.jit_compile(tk.layers.Linear(5, 3)) + layer = tk.layers.jit_compile(tk.layers.Linear(5, 3)) tk.init.StdDataInit()(layer, [T.random.randn([3, 5])]) with pytest.raises(TypeError, match='`layer` is not a core linear layer'): diff --git a/tests/layers/test_composed.py b/tests/layers/test_composed.py index 750da72..fb4721b 100644 --- a/tests/layers/test_composed.py +++ b/tests/layers/test_composed.py @@ -27,7 +27,7 @@ def check_composed_layer(ctx, input, layer_cls, linear_cls, normalizer_cls, ctx.assertIsInstance(layer[0], linear_cls) ctx.assertEqual(layer[0].use_bias, expected_use_bias) assert_allclose( - T.jit_compile(layer)(input), + tk.layers.jit_compile(layer)(input), linear(input) ) @@ -52,7 +52,7 @@ def check_composed_layer(ctx, input, layer_cls, linear_cls, normalizer_cls, ctx.assertEqual(layer[0].use_bias, expected_use_bias) ctx.assertIsInstance(layer[1], normalizer_cls) assert_allclose( - T.jit_compile(layer)(input), + tk.layers.jit_compile(layer)(input), normalizer(linear(input)), ) @@ -72,7 +72,7 @@ def check_composed_layer(ctx, input, layer_cls, linear_cls, normalizer_cls, ctx.assertIsInstance(layer[0], linear_cls) ctx.assertIsInstance(layer[1], tk.layers.Tanh) assert_allclose( - T.jit_compile(layer)(input), + tk.layers.jit_compile(layer)(input), activation_cls()(linear(input)), ) @@ -90,7 +90,7 @@ def check_composed_layer(ctx, input, layer_cls, linear_cls, normalizer_cls, ctx.assertIsInstance(layer[0], linear_cls) out = linear(input) assert_allclose( - T.jit_compile(layer)(input), + tk.layers.jit_compile(layer)(input), T.nn.sigmoid(out[:, out_features:] + 2.0) * out[:, :out_features], ) @@ -109,7 +109,7 @@ def check_composed_layer(ctx, input, layer_cls, linear_cls, normalizer_cls, ctx.assertIsInstance(layer[0], linear_cls) out = linear(input) assert_allclose( - T.jit_compile(layer)(input), + tk.layers.jit_compile(layer)(input), (T.nn.sigmoid(out[:, out_features:] + 2.0) * activation(out[:, :out_features])), ) @@ -131,7 +131,7 @@ def check_composed_layer(ctx, input, layer_cls, linear_cls, normalizer_cls, ctx.assertIsInstance(layer[0], linear_cls) out = normalizer(linear(input)) assert_allclose( - T.jit_compile(layer)(input), + tk.layers.jit_compile(layer)(input), (T.nn.sigmoid(out[:, out_features:] + 2.0) * activation(out[:, :out_features])), ) diff --git a/tests/layers/test_contextual.py b/tests/layers/test_contextual.py index 7dd093b..7842fc1 100644 --- a/tests/layers/test_contextual.py +++ b/tests/layers/test_contextual.py @@ -11,7 +11,7 @@ def test_IgnoreContext(self): x = T.random.randn([2, 3, 4]) context = [T.random.randn([2, 3, 4]), T.random.randn([2, 3, 4])] - layer = T.jit_compile(tk.layers.IgnoreContext()) + layer = tk.layers.jit_compile(tk.layers.IgnoreContext()) assert_equal(layer(x), x) assert_equal(layer(x, context), x) @@ -19,7 +19,7 @@ def test_AddContext(self): x = T.random.randn([2, 3, 4]) context = [T.random.randn([2, 3, 4]), T.random.randn([2, 3, 4])] - layer = T.jit_compile(tk.layers.AddContext()) + layer = tk.layers.jit_compile(tk.layers.AddContext()) assert_equal(layer(x), x) assert_equal(layer(x, context), x + context[0] + context[1]) @@ -27,6 +27,6 @@ def test_MultiplyContext(self): x = T.random.randn([2, 3, 4]) context = [T.random.randn([2, 3, 4]), T.random.randn([2, 3, 4])] - layer = T.jit_compile(tk.layers.MultiplyContext()) + layer = tk.layers.jit_compile(tk.layers.MultiplyContext()) assert_equal(layer(x), x) assert_equal(layer(x, context), x * context[0] * context[1]) diff --git a/tests/layers/test_core.py b/tests/layers/test_core.py index 99a84c9..3237d8f 100644 --- a/tests/layers/test_core.py +++ b/tests/layers/test_core.py @@ -174,7 +174,7 @@ def test_get_bias_store(self): class IdentityTestCase(unittest.TestCase): def test_identity(self): - layer = T.jit_compile(Identity()) + layer = tk.layers.jit_compile(Identity()) x = T.random.randn([2, 3, 4]) assert_equal(x, layer(x)) @@ -234,7 +234,7 @@ class _AutoRepr(BaseLayer): class BaseLayersTestCase(unittest.TestCase): def test_single_variate_layer(self): - layer = T.jit_compile(_MySingleVariateLayer()) + layer = tk.layers.jit_compile(_MySingleVariateLayer()) x = T.random.randn([2, 3, 4]) np_offset = T.from_numpy(np.array([0., 1., 2., 3.])) assert_allclose(layer(x), x * 11. + np_offset) @@ -242,7 +242,7 @@ def test_single_variate_layer(self): assert_allclose(layer(x), x * 11. + 7. + np_offset) def test_multi_variate_layer(self): - layer = T.jit_compile(_MyMultiVariateLayer()) + layer = tk.layers.jit_compile(_MyMultiVariateLayer()) x = T.random.randn([2, 3, 4]) y = T.random.randn([2, 3, 4]) z = T.random.randn([2, 3, 4]) @@ -251,7 +251,7 @@ def test_multi_variate_layer(self): assert_allclose(b, y + z) def test_split_layer(self): - layer = T.jit_compile(_MySplitLayer()) + layer = tk.layers.jit_compile(_MySplitLayer()) x = T.random.randn([2, 3, 4]) a, b, c = layer(x) assert_allclose(a, x) @@ -259,7 +259,7 @@ def test_split_layer(self): assert_allclose(c, x + 2) def test_merge_layer(self): - layer = T.jit_compile(_MyMergeLayer()) + layer = tk.layers.jit_compile(_MyMergeLayer()) x = T.random.randn([2, 3, 4]) y = T.random.randn([2, 3, 4]) z = T.random.randn([2, 3, 4]) @@ -287,7 +287,7 @@ def test_sequential(self): s = Sequential(layers[0], layers[1:2], [layers[2], [layers[3], layers[4]]]) self.assertEqual(list(s), layers) - y = T.jit_compile(s)(x) + y = tk.layers.jit_compile(s)(x) y2 = x for layer in layers: @@ -305,7 +305,7 @@ def check_core_linear(ctx, input, layer_factory, layer_name, numpy_fn): ctx.assertIsInstance(layer.weight_store, SimpleParamStore) weight = T.to_numpy(layer.weight_store()) bias = T.to_numpy(layer.bias_store()) - assert_allclose(T.jit_compile(layer)(T.as_tensor(input, dtype=T.float_x())), + assert_allclose(tk.layers.jit_compile(layer)(T.as_tensor(input, dtype=T.float_x())), numpy_fn(input, weight=weight, bias=bias), rtol=1e-4, atol=1e-6) ctx.assertNotIn('use_bias=', repr(layer)) @@ -314,7 +314,7 @@ def check_core_linear(ctx, input, layer_factory, layer_name, numpy_fn): layer = layer_factory(use_bias=False) ctx.assertIsInstance(layer.weight_store, SimpleParamStore) weight = T.to_numpy(layer.weight_store()) - assert_allclose(T.jit_compile(layer)(T.as_tensor(input, dtype=T.float_x())), + assert_allclose(tk.layers.jit_compile(layer)(T.as_tensor(input, dtype=T.float_x())), numpy_fn(input, weight=weight, bias=None), rtol=1e-4, atol=1e-6) ctx.assertIn('use_bias=False', repr(layer)) @@ -325,7 +325,7 @@ def check_core_linear(ctx, input, layer_factory, layer_name, numpy_fn): ctx.assertIsInstance(layer.weight_store, NormedAndScaledWeightStore, msg=f'weight_norm={wn}') weight = T.to_numpy(layer.weight_store()) - assert_allclose(T.jit_compile(layer)(T.as_tensor(input, dtype=T.float_x())), + assert_allclose(tk.layers.jit_compile(layer)(T.as_tensor(input, dtype=T.float_x())), numpy_fn(input, weight=weight, bias=None), rtol=1e-4, atol=1e-6) @@ -334,7 +334,7 @@ def check_core_linear(ctx, input, layer_factory, layer_name, numpy_fn): ctx.assertIsInstance(layer.weight_store, NormedWeightStore, msg=f'weight_norm={wn}') weight = T.to_numpy(layer.weight_store()) - assert_allclose(T.jit_compile(layer)(T.as_tensor(input, dtype=T.float_x())), + assert_allclose(tk.layers.jit_compile(layer)(T.as_tensor(input, dtype=T.float_x())), numpy_fn(input, weight=weight, bias=None), rtol=1e-4, atol=1e-6) @@ -503,7 +503,7 @@ def test_batch_norm(self): else f'BatchNorm{spatial_ndims}d')) layer = cls(5, momentum=0.1, epsilon=eps) self.assertIn('BatchNorm', repr(layer)) - layer = T.jit_compile(layer) + layer = tk.layers.jit_compile(layer) # layer output x = T.random.randn(make_conv_shape( @@ -551,7 +551,7 @@ def test_dropout(self): layer = cls(p=0.3) self.assertIn('p=0.3', repr(layer)) self.assertIn('Dropout', repr(layer)) - layer = T.jit_compile(layer) + layer = tk.layers.jit_compile(layer) x = 1. + T.random.rand( make_conv_shape([1], n_samples, [2, 2, 2][:spatial_ndims]) diff --git a/tests/layers/test_flow_layer.py b/tests/layers/test_flow_layer.py index bb0e654..4d3b06f 100644 --- a/tests/layers/test_flow_layer.py +++ b/tests/layers/test_flow_layer.py @@ -30,9 +30,9 @@ def _transform(self, class FlowLayerTestCase(unittest.TestCase): def test_FlowLayer(self): - flow = T.jit_compile(_MyFlow( + flow = tk.layers.jit_compile(_MyFlow( x_event_ndims=0, y_event_ndims=0, explicitly_invertible=True)) - layer = T.jit_compile(tk.layers.FlowLayer(flow)) + layer = tk.layers.jit_compile(tk.layers.FlowLayer(flow)) x = T.random.randn([3, 4, 5]) assert_allclose(layer(x), x * 2.) @@ -53,7 +53,7 @@ def test_ActNorm(self): _ = layer(T.random.randn([3, 4, 5])) # check call - layer = T.jit_compile(layer) + layer = tk.layers.jit_compile(layer) x = T.random.randn([3, 4, 5]) assert_allclose(layer(x), flow(x)[0]) @@ -72,6 +72,6 @@ def test_ActNormNd(self): _ = layer(T.random.randn(shape)) # check call - layer = T.jit_compile(layer) + layer = tk.layers.jit_compile(layer) x = T.random.randn(shape) assert_allclose(layer(x), flow(x)[0]) diff --git a/tests/layers/test_gated.py b/tests/layers/test_gated.py index 89ce686..1f91d24 100644 --- a/tests/layers/test_gated.py +++ b/tests/layers/test_gated.py @@ -16,7 +16,7 @@ def test_Gated(self): 'feature_axis=-2, num_features=3, gate_bias=1.5', repr(gated) ) - gated = T.jit_compile(gated) + gated = tk.layers.jit_compile(gated) x = T.random.randn([6, 5]) assert_allclose(gated(x), x[:3, ...] * T.nn.sigmoid(x[3:, ...] + 1.5)) @@ -37,7 +37,7 @@ def test_GatedWithActivation(self): 'feature_axis=-2, num_features=3, gate_bias=1.5', repr(gated) ) - gated = T.jit_compile(gated) + gated = tk.layers.jit_compile(gated) x = T.random.randn([6, 5]) assert_allclose( diff --git a/tests/layers/test_pixelcnn.py b/tests/layers/test_pixelcnn.py index 59ede6f..ac2cc03 100644 --- a/tests/layers/test_pixelcnn.py +++ b/tests/layers/test_pixelcnn.py @@ -129,7 +129,7 @@ def test_causality_and_receptive_field(self): 1, 1, kernel_size=kernel_size, add_ones_channel=False, weight_init=tk.init.ones, ) - input_layer = T.jit_compile(input_layer) + input_layer = tk.layers.jit_compile(input_layer) with pytest.raises(Exception, match='`input` is expected to be .*d'): @@ -157,7 +157,7 @@ def test_causality_and_receptive_field(self): resblock_layer = resblock_layer_cls( 1, 1, kernel_size=kernel_size, weight_init=tk.init.ones ) - resblock_layer = T.jit_compile(resblock_layer) + resblock_layer = tk.layers.jit_compile(resblock_layer) with pytest.raises(Exception): _ = resblock_layer([T.zeros([])] * (spatial_ndims - 1)) @@ -167,7 +167,7 @@ def test_causality_and_receptive_field(self): # the down-sampling and up-sampling layer down_sample_cls = getattr(tk.layers, f'PixelCNNConv{spatial_ndims}d') down_sample_layer = down_sample_cls(1, 1, kernel_size, stride=2) - down_sample_layer = T.jit_compile(down_sample_layer) + down_sample_layer = tk.layers.jit_compile(down_sample_layer) down_sample_output_size = T.shape(down_sample_layer( [T.zeros(make_conv_shape([1], 1, size))] * spatial_ndims)[0]) @@ -183,13 +183,13 @@ def test_causality_and_receptive_field(self): padding='half', # sum of the both sides == (kernel_size - 1) * dilation ) ) - up_sample_layer = T.jit_compile(up_sample_layer) + up_sample_layer = tk.layers.jit_compile(up_sample_layer) # the output layer output_layer_cls = getattr( tk.layers, f'PixelCNNOutput{spatial_ndims}d') output_layer = output_layer_cls() - output_layer = T.jit_compile(output_layer) + output_layer = tk.layers.jit_compile(output_layer) with pytest.raises(Exception, match=r'`len\(inputs\)` is expected to be .*'): @@ -256,7 +256,7 @@ def test_pixelcnn_network(self): tk.layers, f'PixelCNNInput{spatial_ndims}d') input_layer = input_layer_cls( in_channels, out_channels, kernel_size=kernel_size) - input_layer = T.jit_compile(input_layer) + input_layer = tk.layers.jit_compile(input_layer) # the pixelcnn layers resblock_layer_cls = getattr( @@ -294,7 +294,7 @@ def test_pixelcnn_network(self): data_init=tk.init.StdDataInit, ), ] - pixelcnn_layers = [T.jit_compile(l) for l in pixelcnn_layers] + pixelcnn_layers = [tk.layers.jit_compile(l) for l in pixelcnn_layers] # the pixelcnn network network_cls = getattr(tk.layers, f'PixelCNN{spatial_ndims}d') diff --git a/tests/layers/test_pool.py b/tests/layers/test_pool.py index ac29e8d..aa8e499 100644 --- a/tests/layers/test_pool.py +++ b/tests/layers/test_pool.py @@ -83,7 +83,7 @@ def is_valid_padding(padding, kernel_size): f'padding={padding})' ) - layer = T.jit_compile(layer) + layer = tk.layers.jit_compile(layer) assert_allclose( layer(x), fn(x, kernel_size=kernel_size, stride=stride, @@ -108,7 +108,7 @@ def fn(arr): f'GlobalAvgPool{spatial_ndims}d(keepdims={keepdims})' ) - layer = T.jit_compile(layer) + layer = tk.layers.jit_compile(layer) x = T.random.randn(make_conv_shape([4, 5], 6, [7, 8, 9][:spatial_ndims])) assert_allclose(layer(x), fn(T.to_numpy(x)), rtol=1e-4, atol=1e-6) diff --git a/tests/layers/test_resnet.py b/tests/layers/test_resnet.py index 79742c2..e579478 100644 --- a/tests/layers/test_resnet.py +++ b/tests/layers/test_resnet.py @@ -51,7 +51,7 @@ def check_resblock(ctx, ctx.assertFalse(layer.conv0.use_bias) ctx.assertFalse(layer.conv1.use_bias) - layer = T.jit_compile(layer) + layer = tk.layers.jit_compile(layer) assert_allclose( layer(x), x + layer.conv1(layer.conv0(x)), @@ -71,7 +71,7 @@ def check_resblock(ctx, ctx.assertIsNotNone(layer.conv0.bias_store) ctx.assertIsNotNone(layer.conv1.bias_store) - layer = T.jit_compile(layer) + layer = tk.layers.jit_compile(layer) assert_allclose( layer(x), layer.shortcut(x) + layer.conv1(layer.conv0(x)), @@ -122,7 +122,7 @@ def check_resblock(ctx, ctx.assertEqual(layer.conv1.dilation, dilation) ctx.assertEqual(layer.conv1.out_channels, 4) - layer = T.jit_compile(layer) + layer = tk.layers.jit_compile(layer) assert_allclose(layer(x), layer.shortcut(x) + layer.conv1(layer.conv0(x))) # test resize_at_exit = True @@ -141,7 +141,7 @@ def check_resblock(ctx, ctx.assertEqual(layer.conv1.padding, padding) ctx.assertEqual(layer.conv1.out_channels, 4) - layer = T.jit_compile(layer) + layer = tk.layers.jit_compile(layer) assert_allclose( layer(x), layer.shortcut(x) + layer.conv1(layer.conv0(x)), @@ -168,7 +168,7 @@ def check_resblock(ctx, ctx.assertIsInstance(layer.pre_conv1[1], tk.layers.LeakyReLU) ctx.assertEqual(len(layer.pre_conv1), 2) - layer = T.jit_compile(layer) + layer = tk.layers.jit_compile(layer) assert_allclose( layer(x), (layer.shortcut(x) + @@ -200,7 +200,7 @@ def check_resblock(ctx, _ = layer(x) tk.layers.set_train_mode(layer, False) - layer = T.jit_compile(layer) + layer = tk.layers.jit_compile(layer) assert_allclose( layer(x), (layer.shortcut(x) + @@ -220,7 +220,7 @@ def check_resblock(ctx, ctx.assertIsInstance(layer.post_conv1, tk.layers.Gated) ctx.assertEqual(layer.post_conv1.gate_bias, 1.5) - layer = T.jit_compile(layer) + layer = tk.layers.jit_compile(layer) assert_allclose( layer(x), (layer.shortcut(x) + layer.post_conv1( @@ -242,7 +242,7 @@ def check_resblock(ctx, ctx_shape = make_conv_shape([3], 5, [1] * spatial_ndims) context = [T.random.randn(ctx_shape), T.random.randn(ctx_shape)] - layer = T.jit_compile(layer) + layer = tk.layers.jit_compile(layer) assert_allclose( layer(x, context), (layer.shortcut(x) + @@ -265,7 +265,7 @@ def check_resblock(ctx, ctx.assertIs(layer.conv0, conv0) ctx.assertIs(layer.conv1, conv1) - layer = T.jit_compile(layer) + layer = tk.layers.jit_compile(layer) assert_allclose( layer(x), layer.shortcut(x) + layer.conv1(layer.conv0(x)), diff --git a/tests/layers/test_shape_.py b/tests/layers/test_shape_.py index 1636ffa..6f0bbbf 100644 --- a/tests/layers/test_shape_.py +++ b/tests/layers/test_shape_.py @@ -14,7 +14,7 @@ def test_FlattenToNDims(self): x = T.random.randn(make_conv_shape([3, 4], 6, [5])) internal = tk.layers.LinearConv1d(6, 7, kernel_size=1) - layer = T.jit_compile(tk.layers.FlattenToNDims(internal, 3)) + layer = tk.layers.jit_compile(tk.layers.FlattenToNDims(internal, 3)) xx, front_shape = T.flatten_to_ndims(x, 3) assert_equal(layer(x), T.unflatten_from_ndims(internal(xx), front_shape)) @@ -33,7 +33,7 @@ def test_ConstantPad(self): repr(layer), f'ConstantPad(padding=[(1, 1), (2, 3), (0, 5)], value={value})' ) - layer = T.jit_compile(layer) + layer = tk.layers.jit_compile(layer) x = T.random.randn([3, 4, 5]) assert_equal(layer(x), T.pad(x, [(1, 1), (2, 3), (0, 5)], value=value)) @@ -76,7 +76,7 @@ def fn(v): repr(layer), f'ConstantPad{spatial_ndims}d(padding={padding}, value={value})' ) - layer = T.jit_compile(layer) + layer = tk.layers.jit_compile(layer) assert_equal( layer(x), spatial_pad(x, [pad_arg] * spatial_ndims) @@ -90,7 +90,7 @@ def fn(v): repr(layer), f'ConstantPad{spatial_ndims}d(padding={padding}, value={value})' ) - layer = T.jit_compile(layer) + layer = tk.layers.jit_compile(layer) assert_equal(layer(x), spatial_pad(x, padding)) # error padding argument @@ -118,7 +118,7 @@ def test_channel_last_to_first(self): fn = getattr(T.nn, f'channel_last_to_first{spatial_ndims}d') x = T.random.randn([3, 4, 5, 6, 7][:spatial_ndims + 2]) - layer = T.jit_compile(layer) + layer = tk.layers.jit_compile(layer) assert_equal(layer(x), fn(x)) def test_channel_first_to_last(self): @@ -131,5 +131,5 @@ def test_channel_first_to_last(self): fn = getattr(T.nn, f'channel_first_to_last{spatial_ndims}d') x = T.random.randn([3, 4, 5, 6, 7][:spatial_ndims + 2]) - layer = T.jit_compile(layer) + layer = tk.layers.jit_compile(layer) assert_equal(layer(x), fn(x)) diff --git a/tests/layers/test_split_.py b/tests/layers/test_split_.py index 0c5f095..141cfb3 100644 --- a/tests/layers/test_split_.py +++ b/tests/layers/test_split_.py @@ -17,7 +17,7 @@ def test_branch(self): for k in range(len(branches) + 1): # without shared module layer = tk.layers.Branch(branches[:k]) - layer = T.jit_compile(layer) + layer = tk.layers.jit_compile(layer) out = layer(x) self.assertIsInstance(out, list) @@ -28,7 +28,7 @@ def test_branch(self): # with shared module layer = tk.layers.Branch(branches[:k], shared=shared) - layer = T.jit_compile(layer) + layer = tk.layers.jit_compile(layer) out = layer(x) self.assertIsInstance(out, list) diff --git a/tests/tensor/test_core.py b/tests/tensor/test_core.py index f5d5124..9fb536f 100644 --- a/tests/tensor/test_core.py +++ b/tests/tensor/test_core.py @@ -23,16 +23,16 @@ def test_backend_info(self): def test_jit_compile(self): # test compile layer layer = tk.layers.Linear(5, 3) - layer2 = T.jit_compile(layer) + layer2 = tk.layers.jit_compile(layer) if not tk.settings.disable_jit: - self.assertTrue(T.is_jit_layer(layer2)) + self.assertTrue(tk.layers.is_jit_layer(layer2)) else: - self.assertFalse(T.is_jit_layer(layer2)) + self.assertFalse(tk.layers.is_jit_layer(layer2)) # not supported object with pytest.raises(TypeError, match='Not supported by `jit_compile`'): - _ = T.jit_compile(object()) + _ = tk.layers.jit_compile(object()) def test_utilities(self): self.assertEqual(T.int_range(0, 10), list(range(10))) @@ -50,13 +50,13 @@ def test_dtypes(self): self.assertIsInstance(dtype, str) self.assertFalse(T.is_floating_point(T.as_tensor(0, dtype=dtype))) self.assertFalse(T.is_floating_point_dtype(dtype)) - self.assertEqual(T.get_dtype(T.cast(T.as_tensor_backend(x), dtype)), dtype) + self.assertEqual(T.get_dtype(T.cast(T.as_tensor(x), dtype)), dtype) for dtype in [T.float16, T.float32, T.float64]: self.assertIsInstance(dtype, str) self.assertTrue(T.is_floating_point(T.as_tensor(0, dtype=dtype))) self.assertTrue(T.is_floating_point_dtype(dtype)) - self.assertEqual(T.get_dtype(T.cast(T.as_tensor_backend(x), dtype)), dtype) + self.assertEqual(T.get_dtype(T.cast(T.as_tensor(x), dtype)), dtype) # floatx self.assertEqual(settings.float_x, 'float32') @@ -68,7 +68,7 @@ def test_dtypes(self): settings.float_x = 'float32' # as_tensor - t = T.as_tensor_backend(x) + t = T.as_tensor(x) self.assertIsInstance(t, T.Tensor) assert_equal(t, x) @@ -80,27 +80,28 @@ def test_dtypes(self): assert_equal(t2, x) # cast_like - for dtype_as in (t, t2): - t3 = T.cast_like(t, dtype_as) + for like in (t, t2): + t3 = T.cast_like(t, like) self.assertIsInstance(t3, T.Tensor) - self.assertEqual(T.get_dtype(t3), T.get_dtype(dtype_as)) + self.assertEqual(T.get_dtype(t3), T.get_dtype(like)) + self.assertEqual(T.get_device(t3), T.get_device(like)) assert_equal(t3, x) def test_tensor_constructors(self): np.random.seed(1234) - # as_tensor_backend - for x in [1., 1, [1., 2., 3.], np.array([1., 2., 3.])]: - t = T.as_tensor_backend(x) - self.assertIsInstance(t, T.Tensor) - assert_equal(t, x) - - x = T.as_tensor_backend(np.asarray([1, 2, 3], dtype=np.int32)) - t = T.as_tensor_backend(x) - self.assertIs(t, x) - - with pytest.raises(Exception): - _ = T.as_tensor_backend(object()) # not a tensor, should raise error + # # as_tensor + # for x in [1., 1, [1., 2., 3.], np.array([1., 2., 3.])]: + # t = T.as_tensor(x) + # self.assertIsInstance(t, T.Tensor) + # assert_equal(t, x) + # + # x = T.as_tensor(np.asarray([1, 2, 3], dtype=np.int32)) + # t = T.as_tensor(x) + # self.assertIs(t, x) + # + # with pytest.raises(Exception): + # _ = T.as_tensor(object()) # not a tensor, should raise error # as_tensor def copy_tensor(o): @@ -265,16 +266,16 @@ def copy_tensor(o): I = np.eye(n_classes) x = np.random.randint(0, n_classes, size=shape) - t = T.one_hot(T.as_tensor_backend(x), n_classes) + t = T.one_hot(T.as_tensor(x), n_classes) assert_equal(t, I[x]) for dtype in number_dtypes: - t = T.one_hot(T.as_tensor_backend(x), n_classes, dtype=dtype) + t = T.one_hot(T.as_tensor(x), n_classes, dtype=dtype) self.assertEqual(T.get_dtype(t), dtype) assert_equal(t, I[x]) for axis in range(-(len(shape) + 1), len(shape) + 1): - t = T.one_hot(T.as_tensor_backend(x), n_classes, axis=axis) + t = T.one_hot(T.as_tensor(x), n_classes, axis=axis) expected_t = list(range(0, len(shape))) if axis < 0: expected_t.insert(len(expected_t) + axis + 1, -1) @@ -285,11 +286,11 @@ def copy_tensor(o): for axis in [-(len(shape) + 2), len(shape) + 1]: with pytest.raises(Exception, match='`axis` out of range'): - _ = T.one_hot(T.as_tensor_backend(x), n_classes, axis=axis) + _ = T.one_hot(T.as_tensor(x), n_classes, axis=axis) def test_to_numpy(self): x = np.random.randn(2, 3, 4) - t = T.as_tensor_backend(x) + t = T.as_tensor(x) out = T.to_numpy(t) self.assertIsInstance(out, np.ndarray) assert_equal(out, x) @@ -298,7 +299,7 @@ def test_to_numpy(self): _ = T.to_numpy(object()) x = np.asarray([True, False]) - t = T.as_tensor_backend(x) + t = T.as_tensor(x) out = T.to_numpy(t) self.assertIsInstance(out, np.ndarray) self.assertEqual(out.dtype, np.bool) @@ -419,7 +420,7 @@ def test_assignment(self): def test_shape_utils(self): # test shape x = np.random.randn(2, 3, 4) - t = T.as_tensor_backend(x) + t = T.as_tensor(x) s = T.shape(t) self.assertEqual(s, [2, 3, 4]) @@ -436,7 +437,7 @@ def test_shape_utils(self): # test repeat x = np.random.randn(2, 1, 3) - t = T.as_tensor_backend(x) + t = T.as_tensor(x) t2 = T.repeat(t, []) self.assertEqual(T.shape(t2), [2, 1, 3]) @@ -465,9 +466,9 @@ def test_shape_utils(self): # test squeeze x = np.random.randn(1, 2, 1, 3, 1, 4, 1) - t = T.as_tensor_backend(x) + t = T.as_tensor(x) - t2 = T.squeeze(T.as_tensor_backend(x)) + t2 = T.squeeze(T.as_tensor(x)) s2 = [2, 3, 4] self.assertEqual(T.shape(t2), s2) assert_equal(t2, x.reshape(s2)) @@ -487,7 +488,7 @@ def test_shape_utils(self): # test expand dim x = np.random.randn(2, 3) - t = T.as_tensor_backend(x) + t = T.as_tensor(x) t2 = T.expand_dim(t, -1) s2 = [2, 3, 1] @@ -551,7 +552,7 @@ def test_shape_utils(self): # test broadcast_to x = np.random.randn(1, 2, 1) - t = T.as_tensor_backend(x) + t = T.as_tensor(x) t2 = T.broadcast_to(t, [4, 5, 2, 1]) self.assertEqual(T.shape(t2), [4, 5, 2, 1]) @@ -578,7 +579,7 @@ def explicit_broadcast(x, y): def check_explicit_broadcast(shape1, shape2): x = np.asarray(np.random.randn(*shape1)) y = np.asarray(np.random.randn(*shape2)) - out1, out2 = T.explicit_broadcast(T.as_tensor_backend(x), T.as_tensor_backend(y)) + out1, out2 = T.explicit_broadcast(T.as_tensor(x), T.as_tensor(y)) out1 = T.to_numpy(out1) out2 = T.to_numpy(out2) ans1, ans2 = explicit_broadcast(x, y) @@ -623,7 +624,7 @@ def run_check(x, k): with pytest.raises(Exception, match='`ndims` must be at least 1'): - _ = T.flatten_to_ndims(T.as_tensor_backend([0.]), 0) + _ = T.flatten_to_ndims(T.as_tensor([0.]), 0) with pytest.raises(Exception, match=r'rank\(x\) < ndims'): _ = T.flatten_to_ndims(T.zeros([3, 4]), 3) @@ -634,7 +635,7 @@ def run_check(x, k): with pytest.raises(Exception, match=r'Invalid input: rank\(x\) < 1, but ' r'front_shape is not None'): - t = T.as_tensor_backend(123) + t = T.as_tensor(123) _ = T.unflatten_from_ndims(t, [2, 3]) # test reshape_tail @@ -667,46 +668,46 @@ def run_check(x, k): def test_index_select_and_others(self): # index_select x = np.random.randn(3, 4, 5) - t = T.as_tensor_backend(x) + t = T.as_tensor(x) assert_equal( - T.index_select(t, T.as_tensor_backend(1), 0), + T.index_select(t, T.as_tensor(1), 0), x[1, ...] ) assert_equal( - T.index_select(t, T.as_tensor_backend(3), 1), + T.index_select(t, T.as_tensor(3), 1), x[:, 3, ...] ) assert_equal( - T.index_select(t, T.as_tensor_backend(2), -1), + T.index_select(t, T.as_tensor(2), -1), x[..., 2] ) i = np.asarray([0, 2, 1, 1, 0, 2]) assert_equal( - T.index_select(t, T.as_tensor_backend(i), 0), + T.index_select(t, T.as_tensor(i), 0), x[i, ...] ) assert_equal( - T.index_select(t, T.as_tensor_backend(i), 1), + T.index_select(t, T.as_tensor(i), 1), x[:, i, ...] ) assert_equal( - T.index_select(t, T.as_tensor_backend(i), -1), + T.index_select(t, T.as_tensor(i), -1), x[..., i] ) i = np.asarray([[0, 2, 1], [1, 0, 2]]) assert_equal( - T.index_select(t, T.as_tensor_backend(i), 0), + T.index_select(t, T.as_tensor(i), 0), x[i, ...] ) assert_equal( - T.index_select(t, T.as_tensor_backend(i), 1), + T.index_select(t, T.as_tensor(i), 1), x[:, i, ...] ) assert_equal( - T.index_select(t, T.as_tensor_backend(i), -1), + T.index_select(t, T.as_tensor(i), -1), x[..., i] ) @@ -714,27 +715,27 @@ def test_index_select_and_others(self): # TODO: pytorch currently does not support negative index in many # of its functions. enable these test when supported. assert_equal( - T.index_select(t, T.as_tensor_backend(-1), 1), + T.index_select(t, T.as_tensor(-1), 1), x[:, -1] ) i = np.asarray([0, 1, -1, 2, -2, 0]) assert_equal( - T.index_select(t, T.as_tensor_backend(i), 1), + T.index_select(t, T.as_tensor(i), 1), x[:, i, ...] ) i = np.asarray([[0, 1, -1], [2, -2, 0]]) assert_equal( - T.index_select(t, T.as_tensor_backend(i), 1), + T.index_select(t, T.as_tensor(i), 1), x[:, i, ...] ) with pytest.raises(Exception, match='`axis` out of range'): - _ = T.index_select(t, T.as_tensor_backend(0), 3) + _ = T.index_select(t, T.as_tensor(0), 3) with pytest.raises(Exception, match='`axis` out of range'): - _ = T.index_select(t, T.as_tensor_backend(0), -4) + _ = T.index_select(t, T.as_tensor(0), -4) # concat x = np.random.randn(2, 3, 4) @@ -743,7 +744,7 @@ def test_index_select_and_others(self): for arrays, axis in [([x, x, y], -2), ([x, y, y], 1), ([x, x, z], -1), ([x, z, z], 2)]: - t = T.concat([T.as_tensor_backend(arr) for arr in arrays], axis=axis) + t = T.concat([T.as_tensor(arr) for arr in arrays], axis=axis) expected = np.concatenate(arrays, axis=axis) assert_equal(t, expected) @@ -929,8 +930,8 @@ def test_math_univariate_op(self): x = np.random.randn(2, 3) u = np.random.rand(2, 3) - x_t = T.as_tensor_backend(x) - u_t = T.as_tensor_backend(u) + x_t = T.as_tensor(x) + u_t = T.as_tensor(u) assert_allclose(T.floor(x_t), np.floor(x)) assert_allclose(T.ceil(x_t), np.ceil(x)) @@ -939,9 +940,9 @@ def test_math_univariate_op(self): assert_allclose(T.square(x_t), x ** 2) assert_allclose(T.exp(x_t), np.exp(x)) - assert_allclose(T.log(T.as_tensor_backend(np.abs(x))), + assert_allclose(T.log(T.as_tensor(np.abs(x))), np.log(np.abs(x))) - assert_allclose(T.log1p(T.as_tensor_backend(np.abs(x) - 1. + 1e-7)), + assert_allclose(T.log1p(T.as_tensor(np.abs(x) - 1. + 1e-7)), np.log1p(np.abs(x) - 1. + 1e-7)) assert_allclose(T.sin(x_t), np.sin(x)) @@ -958,15 +959,15 @@ def test_math_bivariate_op(self): np.random.seed(1234) x = np.random.randn(2, 3) y = np.random.randn(3) - t1 = T.as_tensor_backend(x) - t2 = T.as_tensor_backend(y) + t1 = T.as_tensor(x) + t2 = T.as_tensor(y) assert_allclose(T.add(t1, t2), x + y) assert_allclose(T.sub(t1, t2), x - y) assert_allclose(T.mul(t1, t2), x * y) - assert_allclose(T.pow(T.as_tensor_backend(np.abs(x)), t2), + assert_allclose(T.pow(T.as_tensor(np.abs(x)), t2), np.abs(x) ** y) - assert_allclose(T.sqrt(T.as_tensor_backend(np.abs(x))), np.sqrt(np.abs(x))) + assert_allclose(T.sqrt(T.as_tensor(np.abs(x))), np.sqrt(np.abs(x))) # for division, of course y should not equal to zero y = np.asarray(y == 0, dtype=y.dtype) + y @@ -977,8 +978,8 @@ def test_math_bivariate_op(self): # to produce identical results with numpy when x > 0 and y > 0 x = np.abs(x) y = np.abs(y) - t1 = T.as_tensor_backend(x) - t2 = T.as_tensor_backend(y) + t1 = T.as_tensor(x) + t2 = T.as_tensor(y) assert_allclose(T.floordiv(t1, t2), x // y) assert_allclose(T.mod(t1, t2), x % y) @@ -994,8 +995,8 @@ def test_math_bivariate_op(self): x = np.random.randint(0, 255, size=(2, 3), dtype=np.uint8) y = np.random.randint(0, 255, size=(3,), dtype=np.uint8) y = y + (y == 0).astype(y.dtype) - t1 = T.as_tensor_backend(x) - t2 = T.as_tensor_backend(y) + t1 = T.as_tensor(x) + t2 = T.as_tensor(y) out = T.truediv(t1, t2) self.assertEqual(T.get_dtype(out), T.float32) assert_allclose(out, x.astype(np.float32) / y.astype(np.float32)) @@ -1004,8 +1005,8 @@ def test_math_bivariate_op(self): x = np.random.randint(-32768, 32767, size=(2, 3), dtype=np.int16) y = np.random.randint(-32768, 32767, size=(3,), dtype=np.int16) y = y + (y == 0).astype(y.dtype) - t1 = T.as_tensor_backend(x) - t2 = T.as_tensor_backend(y) + t1 = T.as_tensor(x) + t2 = T.as_tensor(y) out = T.truediv(t1, t2) self.assertEqual(T.get_dtype(out), T.float32) assert_allclose(out, x.astype(np.float32) / y.astype(np.float32)) @@ -1014,8 +1015,8 @@ def test_math_bivariate_op(self): x = np.random.randint(-100000, 100000, size=(2, 3), dtype=np.int32) y = np.random.randint(-100000, 100000, size=(3,), dtype=np.int32) y = y + (y == 0).astype(y.dtype) - t1 = T.as_tensor_backend(x) - t2 = T.as_tensor_backend(y) + t1 = T.as_tensor(x) + t2 = T.as_tensor(y) out = T.truediv(t1, t2) self.assertEqual(T.get_dtype(out), T.float64) assert_allclose(out, x.astype(np.float64) / y.astype(np.float64)) @@ -1027,7 +1028,7 @@ def test_math_sequential_op(self): z = np.random.randn(2, 1) assert_allclose( - T.add_n([T.as_tensor_backend(t) for t in (x, y, z)]), + T.add_n([T.as_tensor(t) for t in (x, y, z)]), x + y + z ) @@ -1050,7 +1051,7 @@ def log_f_exp(f, x, axis=None, keepdims=False): # prepare for the data np.random.seed(1234) x = np.random.randn(2, 3, 4) - t = T.as_tensor_backend(x) + t = T.as_tensor(x) # test sum, mean, max, min for name in ['sum', 'mean', 'min', 'max', @@ -1160,8 +1161,8 @@ def with_raise(name, fn): x = np.asarray([[True, True, False, False], [False, False, True, True]]) y = np.asarray([True, False, False, True]) - t1 = T.as_tensor_backend(x) - t2 = T.as_tensor_backend(y) + t1 = T.as_tensor(x) + t2 = T.as_tensor(y) # test as_boolean self.assertEqual(T.get_dtype(t1), T.boolean) @@ -1170,25 +1171,25 @@ def with_raise(name, fn): # test logical_not out = T.logical_not(t1) assert_equal(read_bool(out), np.logical_not(x)) - with_raise('x', lambda: T.logical_not(T.as_tensor_backend([1, 2, 3]))) + with_raise('x', lambda: T.logical_not(T.as_tensor([1, 2, 3]))) # test logical_and out = T.logical_and(t1, t2) assert_equal(read_bool(out), np.logical_and(x, y)) - with_raise('x', lambda: T.logical_and(T.as_tensor_backend([1, 2, 3, 4]), t2)) - with_raise('y', lambda: T.logical_and(t1, T.as_tensor_backend([1, 2, 3, 4]))) + with_raise('x', lambda: T.logical_and(T.as_tensor([1, 2, 3, 4]), t2)) + with_raise('y', lambda: T.logical_and(t1, T.as_tensor([1, 2, 3, 4]))) # test logical_or out = T.logical_or(t1, t2) assert_equal(read_bool(out), np.logical_or(x, y)) - with_raise('x', lambda: T.logical_or(T.as_tensor_backend([1, 2, 3, 4]), t2)) - with_raise('y', lambda: T.logical_or(t1, T.as_tensor_backend([1, 2, 3, 4]))) + with_raise('x', lambda: T.logical_or(T.as_tensor([1, 2, 3, 4]), t2)) + with_raise('y', lambda: T.logical_or(t1, T.as_tensor([1, 2, 3, 4]))) # test logical_xor out = T.logical_xor(t1, t2) assert_equal(read_bool(out), np.logical_xor(x, y)) - with_raise('x', lambda: T.logical_xor(T.as_tensor_backend([1, 2, 3, 4]), t2)) - with_raise('y', lambda: T.logical_xor(t1, T.as_tensor_backend([1, 2, 3, 4]))) + with_raise('x', lambda: T.logical_xor(T.as_tensor([1, 2, 3, 4]), t2)) + with_raise('y', lambda: T.logical_xor(t1, T.as_tensor([1, 2, 3, 4]))) # test multiply_mask def test_multiply_mask(x, y, dtype, mask_dtype): @@ -1268,8 +1269,8 @@ def read_bool(t): x = np.random.randn(2, 3, 4) y = np.random.randn(1, 3, 4) x = np.concatenate([y, x], axis=0) - t1 = T.as_tensor_backend(x) - t2 = T.as_tensor_backend(y) + t1 = T.as_tensor(x) + t2 = T.as_tensor(y) # test equal assert_equal(read_bool(T.equal(t1, t2)), (x == y)) @@ -1351,9 +1352,9 @@ def test_gradient(self): y = np.random.randn(2, 3, 4) # requires_grad - yt = T.requires_grad(T.as_tensor_backend(y)) + yt = T.requires_grad(T.as_tensor(y)) - xt = T.as_tensor_backend(x) + xt = T.as_tensor(x) xt_copy = T.requires_grad(xt, copy=False) self.assertIs(xt_copy, xt) l_sum = T.reduce_sum(xt + xt_copy) @@ -1449,7 +1450,7 @@ def test_assertions(self): for x in [np.array([-1, 0, 1]), np.array([1., 2., 3.]), np.array([np.inf, 0.]), np.array([np.nan, 0.]), np.array([np.inf, np.nan])]: - t = T.as_tensor_backend(x) + t = T.as_tensor(x) assert_equal(T.is_finite(t), np.isfinite(x)) is_finite = np.all(np.isfinite(x)) diff --git a/tests/tensor/test_nn.py b/tests/tensor/test_nn.py index 78798d5..79ae63b 100644 --- a/tests/tensor/test_nn.py +++ b/tests/tensor/test_nn.py @@ -24,7 +24,7 @@ def test_activation_functions(self): self.assertTrue(np.any(x < 0)) self.assertTrue(np.any(x > 0)) self.assertTrue(np.any(x == 0)) - x_t = T.as_tensor_backend(x) + x_t = T.as_tensor(x) # test relu assert_allclose(T.nn.relu(x_t), x * (x >= 0)) @@ -126,7 +126,7 @@ def binary_cross_entropy(logits, labels, reduction, negative): self.assertEqual(labels.shape, (3, 4)) self.assertEqual(set(labels.flatten().tolist()), {0, 1}) - _f = T.as_tensor_backend + _f = T.as_tensor for reduction in ['none', 'mean', 'sum']: for negative in [False, True]: @@ -193,7 +193,7 @@ def cross_entropy(logits, labels, reduction, negative): self.assertEqual(labels.shape, (3, 4, 5)) self.assertEqual(set(labels.flatten().tolist()), {0, 1, 2, 3, 4, 5}) - _f = T.as_tensor_backend + _f = T.as_tensor for reduction in ['none', 'mean', 'sum']: for negative in [False, True]: diff --git a/tests/tensor/test_random.py b/tests/tensor/test_random.py index 400f9f4..d642337 100644 --- a/tests/tensor/test_random.py +++ b/tests/tensor/test_random.py @@ -32,12 +32,12 @@ class TensorRandomTestCase(unittest.TestCase): def test_seed(self): T.random.seed(1234) - x = T.to_numpy(T.random.normal(T.as_tensor_backend(0.), T.as_tensor_backend(1.))) - y = T.to_numpy(T.random.normal(T.as_tensor_backend(0.), T.as_tensor_backend(1.))) + x = T.to_numpy(T.random.normal(T.as_tensor(0.), T.as_tensor(1.))) + y = T.to_numpy(T.random.normal(T.as_tensor(0.), T.as_tensor(1.))) self.assertFalse(np.allclose(x, y)) T.random.seed(1234) - z = T.to_numpy(T.random.normal(T.as_tensor_backend(0.), T.as_tensor_backend(1.))) + z = T.to_numpy(T.random.normal(T.as_tensor(0.), T.as_tensor(1.))) assert_allclose(x, z) def test_rand(self): @@ -226,9 +226,9 @@ def log_prob(given): # test n_samples by manual expanding the param shape for dtype in float_dtypes: # test sample dtype and shape - mean_t = T.cast(T.expand(T.as_tensor_backend(mean), [n_samples, 2, 3, 4]), dtype) - std_t = T.cast(T.expand(T.as_tensor_backend(std), [n_samples, 1, 3, 4]), dtype) - logstd_t = T.cast(T.expand(T.as_tensor_backend(logstd), [n_samples, 1, 3, 4]), dtype) + mean_t = T.cast(T.expand(T.as_tensor(mean), [n_samples, 2, 3, 4]), dtype) + std_t = T.cast(T.expand(T.as_tensor(std), [n_samples, 1, 3, 4]), dtype) + logstd_t = T.cast(T.expand(T.as_tensor(logstd), [n_samples, 1, 3, 4]), dtype) t = T.random.normal(mean_t, std_t) self.assertEqual(T.get_dtype(t), dtype) self.assertEqual(T.shape(t), [n_samples, 2, 3, 4]) @@ -600,7 +600,7 @@ def do_test_sample(n_z, sample_shape, float_dtype, dtype): do_test_sample(n_z, sample_shape, T.float64, dtype) with pytest.raises(Exception, match='`n_samples` must be at least 1'): - _ = T.random.bernoulli(probs=T.as_tensor_backend(probs), n_samples=0) + _ = T.random.bernoulli(probs=T.as_tensor(probs), n_samples=0) # given has lower rank than params, broadcasted to match param for float_dtype in float_dtypes: @@ -721,7 +721,7 @@ def do_test_sample(is_one_hot: bool, is_one_hot = Z_sample_fn == T.random.one_hot_categorical this_probs = probs[0, 0] t = Z_sample_fn( - probs=T.as_tensor_backend(this_probs), + probs=T.as_tensor(this_probs), n_samples=100 ) self.assertEqual( @@ -730,7 +730,7 @@ def do_test_sample(is_one_hot: bool, ) x = T.to_numpy(t) - logits_t = T.as_tensor_backend(np.log(this_probs)) + logits_t = T.as_tensor(np.log(this_probs)) do_check_log_prob( given=t, batch_ndims=len(t.shape) - int(is_one_hot), @@ -761,11 +761,11 @@ def do_test_sample(is_one_hot: bool, # argument error for Z_sample_fn in (T.random.categorical, T.random.one_hot_categorical): with pytest.raises(Exception, match='`n_samples` must be at least 1'): - _ = Z_sample_fn(probs=T.as_tensor_backend(probs), n_samples=0) + _ = Z_sample_fn(probs=T.as_tensor(probs), n_samples=0) with pytest.raises(Exception, match='The rank of `probs` must be at ' 'least 1'): - _ = Z_sample_fn(probs=T.as_tensor_backend(probs[0, 0, 0, 0])) + _ = Z_sample_fn(probs=T.as_tensor(probs[0, 0, 0, 0])) def test_discretized_logistic(self): np.random.seed(1234) diff --git a/tests/test_arg_check.py b/tests/test_arg_check.py index 9b2a2dc..0b4115e 100644 --- a/tests/test_arg_check.py +++ b/tests/test_arg_check.py @@ -20,7 +20,7 @@ def test_validate_positive_int(self): def test_validate_layer(self): layer = tk.layers.Linear(5, 3) - for v in [layer, T.jit_compile(layer)]: + for v in [layer, tk.layers.jit_compile(layer)]: self.assertIs(validate_layer('v', v), v) with pytest.raises(TypeError, @@ -40,7 +40,7 @@ def test_get_layer_from_layer_or_factory(self): factory = lambda in_features, out_features: \ tk.layers.Linear(in_features, out_features) layer = factory(5, 3) - for v in [layer, T.jit_compile(layer), + for v in [layer, tk.layers.jit_compile(layer), tk.layers.Linear, factory]: out = get_layer_from_layer_or_factory( 'v', v, args=(5,), kwargs=dict(out_features=3)) From 9ca107ea8788036264a5d2bd35c581568f7c5434 Mon Sep 17 00:00:00 2001 From: Haowen Xu Date: Mon, 17 Feb 2020 21:23:34 +0800 Subject: [PATCH 3/7] now GPU device can work --- tensorkit/backend/pytorch_/core.py | 6 +-- tensorkit/backend/pytorch_/layers.py | 2 +- tensorkit/backend/pytorch_/nn.py | 4 +- tensorkit/backend/pytorch_/random.py | 9 +++- tensorkit/distributions/bernoulli.py | 7 ++-- tensorkit/distributions/categorical.py | 4 +- tensorkit/distributions/uniform.py | 5 ++- tensorkit/examples/classification/mnist.py | 10 +++++ .../examples/classification/mnist_resnet.py | 10 +++++ tensorkit/examples/utils/prepare_data.py | 7 ++-- tensorkit/utils/tensor_stream.py | 3 ++ tests/distributions/test_base.py | 3 +- tests/distributions/test_bernoulli.py | 7 +--- tests/distributions/test_categorical.py | 7 +--- tests/distributions/test_discretized.py | 9 ++-- tests/distributions/test_flow.py | 4 +- tests/distributions/test_mixture.py | 5 +-- tests/distributions/test_normal.py | 20 +++------ tests/distributions/test_uniform.py | 12 +----- tests/distributions/test_utils.py | 27 ++++++++++-- tests/flows/test_act_norm.py | 4 +- tests/flows/test_core.py | 23 ++++------ tests/flows/test_coupling.py | 2 +- tests/flows/test_rearrangement.py | 2 +- tests/flows/test_shape_.py | 6 +-- tests/flows/test_split_.py | 6 +-- tests/helper.py | 37 ++++++++++++++++ tests/init/test_core.py | 16 ++----- tests/init/test_std_data_init.py | 2 +- tests/layers/test_composed.py | 2 +- tests/layers/test_contextual.py | 2 +- tests/layers/test_core.py | 42 +++++++------------ tests/layers/test_flow_layer.py | 4 +- tests/layers/test_gated.py | 2 +- tests/layers/test_pixelcnn.py | 3 +- tests/layers/test_pool.py | 4 +- tests/layers/test_resnet.py | 4 +- tests/layers/test_shape_.py | 6 +-- tests/layers/test_split_.py | 2 +- tests/layers/test_utils.py | 2 +- tests/tensor/test_core.py | 19 +++------ tests/tensor/test_linalg.py | 6 +-- tests/tensor/test_nn.py | 8 +--- tests/tensor/test_random.py | 36 ++-------------- tests/tensor/test_utils.py | 3 +- tests/test_arg_check.py | 3 +- tests/test_bayes.py | 2 +- tests/test_stochastic.py | 2 +- tests/train/test_core.py | 5 +-- tests/variational/test_chain.py | 2 +- tests/variational/test_estimators.py | 7 ++-- tests/variational/test_evaluation.py | 3 +- tests/variational/test_inference.py | 2 +- tests/variational/test_objectives.py | 5 +-- 54 files changed, 205 insertions(+), 230 deletions(-) diff --git a/tensorkit/backend/pytorch_/core.py b/tensorkit/backend/pytorch_/core.py index c1a2aa0..f5a535b 100644 --- a/tensorkit/backend/pytorch_/core.py +++ b/tensorkit/backend/pytorch_/core.py @@ -261,7 +261,7 @@ def as_tensor(data, another tensor, a :class:`~tensorkit.StochasticTensor`, or anything else that the backend supports. dtype: The expected dtype of the constructed tensor. - device: Where to put the new tensor. + device: The device where to place new tensors and variables. force_copy: Force to copy `data` even if it is not necessary. The gradient propagation will not be stopped from the copied tensor to the original tensor. The caller may need to use `T.stop_grad()` @@ -1443,9 +1443,9 @@ def maybe_clip(x: Tensor, if x_min is not None and x_max is not None: return clip(x, x_min, x_max) elif x_min is not None: - return torch.max(x, torch.as_tensor(x_min, dtype=x.dtype)) + return torch.max(x, torch.as_tensor(x_min, dtype=x.dtype, device=x.device)) elif x_max is not None: - return torch.min(x, torch.as_tensor(x_max, dtype=x.dtype)) + return torch.min(x, torch.as_tensor(x_max, dtype=x.dtype, device=x.device)) else: return x diff --git a/tensorkit/backend/pytorch_/layers.py b/tensorkit/backend/pytorch_/layers.py index 4f16167..9e4d22d 100644 --- a/tensorkit/backend/pytorch_/layers.py +++ b/tensorkit/backend/pytorch_/layers.py @@ -304,7 +304,7 @@ def get(self) -> Tensor: def set(self, value: TensorOrData) -> None: with no_grad(): v, g = weight_norm_decompose( - as_tensor(value, dtype=get_dtype(self.v), device=get_dtype(self.v)), + as_tensor(value, dtype=get_dtype(self.v), device=get_device(self.v)), self.norm_axis, self.epsilon, ) diff --git a/tensorkit/backend/pytorch_/nn.py b/tensorkit/backend/pytorch_/nn.py index a083b39..20ef54c 100644 --- a/tensorkit/backend/pytorch_/nn.py +++ b/tensorkit/backend/pytorch_/nn.py @@ -56,8 +56,8 @@ def sigmoid(x: Tensor) -> Tensor: def log_sigmoid(x: Tensor) -> Tensor: # using `neg_x` and `pos_x` separately can avoid having NaN or Infinity # on either of the path. - neg_x = torch.min(x, torch.as_tensor(0., dtype=x.dtype)) - pos_x = torch.max(x, torch.as_tensor(0., dtype=x.dtype)) + neg_x = torch.min(x, torch.as_tensor(0., dtype=x.dtype, device=x.device)) + pos_x = torch.max(x, torch.as_tensor(0., dtype=x.dtype, device=x.device)) return torch.where( x < 0., neg_x - log1p(exp(neg_x)), # log(exp(x) / (1 + exp(x))) diff --git a/tensorkit/backend/pytorch_/random.py b/tensorkit/backend/pytorch_/random.py index aec0f2b..97215f2 100644 --- a/tensorkit/backend/pytorch_/random.py +++ b/tensorkit/backend/pytorch_/random.py @@ -9,7 +9,7 @@ from ...settings_ import settings __all__ = [ - 'seed', + 'seed', 'set_deterministic', # uniform 'rand', 'uniform', @@ -41,6 +41,13 @@ def seed(seed: int): torch.cuda.manual_seed_all(seed) +def set_deterministic(deterministic: bool = True): + if hasattr(torch, 'backends') and hasattr(torch.backends, 'cudnn'): + torch.backends.cudnn.enabled = not deterministic + torch.backends.cudnn.benchmark = not deterministic + torch.backends.cudnn.deterministic = deterministic + + # ---- uniform distribution ---- @jit def rand(shape: List[int], diff --git a/tensorkit/distributions/bernoulli.py b/tensorkit/distributions/bernoulli.py index 3000bdf..31014e9 100644 --- a/tensorkit/distributions/bernoulli.py +++ b/tensorkit/distributions/bernoulli.py @@ -65,9 +65,11 @@ def __init__(self, if logits is not None: value_shape = T.shape(logits) mutual_params = {'logits': logits} + device = device or T.get_device(logits) else: value_shape = T.shape(probs) mutual_params = {'probs': probs} + device = device or T.get_device(probs) epsilon = float(epsilon) # construct the object @@ -75,7 +77,7 @@ def __init__(self, dtype=dtype, value_shape=value_shape, event_ndims=event_ndims, - device=device or T.get_device(logits), + device=device, validate_tensors=validate_tensors, ) for k, v in mutual_params.items(): @@ -134,8 +136,7 @@ def copy(self, **overrided_params): return copy_distribution( cls=Bernoulli, base=self, - attrs=('dtype', 'device', 'event_ndims', 'validate_tensors', - 'epsilon'), + attrs=('dtype', 'event_ndims', 'epsilon', 'device', 'validate_tensors'), mutual_attrs=(('logits', 'probs'),), compute_deps={'logits': ('epsilon',)}, original_mutual_params=self._mutual_params, diff --git a/tensorkit/distributions/categorical.py b/tensorkit/distributions/categorical.py index 1140474..1c28f77 100644 --- a/tensorkit/distributions/categorical.py +++ b/tensorkit/distributions/categorical.py @@ -44,9 +44,11 @@ def __init__(self, if logits is not None: param_shape = T.shape(logits) mutual_params = {'logits': logits} + device = device or T.get_device(logits) else: param_shape = T.shape(probs) mutual_params = {'probs': probs} + device = device or T.get_device(probs) epsilon = float(epsilon) if len(param_shape) < 1: @@ -61,7 +63,7 @@ def __init__(self, dtype=dtype, value_shape=value_shape, event_ndims=event_ndims, - device=device or T.get_device(logits), + device=device, validate_tensors=validate_tensors, ) for k, v in mutual_params.items(): diff --git a/tensorkit/distributions/uniform.py b/tensorkit/distributions/uniform.py index 547c0c6..172f66f 100644 --- a/tensorkit/distributions/uniform.py +++ b/tensorkit/distributions/uniform.py @@ -88,13 +88,16 @@ def __init__(self, dtype = T.get_dtype(low) value_shape = (value_shape + T.broadcast_shape(T.shape(low), T.shape(high))) + device = device or T.get_device(low) + else: + device = T.current_device() super().__init__( dtype=dtype, value_shape=value_shape, reparameterized=reparameterized, event_ndims=event_ndims, - device=device or T.get_device(low), + device=device, validate_tensors=validate_tensors, ) diff --git a/tensorkit/examples/classification/mnist.py b/tensorkit/examples/classification/mnist.py index b91a8bb..7657f16 100644 --- a/tensorkit/examples/classification/mnist.py +++ b/tensorkit/examples/classification/mnist.py @@ -8,6 +8,7 @@ class Config(mltk.Config): max_epoch: int = 10 batch_size: int = 32 test_batch_size: int = 64 + init_batch_count: int = 32 lr: float = 0.001 lr_anneal_ratio: float = 0.5 lr_anneal_epochs: int = 5 @@ -35,6 +36,15 @@ def main(exp: mltk.Experiment[Config]): log_softmax(). \ build() + # initialize the network with first few batches of train data + init_x, _ = train_stream.get_arrays(max_batch=exp.config.init_batch_count) + _ = net(T.as_tensor(init_x)) + mltk.print_with_time('Network initialized') + + # we have initialized the network, now we can compile the net with JIT engine + net = tk.layers.jit_compile(net) + mltk.print_with_time('Network compiled to JIT module') + # define the train and evaluate functions def train_step(x, y): logits = net(x) diff --git a/tensorkit/examples/classification/mnist_resnet.py b/tensorkit/examples/classification/mnist_resnet.py index 374c6a5..bb9a13e 100644 --- a/tensorkit/examples/classification/mnist_resnet.py +++ b/tensorkit/examples/classification/mnist_resnet.py @@ -8,6 +8,7 @@ class Config(mltk.Config): max_epoch: int = 10 batch_size: int = 32 test_batch_size: int = 64 + init_batch_count: int = 10 lr: float = 0.01 lr_anneal_ratio: float = 0.5 lr_anneal_epochs: int = 2 @@ -41,6 +42,15 @@ def main(exp: mltk.Experiment[Config]): log_softmax(). \ build() + # initialize the network with first few batches of train data + init_x, _ = train_stream.get_arrays(max_batch=exp.config.init_batch_count) + _ = net(T.as_tensor(init_x)) + mltk.print_with_time('Network initialized') + + # we have initialized the network, now we can compile the net with JIT engine + net = tk.layers.jit_compile(net) + mltk.print_with_time('Network compiled to JIT module') + # the train, test and validate functions def train_step(x, y): logits = net(x) diff --git a/tensorkit/examples/utils/prepare_data.py b/tensorkit/examples/utils/prepare_data.py index 2f3950f..7474a6f 100644 --- a/tensorkit/examples/utils/prepare_data.py +++ b/tensorkit/examples/utils/prepare_data.py @@ -4,7 +4,6 @@ import numpy as np import tensorkit as tk -from tensorkit import tensor as T __all__ = [ 'get_mnist_streams' @@ -48,10 +47,10 @@ def get_mnist_streams(batch_size: int, # split train & valid set, and construct the streams def make_stream(arrays, **kwargs): - stream = mltk.DataStream.arrays(arrays, **kwargs) + ret = mltk.DataStream.arrays(arrays, **kwargs) if as_tensor_stream: - stream = tk.utils.as_tensor_stream(stream, prefetch=prefetch) - return stream + ret = tk.utils.as_tensor_stream(ret, prefetch=prefetch) + return ret if val_portion is not None: (train_x, train_y), (val_x, val_y) = \ diff --git a/tensorkit/utils/tensor_stream.py b/tensorkit/utils/tensor_stream.py index 3b0abee..57e4584 100644 --- a/tensorkit/utils/tensor_stream.py +++ b/tensorkit/utils/tensor_stream.py @@ -45,6 +45,9 @@ def _minibatch_iterator(self) -> Generator[ArrayTuple, None, None]: finally: g.close() + def _concat_arrays(self, arrays: Sequence[T.Tensor]) -> T.Tensor: + return T.concat(list(arrays), axis=0) + def as_tensor_stream(source: mltk.DataStream, device: Optional[str] = None, diff --git a/tests/distributions/test_base.py b/tests/distributions/test_base.py index f35a989..343ba88 100644 --- a/tests/distributions/test_base.py +++ b/tests/distributions/test_base.py @@ -11,7 +11,7 @@ from tests.helper import * -class BaseDistributionTestCase(unittest.TestCase): +class BaseDistributionTestCase(TestCase): def test_construct(self): def check_all_specified_by_constructor(cls): @@ -239,7 +239,6 @@ def do_check(given, group_ndims, args): (group_ndims, 1 + group_ndims)) def test_prob(self): - np.random.seed(1234) t00 = np.random.randn(2, 3) t0 = T.as_tensor(t00) d = Distribution( diff --git a/tests/distributions/test_bernoulli.py b/tests/distributions/test_bernoulli.py index b71905c..16202ae 100644 --- a/tests/distributions/test_bernoulli.py +++ b/tests/distributions/test_bernoulli.py @@ -17,10 +17,9 @@ def sigmoid(x): np.exp(x) / (1 + np.exp(x))) -class BernoulliTestCase(unittest.TestCase): +class BernoulliTestCase(TestCase): def test_construct(self): - np.random.seed(1234) logits = np.random.randn(2, 3, 4) probs = sigmoid(logits) @@ -68,7 +67,6 @@ def test_construct(self): **{key: T.as_tensor(np.nan, dtype=float_dtype)}) def test_copy(self): - np.random.seed(1234) logits = np.random.randn(2, 3, 4) logits_t = T.as_tensor(logits) bernoulli = Bernoulli(logits=logits_t, event_ndims=1) @@ -82,7 +80,7 @@ def test_copy(self): self.assertEqual(f_copy.call_args, ((), { 'cls': Bernoulli, 'base': bernoulli, - 'attrs': ('dtype', 'event_ndims', 'validate_tensors', 'epsilon'), + 'attrs': ('dtype', 'event_ndims', 'epsilon', 'device', 'validate_tensors'), 'mutual_attrs': (('logits', 'probs'),), 'compute_deps': {'logits': ('epsilon',)}, 'original_mutual_params': {'logits': bernoulli.logits}, @@ -90,7 +88,6 @@ def test_copy(self): })) def test_sample_and_log_prob(self): - np.random.seed(1234) logits = np.random.randn(2, 3, 4) logits_t = T.as_tensor(logits) diff --git a/tests/distributions/test_categorical.py b/tests/distributions/test_categorical.py index b3600db..5c36e34 100644 --- a/tests/distributions/test_categorical.py +++ b/tests/distributions/test_categorical.py @@ -32,10 +32,9 @@ def __init__(self, **kwargs): super().__init__(**kwargs) -class CategoricalTestCase(unittest.TestCase): +class CategoricalTestCase(TestCase): def test_construct_base(self): - np.random.seed(1234) logits = np.random.randn(2, 3, 4) probs = softmax(logits) logits = np.log(probs) @@ -101,7 +100,6 @@ def test_construct_base(self): ) def test_copy(self): - np.random.seed(1234) logits = np.random.randn(2, 3, 4) logits_t = T.as_tensor(logits) cat = _MyBaseCategorical(logits=logits_t, probs=None, event_ndims=1, @@ -116,7 +114,7 @@ def test_copy(self): self.assertEqual(f_copy.call_args, ((), { 'cls': _MyBaseCategorical, 'base': cat, - 'attrs': ('dtype', 'event_ndims', 'validate_tensors', 'epsilon'), + 'attrs': ('dtype', 'event_ndims', 'epsilon', 'device', 'validate_tensors'), 'mutual_attrs': (('logits', 'probs'),), 'compute_deps': {'logits': ('epsilon',)}, 'original_mutual_params': {'logits': cat.logits}, @@ -124,7 +122,6 @@ def test_copy(self): })) def test_Categorical_and_OneHotCategorical(self): - np.random.seed(1234) logits = np.random.randn(2, 3, 4) def do_test(dtype, float_dtype, is_one_hot): diff --git a/tests/distributions/test_discretized.py b/tests/distributions/test_discretized.py index 4467ce1..2fc1f70 100644 --- a/tests/distributions/test_discretized.py +++ b/tests/distributions/test_discretized.py @@ -11,11 +11,9 @@ from tests.helper import * -class DiscretizedLogisticTestCase(unittest.TestCase): +class DiscretizedLogisticTestCase(TestCase): def test_discretized_logsitic(self): - T.random.seed(1234) - mean = T.random.randn([3, 1, 4]) log_scale = T.random.randn([2, 1]) @@ -90,8 +88,6 @@ def log_prob_fn(t): _ = DiscretizedLogistic(mean, T.zeros([7]), 1./32) def test_copy(self): - T.random.seed(1234) - mean = T.random.randn([3, 1, 4]) log_scale = T.random.randn([2, 1]) @@ -118,7 +114,8 @@ def test_copy(self): 'attrs': ( 'mean', 'log_scale', 'bin_size', 'min_val', 'max_val', 'biased_edges', 'discretize_given', 'discretize_sample', - 'reparameterized', 'event_ndims', 'epsilon', 'validate_tensors' + 'reparameterized', 'event_ndims', 'epsilon', 'device', + 'validate_tensors' ), 'overrided_params': {'event_ndims': 2, 'discretize_sample': False, diff --git a/tests/distributions/test_flow.py b/tests/distributions/test_flow.py index a80e12f..b932837 100644 --- a/tests/distributions/test_flow.py +++ b/tests/distributions/test_flow.py @@ -123,11 +123,9 @@ def log_prob_fn(t): fn(None, None, validate_tensors) -class FlowDistributionTestCase(unittest.TestCase): +class FlowDistributionTestCase(TestCase): def test_FlowDistribution(self): - T.random.seed(1234) - check_flow_distribution( self, UnitNormal([], event_ndims=0), diff --git a/tests/distributions/test_mixture.py b/tests/distributions/test_mixture.py index 47a597f..8613b90 100644 --- a/tests/distributions/test_mixture.py +++ b/tests/distributions/test_mixture.py @@ -9,6 +9,7 @@ from tensorkit.distributions.utils import copy_distribution from tensorkit.flows import ActNorm from tests.distributions.test_flow import check_distribution_instance +from tests.helper import * def check_mixture(ctx, @@ -96,11 +97,9 @@ def log_prob_fn(t): fn(categorical, components, None, None, validate_tensors) -class MixtureTestCase(unittest.TestCase): +class MixtureTestCase(TestCase): def test_mixture(self): - T.random.seed(1234) - check_mixture( self, Categorical(logits=T.random.randn([4, 5, 1])), diff --git a/tests/distributions/test_normal.py b/tests/distributions/test_normal.py index e23b435..a1b0dab 100644 --- a/tests/distributions/test_normal.py +++ b/tests/distributions/test_normal.py @@ -14,11 +14,9 @@ from tests.helper import * -class UnitNormalTestCase(unittest.TestCase): +class UnitNormalTestCase(TestCase): def test_construct(self): - np.random.seed(1234) - for shape, event_ndims, dtype in \ product(([], [2, 3]), range(0, 3), float_dtypes): if event_ndims > len(shape): @@ -34,7 +32,6 @@ def test_construct(self): assert_equal(normal.logstd, np.zeros(shape)) def test_copy(self): - np.random.seed(1234) shape = [2, 3] normal = UnitNormal(shape=[2, 3], event_ndims=1, dtype=T.float32) @@ -71,8 +68,6 @@ def test_copy(self): self.assertIsNot(getattr(normal2, key), getattr(normal, key)) def test_sample_and_log_prob(self): - np.random.seed(1234) - for dtype in float_dtypes: normal = UnitNormal(shape=[2, 3, 4], event_ndims=1, dtype=dtype) @@ -122,19 +117,19 @@ def __init__(self, logstd: Optional[T.Tensor] = None, reparameterized: bool = True, event_ndims: int = 0, + device: Optional[str] = None, validate_tensors: Optional[bool] = None, xyz: int = 0): super().__init__( - mean=mean, std=std, logstd=logstd, reparameterized=reparameterized, - event_ndims=event_ndims, validate_tensors=validate_tensors + mean=mean, std=std, logstd=logstd, reparameterized=reparameterized, + event_ndims=event_ndims, device=device, validate_tensors=validate_tensors ) self.xyz = xyz -class NormalTestCase(unittest.TestCase): +class NormalTestCase(TestCase): def test_construct(self): - np.random.seed(1234) mean = np.random.randn(3, 4) logstd = np.random.randn(2, 3, 4) std = np.exp(logstd) @@ -202,7 +197,6 @@ def test_construct(self): _ = normal.logstd def test_copy(self): - np.random.seed(1234) mean = np.random.randn(3, 4) logstd = np.random.randn(2, 3, 4) mean_t = T.as_tensor(mean) @@ -224,14 +218,13 @@ def test_copy(self): 'cls': _MyBaseNormal, 'base': normal, 'attrs': ('mean', 'reparameterized', 'event_ndims', - 'validate_tensors', 'xyz'), + 'device', 'validate_tensors', 'xyz'), 'mutual_attrs': (('std', 'logstd'),), 'original_mutual_params': {'logstd': normal.logstd}, 'overrided_params': {'event_ndims': 2}, })) def test_Normal(self): - np.random.seed(1234) mean = np.random.randn(3, 4) logstd = np.random.randn(2, 3, 4) mean_t = T.as_tensor(mean) @@ -280,7 +273,6 @@ def test_Normal(self): ) def test_TruncatedNormal(self): - np.random.seed(1234) mean = np.random.randn(3, 4) logstd = np.random.randn(2, 3, 4) std = np.exp(logstd) diff --git a/tests/distributions/test_uniform.py b/tests/distributions/test_uniform.py index 9b200ec..d27cb05 100644 --- a/tests/distributions/test_uniform.py +++ b/tests/distributions/test_uniform.py @@ -12,11 +12,9 @@ from tests.helper import * -class UniformTestCase(unittest.TestCase): +class UniformTestCase(TestCase): def test_construct(self): - np.random.seed(1234) - for dtype in float_dtypes: # specify no args uniform = Uniform(dtype=dtype, event_ndims=0) @@ -127,9 +125,6 @@ def test_construct(self): validate_tensors=True) def test_copy(self): - np.random.seed(1234) - T.random.seed(1234) - for dtype in float_dtypes: low_t = T.full([2, 1], -1., dtype=dtype) high_t = T.full([1, 3], 2., dtype=dtype) @@ -159,14 +154,11 @@ def test_copy(self): 'base': uniform, 'attrs': (('shape', '_shape'), 'low', 'high', 'dtype', 'reparameterized', 'event_ndims', 'log_zero', - 'validate_tensors'), + 'device', 'validate_tensors'), 'overrided_params': {'event_ndims': 2}, })) def test_sample_and_log_prob(self): - np.random.seed(1234) - T.random.seed(1234) - array_low = np.random.randn(2, 1) array_high = np.exp(np.random.randn(1, 3)) + 1. log_zero = -1e6 diff --git a/tests/distributions/test_utils.py b/tests/distributions/test_utils.py index 88d382c..9813318 100644 --- a/tests/distributions/test_utils.py +++ b/tests/distributions/test_utils.py @@ -47,7 +47,7 @@ class Sink2(BaseSink): pass -class DistributionUtilsTestCase(unittest.TestCase): +class DistributionUtilsTestCase(TestCase): def test_get_overrided_parameterized(self): cls = Mock(__qualname__='xyz') @@ -110,8 +110,6 @@ def test_get_tail_size(self): _ = get_tail_size([], len(shape) + 1) def test_log_pdf_mask(self): - np.random.seed(1234) - T.random.seed(1234) x = np.random.randn(3, 4, 5) for dtype in float_dtypes: @@ -139,6 +137,7 @@ def test_check_tensor_arg_types(self): for t, v in [(a, 1.0), (b, 2.0), (e, e_orig), (f, f_orig.tensor)]: self.assertIsInstance(t, T.Tensor) self.assertEqual(T.get_dtype(t), dtype) + self.assertEqual(T.get_device(t), T.current_device()) if isinstance(v, float): assert_equal(t, v) else: @@ -172,6 +171,28 @@ def test_check_tensor_arg_types(self): f'{T.float32} vs {dtype}'): _ = check_tensor_arg_types(('a', a_orig), ('b', b_orig)) + # check `device` and `default_device` + if T.current_device() != T.CPU_DEVICE: + [a] = check_tensor_arg_types(('a', [1., 2., 3.]), device=T.CPU_DEVICE) + self.assertEqual(T.get_device(a), T.CPU_DEVICE) + + [a] = check_tensor_arg_types(('a', [1., 2., 3.]), default_device=T.CPU_DEVICE) + self.assertEqual(T.get_device(a), T.CPU_DEVICE) + + [a] = check_tensor_arg_types(('a', [1., 2., 3.]), device=T.CPU_DEVICE, + default_device=T.current_device()) + self.assertEqual(T.get_device(a), T.CPU_DEVICE) + + a = T.as_tensor([1., 2., 3.], device=T.current_device()) + with pytest.raises(ValueError, + match=f'`a.device` != `device`'): + _ = check_tensor_arg_types(('a', a), device=T.CPU_DEVICE) + + b = T.as_tensor([1., 2., 3.], device=T.CPU_DEVICE) + with pytest.raises(ValueError, + match=f'`b.device` != `a.device`'): + _ = check_tensor_arg_types(('a', a), ('b', b)) + # check tensor cannot be None with pytest.raises(ValueError, match='`a` must be specified.'): diff --git a/tests/flows/test_act_norm.py b/tests/flows/test_act_norm.py index 7e02103..85b9248 100644 --- a/tests/flows/test_act_norm.py +++ b/tests/flows/test_act_norm.py @@ -83,16 +83,14 @@ def do_check(batch_shape, scale_type, initialized, dtype): do_check([11], 'exp', False, dtype) -class ActNormTestCase(unittest.TestCase): +class ActNormTestCase(TestCase): @slow_test def test_ActNorm(self): - T.random.seed(1234) check_act_norm(self, 0, ActNorm) @slow_test def test_ActNormNd(self): - T.random.seed(1234) for spatial_ndims in (1, 2, 3): check_act_norm( self, diff --git a/tests/flows/test_core.py b/tests/flows/test_core.py index a7e30b3..27ebfa1 100644 --- a/tests/flows/test_core.py +++ b/tests/flows/test_core.py @@ -69,7 +69,7 @@ def _transform(self, return output, output_log_det -class BaseFlowTestCase(unittest.TestCase): +class BaseFlowTestCase(TestCase): def test_constructor(self): flow = Flow(x_event_ndims=1, @@ -132,7 +132,7 @@ def test_call(self): _ = flow(x, inverse=True) -class FeatureMappingFlowTestCase(unittest.TestCase): +class FeatureMappingFlowTestCase(TestCase): def test_constructor(self): flow = FeatureMappingFlow(axis=-1, @@ -160,7 +160,7 @@ def test_constructor(self): _ = FeatureMappingFlow(axis=0, event_ndims=1, explicitly_invertible=True) -class InverseFlowTestCase(unittest.TestCase): +class InverseFlowTestCase(TestCase): def test_InverseFlow(self): original_flow = tk.layers.jit_compile(_MyFlow()) @@ -226,7 +226,7 @@ def _transform(self, return output, output_log_det -class SequentialFlowTestCase(unittest.TestCase): +class SequentialFlowTestCase(TestCase): def test_constructor(self): flows = [tk.layers.jit_compile(_MyFlow1()), tk.layers.jit_compile(_MyFlow())] @@ -304,7 +304,7 @@ def check_invertible_matrix(ctx, m, size): rtol=1e-4, atol=1e-6) -class InvertibleMatrixTestCase(unittest.TestCase): +class InvertibleMatrixTestCase(TestCase): def test_invertible_matrices(self): for cls in (LooseInvertibleMatrix, StrictInvertibleMatrix): @@ -378,10 +378,9 @@ def check_invertible_linear(ctx, T.random.randn(batch_shape)) -class InvertibleLinearTestCase(unittest.TestCase): +class InvertibleLinearTestCase(TestCase): def test_invertible_dense(self): - T.random.seed(1234) for strict in (True, False): check_invertible_linear( self, @@ -392,7 +391,6 @@ def test_invertible_dense(self): ) def test_invertible_conv_nd(self): - T.random.seed(1234) for spatial_ndims in (1, 2, 3): for strict in (True, False): check_invertible_linear( @@ -511,11 +509,9 @@ def _scale_and_log_scale(self, return scale, log_scale -class ScaleTestCase(unittest.TestCase): +class ScaleTestCase(TestCase): def test_ExpScale(self): - T.random.seed(1234) - x = T.random.randn([2, 3, 4]) scale = ExpScale() scale = tk.layers.jit_compile(scale) @@ -529,8 +525,6 @@ def test_ExpScale(self): check_scale(self, scale, x, pre_scale, expected_y, expected_log_det) def test_SigmoidScale(self): - T.random.seed(1234) - x = T.random.randn([2, 3, 4]) for pre_scale_bias in [None, 0., 1.5]: @@ -553,8 +547,6 @@ def test_SigmoidScale(self): check_scale(self, scale, x, pre_scale, expected_y, expected_log_det) def test_LinearScale(self): - T.random.seed(1234) - x = T.random.randn([2, 3, 4]) scale = LinearScale(epsilon=T.EPSILON) self.assertIn('epsilon=', repr(scale)) @@ -570,7 +562,6 @@ def test_LinearScale(self): check_scale(self, scale, x, pre_scale, expected_y, expected_log_det) def test_bad_output(self): - T.random.seed(1234) x = T.random.randn([2, 3, 1]) scale = _BadScale1() diff --git a/tests/flows/test_coupling.py b/tests/flows/test_coupling.py index eddd9a2..a2e544e 100644 --- a/tests/flows/test_coupling.py +++ b/tests/flows/test_coupling.py @@ -85,7 +85,7 @@ def do_check(secondary, scale_type): _ = cls(shift_and_pre_scale, scale=scale) -class CouplingLayerTestCase(unittest.TestCase): +class CouplingLayerTestCase(TestCase): @slow_test def test_CouplingLayer(self): diff --git a/tests/flows/test_rearrangement.py b/tests/flows/test_rearrangement.py index 451c9af..ff54038 100644 --- a/tests/flows/test_rearrangement.py +++ b/tests/flows/test_rearrangement.py @@ -40,7 +40,7 @@ def check_shuffling_flow(ctx, T.random.randn(batch_shape)) -class RearrangementTestCase(unittest.TestCase): +class RearrangementTestCase(TestCase): def test_FeatureShuffleFlow(self): check_shuffling_flow(self, 0, FeatureShufflingFlow) diff --git a/tests/flows/test_shape_.py b/tests/flows/test_shape_.py index f15ad38..786c8a9 100644 --- a/tests/flows/test_shape_.py +++ b/tests/flows/test_shape_.py @@ -10,7 +10,7 @@ from tests.ops import * -class ReshapeFlowTestCase(unittest.TestCase): +class ReshapeFlowTestCase(TestCase): def test_ReshapeFlow(self): flow = ReshapeFlow([4, -1], [-1]) @@ -48,11 +48,9 @@ def test_ReshapeFlow(self): _ = ReshapeFlow([-1], [-1, -2]) -class SpaceDepthTransformFlowTestCase(unittest.TestCase): +class SpaceDepthTransformFlowTestCase(TestCase): def test_space_depth_transform(self): - T.random.seed(1234) - for spatial_ndims, batch_shape, block_size in product( (1, 2, 3), ([2], [2, 3]), diff --git a/tests/flows/test_split_.py b/tests/flows/test_split_.py index 67b707f..fe41610 100644 --- a/tests/flows/test_split_.py +++ b/tests/flows/test_split_.py @@ -84,12 +84,10 @@ def check_split_flow(ctx, _ = cls([2, 3], left, tk.layers.Linear(2, 3)) -class SplitFlowTestCase(unittest.TestCase): +class SplitFlowTestCase(TestCase): @slow_test def test_SplitFlow(self): - T.random.seed(1234) - # x and y with the same event ndims left = tk.layers.jit_compile(InvertibleDense(2)) right = tk.layers.jit_compile(InvertibleDense(3)) @@ -153,8 +151,6 @@ def test_SplitFlow(self): @slow_test def test_SplitFlowNd(self): - T.random.seed(1234) - for spatial_ndims in (1, 2, 3): cls = getattr(tk.flows, f'SplitFlow{spatial_ndims}d') sub_cls = getattr(tk.flows, f'InvertibleConv{spatial_ndims}d') diff --git a/tests/helper.py b/tests/helper.py index 520ff8d..bd07d66 100644 --- a/tests/helper.py +++ b/tests/helper.py @@ -1,4 +1,7 @@ import os +import random +import unittest +from functools import wraps import numpy as np import pytest @@ -15,6 +18,8 @@ 'slow_test', 'check_distribution_instance', 'flow_standard_check', + + 'TestCase', ] @@ -216,3 +221,35 @@ def flow_standard_check(ctx, flow, x, expected_y, expected_log_det, x, log_det = flow(y, inverse=True, compute_log_det=False) assert_allclose(x, expected_x, rtol=1e-4, atol=1e-6) ctx.assertIsNone(log_det) + + +class TestCaseMeta(type): + + def __new__(cls, name, parents, dct): + def make_wrapper(method): + @wraps(method) + def wrapper(*args, **kwargs): + T.random.set_deterministic(True) + T.random.seed(1234) + np.random.seed(1234) + random.seed(1234) + + try: + with T.use_device(T.first_gpu_device()): + return method(*args, **kwargs) + finally: + T.random.set_deterministic(False) + return wrapper + + keys = list(dct) + for key in keys: + val = dct[key] + if key.startswith('test_'): + val = make_wrapper(val) + dct[key] = val + + return super().__new__(cls, name, parents, dct) + + +class TestCase(unittest.TestCase, metaclass=TestCaseMeta): + pass diff --git a/tests/init/test_core.py b/tests/init/test_core.py index 74774d0..fc8ce3c 100644 --- a/tests/init/test_core.py +++ b/tests/init/test_core.py @@ -13,7 +13,7 @@ from tests.helper import * -class UtilitiesTestCase(unittest.TestCase): +class UtilitiesTestCase(TestCase): def test_calculate_fan_in_and_fan_out(self): for layer, fan_in_and_out in [ @@ -160,7 +160,7 @@ def test_apply_initializer(self): tk.init.apply_initializer(weight, object()) -class TensorInitiailizersTestCase(unittest.TestCase): +class TensorInitiailizersTestCase(TestCase): def test_zeros(self): for dtype in float_dtypes: @@ -185,8 +185,6 @@ def test_fill(self): assert_equal(weight, T.full_like(weight, 123.)) def test_uniform(self): - T.random.seed(1234) - for dtype in float_dtypes: weight = T.variable([n_samples // 50, 50], dtype=dtype, initializer=0.) @@ -208,8 +206,6 @@ def test_uniform(self): ) def test_normal(self): - T.random.seed(1234) - for dtype in float_dtypes: weight = T.variable([n_samples // 50, 50], dtype=dtype, initializer=0.) @@ -227,12 +223,10 @@ def test_normal(self): weight, partial(tk.init.normal, mean=1., std=3.)) self.assertLessEqual( np.abs(T.to_numpy(T.reduce_mean(weight)) - 1.), - 5.0 / 3. / np.sqrt(n_samples) + 5.0 * 3. / np.sqrt(n_samples) ) def test_xavier_initializer(self): - T.random.seed(1234) - for dtype, initializer, mode in product( float_dtypes, (tk.init.xavier_normal, tk.init.xavier_uniform), @@ -268,8 +262,6 @@ def test_xavier_initializer(self): ) def test_kaming_initializer(self): - T.random.seed(1234) - for dtype, initializer, mode in product( float_dtypes, (tk.init.kaming_normal, tk.init.kaming_uniform), @@ -330,7 +322,7 @@ def _init(self, layer: T.Module, inputs: List[T.Tensor]) -> None: self.watcher.append((layer, inputs)) -class DataDependentInitializerTestCase(unittest.TestCase): +class DataDependentInitializerTestCase(TestCase): def test_data_dependent_initializer(self): data_init = _MyDataDependentInitializer([]) diff --git a/tests/init/test_std_data_init.py b/tests/init/test_std_data_init.py index 349720e..205e546 100644 --- a/tests/init/test_std_data_init.py +++ b/tests/init/test_std_data_init.py @@ -9,7 +9,7 @@ from tests.ops import * -class StdDataInitTestCase(unittest.TestCase): +class StdDataInitTestCase(TestCase): def test_repr(self): data_init = tk.init.StdDataInit() diff --git a/tests/layers/test_composed.py b/tests/layers/test_composed.py index fb4721b..0636370 100644 --- a/tests/layers/test_composed.py +++ b/tests/layers/test_composed.py @@ -137,7 +137,7 @@ def check_composed_layer(ctx, input, layer_cls, linear_cls, normalizer_cls, ) -class ComposedTestCase(unittest.TestCase): +class ComposedTestCase(TestCase): def test_dense(self): check_composed_layer( diff --git a/tests/layers/test_contextual.py b/tests/layers/test_contextual.py index 7842fc1..74d1bca 100644 --- a/tests/layers/test_contextual.py +++ b/tests/layers/test_contextual.py @@ -5,7 +5,7 @@ from tests.helper import * -class ContextualTestCase(unittest.TestCase): +class ContextualTestCase(TestCase): def test_IgnoreContext(self): x = T.random.randn([2, 3, 4]) diff --git a/tests/layers/test_core.py b/tests/layers/test_core.py index 3237d8f..a6c1eae 100644 --- a/tests/layers/test_core.py +++ b/tests/layers/test_core.py @@ -28,7 +28,7 @@ def forward(self, input: Tensor) -> Tensor: return self.wrapped(input) -class UtilsAndConstantsTestCase(unittest.TestCase): +class UtilsAndConstantsTestCase(TestCase): def test_constants(self): self.assertEqual(tk.layers.DEFAULT_GATE_BIAS, 2.0) @@ -171,7 +171,7 @@ def test_get_bias_store(self): self.assertIsNone(store) -class IdentityTestCase(unittest.TestCase): +class IdentityTestCase(TestCase): def test_identity(self): layer = tk.layers.jit_compile(Identity()) @@ -231,15 +231,15 @@ class _AutoRepr(BaseLayer): b: float -class BaseLayersTestCase(unittest.TestCase): +class BaseLayersTestCase(TestCase): def test_single_variate_layer(self): layer = tk.layers.jit_compile(_MySingleVariateLayer()) x = T.random.randn([2, 3, 4]) np_offset = T.from_numpy(np.array([0., 1., 2., 3.])) - assert_allclose(layer(x), x * 11. + np_offset) + assert_allclose(layer(x), x * 11. + np_offset, rtol=1e-4, atol=1e-6) layer.set_bias(7.) - assert_allclose(layer(x), x * 11. + 7. + np_offset) + assert_allclose(layer(x), x * 11. + 7. + np_offset, rtol=1e-4, atol=1e-6) def test_multi_variate_layer(self): layer = tk.layers.jit_compile(_MyMultiVariateLayer()) @@ -247,16 +247,16 @@ def test_multi_variate_layer(self): y = T.random.randn([2, 3, 4]) z = T.random.randn([2, 3, 4]) a, b = layer([x, y, z]) - assert_allclose(a, x + y) - assert_allclose(b, y + z) + assert_allclose(a, x + y, rtol=1e-4, atol=1e-6) + assert_allclose(b, y + z, rtol=1e-4, atol=1e-6) def test_split_layer(self): layer = tk.layers.jit_compile(_MySplitLayer()) x = T.random.randn([2, 3, 4]) a, b, c = layer(x) - assert_allclose(a, x) - assert_allclose(b, x + 1) - assert_allclose(c, x + 2) + assert_allclose(a, x, rtol=1e-4, atol=1e-6) + assert_allclose(b, x + 1, rtol=1e-4, atol=1e-6) + assert_allclose(c, x + 2, rtol=1e-4, atol=1e-6) def test_merge_layer(self): layer = tk.layers.jit_compile(_MyMergeLayer()) @@ -264,7 +264,7 @@ def test_merge_layer(self): y = T.random.randn([2, 3, 4]) z = T.random.randn([2, 3, 4]) out = layer([x, y, z]) - assert_allclose(out, x + y + z) + assert_allclose(out, x + y + z, rtol=1e-4, atol=1e-6) def test_auto_repr(self): layer = _AutoRepr() @@ -279,7 +279,7 @@ def test_auto_repr(self): self.assertNotIn('weight=', repr(layer)) -class SequentialTestCase(unittest.TestCase): +class SequentialTestCase(TestCase): def test_sequential(self): x = T.random.randn([4, 5]) @@ -297,8 +297,6 @@ def test_sequential(self): def check_core_linear(ctx, input, layer_factory, layer_name, numpy_fn): - T.random.seed(1234) - # test with bias layer = layer_factory(use_bias=True) ctx.assertIn(layer_name, repr(layer)) @@ -362,11 +360,9 @@ class _MyDataDependentInitializer(init.DataDependentInitializer): _ = layer_factory(data_init=lambda: 'hello') -class CoreLinearTestCase(unittest.TestCase): +class CoreLinearTestCase(TestCase): def test_linear(self): - np.random.seed(1234) - layer = Linear(5, 3) self.assertEqual( repr(layer), @@ -390,8 +386,6 @@ def test_linear(self): @slow_test def test_conv_nd(self): - np.random.seed(1234) - def do_check(spatial_ndims, kernel_size, stride, dilation, padding): cls_name = f'LinearConv{spatial_ndims}d' @@ -430,8 +424,6 @@ def do_check(spatial_ndims, kernel_size, stride, @slow_test def test_conv_transpose_nd(self): - np.random.seed(1234) - def is_valid_output_padding(spatial_ndims, output_padding, stride, dilation): if not hasattr(output_padding, '__iter__'): output_padding = [output_padding] * spatial_ndims @@ -492,11 +484,9 @@ def do_check(spatial_ndims, kernel_size, stride, do_check(3, (3, 2, 1), (3, 2, 1), (3, 2, 1), PaddingMode.HALF, 0) -class BatchNormTestCase(unittest.TestCase): +class BatchNormTestCase(TestCase): def test_batch_norm(self): - T.random.seed(1234) - eps = T.EPSILON for spatial_ndims in (0, 1, 2, 3): cls = getattr(tk.layers, ('BatchNorm' if not spatial_ndims @@ -539,12 +529,10 @@ def test_batch_norm(self): ) -class DropoutTestCase(unittest.TestCase): +class DropoutTestCase(TestCase): def test_dropout(self): n_samples = 10000 - T.random.seed(1234) - for spatial_ndims in (0, 1, 2, 3): cls = getattr(tk.layers, ('Dropout' if not spatial_ndims else f'Dropout{spatial_ndims}d')) diff --git a/tests/layers/test_flow_layer.py b/tests/layers/test_flow_layer.py index 4d3b06f..8a1707d 100644 --- a/tests/layers/test_flow_layer.py +++ b/tests/layers/test_flow_layer.py @@ -27,7 +27,7 @@ def _transform(self, return output, input_log_det -class FlowLayerTestCase(unittest.TestCase): +class FlowLayerTestCase(TestCase): def test_FlowLayer(self): flow = tk.layers.jit_compile(_MyFlow( @@ -41,7 +41,7 @@ def test_FlowLayer(self): _ = tk.layers.FlowLayer(object()) -class ActNormLayerTestCase(unittest.TestCase): +class ActNormLayerTestCase(TestCase): def test_ActNorm(self): layer = tk.layers.ActNorm(5) diff --git a/tests/layers/test_gated.py b/tests/layers/test_gated.py index 1f91d24..814a656 100644 --- a/tests/layers/test_gated.py +++ b/tests/layers/test_gated.py @@ -7,7 +7,7 @@ from tests.helper import * -class GatedTestCase(unittest.TestCase): +class GatedTestCase(TestCase): def test_Gated(self): gated = tk.layers.Gated(feature_axis=-2, num_features=3, diff --git a/tests/layers/test_pixelcnn.py b/tests/layers/test_pixelcnn.py index ac2cc03..2d009e0 100644 --- a/tests/layers/test_pixelcnn.py +++ b/tests/layers/test_pixelcnn.py @@ -114,7 +114,7 @@ def forward(self, input: Tensor, context: List[Tensor]) -> Tensor: raise ValueError('Expected context to have 0 or 1 element.') -class PixelCNNTestCase(unittest.TestCase): +class PixelCNNTestCase(TestCase): def test_causality_and_receptive_field(self): for size in [[12], [12, 11], [12, 11, 10]]: @@ -242,7 +242,6 @@ def test_causality_and_receptive_field(self): ) def test_pixelcnn_network(self): - T.random.seed(1234) in_channels = 3 out_channels = 5 diff --git a/tests/layers/test_pool.py b/tests/layers/test_pool.py index aa8e499..cbde3d5 100644 --- a/tests/layers/test_pool.py +++ b/tests/layers/test_pool.py @@ -10,11 +10,9 @@ from tests.ops import * -class PoolTestCase(unittest.TestCase): +class PoolTestCase(TestCase): def test_AvgPool_and_MaxPool(self): - T.random.seed(1234) - def is_valid_padding(padding, kernel_size): for p, k in zip(padding, kernel_size): if isinstance(p, int): diff --git a/tests/layers/test_resnet.py b/tests/layers/test_resnet.py index e579478..2803389 100644 --- a/tests/layers/test_resnet.py +++ b/tests/layers/test_resnet.py @@ -320,10 +320,9 @@ def check_resblock(ctx, ctx.assertIsInstance(layer.conv1.weight_store, tk.layers.NormedAndScaledWeightStore) -class ResBlockTestCase(unittest.TestCase): +class ResBlockTestCase(TestCase): def test_resblock(self): - T.random.seed(1234) for spatial_ndims in (1, 2, 3): resblock_cls = getattr(tk.layers, f'ResBlock{spatial_ndims}d') check_resblock( @@ -340,7 +339,6 @@ def test_resblock(self): output_padding=1) def test_resblock_transpose(self): - T.random.seed(1234) for spatial_ndims, output_padding in product((1, 2, 3), (0, 1)): check_resblock( ctx=self, diff --git a/tests/layers/test_shape_.py b/tests/layers/test_shape_.py index 6f0bbbf..6b7fca2 100644 --- a/tests/layers/test_shape_.py +++ b/tests/layers/test_shape_.py @@ -8,7 +8,7 @@ from tests.ops import make_conv_shape -class FlattenToNDimsTestCase(unittest.TestCase): +class FlattenToNDimsTestCase(TestCase): def test_FlattenToNDims(self): x = T.random.randn(make_conv_shape([3, 4], 6, [5])) @@ -23,7 +23,7 @@ def test_FlattenToNDims(self): _ = layer(T.random.randn([1, 1])) -class ConstantPadTestCase(unittest.TestCase): +class ConstantPadTestCase(TestCase): def test_ConstantPad(self): for value_arg in [{}, {'value': 123.0}]: @@ -106,7 +106,7 @@ def fn(v): _ = layer_factory(0, 1, 2, 3) -class ChannelSwapTestCase(unittest.TestCase): +class ChannelSwapTestCase(TestCase): def test_channel_last_to_first(self): for spatial_ndims in (1, 2, 3): diff --git a/tests/layers/test_split_.py b/tests/layers/test_split_.py index 141cfb3..ee34e0b 100644 --- a/tests/layers/test_split_.py +++ b/tests/layers/test_split_.py @@ -5,7 +5,7 @@ from tests.helper import * -class BranchTestCase(unittest.TestCase): +class BranchTestCase(TestCase): def test_branch(self): shared = tk.layers.Linear(5, 5) diff --git a/tests/layers/test_utils.py b/tests/layers/test_utils.py index 9c11feb..07ac35a 100644 --- a/tests/layers/test_utils.py +++ b/tests/layers/test_utils.py @@ -10,7 +10,7 @@ from tests.ops import * -class UtilsTestCase(unittest.TestCase): +class UtilsTestCase(TestCase): def test_flatten_nested_layers(self): layers = [tk.layers.Linear(5, 5) for _ in range(5)] diff --git a/tests/tensor/test_core.py b/tests/tensor/test_core.py index 9fb536f..c621c97 100644 --- a/tests/tensor/test_core.py +++ b/tests/tensor/test_core.py @@ -15,7 +15,7 @@ from tests.ops import * -class TensorCoreTestCase(unittest.TestCase): +class TensorCoreTestCase(TestCase): def test_backend_info(self): self.assertEqual(T.backend_name, settings.backend) @@ -29,10 +29,10 @@ def test_jit_compile(self): else: self.assertFalse(tk.layers.is_jit_layer(layer2)) - # not supported object - with pytest.raises(TypeError, - match='Not supported by `jit_compile`'): - _ = tk.layers.jit_compile(object()) + def test_device(self): + # ensure we're using GPU if GPU is available + if T.gpu_device_list(): + self.assertEqual(T.current_device(), T.gpu_device_list()[0]) def test_utilities(self): self.assertEqual(T.int_range(0, 10), list(range(10))) @@ -88,8 +88,6 @@ def test_dtypes(self): assert_equal(t3, x) def test_tensor_constructors(self): - np.random.seed(1234) - # # as_tensor # for x in [1., 1, [1., 2., 3.], np.array([1., 2., 3.])]: # t = T.as_tensor(x) @@ -926,8 +924,6 @@ def test_index_select_and_others(self): _ = T.shift_axis(x_t, axis, 0) def test_math_univariate_op(self): - np.random.seed(1234) - x = np.random.randn(2, 3) u = np.random.rand(2, 3) x_t = T.as_tensor(x) @@ -956,7 +952,6 @@ def test_math_univariate_op(self): assert_allclose(T.erfinv(u_t), erfinv(u)) def test_math_bivariate_op(self): - np.random.seed(1234) x = np.random.randn(2, 3) y = np.random.randn(3) t1 = T.as_tensor(x) @@ -1049,7 +1044,6 @@ def log_f_exp(f, x, axis=None, keepdims=False): log_mean_exp = partial(log_f_exp, np.mean) # prepare for the data - np.random.seed(1234) x = np.random.randn(2, 3, 4) t = T.as_tensor(x) @@ -1265,7 +1259,6 @@ def read_bool(t): self.assertEqual(T.get_dtype(t), T.boolean) return T.to_numpy(t) - np.random.seed(1234) x = np.random.randn(2, 3, 4) y = np.random.randn(1, 3, 4) x = np.concatenate([y, x], axis=0) @@ -1332,8 +1325,6 @@ def test_sort(self): ) def test_matrix_ops(self): - np.random.seed(1234) - for k in [1, 5]: x = np.random.randn(4, k) y = np.random.randn(k, k) diff --git a/tests/tensor/test_linalg.py b/tests/tensor/test_linalg.py index 3a750e5..b1a16c8 100644 --- a/tests/tensor/test_linalg.py +++ b/tests/tensor/test_linalg.py @@ -3,13 +3,12 @@ import numpy as np from tensorkit import tensor as T -from tests.helper import assert_allclose +from tests.helper import * -class LinalgTestCase(unittest.TestCase): +class LinalgTestCase(TestCase): def test_qr(self): - np.random.seed(1234) for k in [1, 5]: m = np.random.randn(k, k) q, r = T.linalg.qr(T.as_tensor(m)) @@ -18,7 +17,6 @@ def test_qr(self): assert_allclose(r, expected_r) def test_slogdet(self): - np.random.seed(1234) for k in [1, 5]: m = np.random.randn(k, k) sign, logdet = T.linalg.slogdet(T.as_tensor(m)) diff --git a/tests/tensor/test_nn.py b/tests/tensor/test_nn.py index 79ae63b..b97b14c 100644 --- a/tests/tensor/test_nn.py +++ b/tests/tensor/test_nn.py @@ -11,14 +11,13 @@ from tests.ops import * -class TensorNNTestCase(unittest.TestCase): +class TensorNNTestCase(TestCase): def test_constants(self): self.assertEqual(T.nn.LEAKY_RELU_DEFAULT_SLOPE, 0.01) self.assertFalse(T.nn.AVG_POOL_DEFAULT_COUNT_PADDED_ZEROS) def test_activation_functions(self): - np.random.seed(1234) x = np.random.randn(2, 3, 4) x = np.concatenate([x, np.zeros([2, 3, 1])], axis=-1) self.assertTrue(np.any(x < 0)) @@ -115,8 +114,6 @@ def binary_cross_entropy(logits, labels, reduction, negative): out = -out return out - np.random.seed(1234) - logits = np.random.randn(2, 3, 4) sparse_labels = sigmoid(np.random.randn(3, 4)) labels = (sparse_labels < 0.5).astype(np.int32) @@ -183,8 +180,6 @@ def cross_entropy(logits, labels, reduction, negative): return sparse_cross_entropy( logits, sparse_labels, reduction, negative) - np.random.seed(1234) - logits = np.random.randn(2, 3, 4, 5, 6) sparse_labels = softmax(np.random.randn(3, 4, 5, 6), axis=-1) labels = np.argmax(sparse_labels, axis=-1) @@ -429,7 +424,6 @@ def do_check(pool_type, spatial_ndims, x, kernel_size, stride, padding, f'count_padded_zeros={count_padded_zeros}' ) - np.random.seed(1234) spatial_shape = [12, 13, 14] for spatial_ndims in (1, 2): x = np.random.uniform( diff --git a/tests/tensor/test_random.py b/tests/tensor/test_random.py index d642337..0502c91 100644 --- a/tests/tensor/test_random.py +++ b/tests/tensor/test_random.py @@ -18,7 +18,7 @@ def do_check_log_prob(given, batch_ndims, Z_log_prob_fn, np_log_prob): assert_allclose( Z_log_prob_fn(given, group_ndims=group_ndims), np.sum(np_log_prob, axis=tuple(range(-group_ndims, 0))), - rtol=1e-2 + rtol=1e-2, atol=1e-5, ) with pytest.raises(Exception, match='`group_ndims` is too large'): _ = Z_log_prob_fn(given, group_ndims=batch_ndims + 1) @@ -28,7 +28,7 @@ def normal_cdf(x): return norm.cdf(x) -class TensorRandomTestCase(unittest.TestCase): +class TensorRandomTestCase(TestCase): def test_seed(self): T.random.seed(1234) @@ -41,9 +41,6 @@ def test_seed(self): assert_allclose(x, z) def test_rand(self): - np.random.seed(1234) - T.random.seed(1234) - for dtype in float_dtypes: # test sample dtype and shape t = T.random.rand([n_samples, 2, 3, 4], dtype=dtype) @@ -60,9 +57,6 @@ def test_rand(self): ) def test_uniform(self): - np.random.seed(1234) - T.random.seed(1234) - for low, high in [(-1., 2.), (3.5, 7.5)]: for dtype in float_dtypes: # test sample dtype and shape @@ -85,7 +79,6 @@ def test_uniform(self): _ = T.random.uniform([2, 3, 4], low=2., high=1.) def test_shuffle_and_random_permutation(self): - T.random.seed(1234) x = np.arange(24).reshape([2, 3, 4]) # shuffle @@ -113,9 +106,6 @@ def test_shuffle_and_random_permutation(self): self.assertLess(equal_count, 100) def test_randn(self): - np.random.seed(1234) - T.random.seed(1234) - for dtype in float_dtypes: # test sample dtype and shape t = T.random.randn([n_samples, 2, 3, 4], dtype=dtype) @@ -138,8 +128,6 @@ def test_randn(self): np_log_prob=np.log(np.exp(-x ** 2 / 2.) / np.sqrt(2 * np.pi))) def test_truncated_randn(self): - np.random.seed(1234) - T.random.seed(1234) log_zero = -1e6 def log_prob(given, low, high): @@ -208,9 +196,6 @@ def log_prob(given, low, high): ) def test_normal(self): - np.random.seed(1234) - T.random.seed(1234) - mean = np.random.randn(2, 3, 4) logstd = np.random.randn(3, 4) std = np.exp(logstd) @@ -238,7 +223,7 @@ def log_prob(given): x_mean = np.mean(x, axis=0) np.testing.assert_array_less( np.abs(x_mean - mean), - np.tile(np.expand_dims(3 * std / np.sqrt(n_samples), axis=0), + np.tile(np.expand_dims(5 * std / np.sqrt(n_samples), axis=0), [2, 1, 1]) ) @@ -265,7 +250,7 @@ def log_prob(given): x_mean = np.mean(x, axis=0) np.testing.assert_array_less( np.abs(x_mean - mean), - np.tile(np.expand_dims(3 * std / np.sqrt(n_samples), axis=0), + np.tile(np.expand_dims(5 * std / np.sqrt(n_samples), axis=0), [2, 1, 1]) ) @@ -349,9 +334,6 @@ def log_prob(given): t, mean_t, logstd_t, validate_tensors=True) def test_truncated_normal(self): - np.random.seed(1234) - T.random.seed(1234) - mean = np.random.randn(2, 3, 4) logstd = np.random.randn(3, 4) std = np.exp(logstd) @@ -542,9 +524,6 @@ def log_prob(given): (1 - given) * log_sigmoid(-logits) ) - np.random.seed(1234) - T.random.seed(1234) - logits = np.random.randn(2, 3, 4) probs = sigmoid(logits) @@ -632,9 +611,6 @@ def log_prob(given, probs, n_classes: int, is_one_hot: bool = False): # return np.log(np.prod(element_pow(probs, one-hot-given), axis=-1)) return np.sum(given * np.log(probs), axis=-1) - np.random.seed(1234) - T.random.seed(1234) - n_classes = 5 logits = np.clip(np.random.randn(2, 3, 4, n_classes) / 10., a_min=-0.3, a_max=0.3) @@ -768,7 +744,6 @@ def do_test_sample(is_one_hot: bool, _ = Z_sample_fn(probs=T.as_tensor(probs[0, 0, 0, 0])) def test_discretized_logistic(self): - np.random.seed(1234) next_seed_val = [1234] def next_seed(): @@ -980,9 +955,6 @@ def do_test_sample(bin_size: float, given_t, mean_t, log_scale_t, 1 / 255., max_val=2.) def test_random_init(self): - np.random.seed(1234) - T.random.seed(1234) - for dtype in float_dtypes: t = T.variable([n_samples, 2, 3], dtype=dtype) for fn, mean, std in [ diff --git a/tests/tensor/test_utils.py b/tests/tensor/test_utils.py index 5bd668e..f5aca99 100644 --- a/tests/tensor/test_utils.py +++ b/tests/tensor/test_utils.py @@ -6,9 +6,10 @@ import tensorkit as tk from tensorkit import tensor as T from tests.ops import * +from tests.helper import * -class UtilsTestCase(unittest.TestCase): +class UtilsTestCase(TestCase): def test_split_channel_spatial_shape(self): for spatial_ndims in (1, 2, 3): diff --git a/tests/test_arg_check.py b/tests/test_arg_check.py index 0b4115e..f11ef30 100644 --- a/tests/test_arg_check.py +++ b/tests/test_arg_check.py @@ -5,9 +5,10 @@ import tensorkit as tk from tensorkit import tensor as T from tensorkit.arg_check import * +from tests.helper import * -class ArgCheckTestCase(unittest.TestCase): +class ArgCheckTestCase(TestCase): def test_validate_positive_int(self): for v in [1, 2, 3]: diff --git a/tests/test_bayes.py b/tests/test_bayes.py index f3f67c6..5d96d10 100644 --- a/tests/test_bayes.py +++ b/tests/test_bayes.py @@ -10,7 +10,7 @@ from tests.helper import * -class BayesianNetTestCase(unittest.TestCase): +class BayesianNetTestCase(TestCase): def test_construct(self): # no observation diff --git a/tests/test_stochastic.py b/tests/test_stochastic.py index 3b8b6b4..3cc7c43 100644 --- a/tests/test_stochastic.py +++ b/tests/test_stochastic.py @@ -8,7 +8,7 @@ from tests.helper import * -class StochasticTensorTestCase(unittest.TestCase): +class StochasticTensorTestCase(TestCase): def test_basic_interface(self): normal = UnitNormal(shape=[2, 3]) diff --git a/tests/train/test_core.py b/tests/train/test_core.py index 2b1f793..a36f8ab 100644 --- a/tests/train/test_core.py +++ b/tests/train/test_core.py @@ -1,17 +1,16 @@ import os -import unittest from tempfile import TemporaryDirectory import numpy as np import pytest import torch - from mltk import SimpleStatefulObject import tensorkit as tk +from tests.helper import * -class TorchCheckpointTestCase(unittest.TestCase): +class TorchCheckpointTestCase(TestCase): def test_invalid_type(self): with pytest.raises(TypeError, diff --git a/tests/variational/test_chain.py b/tests/variational/test_chain.py index 02abc84..ae691e2 100644 --- a/tests/variational/test_chain.py +++ b/tests/variational/test_chain.py @@ -7,7 +7,7 @@ from tests.helper import * -class VariationalChainTestCase(unittest.TestCase): +class VariationalChainTestCase(TestCase): def prepare_model(self): def p_log_probs(names): diff --git a/tests/variational/test_estimators.py b/tests/variational/test_estimators.py index a3d9909..daf466c 100644 --- a/tests/variational/test_estimators.py +++ b/tests/variational/test_estimators.py @@ -6,11 +6,10 @@ from tensorkit import tensor as T from tensorkit.variational import * -from tests.helper import assert_allclose +from tests.helper import * def prepare_test_payload(reparameterized): - np.random.seed(1234) x = T.as_tensor(np.random.normal(size=[7, 13])) # input y = T.requires_grad(T.as_tensor(np.random.normal(size=[13]))) # param if reparameterized: @@ -23,7 +22,7 @@ def prepare_test_payload(reparameterized): return x, y, z, f, log_f, log_q -class SGVBEstimatorTestCase(unittest.TestCase): +class SGVBEstimatorTestCase(TestCase): def test_sgvb(self): assert_allclose_ = functools.partial(assert_allclose, rtol=1e-5, atol=1e-6) @@ -59,7 +58,7 @@ def test_sgvb(self): ) -class IWAEEstimatorTestCase(unittest.TestCase): +class IWAEEstimatorTestCase(TestCase): def test_error(self): x, y, z, f, log_f, log_q = prepare_test_payload(reparameterized=True) diff --git a/tests/variational/test_evaluation.py b/tests/variational/test_evaluation.py index 4520c0e..fc1c129 100644 --- a/tests/variational/test_evaluation.py +++ b/tests/variational/test_evaluation.py @@ -10,7 +10,6 @@ def prepare_test_payload(): - np.random.seed(1234) log_p = T.as_tensor(np.random.normal(size=[13])) log_q = T.as_tensor(np.random.normal(size=[7, 13])) return log_p, log_q @@ -19,7 +18,7 @@ def prepare_test_payload(): assert_allclose_ = partial(assert_allclose, atol=1e-4) -class ImportanceSamplingLogLikelihoodTestCase(unittest.TestCase): +class ImportanceSamplingLogLikelihoodTestCase(TestCase): def test_error(self): log_p, log_q = prepare_test_payload() diff --git a/tests/variational/test_inference.py b/tests/variational/test_inference.py index 53bf5e8..b195d60 100644 --- a/tests/variational/test_inference.py +++ b/tests/variational/test_inference.py @@ -7,7 +7,7 @@ from tests.helper import * -class VariationalInferenceTestCase(unittest.TestCase): +class VariationalInferenceTestCase(TestCase): def test_construction(self): vi = VariationalInference(T.float_scalar(1.), diff --git a/tests/variational/test_objectives.py b/tests/variational/test_objectives.py index 34c62e0..411b45d 100644 --- a/tests/variational/test_objectives.py +++ b/tests/variational/test_objectives.py @@ -9,13 +9,12 @@ def prepare_test_payload(): - np.random.seed(1234) log_p = T.as_tensor(np.random.normal(size=[13])) log_q = T.as_tensor(np.random.normal(size=[7, 13])) return log_p, log_q -class ELBOObjectiveTestCase(unittest.TestCase): +class ELBOObjectiveTestCase(TestCase): def test_elbo(self): log_p, log_q = prepare_test_payload() @@ -36,7 +35,7 @@ def test_elbo(self): ) -class MonteCarloObjectiveTestCase(unittest.TestCase): +class MonteCarloObjectiveTestCase(TestCase): def test_error(self): log_p, log_q = prepare_test_payload() From e84f34aadc6e157d3a624e2035d4cf4d6b0d2e89 Mon Sep 17 00:00:00 2001 From: Haowen Xu Date: Tue, 18 Feb 2020 00:51:35 +0800 Subject: [PATCH 4/7] added device to existing tests --- README.md | 2 +- tensorkit/backend/pytorch_/core.py | 40 +- tensorkit/examples/classification/mnist.py | 2 +- .../examples/classification/mnist_resnet.py | 2 +- tensorkit/layers/utils.py | 1 + tests/layers/test_core.py | 54 +++ tests/layers/test_utils.py | 1 + tests/tensor/test_core.py | 399 +++++++++++------- tests/tensor/test_random.py | 38 +- 9 files changed, 356 insertions(+), 183 deletions(-) diff --git a/README.md b/README.md index 70cf60a..ecdcc01 100644 --- a/README.md +++ b/README.md @@ -2,5 +2,5 @@ ### Requirements -* PyTorch >= 1.4.0 +* PyTorch: 1.3.1 diff --git a/tensorkit/backend/pytorch_/core.py b/tensorkit/backend/pytorch_/core.py index f5a535b..f26faca 100644 --- a/tensorkit/backend/pytorch_/core.py +++ b/tensorkit/backend/pytorch_/core.py @@ -27,9 +27,11 @@ # utilities 'int_range', 'identity', + # cast + 'cast', 'cast_like', + # dtypes - 'cast', 'cast_like', 'get_dtype', 'is_floating_point', - 'is_floating_point_dtype', + 'get_dtype', 'is_floating_point', 'is_floating_point_dtype', # tensor constructors 'as_tensor', 'from_numpy', @@ -191,33 +193,39 @@ def int_range(start: int, end: int, step: int = 1) -> List[int]: return ret -# ---- dtypes ---- +# ---- cast dtype and device ---- @jit -def cast(input: Tensor, dtype: str, device: Optional[str] = None) -> Tensor: - if dtype == 'float32': - target_dtype = torch.float32 - elif dtype == 'int32': - target_dtype = torch.int32 +def cast(input: Tensor, + dtype: Optional[str] = None, + device: Optional[str] = None) -> Tensor: + if dtype is None: + target_dtype = input.dtype else: - target_dtype = {'int8': torch.int8, 'uint8': torch.uint8, 'int16': torch.int16, 'int64': torch.int64, 'float16': torch.float16, 'float64': torch.float64, 'bool': torch.bool}[dtype] + if dtype == 'float32': + target_dtype = torch.float32 + elif dtype == 'int32': + target_dtype = torch.int32 + else: + target_dtype = {'int8': torch.int8, 'uint8': torch.uint8, 'int16': torch.int16, 'int64': torch.int64, 'float16': torch.float16, 'float64': torch.float64, 'bool': torch.bool}[dtype] if target_dtype != input.dtype and device is not None: - input = input.to(dtype=target_dtype, device=device) + output = input.to(dtype=target_dtype, device=device) elif target_dtype != input.dtype: - input = input.to(dtype=target_dtype) + output = input.to(dtype=target_dtype) elif device is not None: - input = input.to(device=device) + output = input.to(device=device) + else: + output = input - return input + return output @jit def cast_like(input: Tensor, like: Tensor) -> Tensor: - if like.dtype != input.dtype: - input = input.to(dtype=like.dtype, device=like.device) - return input + return input.to(dtype=like.dtype, device=like.device) +# ---- dtypes ---- @jit def get_dtype(input: Tensor) -> str: if input.dtype == torch.float32: diff --git a/tensorkit/examples/classification/mnist.py b/tensorkit/examples/classification/mnist.py index 7657f16..9083898 100644 --- a/tensorkit/examples/classification/mnist.py +++ b/tensorkit/examples/classification/mnist.py @@ -43,7 +43,7 @@ def main(exp: mltk.Experiment[Config]): # we have initialized the network, now we can compile the net with JIT engine net = tk.layers.jit_compile(net) - mltk.print_with_time('Network compiled to JIT module') + mltk.print_with_time('Network compiled with JIT') # define the train and evaluate functions def train_step(x, y): diff --git a/tensorkit/examples/classification/mnist_resnet.py b/tensorkit/examples/classification/mnist_resnet.py index bb9a13e..dccbe78 100644 --- a/tensorkit/examples/classification/mnist_resnet.py +++ b/tensorkit/examples/classification/mnist_resnet.py @@ -49,7 +49,7 @@ def main(exp: mltk.Experiment[Config]): # we have initialized the network, now we can compile the net with JIT engine net = tk.layers.jit_compile(net) - mltk.print_with_time('Network compiled to JIT module') + mltk.print_with_time('Network compiled with JIT') # the train, test and validate functions def train_step(x, y): diff --git a/tensorkit/layers/utils.py b/tensorkit/layers/utils.py index d04b837..19b277e 100644 --- a/tensorkit/layers/utils.py +++ b/tensorkit/layers/utils.py @@ -54,6 +54,7 @@ def do_flatten(target, layer_or_layers): 'leakyrelu': LeakyReLU, 'sigmoid': Sigmoid, 'tanh': Tanh, + 'logsoftmax': LogSoftmax, } diff --git a/tests/layers/test_core.py b/tests/layers/test_core.py index a6c1eae..a520d9e 100644 --- a/tests/layers/test_core.py +++ b/tests/layers/test_core.py @@ -28,6 +28,12 @@ def forward(self, input: Tensor) -> Tensor: return self.wrapped(input) +class _MyGetTraining(BaseLayer): + + def forward(self) -> bool: + return self.training + + class UtilsAndConstantsTestCase(TestCase): def test_constants(self): @@ -82,6 +88,44 @@ def test_param_and_buffer(self): self.assertDictEqual(dict(get_named_buffers(seq)), {'wrapped.c': c, 'wrapped.c2': c2}) self.assertDictEqual(dict(get_named_buffers(seq, recursive=False)), {}) + def test_layer_to_device(self): + for device in [None, T.CPU_DEVICE]: + layer = ResBlock2d(3, 4, kernel_size=2, device=device) + for param in tk.layers.get_parameters(layer): + self.assertEqual(T.get_device(param), device or T.current_device()) + + for device2 in [None, T.CPU_DEVICE]: + layer2 = tk.layers.layer_to_device(layer, device=device2) + for param in tk.layers.get_parameters(layer2): + self.assertEqual(T.get_device(param), device2 or T.current_device()) + + def test_set_train_mode(self): + layers = [tk.layers.jit_compile(_MyGetTraining()) + for _ in range(3)] + layer = layers[0] + + # set_train_mode + self.assertIs(tk.layers.set_train_mode(layer, True), layer) + self.assertEqual(layer(), True) + self.assertIs(tk.layers.set_train_mode(layer, False), layer) + self.assertEqual(layer(), False) + + # set_eval_mode + tk.layers.set_train_mode(layer, True) + self.assertEqual(layer(), True) + self.assertIs(tk.layers.set_eval_mode(layer), layer) + self.assertEqual(layer(), False) + + # scoped_eval_mode + for l in layers: + tk.layers.set_train_mode(l, True) + self.assertEqual(l(), True) + with tk.layers.scoped_eval_mode(layers[0], layers[1:]): + for l in layers: + self.assertEqual(l(), False) + for l in layers: + self.assertEqual(l(), True) + def test_SimpleParamStore(self): initial_value = np.random.randn(2, 3, 4) store = SimpleParamStore([2, 3, 4], initializer=initial_value) @@ -504,6 +548,9 @@ def test_batch_norm(self): _ = layer(x) set_train_mode(layer, False) y = layer(x) + set_train_mode(layer, True) + set_eval_mode(layer) + y2 = layer(x) # manually compute the expected output if T.backend_name == 'PyTorch': @@ -519,6 +566,7 @@ def test_batch_norm(self): # check output assert_allclose(y, expected, rtol=1e-4, atol=1e-6) + assert_allclose(y2, expected, rtol=1e-4, atol=1e-6) # check invalid dimensions with pytest.raises(Exception, match='only supports .d input'): @@ -571,3 +619,9 @@ def test_dropout(self): y = layer(x) self.assertTrue(np.all(T.to_numpy(y) != 0)) assert_allclose(y, x, rtol=1e-4, atol=1e-6) + + set_train_mode(layer, True) + set_eval_mode(layer) + y = layer(x) + self.assertTrue(np.all(T.to_numpy(y) != 0)) + assert_allclose(y, x, rtol=1e-4, atol=1e-6) diff --git a/tests/layers/test_utils.py b/tests/layers/test_utils.py index 07ac35a..a3102c5 100644 --- a/tests/layers/test_utils.py +++ b/tests/layers/test_utils.py @@ -45,6 +45,7 @@ def test_get_activation_class(self): ('Leaky_ReLU', tk.layers.LeakyReLU, (), {'negative_slope': 0.2}, T.nn.leaky_relu(x, 0.2)), ('Sigmoid', tk.layers.Sigmoid, (), {}, T.nn.sigmoid(x)), ('Tanh', tk.layers.Tanh, (), {}, T.tanh(x)), + ('Log_Softmax', tk.layers.LogSoftmax, (), {}, T.nn.log_softmax(x)), ]: name_candidates = (None,) if origin_name is None else ( origin_name, diff --git a/tests/tensor/test_core.py b/tests/tensor/test_core.py index c621c97..3d17e22 100644 --- a/tests/tensor/test_core.py +++ b/tests/tensor/test_core.py @@ -32,7 +32,30 @@ def test_jit_compile(self): def test_device(self): # ensure we're using GPU if GPU is available if T.gpu_device_list(): - self.assertEqual(T.current_device(), T.gpu_device_list()[0]) + gpu_list = T.gpu_device_list() + self.assertEqual(T.current_device(), gpu_list[0]) + self.assertEqual(T.first_gpu_device(), gpu_list[0]) + else: + self.assertEqual(T.first_gpu_device(), T.CPU_DEVICE) + with pytest.raises(RuntimeError, match='No GPU is available.'): + _ = T.first_gpu_device(fallback_to_cpu=False) + + # test `get_device` + t = T.random.randn([2, 3, 4], dtype=T.float32) + self.assertEqual(T.get_device(t), T.current_device()) + + # test `to_device` + if T.current_device() != T.CPU_DEVICE: + t = T.random.randn([2, 3, 4], dtype=T.float32) + t2 = T.to_device(t, T.CPU_DEVICE) + self.assertEqual(T.get_device(t2), T.CPU_DEVICE) + assert_allclose(t, t2) + + # test `use_device` + with T.use_device(T.CPU_DEVICE): + self.assertEqual(T.current_device(), T.CPU_DEVICE) + t = T.random.randn([2, 3, 4], dtype=T.float32) + self.assertEqual(T.get_device(t), T.CPU_DEVICE) def test_utilities(self): self.assertEqual(T.int_range(0, 10), list(range(10))) @@ -72,35 +95,42 @@ def test_dtypes(self): self.assertIsInstance(t, T.Tensor) assert_equal(t, x) - # cast - for dtype in number_dtypes: - t2 = T.cast(t, dtype) + def test_cast(self): + x = np.asarray([1, 2, 3]) + t = T.as_tensor(x) + + # cast dtype + for dtype, device in itertools.product( + number_dtypes, [None, T.CPU_DEVICE]): + t2 = T.cast(t, dtype=dtype, device=device) self.assertIsInstance(t2, T.Tensor) self.assertEqual(T.get_dtype(t2), dtype) + self.assertEqual(T.get_device(t2), device or T.current_device()) assert_equal(t2, x) - # cast_like - for like in (t, t2): - t3 = T.cast_like(t, like) - self.assertIsInstance(t3, T.Tensor) - self.assertEqual(T.get_dtype(t3), T.get_dtype(like)) - self.assertEqual(T.get_device(t3), T.get_device(like)) - assert_equal(t3, x) + # cast_like + for like in (t, t2): + t3 = T.cast_like(t, like) + self.assertIsInstance(t3, T.Tensor) + self.assertEqual(T.get_dtype(t3), T.get_dtype(like)) + self.assertEqual(T.get_device(t3), T.get_device(like)) + assert_equal(t3, x) + + # only device + for device in [None, T.CPU_DEVICE]: + t2 = T.cast(t, device=device) + self.assertIsInstance(t2, T.Tensor) + self.assertEqual(T.get_dtype(t2), T.get_dtype(t)) + self.assertEqual(T.get_device(t2), device or T.current_device()) + assert_equal(t2, x) - def test_tensor_constructors(self): - # # as_tensor - # for x in [1., 1, [1., 2., 3.], np.array([1., 2., 3.])]: - # t = T.as_tensor(x) - # self.assertIsInstance(t, T.Tensor) - # assert_equal(t, x) - # - # x = T.as_tensor(np.asarray([1, 2, 3], dtype=np.int32)) - # t = T.as_tensor(x) - # self.assertIs(t, x) - # - # with pytest.raises(Exception): - # _ = T.as_tensor(object()) # not a tensor, should raise error + # null cast + t2 = T.cast(t) + self.assertEqual(T.get_dtype(t2), T.get_dtype(t)) + self.assertEqual(T.get_device(t2), T.get_device(t)) + assert_equal(t2, x) + def test_tensor_constructors(self): # as_tensor def copy_tensor(o): if isinstance(o, StochasticTensor): @@ -128,135 +158,187 @@ def copy_tensor(o): x_value = copy.copy(x) for should_copy in [True, False]: - for dtype in (None,) + number_dtypes: + for dtype, device in itertools.product( + (None,) + number_dtypes, [None, T.CPU_DEVICE]): xx = copy_tensor(x) self.assertIsInstance(xx, type(x)) - dtype_kwargs = {'dtype': dtype} if dtype is not None else {} + kwargs = {'dtype': dtype} if dtype is not None else {} + if device is not None: + kwargs['device'] = device - t = T.as_tensor(xx, force_copy=should_copy, **dtype_kwargs) + t = T.as_tensor(xx, force_copy=should_copy, **kwargs) self.assertIsInstance(t, T.Tensor) + self.assertEqual(T.get_device(t), device or T.current_device()) if should_copy: if hasattr(xx, '__setitem__'): xx[0] = 12345. assert_equal(t, x_value, - err_msg=f'{x}, {should_copy}, {dtype}') + err_msg=f'{x}, {should_copy}, {dtype}, {device}') with pytest.raises(Exception): _ = T.as_tensor(object()) # not a tensor, should raise error # from numpy: force copied for x in [np.array([1., 2., 3.])]: - for dtype in (None,) + number_dtypes: + for dtype, device in itertools.product( + (None,) + number_dtypes, [None, T.CPU_DEVICE]): xx = copy.copy(x) self.assertIsInstance(xx, type(x)) - dtype_kwargs = {'dtype': dtype} if dtype is not None else {} - t = T.from_numpy(xx, **dtype_kwargs) + kwargs = {'dtype': dtype} if dtype is not None else {} + if device is not None: + kwargs['device'] = device + t = T.from_numpy(xx, **kwargs) self.assertIsInstance(t, T.Tensor) + self.assertEqual(T.get_device(t), device or T.current_device()) xx[0] = 12345. - assert_equal(t, x, err_msg=f'{x}, {dtype}') + assert_equal(t, x, err_msg=f'{x}, {dtype}, {device}') with pytest.raises(Exception): _ = T.from_numpy(object()) # not a tensor, should raise error # float_scalar - for value in (1.25, 125): - for dtype in (T.float16, T.float32, T.float64): - t = T.float_scalar(value, dtype=dtype) - self.assertEqual(T.get_dtype(t), dtype) - assert_equal(t, value) - self.assertEqual(T.get_dtype(T.float_scalar(1.25)), T.float_x()) + for value, dtype, device in itertools.product( + (1.25, 125), + (T.float16, T.float32, T.float64), + (None, T.CPU_DEVICE)): + t = T.float_scalar(value, dtype=dtype, device=device) + self.assertEqual(T.get_dtype(t), dtype) + self.assertEqual(T.get_device(t), device or T.current_device()) + assert_equal(t, value) + + # float_scalar_like + t2 = T.float_scalar_like(value, t) + self.assertEqual(T.get_dtype(t2), T.get_dtype(t)) + self.assertEqual(T.get_device(t2), T.get_device(t)) + + t = T.float_scalar(1.25) + assert_equal(t, 1.25) + self.assertEqual(T.get_dtype(t), T.float_x()) + self.assertEqual(T.get_device(t), T.current_device()) # int_scalar - for value in (2, 125): - for dtype in (T.int8, T.int16, T.int32, T.int64): - t = T.int_scalar(value, dtype=dtype) - self.assertEqual(T.get_dtype(t), dtype) - assert_equal(t, value) - self.assertEqual(T.get_dtype(T.int_scalar(125)), T.int32) + for value, dtype, device in itertools.product( + (2, 125), + (T.int8, T.int16, T.int32, T.int64), + (None, T.CPU_DEVICE)): + t = T.int_scalar(value, dtype=dtype) + self.assertEqual(T.get_dtype(t), dtype) + self.assertEqual(T.get_device(t), device or T.current_device()) + assert_equal(t, value) - # zeros - for shape in ([1, 2, 3], []): - for dtype in number_dtypes: - t = T.zeros(shape, dtype=dtype) - self.assertIsInstance(t, T.Tensor) - self.assertEqual(T.get_dtype(t), dtype) - assert_equal(t, np.zeros(shape)) + # int_scalar_like + t2 = T.float_scalar_like(value, t) + self.assertEqual(T.get_dtype(t2), T.get_dtype(t)) + self.assertEqual(T.get_device(t2), T.get_device(t)) - # zeros_like - t2 = T.zeros_like(t) - self.assertIsInstance(t2, T.Tensor) - self.assertEqual(T.get_dtype(t2), dtype) - assert_equal(t, np.zeros(shape)) + t = T.int_scalar(125) + self.assertEqual(T.get_dtype(t), T.int32) + self.assertEqual(T.get_device(t), T.current_device()) - for dtype2 in (None,) + number_dtypes: - for shape2 in (None, [7, 8]): - t2 = T.zeros_like(t, dtype=dtype2, shape=shape2) - self.assertIsInstance(t2, T.Tensor) - self.assertEqual(T.get_dtype(t2), dtype2 or dtype) - assert_equal(t2, np.zeros(shape2 or shape)) + # zeros + for shape, dtype, device in itertools.product( + ([1, 2, 3], []), + number_dtypes, + (None, T.CPU_DEVICE)): + t = T.zeros(shape, dtype=dtype, device=device) + self.assertIsInstance(t, T.Tensor) + self.assertEqual(T.get_dtype(t), dtype) + self.assertEqual(T.get_device(t), device or T.current_device()) + assert_equal(t, np.zeros(shape)) - # ones - for shape in ([1, 2, 3], []): - for dtype in number_dtypes: - t = T.ones(shape, dtype=dtype) - self.assertIsInstance(t, T.Tensor) - self.assertEqual(T.get_dtype(t), dtype) - assert_equal(t, np.ones(shape)) + # zeros_like + t2 = T.zeros_like(t) + self.assertIsInstance(t2, T.Tensor) + self.assertEqual(T.get_dtype(t2), T.get_dtype(t)) + self.assertEqual(T.get_device(t2), T.get_device(t)) + assert_equal(t, np.zeros(shape)) + + for dtype2 in (None,) + number_dtypes: + for shape2 in (None, [7, 8]): + t2 = T.zeros_like(t, dtype=dtype2, shape=shape2) + self.assertIsInstance(t2, T.Tensor) + self.assertEqual(T.get_dtype(t2), dtype2 or dtype) + self.assertEqual(T.get_device(t2), T.get_device(t)) + assert_equal(t2, np.zeros(shape2 or shape)) - # ones_like - t2 = T.ones_like(t) - self.assertIsInstance(t2, T.Tensor) - self.assertEqual(T.get_dtype(t2), dtype) - assert_equal(t, np.ones(shape)) + # ones + for shape, dtype, device in itertools.product( + ([1, 2, 3], []), + number_dtypes, + (None, T.CPU_DEVICE)): + t = T.ones(shape, dtype=dtype) + self.assertIsInstance(t, T.Tensor) + self.assertEqual(T.get_dtype(t), dtype) + self.assertEqual(T.get_device(t), device or T.current_device()) + assert_equal(t, np.ones(shape)) - for dtype2 in (None,) + number_dtypes: - for shape2 in (None, [7, 8]): - t2 = T.ones_like(t, dtype=dtype2, shape=shape2) - self.assertIsInstance(t2, T.Tensor) - self.assertEqual(T.get_dtype(t2), dtype2 or dtype) - assert_equal(t2, np.ones(shape2 or shape)) + # ones_like + t2 = T.ones_like(t) + self.assertIsInstance(t2, T.Tensor) + self.assertEqual(T.get_dtype(t2), T.get_dtype(t)) + self.assertEqual(T.get_device(t2), T.get_device(t)) + assert_equal(t, np.ones(shape)) + + for dtype2 in (None,) + number_dtypes: + for shape2 in (None, [7, 8]): + t2 = T.ones_like(t, dtype=dtype2, shape=shape2) + self.assertIsInstance(t2, T.Tensor) + self.assertEqual(T.get_dtype(t2), dtype2 or dtype) + self.assertEqual(T.get_device(t2), T.get_device(t)) + assert_equal(t2, np.ones(shape2 or shape)) # full fill_value = 123 - for shape in ([1, 2, 3], []): - for dtype in number_dtypes: - t = T.full(shape, fill_value, dtype=dtype) - self.assertIsInstance(t, T.Tensor) - self.assertEqual(T.get_dtype(t), dtype) - assert_equal(t, np.full(shape, fill_value)) - - # zeros_like - t2 = T.full_like(t, fill_value) - self.assertIsInstance(t2, T.Tensor) - self.assertEqual(T.get_dtype(t2), dtype) - assert_equal(t, np.full(shape, fill_value)) - - for dtype2 in (None,) + number_dtypes: - for shape2 in (None, [7, 8]): - t2 = T.full_like(t, fill_value, dtype=dtype2, - shape=shape2) - self.assertIsInstance(t2, T.Tensor) - self.assertEqual(T.get_dtype(t2), dtype2 or dtype) - assert_equal(t2, np.full(shape2 or shape, fill_value)) + for shape, dtype, device in itertools.product( + ([1, 2, 3], []), + number_dtypes, + (None, T.CPU_DEVICE)): + t = T.full(shape, fill_value, dtype=dtype, device=device) + self.assertIsInstance(t, T.Tensor) + self.assertEqual(T.get_dtype(t), dtype) + self.assertEqual(T.get_device(t), device or T.current_device()) + assert_equal(t, np.full(shape, fill_value)) + + # full_like + t2 = T.full_like(t, fill_value) + self.assertIsInstance(t2, T.Tensor) + self.assertEqual(T.get_dtype(t2), T.get_dtype(t)) + self.assertEqual(T.get_device(t2), T.get_device(t)) + assert_equal(t, np.full(shape, fill_value)) + + for dtype2 in (None,) + number_dtypes: + for shape2 in (None, [7, 8]): + t2 = T.full_like(t, fill_value, dtype=dtype2, + shape=shape2) + self.assertIsInstance(t2, T.Tensor) + self.assertEqual(T.get_dtype(t2), dtype2 or dtype) + self.assertEqual(T.get_device(t2), T.get_device(t)) + assert_equal(t2, np.full(shape2 or shape, fill_value)) # arange - for start, end in [(1, 10), (0, 10)]: - t = T.arange(start, end) - self.assertIsInstance(t, T.Tensor) - self.assertEqual(T.get_dtype(t), T.int32) - assert_equal(t, np.arange(start, end)) + for device in [None, T.current_device()]: + expected_device = device or T.current_device() - for start, end, step in [(0, 10, 2), (-2, -15, -3)]: - t = T.arange(start, end, step) - self.assertIsInstance(t, T.Tensor) - self.assertEqual(T.get_dtype(t), T.int32) - assert_equal(t, np.arange(start, end, step)) + for start, end in [(1, 10), (0, 10)]: + t = T.arange(start, end, device=device) + self.assertIsInstance(t, T.Tensor) + self.assertEqual(T.get_dtype(t), T.int32) + self.assertEqual(T.get_device(t), expected_device) + assert_equal(t, np.arange(start, end)) - for dtype in number_dtypes: - t = T.arange(0, 10, dtype=dtype) - self.assertIsInstance(t, T.Tensor) - self.assertEqual(T.get_dtype(t), dtype) - assert_equal(t, np.arange(10)) + for start, end, step in [(0, 10, 2), (-2, -15, -3)]: + t = T.arange(start, end, step, device=device) + self.assertIsInstance(t, T.Tensor) + self.assertEqual(T.get_dtype(t), T.int32) + self.assertEqual(T.get_device(t), expected_device) + assert_equal(t, np.arange(start, end, step)) + + for dtype in number_dtypes: + t = T.arange(0, 10, dtype=dtype, device=device) + self.assertIsInstance(t, T.Tensor) + self.assertEqual(T.get_dtype(t), dtype) + self.assertEqual(T.get_device(t), expected_device) + assert_equal(t, np.arange(10)) # one_hot for n_classes in [1, 5]: @@ -265,6 +347,7 @@ def copy_tensor(o): x = np.random.randint(0, n_classes, size=shape) t = T.one_hot(T.as_tensor(x), n_classes) + self.assertEqual(T.get_device(t), T.current_device()) assert_equal(t, I[x]) for dtype in number_dtypes: @@ -312,51 +395,61 @@ def is_requires_grad(t): except Exception: return False - for dtype in number_dtypes: - t = T.variable([3], dtype=dtype, requires_grad=False) - self.assertIsInstance(t, T.Variable) - self.assertEqual(T.get_dtype(t), dtype) - self.assertEqual(T.shape(t), [3]) + for device in [None, T.CPU_DEVICE]: + for dtype in number_dtypes: + t = T.variable([3], dtype=dtype, device=device, requires_grad=False) + self.assertIsInstance(t, T.Variable) + self.assertEqual(T.get_dtype(t), dtype) + self.assertEqual(T.get_device(t), device or T.current_device()) + self.assertEqual(T.shape(t), [3]) - t = T.variable([2, 3], dtype=t.dtype, requires_grad=False) - self.assertIsInstance(t, T.Variable) - self.assertEqual(T.get_dtype(t), dtype) - self.assertEqual(T.shape(t), [2, 3]) + t = T.variable([2, 3], dtype=t.dtype, device=device, requires_grad=False) + self.assertIsInstance(t, T.Variable) + self.assertEqual(T.get_dtype(t), dtype) + self.assertEqual(T.get_device(t), device or T.current_device()) + self.assertEqual(T.shape(t), [2, 3]) - for dtype in float_dtypes: - # scalar initializer - for v in (123, 123., np.array(123.)): + for dtype in float_dtypes: + # scalar initializer + for v in (123, 123., np.array(123.)): + for requires_grad in (True, False): + t = T.variable( + [3], dtype=dtype, device=device, + requires_grad=requires_grad, initializer=v) + self.assertIsInstance(t, T.Variable) + self.assertEqual(T.get_dtype(t), dtype) + self.assertEqual(T.get_device(t), device or T.current_device()) + self.assertEqual(is_requires_grad(t), requires_grad) + assert_equal(t, np.array([v] * 3)) + + # array initializer for requires_grad in (True, False): - t = T.variable([3], dtype=dtype, requires_grad=requires_grad, - initializer=v) + t = T.variable( + [3], dtype=dtype, device=device, + requires_grad=requires_grad, initializer=np.array([1., 2., 3.])) self.assertIsInstance(t, T.Variable) self.assertEqual(T.get_dtype(t), dtype) + self.assertEqual(T.get_device(t), device or T.current_device()) self.assertEqual(is_requires_grad(t), requires_grad) - assert_equal(t, np.array([v] * 3)) + assert_equal(t, [1., 2., 3.]) - # array initializer - for requires_grad in (True, False): - t = T.variable([3], dtype=dtype, requires_grad=requires_grad, - initializer=np.array([1., 2., 3.])) - self.assertIsInstance(t, T.Variable) - self.assertEqual(T.get_dtype(t), dtype) - self.assertEqual(is_requires_grad(t), requires_grad) - assert_equal(t, [1., 2., 3.]) - - with pytest.raises(ValueError, - match=r'`initializer.shape` != `shape`: ' - r'\[3\] vs \[4\]'): - _ = T.variable([4], dtype=dtype, requires_grad=False, - initializer=np.array([1., 2., 3.])) - - # callable initializer - for requires_grad in (True, False): - t = T.variable([3], dtype=dtype, requires_grad=requires_grad, - initializer=partial(T.fill, fill_value=123.)) - self.assertIsInstance(t, T.Variable) - self.assertEqual(T.get_dtype(t), dtype) - self.assertEqual(is_requires_grad(t), requires_grad) - assert_equal(t, [123.] * 3) + with pytest.raises(ValueError, + match=r'`initializer.shape` != `shape`: ' + r'\[3\] vs \[4\]'): + _ = T.variable( + [4], dtype=dtype, device=device, requires_grad=False, + initializer=np.array([1., 2., 3.])) + + # callable initializer + for requires_grad in (True, False): + t = T.variable( + [3], dtype=dtype, device=device, requires_grad=requires_grad, + initializer=partial(T.fill, fill_value=123.)) + self.assertIsInstance(t, T.Variable) + self.assertEqual(T.get_dtype(t), dtype) + self.assertEqual(T.get_device(t), device or T.current_device()) + self.assertEqual(is_requires_grad(t), requires_grad) + assert_equal(t, [123.] * 3) # unsupported initializer with pytest.raises(TypeError, match='Unsupported initializer'): diff --git a/tests/tensor/test_random.py b/tests/tensor/test_random.py index 0502c91..8ddd8f6 100644 --- a/tests/tensor/test_random.py +++ b/tests/tensor/test_random.py @@ -15,8 +15,10 @@ def do_check_log_prob(given, batch_ndims, Z_log_prob_fn, np_log_prob): # test log_prob for group_ndims in range(0, batch_ndims + 1): + out = Z_log_prob_fn(given, group_ndims=group_ndims) + assert(T.get_device(out) == T.get_device(given)) assert_allclose( - Z_log_prob_fn(given, group_ndims=group_ndims), + out, np.sum(np_log_prob, axis=tuple(range(-group_ndims, 0))), rtol=1e-2, atol=1e-5, ) @@ -41,10 +43,11 @@ def test_seed(self): assert_allclose(x, z) def test_rand(self): - for dtype in float_dtypes: + for dtype, device in product(float_dtypes, [None, T.CPU_DEVICE]): # test sample dtype and shape - t = T.random.rand([n_samples, 2, 3, 4], dtype=dtype) + t = T.random.rand([n_samples, 2, 3, 4], dtype=dtype, device=device) self.assertEqual(T.get_dtype(t), dtype) + self.assertEqual(T.get_device(t), device or T.current_device()) self.assertEqual(T.shape(t), [n_samples, 2, 3, 4]) # test sample mean @@ -58,11 +61,14 @@ def test_rand(self): def test_uniform(self): for low, high in [(-1., 2.), (3.5, 7.5)]: - for dtype in float_dtypes: + for dtype, device in product(float_dtypes, [None, T.CPU_DEVICE]): # test sample dtype and shape - t = T.random.uniform([n_samples, 2, 3, 4], low=low, high=high, - dtype=dtype) + t = T.random.uniform( + [n_samples, 2, 3, 4], low=low, high=high, dtype=dtype, + device=device + ) self.assertEqual(T.get_dtype(t), dtype) + self.assertEqual(T.get_device(t), device or T.current_device()) self.assertEqual(T.shape(t), [n_samples, 2, 3, 4]) # test sample mean @@ -70,7 +76,7 @@ def test_uniform(self): x_mean = np.mean(x, axis=0) np.testing.assert_array_less( np.abs(x_mean - 0.5 * (high + low)), - (3. * np.sqrt((high - low) ** 2 / 12) / np.sqrt(n_samples) * + (5. * np.sqrt((high - low) ** 2 / 12) / np.sqrt(n_samples) * np.ones_like(x_mean)) ) @@ -92,13 +98,14 @@ def test_shuffle_and_random_permutation(self): self.assertLess(equal_count, 100) # random_permutation - for dtype in int_dtypes: + for dtype, device in product(int_dtypes, [None, T.CPU_DEVICE]): for n in [0, 1, 5]: x = np.arange(n) equal_count = 0 for k in range(100): - t = T.random.random_permutation(n, dtype=dtype) + t = T.random.random_permutation(n, dtype=dtype, device=device) self.assertEqual(T.get_dtype(t), dtype) + self.assertEqual(T.get_device(t), device or T.current_device()) if np.all(np.equal(T.to_numpy(t), x)): equal_count += 1 assert_equal(np.sort(T.to_numpy(t)), x) @@ -106,10 +113,11 @@ def test_shuffle_and_random_permutation(self): self.assertLess(equal_count, 100) def test_randn(self): - for dtype in float_dtypes: + for dtype, device in product(float_dtypes, [None, T.CPU_DEVICE]): # test sample dtype and shape - t = T.random.randn([n_samples, 2, 3, 4], dtype=dtype) + t = T.random.randn([n_samples, 2, 3, 4], dtype=dtype, device=device) self.assertEqual(T.get_dtype(t), dtype) + self.assertEqual(T.get_device(t), device or T.current_device()) self.assertEqual(T.shape(t), [n_samples, 2, 3, 4]) # test sample mean @@ -216,6 +224,7 @@ def log_prob(given): logstd_t = T.cast(T.expand(T.as_tensor(logstd), [n_samples, 1, 3, 4]), dtype) t = T.random.normal(mean_t, std_t) self.assertEqual(T.get_dtype(t), dtype) + self.assertEqual(T.get_device(t), T.current_device()) self.assertEqual(T.shape(t), [n_samples, 2, 3, 4]) # test sample mean @@ -243,6 +252,7 @@ def log_prob(given): logstd_t = T.as_tensor(logstd, dtype) t = T.random.normal(mean_t, std_t, n_samples=n_samples) self.assertEqual(T.get_dtype(t), dtype) + self.assertEqual(T.get_device(t), T.current_device()) self.assertEqual(T.shape(t), [n_samples, 2, 3, 4]) # test sample mean @@ -269,6 +279,7 @@ def log_prob(given): logstd_t = T.as_tensor(logstd, dtype) t = T.random.normal(mean_t, std_t) self.assertEqual(T.get_dtype(t), dtype) + self.assertEqual(T.get_device(t), T.current_device()) # test log_prob x = T.to_numpy(t) @@ -371,6 +382,7 @@ def do_test(low, high, dtype): t = T.random.truncated_normal( mean_t, std_t, n_samples=n_samples, low=low, high=high) self.assertEqual(T.get_dtype(t), dtype) + self.assertEqual(T.get_device(t), T.current_device()) self.assertEqual(T.shape(t), [n_samples, 2, 3, 4]) # test sample value range @@ -408,6 +420,7 @@ def do_test(low, high, dtype): logstd_t = T.as_tensor(logstd, dtype) t = T.random.truncated_normal(mean_t, std_t, low=low, high=high) self.assertEqual(T.get_dtype(t), dtype) + self.assertEqual(T.get_device(t), T.current_device()) # test sample value range x = T.to_numpy(t) @@ -552,6 +565,7 @@ def do_test_sample(n_z, sample_shape, float_dtype, dtype): t = T.random.bernoulli( probs=probs_t, n_samples=n_z, dtype=dtype) self.assertEqual(T.get_dtype(t), dtype) + self.assertEqual(T.get_device(t), T.current_device()) self.assertEqual(T.shape(t), sample_shape + [2, 3, 4]) # all values must be either 0 or 1 @@ -657,6 +671,7 @@ def do_test_sample(is_one_hot: bool, t = (T.random.one_hot_categorical if is_one_hot else T.random.categorical)(probs_t, n_samples=n_z, **kwargs) self.assertEqual(T.get_dtype(t), expected_dtype) + self.assertEqual(T.get_device(t), T.current_device()) self.assertEqual(T.shape(t), sample_shape + [2, 3, 4] + value_shape) # check values @@ -866,6 +881,7 @@ def do_test_sample(bin_size: float, validate_tensors=validate_tensors, ) self.assertEqual(T.get_dtype(t), dtype) + self.assertEqual(T.get_device(t), T.current_device()) self.assertEqual(T.shape(t), sample_shape + value_shape) # check values From a5908571ad16b8eb44d47ea6a43a0b8e4baa73a5 Mon Sep 17 00:00:00 2001 From: Haowen Xu Date: Tue, 18 Feb 2020 02:16:02 +0800 Subject: [PATCH 5/7] added some tests --- tensorkit/backend/pytorch_/core.py | 5 ++- tensorkit/utils/data_utils.py | 2 -- tensorkit/utils/tensor_stream.py | 29 ++++++++++++++--- tests/tensor/test_core.py | 2 +- tests/utils/__init__.py | 0 tests/utils/test_data_utils.py | 50 +++++++++++++++++++++++++++++ tests/utils/test_tensor_stream.py | 51 ++++++++++++++++++++++++++++++ 7 files changed, 129 insertions(+), 10 deletions(-) create mode 100644 tests/utils/__init__.py create mode 100644 tests/utils/test_data_utils.py create mode 100644 tests/utils/test_tensor_stream.py diff --git a/tensorkit/backend/pytorch_/core.py b/tensorkit/backend/pytorch_/core.py index f26faca..93a28cb 100644 --- a/tensorkit/backend/pytorch_/core.py +++ b/tensorkit/backend/pytorch_/core.py @@ -159,9 +159,8 @@ def use_device(device: str): else: old_device = _current_device[0] try: - with torch.cuda.device(device): - _current_device[0] = device - yield + _current_device[0] = device + yield finally: _current_device[0] = old_device diff --git a/tensorkit/utils/data_utils.py b/tensorkit/utils/data_utils.py index a6a0168..fe18ae4 100644 --- a/tensorkit/utils/data_utils.py +++ b/tensorkit/utils/data_utils.py @@ -1,5 +1,3 @@ -import mltk - import numpy as np from tensorkit import tensor as T diff --git a/tensorkit/utils/tensor_stream.py b/tensorkit/utils/tensor_stream.py index 57e4584..0d1a6c1 100644 --- a/tensorkit/utils/tensor_stream.py +++ b/tensorkit/utils/tensor_stream.py @@ -12,11 +12,22 @@ class TensorStream(mltk.DataStream): + """ + A subclass of :class:`mltk.DataStream` that transforms the underlying + NumPy array data stream into tensor data stream. + """ source: mltk.DataStream device: str def __init__(self, source: mltk.DataStream, device: Optional[str] = None): + """ + Construct a new :class:`TensorStream`. + + Args: + source: The source data stream. + device: The device where to place new tensors. + """ device = device or T.current_device() super().__init__( batch_size=source.batch_size, @@ -45,14 +56,24 @@ def _minibatch_iterator(self) -> Generator[ArrayTuple, None, None]: finally: g.close() - def _concat_arrays(self, arrays: Sequence[T.Tensor]) -> T.Tensor: - return T.concat(list(arrays), axis=0) - def as_tensor_stream(source: mltk.DataStream, device: Optional[str] = None, prefetch: Optional[int] = None - ) -> mltk.DataStream: + ) -> Union[TensorStream, mltk.data.ThreadingDataStream]: + """ + Construct a tensor data stream. + + Args: + source: The source NumPy array stream. + device: The device where to place new tensors. + prefetch: Number of batches to prefetch in background. + If specified, will wrap the constructed :class:`TensorStream` + with a :class:`mltk.data.ThreadingDataStream`. + + Returns: + The tensor data stream. + """ stream = TensorStream(source, device=device) if prefetch is not None: stream = stream.threaded(prefetch) diff --git a/tests/tensor/test_core.py b/tests/tensor/test_core.py index 3d17e22..fb2eb09 100644 --- a/tests/tensor/test_core.py +++ b/tests/tensor/test_core.py @@ -221,7 +221,7 @@ def copy_tensor(o): (2, 125), (T.int8, T.int16, T.int32, T.int64), (None, T.CPU_DEVICE)): - t = T.int_scalar(value, dtype=dtype) + t = T.int_scalar(value, dtype=dtype, device=device) self.assertEqual(T.get_dtype(t), dtype) self.assertEqual(T.get_device(t), device or T.current_device()) assert_equal(t, value) diff --git a/tests/utils/__init__.py b/tests/utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/utils/test_data_utils.py b/tests/utils/test_data_utils.py new file mode 100644 index 0000000..db0b95e --- /dev/null +++ b/tests/utils/test_data_utils.py @@ -0,0 +1,50 @@ +import numpy as np +import pytest + +import tensorkit as tk +from tensorkit import tensor as T +from tests.helper import * +from tests.ops import * + + +class DataUtilsTestCase(TestCase): + + def test_channel_from_last_to_first_nd(self): + for spatial_ndims in (1, 2, 3): + bad_input = np.random.randn(*([7, 8, 9, 10][:spatial_ndims + 1])) + last_to_first = getattr( + tk.utils, + f'numpy_channel_from_last_to_first{spatial_ndims}d' + ) + first_to_last = getattr( + tk.utils, + f'numpy_channel_from_first_to_last{spatial_ndims}d' + ) + last_to_default = getattr( + tk.utils, + f'numpy_channel_from_last_to_default{spatial_ndims}d' + ) + default_to_last = getattr( + tk.utils, + f'numpy_channel_from_default_to_last{spatial_ndims}d' + ) + + for op in (first_to_last, last_to_first): + with pytest.raises(ValueError, + match=f'`input` is expected to be at least ' + f'{spatial_ndims + 2}d'): + _ = op(bad_input) + + for batch_shape in ([5], [2, 3]): + x = np.random.randn(*( + batch_shape + [7, 8, 9, 10][:spatial_ndims + 1])) # assume x is channel last + y = last_to_first(x) + assert_allclose(y, channel_to_first_nd(x, spatial_ndims)) + assert_allclose(first_to_last(y), channel_to_last_nd(y, spatial_ndims)) + + if T.IS_CHANNEL_LAST: + assert_allclose(last_to_default(x), x) + assert_allclose(default_to_last(x), x) + else: + assert_allclose(last_to_default(x), y) + assert_allclose(default_to_last(y), x) diff --git a/tests/utils/test_tensor_stream.py b/tests/utils/test_tensor_stream.py new file mode 100644 index 0000000..d54650c --- /dev/null +++ b/tests/utils/test_tensor_stream.py @@ -0,0 +1,51 @@ +from itertools import product + +import mltk +import numpy as np +import tensorkit as tk +from tensorkit import tensor as T +from tests.helper import * + + +class TensorStreamTestCase(TestCase): + + def test_TensorStream(self): + x = np.random.randn(17, 3, 4) + y = np.random.randn(17, 5) + source = mltk.DataStream.arrays( + [x, y], batch_size=3, random_state=np.random.RandomState()) + + # test tensor stream + for device in [None, T.CPU_DEVICE]: + stream = tk.utils.as_tensor_stream(source, device=device) + self.assertIsInstance(stream, tk.utils.TensorStream) + self.assertEqual(stream.device, device or T.current_device()) + + for attr in ('batch_size', 'array_count', 'data_shapes', + 'data_length', 'random_state'): + self.assertEqual(getattr(stream, attr), getattr(source, attr)) + + out_x, out_y = stream.get_arrays() + assert_allclose(out_x, x, rtol=1e-4, atol=1e-6) + assert_allclose(out_y, y, rtol=1e-4, atol=1e-6) + + for batch_x, batch_y in stream: + self.assertIsInstance(batch_x, T.Tensor) + self.assertEqual(T.get_device(batch_x), device or T.current_device()) + self.assertIsInstance(batch_y, T.Tensor) + self.assertEqual(T.get_device(batch_y), device or T.current_device()) + + # test copy + for device2 in [None, T.CPU_DEVICE]: + kwargs = {'device': device2} if device2 is not None else {} + stream2 = stream.copy(**kwargs) + self.assertIs(stream2.source, stream.source) + self.assertEqual(stream2.device, device2 or stream.device) + + # test prefetch + stream = tk.utils.as_tensor_stream(source, prefetch=3) + self.assertIsInstance(stream.source, tk.utils.TensorStream) + + out_x, out_y = stream.get_arrays() + assert_allclose(out_x, x, rtol=1e-4, atol=1e-6) + assert_allclose(out_y, y, rtol=1e-4, atol=1e-6) From a1e54a684430d715810a7776aa2f57812e047c1d Mon Sep 17 00:00:00 2001 From: Haowen Xu Date: Tue, 18 Feb 2020 11:40:45 +0800 Subject: [PATCH 6/7] add tests for optimizers and lr_scheduler --- tensorkit/backend/pytorch_/core.py | 2 + tensorkit/backend/pytorch_/optim.py | 27 +++++-- tensorkit/optim/lr_scheduler.py | 10 ++- tests/helper.py | 18 ++++- tests/optim/__init__.py | 0 tests/optim/test_lr_scheduler.py | 65 ++++++++++++++++ tests/optim/test_optimizer.py | 115 ++++++++++++++++++++++++++++ 7 files changed, 223 insertions(+), 14 deletions(-) create mode 100644 tests/optim/__init__.py create mode 100644 tests/optim/test_lr_scheduler.py create mode 100644 tests/optim/test_optimizer.py diff --git a/tensorkit/backend/pytorch_/core.py b/tensorkit/backend/pytorch_/core.py index 93a28cb..2d85510 100644 --- a/tensorkit/backend/pytorch_/core.py +++ b/tensorkit/backend/pytorch_/core.py @@ -586,6 +586,8 @@ def variable(shape: List[int], if list(initializer.shape) != shape: raise ValueError(f'`initializer.shape` != `shape`: ' f'{list(initializer.shape)} vs {shape}') + if isinstance(initializer, Tensor): + initializer = to_numpy(initializer) ret = as_tensor(initializer, dtype=target_dtype, device=device, force_copy=force_copy) if requires_grad: diff --git a/tensorkit/backend/pytorch_/optim.py b/tensorkit/backend/pytorch_/optim.py index 70e3ca9..dedb60a 100644 --- a/tensorkit/backend/pytorch_/optim.py +++ b/tensorkit/backend/pytorch_/optim.py @@ -2,8 +2,6 @@ from typing import * import torch -from torch.optim import (adam, adadelta, adagrad, adamax, - rmsprop, sgd) from torch.optim.optimizer import Optimizer as TorchOptimizer from .core import * @@ -22,7 +20,7 @@ def lr(self) -> float: def set_lr(self, lr: float): raise NotImplementedError() - def add_params(self, params: List[Variable]): + def add_param_group(self, params: Iterator[Variable]): raise NotImplementedError() def clear_grad(self): @@ -56,6 +54,7 @@ def __init__(self, self.torch_optimizer = torch_optimizer self.set_lr(lr) + @property def lr(self) -> float: return self._lr @@ -65,7 +64,7 @@ def set_lr(self, lr: float): group['lr'] = lr self._lr = lr - def add_params(self, params: Sequence[Variable]): + def add_param_group(self, params: Iterator[Variable]): self.torch_optimizer.add_param_group({ 'params': list(params), 'lr': self._lr, @@ -91,6 +90,13 @@ def state_dict(self) -> Dict[str, Any]: def load_state_dict(self, state_dict: Dict[str, Any]): self.torch_optimizer.load_state_dict(state_dict) + # ensure that we've got all state on the same device as the parameters. + device = self.torch_optimizer.param_groups[0]['params'][0].device + for state in self.torch_optimizer.state.values(): + for k, v in state.items(): + if isinstance(v, torch.Tensor): + state[k] = v.to(device=device) + class SGD(BackendOptimizer): @@ -99,9 +105,18 @@ def __init__(self, lr: float, momentum: float = 0., nesterov: bool = False): + """ + Construct a new :class:`SGD` optimizer. + + Args: + params: The parameters to be optimized. + lr: The learning rate. + momentum: The momentum. Typically 0.9 for momentum SGD optimization. + nesterov: Whether or not to use Nesterov momentum optimizer? + """ super().__init__( lr=lr, - torch_optimizer=torch.optim.sgd.SGD( + torch_optimizer=torch.optim.SGD( params=params, lr=lr, momentum=momentum, @@ -121,7 +136,7 @@ def __init__(self, amsgrad: bool = False): super().__init__( lr=lr, - torch_optimizer=adam.Adam( + torch_optimizer=torch.optim.Adam( params=params, lr=lr, betas=(beta_1, beta_2), diff --git a/tensorkit/optim/lr_scheduler.py b/tensorkit/optim/lr_scheduler.py index 8dfb597..ad24aee 100644 --- a/tensorkit/optim/lr_scheduler.py +++ b/tensorkit/optim/lr_scheduler.py @@ -1,5 +1,3 @@ -from typing import * - import mltk from .core import * @@ -30,8 +28,8 @@ def update_lr(self): """Update the learning rate of the optimizer according to the loop.""" raise NotImplementedError() - def close(self): - """Close this scheduler, such that it will no longer affect the optimizer.""" + def unbind_events(self): + """Unregister this scheduler from the loop events.""" self._unbind_events(self.loop) def _bind_events(self, loop: mltk.TrainLoop): @@ -42,6 +40,10 @@ def _unbind_events(self, loop: mltk.TrainLoop): class AnnealingLR(LRScheduler): + """ + Learning rate scheduler that anneals the learning rate after every few + `epochs`, by a specified `ratio`. + """ initial_lr: float ratio: float diff --git a/tests/helper.py b/tests/helper.py index bd07d66..bd64b3f 100644 --- a/tests/helper.py +++ b/tests/helper.py @@ -13,7 +13,7 @@ 'int_dtypes', 'float_dtypes', 'number_dtypes', 'n_samples', - 'assert_allclose', 'assert_not_equal', 'assert_equal', + 'assert_allclose', 'assert_not_allclose', 'assert_equal', 'assert_not_equal', 'slow_test', @@ -47,9 +47,9 @@ def wrapper(x, y, **kwargs): @wrap_numpy_testing_assertion_fn -def assert_not_equal(x, y, err_msg=''): - if np.all(np.equal(x, y)): - msg = f'`x != y` not hold' +def assert_not_allclose(x, y, err_msg='', **kwargs): + if np.all(np.allclose(x, y, **kwargs)): + msg = f'`not allclose(x, y)` not hold' if err_msg: msg += f': {err_msg}' msg += f'\nx = {x}\ny = {y}' @@ -59,6 +59,16 @@ def assert_not_equal(x, y, err_msg=''): assert_equal = wrap_numpy_testing_assertion_fn(np.testing.assert_equal) +@wrap_numpy_testing_assertion_fn +def assert_not_equal(x, y, err_msg=''): + if np.all(np.equal(x, y)): + msg = f'`x != y` not hold' + if err_msg: + msg += f': {err_msg}' + msg += f'\nx = {x}\ny = {y}' + raise AssertionError(msg) + + # decorate a test that is slow def slow_test(fn): fn = pytest.mark.skipif( diff --git a/tests/optim/__init__.py b/tests/optim/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/optim/test_lr_scheduler.py b/tests/optim/test_lr_scheduler.py new file mode 100644 index 0000000..f9c0268 --- /dev/null +++ b/tests/optim/test_lr_scheduler.py @@ -0,0 +1,65 @@ +import mltk +from mock import Mock + +import tensorkit as tk +from tests.helper import * + + +class _MyFakeOptimizer(object): + def __init__(self, lr): + self.lr = lr + + def set_lr(self, lr): + self.lr = lr + + +def standard_lr_scheduler_check(ctx, scheduler_factory, lr_func): + # test start with eopch = 0 + optimizer = _MyFakeOptimizer(0.1) + assert_allclose(optimizer.lr, 0.1) + + ev_hosts = mltk.EventHost() + loop = Mock(epoch=0, on_epoch_end=ev_hosts['on_epoch_end']) + scheduler = scheduler_factory(loop, optimizer) + assert_allclose(optimizer.lr, lr_func(loop, optimizer)) + + for epoch in range(1, 29): + loop.epoch = epoch + if epoch < 15: + scheduler.update_lr() + else: + ev_hosts.fire('on_epoch_end') + assert_allclose(optimizer.lr, lr_func(loop, optimizer)) + + final_lr = optimizer.lr + scheduler.unbind_events() + for epoch in range(29, 39): + loop.epoch = epoch + ev_hosts.fire('on_epoch_end') + assert_allclose(optimizer.lr, final_lr) + + for epoch in range(29, 39): + loop.epoch = epoch + scheduler.update_lr() # still can update the lr if manually called + assert_allclose(optimizer.lr, lr_func(loop, optimizer)) + + # test start with epoch = some value + optimizer = _MyFakeOptimizer(0.1) + assert_allclose(optimizer.lr, 0.1) + + ev_hosts = mltk.EventHost() + loop = Mock(epoch=40, on_epoch_end=ev_hosts['on_epoch_end']) + scheduler = scheduler_factory(loop, optimizer) + assert_allclose(optimizer.lr, lr_func(loop, optimizer)) + + +class LRSchedulerTestCaes(TestCase): + + def test_annealing_lr(self): + standard_lr_scheduler_check( + self, + lambda loop, optimizer: tk.optim.lr_scheduler.AnnealingLR( + loop, optimizer, initial_lr=0.01, ratio=0.5, epochs=2 + ), + lambda loop, optimizer: 0.01 * 0.5 ** int(loop.epoch // 2) + ) diff --git a/tests/optim/test_optimizer.py b/tests/optim/test_optimizer.py new file mode 100644 index 0000000..7aac4ea --- /dev/null +++ b/tests/optim/test_optimizer.py @@ -0,0 +1,115 @@ +import os +from functools import partial +from tempfile import TemporaryDirectory + +import tensorkit as tk +from tensorkit import tensor as T +from tests.helper import * + + +def optimizer_standard_check(ctx, optimizer_factory, lr): + a = T.variable([], initializer=123.) + b = T.variable([], initializer=456.) + + def calculate_loss(a, b): + return (a + b) ** 2 + + optimizer = optimizer_factory(iter([a]), lr) + ctx.assertEqual(optimizer.lr, lr) + + # test optimize a + optimizer.clear_grad() + with optimizer.capture_grad(): + loss = calculate_loss(a, b) + optimizer.minimize(loss) + ctx.assertLessEqual(calculate_loss(a, b), loss) + assert_not_equal(a, 123.) + assert_equal(b, 456.) + + # test optimize a and b + optimizer.clear_grad() + optimizer.add_param_group(iter([b])) + with optimizer.capture_grad(): + loss = calculate_loss(a, b) + optimizer.minimize(loss) + ctx.assertLessEqual(calculate_loss(a, b), loss) + assert_not_equal(a, 123.) + assert_not_equal(b, 456.) + + # save checkpoint + with TemporaryDirectory() as temp_dir: + ckpt_path = os.path.join(temp_dir, 'ckpt') + checkpoint = tk.train.Checkpoint(optimizer=optimizer) + checkpoint.save(ckpt_path) + + # test backup and restore the status + a2 = T.variable([], initializer=a) + b2 = T.variable([], initializer=b) + optimizer2 = optimizer_factory([a2], lr) + optimizer2.add_param_group([b2]) + checkpoint2 = tk.train.Checkpoint(optimizer=optimizer2) + checkpoint2.restore(ckpt_path) + + with optimizer2.capture_grad(): + loss = calculate_loss(a2, b2) + optimizer2.minimize(loss) + ctx.assertLessEqual(calculate_loss(a2, b2), loss) + assert_not_equal(a2, a) + assert_not_equal(b2, b) + + # test backup and restore the status, and use maximize instead of minimize + a3 = T.variable([], initializer=a) + b3 = T.variable([], initializer=b) + optimizer3 = optimizer_factory([a3], lr) + optimizer3.add_param_group([b3]) + checkpoint3 = tk.train.Checkpoint(optimizer=optimizer3) + checkpoint3.restore(ckpt_path) + + with optimizer3.capture_grad(): + loss = calculate_loss(a3, b3) + optimizer3.maximize(-loss) + ctx.assertLessEqual(calculate_loss(a3, b3), loss) + assert_allclose(a3, a2) + assert_allclose(b3, b2) + assert_allclose(calculate_loss(a3, b3), calculate_loss(a2, b2)) + + # backup and restore the status, change the learning rate and get + # the third output, and compare to the result with optimizer2 + a4 = T.variable([], initializer=a) + b4 = T.variable([], initializer=b) + optimizer4 = optimizer_factory([a4], lr) + optimizer4.add_param_group([b4]) + checkpoint4 = tk.train.Checkpoint(optimizer=optimizer4) + checkpoint4.restore(ckpt_path) + + optimizer4.set_lr(lr * 0.5) + ctx.assertEqual(optimizer4.lr, lr * 0.5) + with optimizer4.capture_grad(): + loss = calculate_loss(a4, b4) + optimizer4.minimize(loss) + assert_not_allclose(a4, a2) + assert_not_allclose(b4, b2) + assert_not_allclose(calculate_loss(a4, b4), calculate_loss(a2, b2)) + + # now proceed the optimization from the first optimizer, and compare + # the result with optimizer2 + optimizer.clear_grad() + with optimizer.capture_grad(): + loss = calculate_loss(a, b) + optimizer.minimize(loss) + ctx.assertLessEqual(calculate_loss(a, b), loss) + assert_allclose(a, a2) + assert_allclose(b, b2) + assert_allclose(calculate_loss(a, b), calculate_loss(a2, b2)) + + +class OptimizerTestCase(TestCase): + + def test_sgd(self): + optimizer_standard_check(self, partial(tk.optim.SGD), 0.001) + optimizer_standard_check(self, partial(tk.optim.SGD, momentum=0.9), 0.001) + optimizer_standard_check(self, partial(tk.optim.SGD, momentum=0.9, nesterov=True), 0.001) + + def test_adam(self): + optimizer_standard_check(self, partial(tk.optim.Adam), 0.1) + optimizer_standard_check(self, partial(tk.optim.Adam, amsgrad=True), 0.1) From 1b8042578215abcb0e9fc46866593650c4b5a33a Mon Sep 17 00:00:00 2001 From: Haowen Xu Date: Thu, 20 Feb 2020 16:57:57 +0800 Subject: [PATCH 7/7] add tests for SequentialBuilder --- tensorkit/layers/builder.py | 150 ++++++--- tensorkit/layers/shape_.py | 27 +- tests/layers/test_builder.py | 575 +++++++++++++++++++++++++++++++++++ tests/tensor/test_core.py | 2 +- 4 files changed, 707 insertions(+), 47 deletions(-) create mode 100644 tests/layers/test_builder.py diff --git a/tensorkit/layers/builder.py b/tensorkit/layers/builder.py index 240d500..9442c17 100644 --- a/tensorkit/layers/builder.py +++ b/tensorkit/layers/builder.py @@ -1,4 +1,3 @@ -import re from contextlib import contextmanager from typing import * @@ -14,7 +13,7 @@ from ..arg_check import * from ..typing_ import * -__all__ = ['SequentialBuilder'] +__all__ = ['LayerArgs', 'SequentialBuilder'] def _get_layer_class(name: str) -> type: @@ -64,7 +63,7 @@ def _calculate_deconv_output_size(in_size, kernel_size, stride, padding, output_ if i is None: out_size.append(None) else: - l = T.utils.calculate_deconv_output_size(d[i], [k], [s], [p], [op], [d])[0] + l = T.utils.calculate_deconv_output_size([i], [k], [s], [p], [op], [d])[0] out_size.append(l) return out_size @@ -157,6 +156,8 @@ def build(self, type_: Union[str, type], *args, **kwargs): Returns: The built layer object. """ + if isinstance(type_, str): + type_ = _get_layer_class(type_) return type_(*args, **self.get_kwargs(type_, **kwargs)) @@ -176,7 +177,7 @@ def __init__(self, *, in_shape: Sequence[Optional[int]] = NOT_SET, in_channels: Optional[int] = NOT_SET, - in_spatial_shape: List[int] = NOT_SET, + in_size: Sequence[Optional[int]] = NOT_SET, in_builder: 'SequentialBuilder' = NOT_SET): """ Construct a new :class:`SequentialBuilder`. @@ -188,7 +189,7 @@ def __init__(self, used as the `in_shape` of this :class:`SequentialBuilder`. in_shape: The input shape. in_channels: The number of input channels. - in_spatial_shape: The input spatial shape. Can be specified + in_size: The input spatial size. Can be specified only if `in_channels` is specified, or `in_spec` is a int. in_builder: Explicitly specify the previous sequential builder. """ @@ -201,28 +202,30 @@ def __init__(self, '`in_builder` should be specified.' ) + layer_args = None if isinstance(in_spec, SequentialBuilder): in_builder = in_spec layer_args = LayerArgs(in_builder.layer_args) elif hasattr(in_spec, '__iter__'): in_shape = in_spec - layer_args = LayerArgs() - else: + elif in_spec is not NOT_SET: in_channels = in_spec + + if layer_args is None: layer_args = LayerArgs() - if in_spatial_shape is not NOT_SET and in_channels is NOT_SET: + if in_size is not NOT_SET and in_channels is NOT_SET: raise ValueError( - '`in_spatial_shape` can be specified only when `in_channels` ' + '`in_size` can be specified only when `in_channels` ' 'is specified, or `in_spec` is None or an integer.' ) if in_shape is not NOT_SET: in_shape = list(in_shape) elif in_channels is not NOT_SET: - if in_spatial_shape is NOT_SET: - in_spatial_shape = [] - in_shape = _unsplit_channel_spatial(in_channels, in_spatial_shape) + if in_size is NOT_SET: + in_size = [] + in_shape = _unsplit_channel_spatial(in_channels, in_size) else: in_shape = list(in_builder.out_shape) @@ -238,7 +241,7 @@ def _assert_out_shape(self, spatial: Optional[Sequence[bool]] = None, at_least: bool = False) -> List[Optional[int]]: if shape is None: - if channel is None: + if channel is None: # pragma: no cover raise ValueError('`channel` must be specified when `shape` is not.') shape = _unsplit_channel_spatial(channel, spatial or []) @@ -315,7 +318,7 @@ def add(self, out_shape: List[Optional[int]] = NOT_SET, *, out_channels: Optional[int] = NOT_SET, - out_spatial_shape: List[Optional[int]] = NOT_SET + out_size: List[Optional[int]] = NOT_SET ) -> 'SequentialBuilder': """ Manually add a layer to this builder. @@ -325,23 +328,23 @@ def add(self, out_shape: The new output shape. out_channels: The new output channels. Should be specified and only be specified when `out_shape` is not. - out_spatial_shape: The new spatial shape. Should only be specified + out_size: The new spatial shape. Should only be specified when `out_channels` is specified. Returns: This sequential builder object. """ + if out_size is not NOT_SET and out_channels is NOT_SET: + raise ValueError('`out_size` can only be specified when ' + '`out_channels` is specified.') if (out_shape is NOT_SET) == (out_channels is NOT_SET): raise ValueError('Either `out_shape` or `out_channels` should be ' 'specified, but not both.') - if out_spatial_shape is not NOT_SET and out_channels is NOT_SET: - raise ValueError('`out_spatial_shape` can only be specified when ' - '`out_channels` is specified.') if out_channels is not NOT_SET: - if out_spatial_shape is NOT_SET: - out_spatial_shape = [] - out_shape = _unsplit_channel_spatial(out_channels, out_spatial_shape) + if out_size is NOT_SET: + out_size = [] + out_shape = _unsplit_channel_spatial(out_channels, out_size) self.layers.append(layer) self.out_shape = out_shape @@ -359,7 +362,7 @@ def build(self, flatten_to_ndims: bool = True) -> T.Module: The built sequential layer. """ if not self.layers: - raise RuntimeError('No layer has been added.') + return Identity() elif len(self.layers) == 1: layer = self.layers[0] else: @@ -369,6 +372,10 @@ def build(self, flatten_to_ndims: bool = True) -> T.Module: layer = FlattenToNDims(layer, ndims=len(self.in_shape) + 1) return layer + # ---- identity layer (add no layer) ---- + def identity(self): + return self + # ---- activation ---- def _make_activation(self, type_): self._assert_out_shape((False,), at_least=True) @@ -404,10 +411,13 @@ def dense(self, out_features: int, **kwargs): # ---- convolution layers ---- def _conv_nd(self, spatial_ndims, conv_cls, out_channels, **kwargs): + kwargs = self.layer_args.get_kwargs(conv_cls, **kwargs) + if 'kernel_size' not in kwargs: + raise ValueError('The `kernel_size` argument is required.') + in_channels, in_size = self._split_out_shape(True, [False] * spatial_ndims) # validate the arguments - kwargs = self.layer_args.get_kwargs(conv_cls, **kwargs) kernel_size = validate_conv_size('kernel_size', kwargs['kernel_size'], spatial_ndims) stride = validate_conv_size('stride', kwargs.get('stride', 1), spatial_ndims) dilation = validate_conv_size('dilation', kwargs.get('dilation', 1), spatial_ndims) @@ -469,10 +479,17 @@ def res_block3d(self, # ---- deconvolution layers ---- def _deconv_nd(self, spatial_ndims, deconv_cls, out_channels, output_size, **kwargs): + kwargs = self.layer_args.get_kwargs(deconv_cls, **kwargs) + if 'kernel_size' not in kwargs: + raise ValueError('The `kernel_size` argument is required.') + + if output_size is not NOT_SET: + kwargs.pop('output_size', None) + else: + output_size = kwargs.pop('output_size', NOT_SET) in_channels, in_size = self._split_out_shape(True, [False] * spatial_ndims) # validate the arguments - kwargs = self.layer_args.get_kwargs(deconv_cls, **kwargs) kernel_size = validate_conv_size('kernel_size', kwargs['kernel_size'], spatial_ndims) stride = validate_conv_size('stride', kwargs.get('stride', 1), spatial_ndims) dilation = validate_conv_size('dilation', kwargs.get('dilation', 1), spatial_ndims) @@ -480,7 +497,7 @@ def _deconv_nd(self, spatial_ndims, deconv_cls, out_channels, output_size, **kwa kwargs.get('padding', PaddingMode.DEFAULT), kernel_size, dilation, spatial_ndims) if 'output_padding' in kwargs and output_size is not NOT_SET: - raise ValueError('`output_padding` and `out_shape` cannot be both specified.') + raise ValueError('`output_padding` and `output_size` cannot be both specified.') elif output_size is not NOT_SET: if len(output_size) != spatial_ndims: raise ValueError( @@ -493,12 +510,9 @@ def _deconv_nd(self, spatial_ndims, deconv_cls, out_channels, output_size, **kwa f'is supported only when the previous output shape ' f'is all deterministic.' ) + output_padding = T.utils.calculate_deconv_output_padding( + in_size, output_size, kernel_size, stride, padding, dilation) out_size = output_size - output_padding = [ - T.utils.calculate_deconv_output_padding(*args) - for args in zip( - in_size, output_size, kernel_size, stride, padding, dilation) - ] elif 'output_padding' in kwargs: output_padding = validate_output_padding( kwargs.get('output_padding', 0), stride, dilation, spatial_ndims) @@ -523,63 +537,63 @@ def linear_conv_transpose1d(self, output_size: List[int] = NOT_SET, **kwargs) -> 'SequentialBuilder': return self._deconv_nd( - 1, LinearConvTranspose1d, out_channels, output_size, **kwargs) + 1, LinearConvTranspose1d, out_channels, output_size=output_size, **kwargs) def linear_conv_transpose2d(self, out_channels: int, output_size: List[int] = NOT_SET, **kwargs) -> 'SequentialBuilder': return self._deconv_nd( - 2, LinearConvTranspose2d, out_channels, output_size, **kwargs) + 2, LinearConvTranspose2d, out_channels, output_size=output_size, **kwargs) def linear_conv_transpose3d(self, out_channels: int, output_size: List[int] = NOT_SET, **kwargs) -> 'SequentialBuilder': return self._deconv_nd( - 3, LinearConvTranspose3d, out_channels, output_size, **kwargs) + 3, LinearConvTranspose3d, out_channels, output_size=output_size, **kwargs) def conv_transpose1d(self, out_channels: int, output_size: List[int] = NOT_SET, **kwargs) -> 'SequentialBuilder': return self._deconv_nd( - 1, ConvTranspose1d, out_channels, output_size, **kwargs) + 1, ConvTranspose1d, out_channels, output_size=output_size, **kwargs) def conv_transpose2d(self, out_channels: int, output_size: List[int] = NOT_SET, **kwargs) -> 'SequentialBuilder': return self._deconv_nd( - 2, ConvTranspose2d, out_channels, output_size, **kwargs) + 2, ConvTranspose2d, out_channels, output_size=output_size, **kwargs) def conv_transpose3d(self, out_channels: int, output_size: List[int] = NOT_SET, **kwargs) -> 'SequentialBuilder': return self._deconv_nd( - 3, ConvTranspose3d, out_channels, output_size, **kwargs) + 3, ConvTranspose3d, out_channels, output_size=output_size, **kwargs) def res_block_transpose1d(self, out_channels: int, output_size: List[int] = NOT_SET, **kwargs) -> 'SequentialBuilder': return self._deconv_nd( - 1, ResBlockTranspose1d, out_channels, output_size, **kwargs) + 1, ResBlockTranspose1d, out_channels, output_size=output_size, **kwargs) def res_block_transpose2d(self, out_channels: int, output_size: List[int] = NOT_SET, **kwargs) -> 'SequentialBuilder': return self._deconv_nd( - 2, ResBlockTranspose2d, out_channels, output_size, **kwargs) + 2, ResBlockTranspose2d, out_channels, output_size=output_size, **kwargs) def res_block_transpose3d(self, out_channels: int, output_size: List[int] = NOT_SET, **kwargs) -> 'SequentialBuilder': return self._deconv_nd( - 3, ResBlockTranspose3d, out_channels, output_size, **kwargs) + 3, ResBlockTranspose3d, out_channels, output_size=output_size, **kwargs) # aliases for the deconvolution layers linear_deconv1d = linear_conv_transpose1d @@ -591,10 +605,13 @@ def res_block_transpose3d(self, # ---- pool layers ---- def _pool_nd(self, spatial_ndims, pool_cls, **kwargs): - in_channels, in_size = self._split_out_shape(True, [False] * spatial_ndims) + kwargs = self.layer_args.get_kwargs(pool_cls, **kwargs) + if 'kernel_size' not in kwargs: + raise ValueError('The `kernel_size` argument is required.') + + in_channels, in_size = self._split_out_shape(False, [False] * spatial_ndims) # validate the arguments - kwargs = self.layer_args.get_kwargs(pool_cls, **kwargs) kernel_size = validate_conv_size('kernel_size', kwargs['kernel_size'], spatial_ndims) stride = validate_conv_size('stride', kwargs.get('stride', kernel_size), spatial_ndims) dilation = [1] * spatial_ndims @@ -627,13 +644,15 @@ def max_pool3d(self, **kwargs) -> 'SequentialBuilder': return self._pool_nd(3, MaxPool3d, **kwargs) def _global_avg_pool_nd(self, spatial_ndims, pool_cls, **kwargs): - in_channels, in_size = self._split_out_shape(True, [False] * spatial_ndims) + kwargs = self.layer_args.get_kwargs(pool_cls, **kwargs) keepdims = kwargs.get('keepdims', False) + + in_channels, in_size = self._split_out_shape(False, [False] * spatial_ndims) if keepdims: out_shape = _unsplit_channel_spatial(in_channels, [1] * spatial_ndims) else: out_shape = [in_channels] - layer = pool_cls(**self.layer_args.get_kwargs(pool_cls, **kwargs)) + layer = pool_cls(**kwargs) return self.add(layer, out_shape) def global_avg_pool1d(self, **kwargs) -> 'SequentialBuilder': @@ -644,3 +663,48 @@ def global_avg_pool2d(self, **kwargs) -> 'SequentialBuilder': def global_avg_pool3d(self, **kwargs) -> 'SequentialBuilder': return self._global_avg_pool_nd(3, GlobalAvgPool3d, **kwargs) + + # ---- reshape layers ---- + def _channel_first_to_last_nd(self, spatial_ndims, layer_cls): + in_shape = self._assert_out_shape([False] * (spatial_ndims + 1)) + out_shape = in_shape[1:] + in_shape[:1] + return self.add(layer_cls(), out_shape) + + def channel_first_to_last1d(self): + return self._channel_first_to_last_nd(1, ChannelFirstToLast1d) + + def channel_first_to_last2d(self): + return self._channel_first_to_last_nd(2, ChannelFirstToLast2d) + + def channel_first_to_last3d(self): + return self._channel_first_to_last_nd(3, ChannelFirstToLast3d) + + def _channel_last_to_first_nd(self, spatial_ndims, layer_cls): + in_shape = self._assert_out_shape([False] * (spatial_ndims + 1)) + out_shape = in_shape[-1:] + in_shape[:-1] + return self.add(layer_cls(), out_shape) + + def channel_last_to_first1d(self): + return self._channel_last_to_first_nd(1, ChannelLastToFirst1d) + + def channel_last_to_first2d(self): + return self._channel_last_to_first_nd(2, ChannelLastToFirst2d) + + def channel_last_to_first3d(self): + return self._channel_last_to_first_nd(3, ChannelLastToFirst3d) + + if T.IS_CHANNEL_LAST: + channel_last_to_default1d = \ + channel_last_to_default2d = \ + channel_last_to_default3d = \ + channel_default_to_last1d = \ + channel_default_to_last2d = \ + channel_default_to_last3d = \ + identity + else: + channel_last_to_default1d = channel_last_to_first1d + channel_last_to_default2d = channel_last_to_first2d + channel_last_to_default3d = channel_last_to_first3d + channel_default_to_last1d = channel_first_to_last1d + channel_default_to_last2d = channel_first_to_last2d + channel_default_to_last3d = channel_first_to_last3d diff --git a/tensorkit/layers/shape_.py b/tensorkit/layers/shape_.py index ea7266a..5809f99 100644 --- a/tensorkit/layers/shape_.py +++ b/tensorkit/layers/shape_.py @@ -1,5 +1,6 @@ from typing import * +from .. import tensor as T from ..tensor import (Tensor, Module, shape, rank, flatten_to_ndims, unflatten_from_ndims, pad) from ..tensor.nn import * @@ -10,19 +11,22 @@ 'ConstantPad', 'ConstantPad1d', 'ConstantPad2d', 'ConstantPad3d', 'ChannelFirstToLast1d', 'ChannelFirstToLast2d', 'ChannelFirstToLast3d', 'ChannelLastToFirst1d', 'ChannelLastToFirst2d', 'ChannelLastToFirst3d', + 'ChannelDefaultToLast1d', 'ChannelDefaultToLast2d', 'ChannelDefaultToLast3d', + 'ChannelLastToDefault1d', 'ChannelLastToDefault2d', 'ChannelLastToDefault3d', ] # ---- FlattenToNDims ---- class FlattenToNDims(BaseLayer): - __constants__ = ('layer', 'ndims') + __constants__ = ('wrapped', 'ndims') + wrapped: Module ndims: int def __init__(self, layer: Module, ndims: int): super().__init__() - self.layer = layer + self.wrapped = layer self.ndims = ndims def forward(self, input: Tensor) -> Tensor: @@ -38,7 +42,7 @@ def forward(self, input: Tensor) -> Tensor: # flatten, get output from the layer, and then unflatten output, front_shape = flatten_to_ndims(input, expected_rank) - output = self.layer(output) + output = self.wrapped(output) return unflatten_from_ndims(output, front_shape) @@ -163,3 +167,20 @@ class ChannelLastToFirst3d(BaseLayer): def forward(self, input: Tensor) -> Tensor: return channel_last_to_first3d(input) + + +if T.IS_CHANNEL_LAST: + ChannelLastToDefault1d = \ + ChannelLastToDefault2d = \ + ChannelLastToDefault3d = \ + ChannelDefaultToLast1d = \ + ChannelDefaultToLast2d = \ + ChannelDefaultToLast3d = \ + Identity +else: + ChannelLastToDefault1d = ChannelLastToFirst1d + ChannelLastToDefault2d = ChannelLastToFirst2d + ChannelLastToDefault3d = ChannelLastToFirst3d + ChannelDefaultToLast1d = ChannelFirstToLast1d + ChannelDefaultToLast2d = ChannelFirstToLast2d + ChannelDefaultToLast3d = ChannelFirstToLast3d diff --git a/tests/layers/test_builder.py b/tests/layers/test_builder.py new file mode 100644 index 0000000..b9a47f3 --- /dev/null +++ b/tests/layers/test_builder.py @@ -0,0 +1,575 @@ +from itertools import product + +import mltk +import pytest + +import tensorkit as tk +from tensorkit import tensor as T +from tensorkit.layers import * +from tests.helper import * +from tests.ops import * + + +class _RecordInitArgsLayer(BaseLayer): + + def __init__(self, *args, **kwargs): + super().__init__() + self.args = tuple(args) + self.kwargs = dict(kwargs) + + def __repr__(self): + return repr((self.args, self.kwargs)) + + def __eq__(self, other): + if isinstance(other, _RecordInitArgsLayer): + args, kwargs = other.args, other.kwargs + else: + args, kwargs = other + return args == self.args and kwargs == self.kwargs + + +class LayerArgsTestCase(TestCase): + + def test_set_args(self): + # empty default args + args = tk.layers.LayerArgs() + self.assertEqual(args.get_kwargs(_RecordInitArgsLayer), {}) + + o = args.build(_RecordInitArgsLayer) + self.assertIsInstance(o, _RecordInitArgsLayer) + self.assertEqual(o, ((), {})) + + # set default args + args.set_args(_RecordInitArgsLayer, d=4) + self.assertEqual(args.get_kwargs(_RecordInitArgsLayer), {'d': 4}) + self.assertEqual(args.get_kwargs(_RecordInitArgsLayer, c=3, d=5), {'c': 3, 'd': 5}) + + o = args.build(_RecordInitArgsLayer) + self.assertIsInstance(o, _RecordInitArgsLayer) + self.assertEqual(o, ((), {'d': 4})) + + o = args.build(_RecordInitArgsLayer, 1, 2, c=3, d=5) + self.assertIsInstance(o, _RecordInitArgsLayer) + self.assertEqual(o, ((1, 2), {'c': 3, 'd': 5})) + + # inherit default args from previous instance + args2 = tk.layers.LayerArgs(args) + args2.set_args([_RecordInitArgsLayer], c=5) + self.assertEqual(args2.get_kwargs(_RecordInitArgsLayer), {'c': 5, 'd': 4}) + self.assertEqual(args.get_kwargs(_RecordInitArgsLayer), {'d': 4}) # should not change + + def test_layer_names_as_types(self): + args = tk.layers.LayerArgs() + args.set_args(['dense', 'conv2d'], activation=tk.layers.LeakyReLU) + args.set_args(['conv2d'], kernel_size=3) + + self.assertEqual(args.get_kwargs('dense'), {'activation': tk.layers.LeakyReLU}) + self.assertEqual(args.get_kwargs('conv2d'), { + 'activation': tk.layers.LeakyReLU, + 'kernel_size': 3, + }) + + l1 = args.build('dense', 4, 4) + self.assertIsInstance(l1[1], tk.layers.LeakyReLU) + l2 = args.build('conv2d', 4, 4) + self.assertIsInstance(l2[1], tk.layers.LeakyReLU) + self.assertEqual(T.shape(l2[0].weight_store()), [4, 4, 3, 3]) + + +def sequential_builder_standard_check(ctx, + fn_name, + layer_cls, + input_shape, + input_mask, + args, + builder_args, + kwargs, + layer_kwargs=None, + builder_kwargs=None, + output_mask=None, + at_least=None): + if output_mask is None: + output_mask = input_mask + x = T.random.randn([3] + input_shape) + + # the expected layer + T.random.seed(1234) + layer_kwargs = dict(layer_kwargs or {}) + layer_kwargs.update(kwargs) + layer0 = layer_cls(*args, **layer_kwargs) + y = layer0(x) + output_shape = T.shape(y)[1:] + + def fn(input_shape, output_shape, kwargs, + builder_set_arg_layer, builder_set_arg_kwargs): + T.random.seed(1234) + builder = SequentialBuilder(input_shape) + ctx.assertEqual(builder.in_shape, input_shape) + if builder_set_arg_kwargs: + ctx.assertIs( + builder.set_args( + builder_set_arg_layer, **builder_set_arg_kwargs), + builder, + ), + ctx.assertIs( + getattr(builder, fn_name)(*builder_args, **kwargs), + builder, + ) + ctx.assertEqual(builder.out_shape, output_shape) + layer = builder.build(False) + ctx.assertIsInstance(layer, layer_cls) + assert_allclose(layer(x), y, rtol=1e-4, atol=1e-6) + + def apply_mask(shape, mask): + return [s if m else None for s, m in zip(shape, mask)] + + # do check various ways to specify the arguments + builder_kwargs = dict(builder_kwargs or {}) + builder_kwargs.update(kwargs) + fn(input_shape, output_shape, builder_kwargs, None, {}) + fn(input_shape, output_shape, {}, layer_cls, builder_kwargs) + fn(input_shape, output_shape, {}, fn_name, builder_kwargs) + + if False in input_mask: + fn(apply_mask(input_shape, input_mask), + apply_mask(output_shape, output_mask), + {}, fn_name, builder_kwargs) + + # check some common error checks + if 'kernel_size' in builder_kwargs: + kwargs2 = dict(builder_kwargs) + kwargs2.pop('kernel_size') + with pytest.raises(ValueError, + match='The `kernel_size` argument is required'): + fn(input_shape, output_shape, kwargs2, None, {}) + + if 'output_size' not in builder_kwargs: + for i, m in enumerate(input_mask): + if not m: + continue + input_shape2 = list(input_shape) + input_shape2[i] = None + with pytest.raises(ValueError, + match=f'Axis {i - len(input_shape)} of the previous ' + f'output shape is expected to be deterministic'): + fn(input_shape2, output_shape, builder_kwargs, None, {}) + + if len(input_shape) >= 1: + input_shape2 = list(input_shape[:-1]) + if len(input_shape) == at_least: + with pytest.raises(ValueError, + match=f'The previous output shape is expected to ' + f'be at least {len(input_shape)}d'): + fn(input_shape2, output_shape, builder_kwargs, None, {}) + elif at_least is None: + with pytest.raises(ValueError, + match=f'The previous output shape is expected to ' + f'be exactly {len(input_shape)}d'): + fn(input_shape2, output_shape, builder_kwargs, None, {}) + + +class SequentialBuilderTestCase(TestCase): + + def test_construct(self): + def assert_in_shape(b, s): + self.assertEqual(b.in_shape, s) + self.assertEqual(b.out_shape, s) + + # test the input shape + assert_in_shape(SequentialBuilder(3), [3]) + assert_in_shape(SequentialBuilder(None), [None]) + assert_in_shape(SequentialBuilder(in_channels=3), [3]) + assert_in_shape(SequentialBuilder(in_channels=None), [None]) + + assert_in_shape(SequentialBuilder([3]), [3]) + assert_in_shape(SequentialBuilder([3, 4]), [3, 4]) + assert_in_shape(SequentialBuilder((3, 4)), [3, 4]) + assert_in_shape(SequentialBuilder([None, None]), [None, None]) + assert_in_shape(SequentialBuilder(in_shape=[3, 4]), [3, 4]) + assert_in_shape(SequentialBuilder(in_shape=(3, 4)), [3, 4]) + assert_in_shape(SequentialBuilder(in_shape=(None, None)), [None, None]) + + assert_in_shape( + SequentialBuilder(5, in_size=[3, 4]), + make_conv_shape([], 5, [3, 4]), + ) + assert_in_shape( + SequentialBuilder(in_channels=5, in_size=[3, 4]), + make_conv_shape([], 5, [3, 4]), + ) + assert_in_shape( + SequentialBuilder(in_channels=5, in_size=(3, 4)), + make_conv_shape([], 5, [3, 4]), + ) + assert_in_shape( + SequentialBuilder(in_channels=5, in_size=(None, None)), + [s if s == 5 else None for s in make_conv_shape([], 5, [3, 4])], + ) + + # test in_builder + in_shape0 = make_conv_shape([], 5, [3, 4]) + for in_shape in (in_shape0, [None if i != 5 else i for i in in_shape0]): + builder0 = SequentialBuilder(in_shape) + builder0.set_args(['dense', 'conv2d'], activation=tk.layers.LeakyReLU) + builder0.set_args('conv2d', kernel_size=3) + + builder = SequentialBuilder(builder0) + assert_in_shape(builder, in_shape) + self.assertEqual( + builder.layer_args.get_kwargs(Dense), + {'activation': tk.layers.LeakyReLU} + ) + self.assertEqual( + builder.layer_args.get_kwargs(Conv2d), + {'activation': tk.layers.LeakyReLU, 'kernel_size': 3} + ) + + # test arg errors + with pytest.raises(ValueError, + match='One and only one of `in_spec`, `in_shape`, ' + '`in_channels` and `in_builder` should be ' + 'specified'): + _ = SequentialBuilder() + + arg_values = { + 'in_spec': [3, 4], + 'in_shape': [5, 6], + 'in_channels': 7, + 'in_builder': builder0, + } + for arg1, arg2 in product( + ['in_spec', 'in_shape', 'in_channels', 'in_builder'], + ['in_spec', 'in_shape', 'in_channels', 'in_builder']): + if arg1 == arg2: + continue + with pytest.raises(ValueError, + match='One and only one of `in_spec`, `in_shape`, ' + '`in_channels` and `in_builder` should be ' + 'specified'): + _ = SequentialBuilder(**{arg1: arg_values[arg1], + arg2: arg_values[arg2]}) + for arg in ['in_spec', 'in_shape', 'in_builder']: + with pytest.raises(ValueError, + match='`in_size` can be specified only when ' + '`in_channels` is specified, or `in_spec` ' + 'is None or an integer'): + _ = SequentialBuilder(in_size=[8, 9], **{arg: arg_values[arg]}) + + def test_arg_scope(self): + builder = SequentialBuilder(5) + self.assertEqual(builder.layer_args.get_kwargs(Dense), {}) + self.assertEqual(builder.layer_args.get_kwargs(Conv2d), {}) + with builder.arg_scope(['conv2d', Dense], activation=LeakyReLU): + self.assertEqual(builder.layer_args.get_kwargs(Dense), + {'activation': LeakyReLU}) + self.assertEqual(builder.layer_args.get_kwargs(Conv2d), + {'activation': LeakyReLU}) + with builder.arg_scope('dense', activation=Sigmoid, normalizer=BatchNorm): + self.assertEqual(builder.layer_args.get_kwargs(Dense), + {'activation': Sigmoid, 'normalizer': BatchNorm}) + with builder.arg_scope(Conv2d, activation=Tanh, normalizer=BatchNorm2d): + self.assertEqual(builder.layer_args.get_kwargs(Conv2d), + {'activation': Tanh, 'normalizer': BatchNorm2d}) + self.assertEqual(builder.layer_args.get_kwargs(Conv2d), + {'activation': LeakyReLU}) + self.assertEqual(builder.layer_args.get_kwargs(Dense), + {'activation': LeakyReLU}) + self.assertEqual(builder.layer_args.get_kwargs(Dense), {}) + self.assertEqual(builder.layer_args.get_kwargs(Conv2d), {}) + + def test_add(self): + def fn(in_shape, layer, out_shape): + # test using `out_shape` + builder = SequentialBuilder(in_shape) + self.assertIs(builder.add(layer, out_shape), builder) + self.assertEqual(builder.out_shape, out_shape) + self.assertIs(builder.build(False), layer) + + with pytest.raises(ValueError, + match='`out_size` can only be specified when ' + '`out_channels` is specified'): + _ = builder.add(layer, out_shape, out_size=[]) + + # test using `out_channels` and `out_size` + def g(out_channels, **out_size_args): + builder = SequentialBuilder(in_shape) + self.assertIs( + builder.add(layer, out_channels=out_channels, **out_size_args), + builder + ) + self.assertEqual(builder.out_shape, out_shape) + self.assertIs(builder.build(False), layer) + + # test error + with pytest.raises(ValueError, + match='Either `out_shape` or `out_channels` ' + 'should be specified, but not both'): + _ = builder.add(layer, out_shape, out_channels=out_channels, + **out_size_args) + with pytest.raises(ValueError, + match='Either `out_shape` or `out_channels` ' + 'should be specified, but not both'): + _ = builder.add(layer) + + if len(out_shape) > 1: + if T.IS_CHANNEL_LAST: + out_channels, out_size = out_shape[-1], out_shape[:-1] + else: + out_channels, out_size = out_shape[0], out_shape[1:] + g(out_channels, out_size=out_size) + else: + g(out_shape[0], out_size=[]) + g(out_shape[0]) + + fn([5], Linear(5, 3), [3]) + fn([None], Linear(5, 3), [None]) + fn(make_conv_shape([], 5, [6, 7]), + Conv2d(5, 3, kernel_size=1), + make_conv_shape([], 3, [6, 7])) + fn(make_conv_shape([], None, [None, None]), + Conv2d(5, 3, kernel_size=1), + make_conv_shape([], None, [None, None])) + + def test_build(self): + builder = SequentialBuilder(5) + self.assertIsInstance(builder.build(), Identity) + self.assertIsInstance(builder.build(False), Identity) + + # build with one layer + builder.dense(4) + l1 = builder.build(False) + self.assertIsInstance(l1, Dense) + l = builder.build(True) + self.assertIsInstance(l, FlattenToNDims) + x = T.random.randn([3, 5]) + assert_allclose(l(x), l1(x), rtol=1e-4, atol=1e-6) + + # build with two layers + builder.linear(3) + l = builder.build(False) + self.assertIsInstance(l, Sequential) + self.assertIs(l[0], l1) + l2 = l[-1] + self.assertIsInstance(l2, Linear) + l = builder.build(True) + self.assertIsInstance(l, FlattenToNDims) + x = T.random.randn([3, 5]) + assert_allclose(l(x), l2(l1(x)), rtol=1e-4, atol=1e-6) + + def test_identity(self): + for in_shape in ([], [5], [3, 4, 5]): + sequential_builder_standard_check( + ctx=self, fn_name='identity', layer_cls=Identity, + input_shape=in_shape, input_mask=[False] * len(in_shape), + args=(), builder_args=(), kwargs={}, at_least=0, + ) + + def test_activation(self): + for name in ['relu', 'leaky_relu', 'sigmoid', 'tanh', 'log_softmax']: + layer_cls = tk.layers.get_activation_class(name) + for in_shape in ([5], [3, 4, 5]): + sequential_builder_standard_check( + ctx=self, fn_name=name, layer_cls=layer_cls, + input_shape=in_shape, input_mask=[False] * len(in_shape), + args=(), builder_args=(), kwargs={}, at_least=1, + ) + + def test_linear(self): + sequential_builder_standard_check( + ctx=self, fn_name='linear', layer_cls=Linear, input_shape=[5], + input_mask=[True], args=(5, 4), builder_args=(4,), + kwargs={'weight_norm': True}, + ) + sequential_builder_standard_check( + ctx=self, fn_name='dense', layer_cls=Dense, input_shape=[5], + input_mask=[True], args=(5, 4), builder_args=(4,), + kwargs={'weight_norm': True, 'activation': LeakyReLU}, + ) + + def test_conv_and_deconv(self): + for spatial_ndims in (1, 2, 3): + input_shape = make_conv_shape([], 5, [15, 16, 17][:spatial_ndims]) + input_mask = [i == 5 for i in input_shape] + for fn_name, layer_cls in zip( + [ + f'linear_conv{spatial_ndims}d', + f'conv{spatial_ndims}d', + f'res_block{spatial_ndims}d' + ], + [ + getattr(tk.layers, f'LinearConv{spatial_ndims}d'), + getattr(tk.layers, f'Conv{spatial_ndims}d'), + getattr(tk.layers, f'ResBlock{spatial_ndims}d'), + ]): + kwargs = {'kernel_size': 3, 'stride': 2, 'padding': 'half', + 'weight_norm': True} + if not fn_name.startswith('linear_'): + kwargs['activation'] = LeakyReLU + sequential_builder_standard_check( + ctx=self, fn_name=fn_name, layer_cls=layer_cls, + input_shape=input_shape, input_mask=input_mask, + args=(5, 4), builder_args=(4,), + kwargs=kwargs + ) + + def test_deconv(self): + for spatial_ndims in (1, 2, 3): + output_size = [16, 17, 18][:spatial_ndims] + output_shape = make_conv_shape([], 4, output_size) + layer0 = getattr(tk.layers, f'LinearConv{spatial_ndims}d')( + 4, 5, kernel_size=3, stride=2, padding='half', + weight_init=tk.init.ones + ) + y = layer0(T.zeros([1] + output_shape)) + input_shape = T.shape(y)[1:] + input_channel, input_size = T.utils.split_channel_spatial_shape(input_shape) + + for fn_name, layer_cls in zip( + [ + f'linear_conv_transpose{spatial_ndims}d', + f'linear_deconv{spatial_ndims}d', + f'conv_transpose{spatial_ndims}d', + f'deconv{spatial_ndims}d', + f'res_block_transpose{spatial_ndims}d' + ], + [ + getattr(tk.layers, f'LinearConvTranspose{spatial_ndims}d'), + getattr(tk.layers, f'LinearConvTranspose{spatial_ndims}d'), + getattr(tk.layers, f'ConvTranspose{spatial_ndims}d'), + getattr(tk.layers, f'ConvTranspose{spatial_ndims}d'), + getattr(tk.layers, f'ResBlockTranspose{spatial_ndims}d'), + ]): + # without output_shape + kwargs = {'kernel_size': 3, 'stride': 2, 'padding': 'half', + 'weight_norm': True} + input_mask = [i == 5 for i in input_shape] + if not fn_name.startswith('linear_'): + kwargs['activation'] = LeakyReLU + sequential_builder_standard_check( + ctx=self, fn_name=fn_name, layer_cls=layer_cls, + input_shape=input_shape, input_mask=input_mask, + args=(5, 4), builder_args=(4,), + kwargs=kwargs + ) + kwargs['output_padding'] = 0 + sequential_builder_standard_check( + ctx=self, fn_name=fn_name, layer_cls=layer_cls, + input_shape=input_shape, input_mask=input_mask, + args=(5, 4), builder_args=(4,), + kwargs=kwargs + ) + + # with output_shape + kwargs = {'kernel_size': 3, 'stride': 2, 'padding': 'half', + 'weight_norm': True} + layer_kwargs = { + 'output_padding': T.utils.calculate_deconv_output_padding( + input_size=input_size, + output_size=output_size, + kernel_size=[3] * spatial_ndims, + stride=[2] * spatial_ndims, + padding=[(1, 1)] * spatial_ndims, + dilation=[1] * spatial_ndims, + ) + } + builder_kwargs = {'output_size': output_size} + input_mask = [True] * spatial_ndims + if not fn_name.startswith('linear_'): + kwargs['activation'] = LeakyReLU + sequential_builder_standard_check( + ctx=self, fn_name=fn_name, layer_cls=layer_cls, + input_shape=input_shape, input_mask=input_mask, + args=(5, 4), builder_args=(4,), + kwargs=kwargs, layer_kwargs=layer_kwargs, + builder_kwargs=builder_kwargs, + ) + + # test errors + builder = SequentialBuilder(input_shape) + fn = getattr(builder, fn_name) + with pytest.raises(ValueError, + match='`output_padding` and `output_size` ' + 'cannot be both specified'): + fn(5, kernel_size=1, + output_padding=1, + output_size=[2, 3, 4][:spatial_ndims]) + + with pytest.raises(ValueError, + match=f'`output_size` is expected to be ' + f'{spatial_ndims}d'): + fn(5, kernel_size=1, + output_size=[2, 3, 4, 5][:spatial_ndims + 1]) + + builder = SequentialBuilder( + [i if i == 5 else None for i in input_shape]) + fn = getattr(builder, fn_name) + with pytest.raises(ValueError, + match='Specifying `output_size` instead of ' + '`output_padding` is supported only ' + 'when the previous output shape ' + 'is all deterministic.'): + fn(5, kernel_size=1, output_size=[2, 3, 4][:spatial_ndims]) + + def test_pool(self): + for spatial_ndims in (1, 2, 3): + input_shape = make_conv_shape([], 5, [15, 16, 17][:spatial_ndims]) + input_mask = [False] * (spatial_ndims + 1) + for fn_name, layer_cls in zip( + [ + f'avg_pool{spatial_ndims}d', + f'max_pool{spatial_ndims}d', + ], + [ + getattr(tk.layers, f'AvgPool{spatial_ndims}d'), + getattr(tk.layers, f'MaxPool{spatial_ndims}d'), + ]): + kwargs = {'kernel_size': 3, 'stride': 2, 'padding': 'half'} + sequential_builder_standard_check( + ctx=self, fn_name=fn_name, layer_cls=layer_cls, + input_shape=input_shape, input_mask=input_mask, + args=(), builder_args=(), kwargs=kwargs + ) + + def test_global_avg_pool(self): + for spatial_ndims in (1, 2, 3): + input_shape = make_conv_shape([], 5, [15, 16, 17][:spatial_ndims]) + input_mask = [False] * (spatial_ndims + 1) + + for keepdims in [True, False, None]: + if keepdims: + output_mask = [i != 5 for i in input_shape] + else: + output_mask = [False] + + kwargs = {'keepdims': keepdims} if keepdims is not None else {} + sequential_builder_standard_check( + ctx=self, + fn_name=f'global_avg_pool{spatial_ndims}d', + layer_cls=getattr(tk.layers, f'GlobalAvgPool{spatial_ndims}d'), + input_shape=input_shape, input_mask=input_mask, + args=(), builder_args=(), kwargs=kwargs, + output_mask=output_mask + ) + + def test_channel_transpose_layers(self): + for spatial_ndims in (1, 2, 3): + input_shape = [15, 16, 17, 18][:spatial_ndims + 1] + input_mask = [False] * (spatial_ndims + 1) + for fn_name, layer_cls in zip( + [ + f'channel_first_to_last{spatial_ndims}d', + f'channel_last_to_first{spatial_ndims}d', + f'channel_default_to_last{spatial_ndims}d', + f'channel_last_to_default{spatial_ndims}d', + ], + [ + getattr(tk.layers, f'ChannelFirstToLast{spatial_ndims}d'), + getattr(tk.layers, f'ChannelLastToFirst{spatial_ndims}d'), + getattr(tk.layers, f'ChannelDefaultToLast{spatial_ndims}d'), + getattr(tk.layers, f'ChannelLastToDefault{spatial_ndims}d'), + ]): + sequential_builder_standard_check( + ctx=self, fn_name=fn_name, layer_cls=layer_cls, + input_shape=input_shape, input_mask=input_mask, + args=(), builder_args=(), kwargs={} + ) diff --git a/tests/tensor/test_core.py b/tests/tensor/test_core.py index fb2eb09..7a2902b 100644 --- a/tests/tensor/test_core.py +++ b/tests/tensor/test_core.py @@ -266,7 +266,7 @@ def copy_tensor(o): ([1, 2, 3], []), number_dtypes, (None, T.CPU_DEVICE)): - t = T.ones(shape, dtype=dtype) + t = T.ones(shape, dtype=dtype, device=device) self.assertIsInstance(t, T.Tensor) self.assertEqual(T.get_dtype(t), dtype) self.assertEqual(T.get_device(t), device or T.current_device())