/
l2_normalization.py
112 lines (82 loc) · 3.43 KB
/
l2_normalization.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
import numpy
from chainer.backends import cuda
from chainer import function_node
import chainer.functions
from chainer.utils import type_check
class _SetItemZero(function_node.FunctionNode):
"""Write values to mask of zero-initialized array"""
def __init__(self, mask):
self.mask = mask
def forward(self, inputs):
x, = inputs
xp = cuda.get_array_module(x)
y = xp.zeros(self.mask.shape, x.dtype)
y[self.mask] = x
return y,
def backward(self, indices, grad_outputs):
g, = grad_outputs
return g[self.mask],
class NormalizeL2(function_node.FunctionNode):
"""L2 normalization"""
def __init__(self, eps=1e-5, axis=1):
self.eps = eps
if isinstance(axis, int):
axis = axis,
if len(axis) not in (1, 2):
raise ValueError("Improper number of dimensions to norm.")
self.axis = axis
def check_type_forward(self, in_types):
type_check.expect(in_types.size() == 1)
x_type, = in_types
type_check.expect(
x_type.dtype == numpy.float32,
)
def forward(self, inputs):
self.retain_inputs((0,))
x, = inputs
xp = cuda.get_array_module(x)
# keep x.ndim >= 1 to avoid casting to self.eps' type
norm = xp.sqrt(xp.sum(
xp.square(x), axis=self.axis, keepdims=True)) + self.eps
return x / norm,
def backward(self, indexes, grad_outputs):
x, = self.get_retained_inputs()
gy, = grad_outputs
F = chainer.functions
norm_noeps = F.sqrt(F.sum(F.square(x), axis=self.axis, keepdims=True))
norm = norm_noeps + self.eps
norm = F.broadcast_to(norm, gy.shape)
x_gy_reduced = F.sum((x * gy), axis=self.axis, keepdims=True)
# L2 normalize with eps has continuous backward. However,
# the backward is not differentiable for the indices of zero vectors.
# To avoid nan in double backward, do not compute outside of mask.
mask = norm_noeps.array != 0
x_gy_reduced, = _SetItemZero(mask).apply((
x_gy_reduced[mask] / norm_noeps[mask],))
x_gy_reduced = F.broadcast_to(x_gy_reduced, gy.shape)
gx = gy * norm - x_gy_reduced * x
gx = gx / norm ** 2
return gx,
def normalize(x, eps=1e-5, axis=1):
"""L2 norm squared (a.k.a.\\ Euclidean norm).
This function implements L2 normalization on a vector along the given axis.
No reduction is done along the normalization axis.
In the case when :obj:`axis=1` and :math:`x` is a vector of dimension
:math:`(N, K)`, where :math:`N` and :math:`K` denote mini-batch size and
the dimension of the input variable, this function computes an output
vector :math:`y` by the following equation:
.. math::
y_i = {x_i \\over \\| x_i \\|_2 + \\epsilon}
:obj:`eps` is used to avoid division by zero when norm of :math:`x` along
the given axis is zero.
The default value of :obj:`axis` is determined for backward compatibility.
Args:
x (~chainer.Variable): Two dimensional output variable. The first
dimension is assumed to be the mini-batch dimension.
eps (float): Epsilon value for numerical stability.
axis (int): Axis along which to normalize.
Returns:
~chainer.Variable: The output variable which has the same shape
as :math:`x`.
"""
return NormalizeL2(eps, axis).apply((x,))[0]