-
Notifications
You must be signed in to change notification settings - Fork 0
/
init_method.py
81 lines (69 loc) · 3.1 KB
/
init_method.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
from enum import Enum
import numpy as np
class InitMethod(Enum):
HE_NORMAL = 1
MAXOUT_HE_NORMAL = 2
SPHERE = 3
MANY_REGIONS = 4
def maxout_he_std(fan_in, K):
if K == 1: # We use this std for a linear layer
return np.sqrt(2. / (fan_in))
if K == 2:
return np.sqrt(1. / (fan_in))
elif K == 3:
return np.sqrt(2. * np.pi / ((np.sqrt(3) + 2 * np.pi) * fan_in))
elif K == 4:
return np.sqrt(np.pi / ((np.sqrt(3) + np.pi) * fan_in))
elif K == 5:
return np.sqrt(0.5555 / (fan_in))
else:
raise Exception('Wrong init!')
return init_distr
def init_params(init, K, fan_in, fan_out, zero_bias):
weight_shape = [fan_out, K, fan_in] if K > 1 else [fan_out, fan_in]
bias_shape = [fan_out, K] if K > 1 else [fan_out]
if init == InitMethod.HE_NORMAL:
init_distr = lambda fan_in, size: np.random.normal(loc=0., scale=np.sqrt(2. / (fan_in)), size=size)
weight = init_distr(fan_in=fan_in, size=weight_shape)
if zero_bias:
bias = np.zeros(bias_shape)
else:
bias = init_distr(fan_in=fan_in, size=bias_shape)
elif init == InitMethod.MAXOUT_HE_NORMAL:
init_distr = lambda fan_in, size: np.random.normal(loc=0., scale=maxout_he_std(fan_in=fan_in, K=K), size=size)
weight = init_distr(fan_in=fan_in, size=weight_shape)
if zero_bias:
bias = np.zeros(bias_shape)
else:
bias = init_distr(fan_in=fan_in, size=bias_shape)
elif init == InitMethod.SPHERE:
init_distr = lambda fan_in, size: np.random.normal(loc=0., scale=maxout_he_std(fan_in=fan_in, K=K), size=size)
weight = init_distr(fan_in=fan_in, size=weight_shape)
bias = init_distr(fan_in=fan_in, size=bias_shape)
for i in range(fan_out):
for j in range(K):
norm = np.linalg.norm(weight[i][j] + [bias[i][j]])
weight[i][j] = weight[i][j] / norm
bias[i][j] = bias[i][j] / norm
c = 1 / np.sqrt(K * fan_in)
bias[i][j] = np.abs(bias[i][j]) - c
elif init == InitMethod.MANY_REGIONS:
init_distr = lambda fan_in, size: np.random.normal(loc=0., scale=maxout_he_std(fan_in=fan_in, K=K), size=size)
weight = np.zeros(weight_shape)
bias = np.zeros(bias_shape)
for i in range(fan_out):
v = init_distr(fan_in=fan_in, size=[fan_in])
for j in range(K):
weight[i][j] = v * np.cos(np.pi * (j + 1) / K + 0.01 * np.random.normal(loc=0., scale=1.))
bias[i][j] = np.sin(np.pi * (j + 1) / K + 0.01 * np.random.normal(loc=0., scale=1.))
else:
raise Exception(f'Wrong init {init}')
return weight, bias
def get_init_std(init, K, fan_in):
if init == InitMethod.HE_NORMAL:
return np.sqrt(2. / (fan_in))
elif init == InitMethod.MAXOUT_HE_NORMAL:
return maxout_he_std(fan_in=fan_in, K=K)
# This is a stub, to avoid an exception
# Sphere init has no distribution and variance of the many ragions initialization is difficult to estimate
return maxout_he_std(fan_in=fan_in, K=K)