In [1]:
from torch import FloatTensor
import math

In [2]:
def scaled_init(w, b, init_type, gain=1):
    std = 1. / math.sqrt(w.size(1))
    #print(std)
    if init_type == "normal":
        w.normal_(0, gain*std)
        b.normal_(0, gain*std)
    elif init_type == "uniform":
        temp = math.sqrt(3.0)*std*gain
        w.uniform_(-temp, temp)
        b.uniform_(-temp, temp)

In [3]:
def xavier_init(w, b, init_type, gain=1):
    fan_in = w.size(1)
    fan_out = w.size(0)
    std = math.sqrt(2.0 / (fan_in + fan_out))
    #print(std)
    if init_type == "normal":
        w.normal_(0, gain*std)
        b.normal_(0, gain*std)
    elif init_type == "uniform":
        temp = math.sqrt(3.0)*std*gain
        w.uniform_(-temp, temp)
        b.uniform_(-temp, temp)

In [4]:
def calculate_gain(type):
    if type == 'tanh':
        return 5.0 / 3
    elif type == 'relu':
        return math.sqrt(2.0)

In [5]:
a = FloatTensor(50,80)
bias = FloatTensor(1,50)

In [6]:
scaled_init(a, bias, "normal")

In [7]:
print(1. / math.sqrt(a.size(1)))
print(math.sqrt(2.0 / (a.size(0) + a.size(1))))
print(a.std())

0.11180339887498948
0.12403473458920845
0.1124286811735223


In [8]:
from Linear import *
from Tanh import *
from Sequential import *

In [9]:
model = Sequential(
        Linear(2, 25), 
        Tanh(),
        Linear(25, 25), 
        Tanh(),
        Linear(25, 25), 
        Tanh(),
        Linear(25, 2)
)

In [10]:
print(1. / math.sqrt(model.param()[0][0].size(1)))

0.7071067811865475


In [11]:
for i in range(0, len(model.param()), 2):
    scaled_init(model.param()[i][0], model.param()[i+1][0], 'normal')

In [12]:
print(model.param()[0][0].std())
print(1. / math.sqrt(model.param()[0][0].size(1)))

0.6079301806263597
0.7071067811865475
