# default initialize with Xavier

In [1]:
batchsize = 500

In [2]:
from tfs.models import LeNet
net = LeNet()
from tfs.dataset import Mnist
dataset = Mnist()
net.build()
net.fit(dataset,batchsize,1)

step 10. loss 3.655696, score:0.546600
step 20. loss 0.723743, score:0.749500
step 30. loss 0.664948, score:0.846000
step 40. loss 0.392544, score:0.887300
step 50. loss 0.403155, score:0.911100
step 60. loss 0.329736, score:0.925000
step 70. loss 0.295869, score:0.935600
step 80. loss 0.245136, score:0.944400
step 90. loss 0.154759, score:0.946300
step 100. loss 0.234490, score:0.949400
step 110. loss 0.123667, score:0.953800
step 120. loss 0.236989, score:0.955100


<tfs.models.lenet.LeNet at 0x7f48bd45c510>

# LSUV initializer

In [3]:
from tfs.core.initializer import Initializer,InitType
from tfs.core.layer import *
import numpy as np

def svd_orthonormal(shape):
    if len(shape) < 2:
        raise RuntimeError("Only shapes of length 2 or more are supported.")
    flat_shape = (shape[0], np.prod(shape[1:]))
    a = np.random.standard_normal(flat_shape)
    u, _, v = np.linalg.svd(a, full_matrices=False)
    q = u if u.shape == flat_shape else v
    q = q.reshape(shape)
    return q

# this initializer would also change the weight of current net.
class LSUV(Initializer):
    ret_type = InitType.values
    available_node_type = [Conv2d, FullyConnect]
    def __init__(
        self,
        net,
        batchX,
        print_names=[]
    ):
        vs = locals()
        net = vs['net']
        del vs['self']
        del vs['net']
        super(LSUV,self).__init__(net,**vs)
        
    def _build_init_table(self):
        tbl = {}
        margin = 0.1
        max_iter = 10
        for n in self.net.net_def:
            print(type(n).__name__)
            if type(n) not in self.available_node_type:
                continue
            my_dict = {}
            
            name = 'weights'
            v = n.variables[name]
            defaultInitOp = n.initializers[name]
            val = defaultInitOp(v.get_shape().as_list(),v.dtype.base_dtype)
            myval = svd_orthonormal(val.shape)
            my_dict[name] = myval
            
            name = 'biases'
            v = n.variables[name]
            defaultInitOp = n.initializers[name]
            val = defaultInitOp(v.get_shape().as_list(),v.dtype.base_dtype)
            myval = val
            my_dict[name] = myval
            
            n.set_weights(my_dict)
            
            acts1 = self.net.eval_node(n,self.param.batchX)
            var1=np.var(acts1)
            iter1=0
            needed_variance = 1.0
            print(var1)
            
            while (abs(needed_variance - var1) > margin):
                weights = self.net.run(n.variables['weights'])
                biases = self.net.run(n.variables['biases'])
                weights /= np.sqrt(var1)/np.sqrt(needed_variance)
                w_all_new = {'weights':weights,
                             'biases':biases}
                n.set_weights(w_all_new)
                acts1=self.net.eval_node(n,self.param.batchX)
                var1=np.var(acts1)
                iter1+=1
                print(var1)
                if iter1 > max_iter:
                    break            

        # it is initialized during the loop, so we can return a nil tbl
        return tbl



In [4]:
from tfs.models import LeNet
net = LeNet()
from tfs.dataset import Mnist
dataset = Mnist()
batchX,batchY = dataset.train.next_batch(batchsize)

In [5]:
net.initializer = LSUV(net,batchX)

In [6]:
net.build() # the number represent the variances that we adjust.

Conv2d
2481.81
1.0
MaxPool
Conv2d
0.0820143
0.856374
0.994984
MaxPool
FullyConnect
1.10542
1.00325
FullyConnect
0.916689
Softmax


<tf.Tensor 'prob:0' shape=(?, 10) dtype=float32>

In [7]:
net.fit(dataset,batchsize,1)

step 10. loss 0.636787, score:0.794900
step 20. loss 0.304964, score:0.864200
step 30. loss 0.299373, score:0.933700
step 40. loss 0.135905, score:0.951200
step 50. loss 0.202405, score:0.959800
step 60. loss 0.127498, score:0.957700
step 70. loss 0.143419, score:0.966200
step 80. loss 0.098482, score:0.967600
step 90. loss 0.090081, score:0.971400
step 100. loss 0.140789, score:0.975000
step 110. loss 0.081888, score:0.975900
step 120. loss 0.149892, score:0.976600


<tfs.models.lenet.LeNet at 0x7f4881ab78d0>