# default initialize with Xavier

In [1]:
batchsize = 500

In [2]:
from tfs.models import LeNet
net = LeNet()
from tfs.dataset import Mnist
dataset = Mnist()
net.build()
net.fit(dataset,batchsize,1)

step 10. loss 5.903676, test accuracy:0.430100
step 20. loss 0.979873, test accuracy:0.655800
step 30. loss 0.708969, test accuracy:0.818600
step 40. loss 0.391821, test accuracy:0.876600
step 50. loss 0.339591, test accuracy:0.909400
step 60. loss 0.312280, test accuracy:0.926600
step 70. loss 0.286118, test accuracy:0.934600
step 80. loss 0.252523, test accuracy:0.945900
step 90. loss 0.170250, test accuracy:0.944600
step 100. loss 0.289807, test accuracy:0.953600
step 110. loss 0.134003, test accuracy:0.955100
step 120. loss 0.192019, test accuracy:0.957100


<tfs.models.lenet.LeNet at 0x103e0af50>

# LSUV initializer

In [3]:
from tfs.core.initializer import Initializer,InitType
from tfs.core.layer import *
import numpy as np

def svd_orthonormal(shape):
    if len(shape) < 2:
        raise RuntimeError("Only shapes of length 2 or more are supported.")
    flat_shape = (shape[0], np.prod(shape[1:]))
    a = np.random.standard_normal(flat_shape)
    u, _, v = np.linalg.svd(a, full_matrices=False)
    q = u if u.shape == flat_shape else v
    q = q.reshape(shape)
    return q

# this initializer would also change the weight of current net.
class LSUV(Initializer):
    ret_type = InitType.values
    available_node_type = [Conv2d, FullyConnect]
    def __init__(
        self,
        net,
        batchX,
        print_names=[]
    ):
        vs = locals()
        net = vs['net']
        del vs['self']
        del vs['net']
        super(LSUV,self).__init__(net,**vs)
        
    def _build_init_table(self):
        tbl = {}
        margin = 0.1
        max_iter = 10
        for n in self.net.net_def:
            print(type(n).__name__)
            if type(n) not in self.available_node_type:
                continue
            my_dict = {}
            
            name = 'weights'
            v = n.variables[name]
            defaultInitOp = n.initializers[name]
            val = defaultInitOp(v.get_shape().as_list(),v.dtype.base_dtype)
            myval = svd_orthonormal(val.shape)
            my_dict[name] = myval
            
            name = 'biases'
            v = n.variables[name]
            defaultInitOp = n.initializers[name]
            val = defaultInitOp(v.get_shape().as_list(),v.dtype.base_dtype)
            myval = val
            my_dict[name] = myval
            
            n.set_weights(my_dict)
            
            acts1 = self.net.eval_node(n,self.param.batchX)
            var1=np.var(acts1)
            iter1=0
            needed_variance = 1.0
            print(var1)
            
            while (abs(needed_variance - var1) > margin):
                weights = self.net.run(n.variables['weights'])
                biases = self.net.run(n.variables['biases'])
                weights /= np.sqrt(var1)/np.sqrt(needed_variance)
                w_all_new = {'weights':weights,
                             'biases':biases}
                n.set_weights(w_all_new)
                acts1=self.net.eval_node(n,self.param.batchX)
                var1=np.var(acts1)
                iter1+=1
                print(var1)
                if iter1 > max_iter:
                    break            

        # it is initialized during the loop, so we can return a nil tbl
        return tbl



In [4]:
from tfs.models import LeNet
net = LeNet()
from tfs.dataset import Mnist
dataset = Mnist()
batchX,batchY = dataset.train.next_batch(batchsize)

In [5]:
net.initializer = LSUV(net,batchX)

In [6]:
net.build()

Conv2d
2111.35
1.0
MaxPool
Conv2d
0.0835624
0.849939
0.994472
MaxPool
FullyConnect
1.00148
FullyConnect
2.31459
1.0
Softmax


<tf.Tensor 'prob:0' shape=(?, 10) dtype=float32>

In [7]:
net.fit(dataset,batchsize,1)

step 10. loss 0.455967, test accuracy:0.850200
step 20. loss 0.275849, test accuracy:0.918300
step 30. loss 0.293066, test accuracy:0.941500
step 40. loss 0.110870, test accuracy:0.955700
step 50. loss 0.161958, test accuracy:0.962900
step 60. loss 0.101815, test accuracy:0.968700
step 70. loss 0.136306, test accuracy:0.973900
step 80. loss 0.066778, test accuracy:0.972000
step 90. loss 0.059173, test accuracy:0.975000
step 100. loss 0.143145, test accuracy:0.976200
step 110. loss 0.064761, test accuracy:0.977400
step 120. loss 0.117925, test accuracy:0.976600


<tfs.models.lenet.LeNet at 0x112cbcbd0>