In [1]:
import tensorflow as tf
import numpy as np
import dilated_model as dm
import torch as th
from tensorflow.python.framework import ops

In [2]:
#pthnet = dm.EncodeWideResNetFIXED(in_channel=1, init_channel=32, num_enc_layer=4, N_res_in_block=1, use_selu=True, num_classes=3)
pthnet = dm.EncodeWideResNet(in_channel=1, init_channel=32, num_enc_layer=4, N_res_in_block=1, use_selu=True, num_classes=3)
sd = th.load('saved/ENCODE-selu-adam/0002/state_dict_highscore')
#sd = th.load('saved/fixed-noise/0001/state_dict_highscore')
pthnet.load_state_dict(sd)

In [12]:
def selu(x):
    with ops.name_scope('elu') as scope:
        alpha = 1.6732632423543772848170429916717
        scale = 1.0507009873554804934193349852946
        return scale*tf.where(x>=0.0, x, alpha*tf.nn.elu(x))
    
#def init(*shape):
#    return np.zeros(shape, dtype='float32')

def gener():
    for p in sd.values():
        yield p.cpu().transpose(0, -1).numpy()    
paramgen = gener()
def init(*args, do_assert=True):
    p = next(paramgen)
    if do_assert:
        assert p.shape == args, (p.shape, args)
    return p
    
def BatchNorm(input, channel=8):
    with tf.variable_scope('BatchNorm'):
        weight = tf.Variable(init(channel), name='weight')
        bias = tf.Variable(init(channel), name='bias')
        mean = tf.Variable(init(channel), name='running_mean')
        var = tf.Variable(init(channel), name='running_var')
        return tf.nn.batch_normalization(input, mean, var, bias, weight, 1e-05)

def Conv1d(input, in_channel, out_channel, kernel_size, dilation=1, bias=False):
    with tf.variable_scope('Conv1d'):
        w = tf.Variable(init(kernel_size, in_channel, out_channel), name='weight')
        if dilation > 1:
            w = tf.expand_dims(w, 0)
            x = tf.expand_dims(input, 1)
            p = kernel_size - 1 #// 2
            x = tf.pad(x, [[0, 0], [0, 0], [p, p], [0, 0]], "CONSTANT")
            x = tf.nn.atrous_conv2d(x, w, dilation, 'VALID')
            x = tf.squeeze(x, 1)
        else:
            p = kernel_size // 2
            x = tf.pad(input, [[0, 0], [p, p], [0, 0]], "CONSTANT")
            x = tf.nn.conv1d(x, w, 1, 'VALID')
        if bias:
            b = tf.Variable(init(out_channel), name='bias')
            x = x + b
    return x
    
def MaxPool1d(input):
    with tf.variable_scope('MaxPool1d'):
        x = tf.expand_dims(input, 1)
        x = tf.nn.max_pool(x, [1, 1, 2, 1], [1, 1, 2, 1], 'SAME')
        x = tf.squeeze(x, 1)
    return x
    

def Encoder(input, init_channel):
    def DownSampleBlock(input, in_channel, out_channel):
        with tf.variable_scope('DownSampleBlock'):
            x = Conv1d(input, in_channel, out_channel, 7, bias=True)
            x = BatchNorm(x, out_channel)
            x = selu(x)
            x = MaxPool1d(x)
        return x    
    with tf.variable_scope('Encoder'):
        x = DownSampleBlock(input, 1, init_channel)
        x = DownSampleBlock(x, init_channel, init_channel*2)
        x = DownSampleBlock(x, init_channel*2, init_channel*4)
        x = DownSampleBlock(x, init_channel*4, init_channel*8)
    return x
    
def DilatedBlock(input, channel=8, kernel_size=9, dilation=2):
    # No change in # of channels -> identity mapping
    with tf.variable_scope('DilatedBlock'):
        x = BatchNorm(input, channel)
        x = Conv1d(x, channel, channel, kernel_size)
        x = selu(x)
        x = BatchNorm(x, channel)
        x = Conv1d(x, channel, channel, kernel_size, dilation)
        x = selu(x)
    return x + input    
    
def ResNet(input, channel):
    with tf.variable_scope('ResNet'):
        x = DilatedBlock(input, channel)
        for _ in range(8):
            x = DilatedBlock(x, channel)
        
    return x

def Features(input, init_channel):
    x = Encoder(input, init_channel)
    x = ResNet(x, init_channel*8)
    return x

def NET(input, init_channel=32, num_classes=3):
    x = Features(input, init_channel)
    with tf.variable_scope('Logit'):
        logit = Conv1d(x, init_channel*8, num_classes, 1)
        logit = tf.reduce_mean(logit, 1)
        #print('MEGTÖRTÉNT')
        
    return logit

In [13]:

channel = 1
testinput = np.random.randn(*[10, channel, 1024])
def forward(self, x, lens=None):
    if lens is None:
        lens = x.size(-1)
    else:
        lens = lens[:, None].expand(len(x), self.num_classes)
    
    #out = self.forward_encoder(x)
    #out = self.forward_features(x)
    #out = self.logit(out)
    #out = th.sum(out, dim=-1).squeeze() / lens
    out = self.forward_features(x)
    out = th.mean(self.logit(out), -1)
    return out

pytorch_testmodule = pthnet
pytorch_testmodule.eval()
pth_input = th.autograd.Variable(th.FloatTensor(testinput))
pth_result = forward(pytorch_testmodule, pth_input).data.numpy()

In [14]:
####################################
print('OK')
tf.reset_default_graph()
dilation = 2
x = tf.constant(testinput.transpose(0,2,1), 1)
#y1 = selu(x)
#y1 = Conv1d(x, 1, 1, 3, 2, bias=False)
#y1 = DilatedBlock(x, channel=channel, kernel_size=9, dilation=dilation)
#pytorch_testmodule = dm.DilatedBlock(in_channels=channel, 
#                                     out_channels=channel, kernel_size=9, nonlin=dm.SELU(), dilation=dilation)

y1 = NET(x, init_channel=32)
with tf.Session() as sess:
    tf.global_variables_initializer().run()
    for f, (p_names, p) in zip(tf.global_variables(), pytorch_testmodule.state_dict().items()):
        print('%50s\t%50s'%(f.name, p_names))
        #print(p_names)
        #initializer = tf.constant(p.transpose(0, -1).numpy(), 1)
        #f.assign(initializer).eval()
        #print(initializer.eval()-f.eval())
    tf_result = sess.run(y1)#.transpose(0,2,1)

OK
           Encoder/DownSampleBlock/Conv1d/weight:0	                                  encoder.0.weight
             Encoder/DownSampleBlock/Conv1d/bias:0	                                    encoder.0.bias
        Encoder/DownSampleBlock/BatchNorm/weight:0	                                  encoder.1.weight
          Encoder/DownSampleBlock/BatchNorm/bias:0	                                    encoder.1.bias
  Encoder/DownSampleBlock/BatchNorm/running_mean:0	                            encoder.1.running_mean
   Encoder/DownSampleBlock/BatchNorm/running_var:0	                             encoder.1.running_var
         Encoder/DownSampleBlock_1/Conv1d/weight:0	                                  encoder.4.weight
           Encoder/DownSampleBlock_1/Conv1d/bias:0	                                    encoder.4.bias
      Encoder/DownSampleBlock_1/BatchNorm/weight:0	                                  encoder.5.weight
        Encoder/DownSampleBlock_1/BatchNorm/bias:0	                            

In [15]:
print(tf_result.shape, pth_result.shape)
#print('\nT=\n',tf_result)
#print('choice\n', tf_result.argmax(1))
#print('\nP=\n',pth_result)
#print('choice\n', pth_result.argmax(1))

print('\nP-T =\n', np.abs(tf_result-pth_result))
print('\nmean abs(P-T)/max(P,T) =\n', (np.abs(tf_result-pth_result)/np.max(np.abs([tf_result, pth_result]))).mean())

(10, 3) (10, 3)

P-T =
 [[  1.22070312e-04   1.52587891e-05   0.00000000e+00]
 [  3.81469727e-05   1.37329102e-04   4.67300415e-05]
 [  1.22070312e-04   7.62939453e-06   6.86645508e-05]
 [  6.10351562e-05   0.00000000e+00   3.81469727e-06]
 [  6.10351562e-05   9.91821289e-05   1.14440918e-05]
 [  1.22070312e-04   1.52587891e-05   2.47955322e-05]
 [  3.05175781e-05   3.05175781e-05   7.62939453e-06]
 [  3.05175781e-05   4.57763672e-05   0.00000000e+00]
 [  3.81469727e-05   8.39233398e-05   2.52723694e-05]
 [  1.52587891e-05   3.05175781e-05   7.62939453e-06]]

mean abs(P-T)/max(P,T) =
 1.62921e-07


In [21]:
import torch
import torch.nn as nn
from torch.nn import Parameter
from torch.autograd import Variable, Function
from collections import defaultdict
import graphviz
old_function__call__ = Function.__call__

def register_creator(inputs, creator, output):
    """
    In the forward pass, our Function.__call__ and BatchNorm.forward_hook both call this method to register the creators
    inputs: list of input variables
    creator: one of
        - Function
        - BatchNorm module
    output: a single output variable
    """
    cid = id(creator)
    oid = id(output)
    if oid in vars: 
        return
    # connect creator to input
    for input in inputs:
        iid = id(input)
        func_trace[cid][iid] = input
        # register input
        vars[iid] = input
    # connect output to creator
    assert type(output) not in [tuple, list, dict]
    var_trace[oid][cid] = creator
    # register creator and output and all inputs
    vars[oid] = output
    funcs[cid] = creator

hooks = []

def register_vis_hooks(model):
    global var_trace, func_trace, vars, funcs
    remove_vis_hooks()
    var_trace  = defaultdict(lambda: {})     # map oid to {cid:creator}
    func_trace = defaultdict(lambda: {})     # map cid to {iid:input}
    vars  = {}                               # map vid to Variable/Parameter
    funcs = {}                               # map cid to Function/BatchNorm module
    hooks = []                               # contains the forward hooks, needed for hook removal

    def hook_func(module, inputs, output):
        print('HOOK', mod.__class__.__name__)
        #assert 'BatchNorm' in mod.__class__.__name__        # batchnorms don't have shared superclass
        inputs = list(inputs)
        try:
            for p in [module.weight, module.bias]:
                if p is not None:
                    inputs.append(p)
        except AttributeError:
            pass
        register_creator(inputs, module, output)

    for mod in model.modules():
        #if 'BatchNorm' in mod.__class__.__name__:           # batchnorms don't have shared superclass
        hook = mod.register_forward_hook(hook_func)
        hooks.append(hook)

    def new_function__call__(self, *args, **kwargs):
        inputs =  [a for a in args            if isinstance(a, Variable)]
        inputs += [a for a in kwargs.values() if isinstance(a, Variable)]
        output = old_function__call__(self, *args, **kwargs)
        register_creator(inputs, self, output)
        return output

    Function.__call__ = new_function__call__


def remove_vis_hooks():
    for hook in hooks:
        hook.remove()

    Function.__call__ = old_function__call__


def save_visualization(name, format='svg'):
    g = graphviz.Digraph(format=format)
    def sizestr(var):
        size = [int(i) for i in list(var.size())]
        return str(size)
    # add variable nodes
    for vid, var in vars.items():
        if isinstance(var, nn.Parameter):
            g.node(str(vid), label=sizestr(var), shape='ellipse', style='filled', fillcolor='red')
        elif isinstance(var, Variable):
            g.node(str(vid), label=sizestr(var), shape='ellipse', style='filled', fillcolor='lightblue')
        else:
            assert False, var.__class__
    # add creator nodes
    for cid in func_trace:
        creator = funcs[cid]
        g.node(str(cid), label=str(creator.__class__.__name__), shape='rectangle', style='filled', fillcolor='orange')
    # add edges between creator and inputs
    for cid in func_trace:
        for iid in func_trace[cid]:
            g.edge(str(iid), str(cid))
    # add edges between outputs and creators
    for oid in var_trace:
        for cid in var_trace[oid]:
            g.edge(str(cid), str(oid))
    g.render(name)
    
#register_forward_hook

In [22]:
register_vis_hooks(pthnet)

pth_result = pthnet(Variable(th.FloatTensor(testinput)))

HOOK Conv1d
HOOK Conv1d
HOOK Conv1d
HOOK Conv1d
HOOK Conv1d
HOOK Conv1d
HOOK Conv1d
HOOK Conv1d
HOOK Conv1d
HOOK Conv1d
HOOK Conv1d
HOOK Conv1d
HOOK Conv1d
HOOK Conv1d
HOOK Conv1d
HOOK Conv1d
HOOK Conv1d
HOOK Conv1d
HOOK Conv1d
HOOK Conv1d
HOOK Conv1d
HOOK Conv1d
HOOK Conv1d
HOOK Conv1d
HOOK Conv1d
HOOK Conv1d
HOOK Conv1d
HOOK Conv1d
HOOK Conv1d
HOOK Conv1d
HOOK Conv1d
HOOK Conv1d
HOOK Conv1d
HOOK Conv1d
HOOK Conv1d
HOOK Conv1d
HOOK Conv1d
HOOK Conv1d
HOOK Conv1d
HOOK Conv1d
HOOK Conv1d
HOOK Conv1d
HOOK Conv1d
HOOK Conv1d
HOOK Conv1d
HOOK Conv1d
HOOK Conv1d
HOOK Conv1d
HOOK Conv1d
HOOK Conv1d
HOOK Conv1d
HOOK Conv1d
HOOK Conv1d
HOOK Conv1d
HOOK Conv1d
HOOK Conv1d
HOOK Conv1d
HOOK Conv1d
HOOK Conv1d
HOOK Conv1d
HOOK Conv1d
HOOK Conv1d
HOOK Conv1d
HOOK Conv1d
HOOK Conv1d
HOOK Conv1d
HOOK Conv1d
HOOK Conv1d
HOOK Conv1d
HOOK Conv1d
HOOK Conv1d
HOOK Conv1d
HOOK Conv1d
HOOK Conv1d
HOOK Conv1d
HOOK Conv1d
HOOK Conv1d
HOOK Conv1d
HOOK Conv1d
HOOK Conv1d
HOOK Conv1d
HOOK Conv1d
HOOK Conv1d
HOOK

In [23]:
remove_vis_hooks()
save_visualization('graphviz', format='png')

In [3]:
import dilated_model as dm
import torch as th
pthnet = dm.EncodeWideResNet(in_channel=1, init_channel=32, num_enc_layer=4, N_res_in_block=1, use_selu=True, num_classes=3)
#sd = th.nn.BatchNorm1d(4)
#sd.eval()

#sd = th.load('saved/ENCODE-selu-adam/0002/state_dict_highscore')
pthnet.load_state_dict(sd)
pthnet.cpu()

NameError: name 'sd' is not defined

In [177]:
tf.reset_default_graph()
testinput = np.random.randn(*[3, 1, 100])
x = tf.constant(testinput.transpose(0,2,1), 1)
y1 = NET(x)

with tf.Session() as sess:
    for f, (p_names, p) in zip(tf.global_variables(), pthnet.state_dict().items()):
        #print('%50s\t%50s'%(f.name, p_names))
        #print(p_names)
        initializer = tf.constant(p.transpose(0, -1).numpy(), 1)
        f.assign(initializer).eval()
        #print(initializer.eval()-f.eval())
    tf_result = sess.run(y1)
    pth_result = pthnet(th.autograd.Variable(th.FloatTensor(testinput))).data.numpy()

In [11]:
tf.reset_default_graph()
channel = 30
dilation = 2
testinput = np.random.randn(*[2, channel, 1000])
x = tf.constant(testinput.transpose(0,2,1), 1)
#y1 = selu(x)
#y1 = Conv1d(x, 1, 1, 3, 2, bias=False)
#y1 = DilatedBlock(x, channel=channel, kernel_size=9, dilation=dilation)
#pytorch_testmodule = dm.DilatedBlock(in_channels=channel, 
#                                     out_channels=channel, kernel_size=9, nonlin=dm.SELU(), dilation=dilation)
y1 = ResNet(x, channel=channel)
pytorch_testmodule = dm.ConvModule(nonlin=dm.SELU(),
    in_channel=channel, channel=channel, kernel_size=9, N=9)
pytorch_testmodule.eval()
with tf.Session() as sess:
    for f, (p_names, p) in zip(tf.global_variables(), pytorch_testmodule.state_dict().items()):
        print('%50s\t%50s'%(f.name, p_names))
        #print(p_names)
        initializer = tf.constant(p.transpose(0, -1).numpy(), 1)
        f.assign(initializer).eval()
        #print(initializer.eval()-f.eval())
    tf_result = sess.run(y1)#.transpose(0,2,1)
    pth_result = pytorch_testmodule(th.autograd.Variable(th.FloatTensor(testinput))).data.numpy()

            ResNet/DilatedBlock/BatchNorm/weight:0	                        residuals.0.block.0.weight
              ResNet/DilatedBlock/BatchNorm/bias:0	                          residuals.0.block.0.bias
      ResNet/DilatedBlock/BatchNorm/running_mean:0	                  residuals.0.block.0.running_mean
       ResNet/DilatedBlock/BatchNorm/running_var:0	                   residuals.0.block.0.running_var
               ResNet/DilatedBlock/Conv1d/weight:0	                        residuals.0.block.1.weight
          ResNet/DilatedBlock/BatchNorm_1/weight:0	                        residuals.0.block.3.weight
            ResNet/DilatedBlock/BatchNorm_1/bias:0	                          residuals.0.block.3.bias
    ResNet/DilatedBlock/BatchNorm_1/running_mean:0	                  residuals.0.block.3.running_mean
     ResNet/DilatedBlock/BatchNorm_1/running_var:0	                   residuals.0.block.3.running_var
             ResNet/DilatedBlock/Conv1d_1/weight:0	                        residua

          ResNet/DilatedBlock_8/BatchNorm_1/bias:0	                          residuals.8.block.3.bias
  ResNet/DilatedBlock_8/BatchNorm_1/running_mean:0	                  residuals.8.block.3.running_mean
   ResNet/DilatedBlock_8/BatchNorm_1/running_var:0	                   residuals.8.block.3.running_var
           ResNet/DilatedBlock_8/Conv1d_1/weight:0	                        residuals.8.block.5.weight


In [263]:
pthnet = dm.EncodeWideResNet(in_channel=1, init_channel=32, num_enc_layer=4, N_res_in_block=1, use_selu=True, num_classes=3)
#sd = th.load('saved/ENCODE-selu-adam/0002/state_dict_highscore')
#pthnet.load_state_dict(sd)

####################################

tf.reset_default_graph()
channel = 1
dilation = 2
testinput = np.random.randn(*[2, channel, 16])
x = tf.constant(testinput.transpose(0,2,1), 1)
#y1 = selu(x)
#y1 = Conv1d(x, 1, 1, 3, 2, bias=False)
#y1 = DilatedBlock(x, channel=channel, kernel_size=9, dilation=dilation)
#pytorch_testmodule = dm.DilatedBlock(in_channels=channel, 
#                                     out_channels=channel, kernel_size=9, nonlin=dm.SELU(), dilation=dilation)

y1 = Encoder(x, init_channel=32)
pytorch_testmodule = pthnet.encoder
pytorch_testmodule.eval()
with tf.Session() as sess:
    for f, (p_names, p) in zip(tf.global_variables(), pytorch_testmodule.state_dict().items()):
        print('%50s\t%50s'%(f.name, p_names))
        #print(p_names)
        initializer = tf.constant(p.transpose(0, -1).numpy(), 1)
        f.assign(initializer).eval()
        #print(initializer.eval()-f.eval())
    tf_result = sess.run(y1).transpose(0,2,1)
    pth_result = pytorch_testmodule(th.autograd.Variable(th.FloatTensor(testinput))).data.numpy()

           Encoder/DownSampleBlock/Conv1d/weight:0	                                          0.weight
             Encoder/DownSampleBlock/Conv1d/bias:0	                                            0.bias
        Encoder/DownSampleBlock/BatchNorm/weight:0	                                          1.weight
          Encoder/DownSampleBlock/BatchNorm/bias:0	                                            1.bias
  Encoder/DownSampleBlock/BatchNorm/running_mean:0	                                    1.running_mean
   Encoder/DownSampleBlock/BatchNorm/running_var:0	                                     1.running_var
         Encoder/DownSampleBlock_1/Conv1d/weight:0	                                          4.weight
           Encoder/DownSampleBlock_1/Conv1d/bias:0	                                            4.bias
      Encoder/DownSampleBlock_1/BatchNorm/weight:0	                                          5.weight
        Encoder/DownSampleBlock_1/BatchNorm/bias:0	                               

In [14]:
print(tf_result.shape, pth_result.shape)
#print('\nT=\n',tf_result)
#print('choice\n', tf_result.argmax(1))
#print('\nP=\n',pth_result)
#print('choice\n', pth_result.argmax(1))

#print('\nP-T =\n', np.abs(tf_result-pth_result))
print('\nmean abs(P-T)/max(P,T) =\n', (np.abs(tf_result-pth_result)/np.max([tf_result, pth_result])).mean())

(2, 64, 3) (2, 3)


ValueError: operands could not be broadcast together with shapes (2,64,3) (2,3) 

In [22]:

summary_writer = tf.summary.FileWriter('/tmp/model/', graph=tf.get_default_graph())
with tf.Session() as sess:
    tf.global_variables_initializer().run()
    print(y1.eval({x:np.ones([10, 256, 1])}).shape)

(10, 4)


In [256]:
dm.DilatedBlock(nonlin=dm.SELU())

DilatedBlock (
  (nonlin): SELU (
  )
  (block): Sequential (
    (0): BatchNorm1d(8, eps=1e-05, momentum=0.1, affine=True)
    (1): Conv1d(8, 8, kernel_size=(9,), stride=(1,), padding=(4,), bias=False)
    (2): SELU (
    )
    (3): BatchNorm1d(8, eps=1e-05, momentum=0.1, affine=True)
    (4): Dropout (p = 0.5, inplace)
    (5): Conv1d(8, 8, kernel_size=(9,), stride=(1,), padding=(8,), dilation=(2,), bias=False)
    (6): SELU (
    )
  )
)

In [8]:
with tf.Session() as sess:
    tf.global_variables_initializer().run()

In [9]:
tf_result

array([[[ 0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.]],

       [[ 0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.],
        [ 0.,  0.,  0.,  0.]],

       [[ 0.,  0.,  0.,  0.],
      

In [31]:
for i, a in enumerate(tf.global_variables()):
    print(a.get_shape())

(7, 1, 32)
(32,)
(32,)
(32,)
(32,)
(32,)
(7, 32, 64)
(64,)
(64,)
(64,)
(64,)
(64,)
(7, 64, 128)
(128,)
(128,)
(128,)
(128,)
(128,)
(7, 128, 256)
(256,)
(256,)
(256,)
(256,)
(256,)
(256,)
(256,)
(256,)
(256,)
(9, 256, 256)
(256,)
(256,)
(256,)
(256,)
(9, 256, 256)
(256,)
(256,)
(256,)
(256,)
(9, 256, 256)
(256,)
(256,)
(256,)
(256,)
(9, 256, 256)
(256,)
(256,)
(256,)
(256,)
(9, 256, 256)
(256,)
(256,)
(256,)
(256,)
(9, 256, 256)
(256,)
(256,)
(256,)
(256,)
(9, 256, 256)
(256,)
(256,)
(256,)
(256,)
(9, 256, 256)
(256,)
(256,)
(256,)
(256,)
(9, 256, 256)
(256,)
(256,)
(256,)
(256,)
(9, 256, 256)
(256,)
(256,)
(256,)
(256,)
(9, 256, 256)
(256,)
(256,)
(256,)
(256,)
(9, 256, 256)
(256,)
(256,)
(256,)
(256,)
(9, 256, 256)
(256,)
(256,)
(256,)
(256,)
(9, 256, 256)
(256,)
(256,)
(256,)
(256,)
(9, 256, 256)
(256,)
(256,)
(256,)
(256,)
(9, 256, 256)
(256,)
(256,)
(256,)
(256,)
(9, 256, 256)
(256,)
(256,)
(256,)
(256,)
(9, 256, 256)
(1, 256, 4)


In [None]:
a.assign()

In [5]:
len(tf.global_variables())

115

In [11]:
res = sd(th.autograd.Variable(th.FloatTensor(10, 1, 1000)))

In [16]:
res.max(1)[1][:, None]

Variable containing:
    0
    2
    0
    0
    0
    0
    2
    0
    0
    2
[torch.LongTensor of size 10x1]

In [24]:
for k, v in sd.state_dict().items():
    print(v.size())

torch.Size([32, 1, 7])
torch.Size([32])
torch.Size([32])
torch.Size([32])
torch.Size([32])
torch.Size([32])
torch.Size([64, 32, 7])
torch.Size([64])
torch.Size([64])
torch.Size([64])
torch.Size([64])
torch.Size([64])
torch.Size([128, 64, 7])
torch.Size([128])
torch.Size([128])
torch.Size([128])
torch.Size([128])
torch.Size([128])
torch.Size([256, 128, 7])
torch.Size([256])
torch.Size([256])
torch.Size([256])
torch.Size([256])
torch.Size([256])
torch.Size([256])
torch.Size([256])
torch.Size([256])
torch.Size([256])
torch.Size([256, 256, 9])
torch.Size([256])
torch.Size([256])
torch.Size([256])
torch.Size([256])
torch.Size([256, 256, 9])
torch.Size([256])
torch.Size([256])
torch.Size([256])
torch.Size([256])
torch.Size([256, 256, 9])
torch.Size([256])
torch.Size([256])
torch.Size([256])
torch.Size([256])
torch.Size([256, 256, 9])
torch.Size([256])
torch.Size([256])
torch.Size([256])
torch.Size([256])
torch.Size([256, 256, 9])
torch.Size([256])
torch.Size([256])
torch.Size([256])
torch.Si

In [62]:
a = nn.Conv1d(3, 4, 5)

In [63]:
a.state_dict()

OrderedDict([('weight', 
              (0 ,.,.) = 
               -0.1830  0.1597  0.1484 -0.1916  0.2417
               -0.1660  0.0768 -0.1457  0.1034 -0.2297
               -0.1860  0.0322  0.0990  0.2096 -0.1983
              
              (1 ,.,.) = 
               -0.1521  0.1666 -0.0709  0.0931  0.1345
                0.1034 -0.0586  0.1383 -0.1212 -0.1902
               -0.2440 -0.0988 -0.0854  0.1089  0.1279
              
              (2 ,.,.) = 
                0.0536  0.2212  0.0824  0.0108  0.2315
               -0.2318  0.0847 -0.2136 -0.0735  0.2240
                0.2383  0.0270  0.2266 -0.1901 -0.0234
              
              (3 ,.,.) = 
                0.2279  0.2184  0.0534 -0.0578  0.1607
                0.1085  0.1057  0.0550 -0.0299  0.0537
                0.0227  0.1641 -0.1961  0.0515 -0.1527
              [torch.FloatTensor of size 4x3x5]), ('bias', 
               0.1855
               0.2336
              -0.0204
              -0.0937
              [tor