In [90]:
import io, os, sys, types
from IPython import get_ipython
from nbformat import read
from IPython.core.interactiveshell import InteractiveShell
def find_notebook(fullname, path=None):
    """find a notebook, given its fully qualified name and an optional path
    
    This turns "foo.bar" into "foo/bar.ipynb"
    and tries turning "Foo_Bar" into "Foo Bar" if Foo_Bar
    does not exist.
    """
    name = fullname.rsplit('.', 1)[-1]
    if not path:
        path = ['']
    for d in path:
        nb_path = os.path.join(d, name + ".ipynb")
        if os.path.isfile(nb_path):
            return nb_path
        # let import Notebook_Name find "Notebook Name.ipynb"
        nb_path = nb_path.replace("_", " ")
        if os.path.isfile(nb_path):
            return nb_path
        
class NotebookLoader(object):
    """Module Loader for Jupyter Notebooks"""
    def __init__(self, path=None):
        self.shell = InteractiveShell.instance()
        self.path = path
    
    def load_module(self, fullname):
        """import a notebook as a module"""
        path = find_notebook(fullname, self.path)
        
        print ("importing Jupyter notebook from %s" % path)
                                       
        # load the notebook object
        with io.open(path, 'r', encoding='utf-8') as f:
            nb = read(f, 4)
        
        
        # create the module and add it to sys.modules
        # if name in sys.modules:
        #    return sys.modules[name]
        mod = types.ModuleType(fullname)
        mod.__file__ = path
        mod.__loader__ = self
        mod.__dict__['get_ipython'] = get_ipython
        sys.modules[fullname] = mod
        
        # extra work to ensure that magics that would affect the user_ns
        # actually affect the notebook module's ns
        save_user_ns = self.shell.user_ns
        self.shell.user_ns = mod.__dict__
        
        try:
          for cell in nb.cells:
            if cell.cell_type == 'code':
                # transform the input to executable Python
                code = self.shell.input_transformer_manager.transform_cell(cell.source)
                # run the code in themodule
                exec(code, mod.__dict__)
        finally:
            self.shell.user_ns = save_user_ns
        return mod
class NotebookFinder(object):
    """Module finder that locates Jupyter Notebooks"""
    def __init__(self):
        self.loaders = {}
    
    def find_module(self, fullname, path=None):
        nb_path = find_notebook(fullname, path)
        if not nb_path:
            return
        
        key = path
        if path:
            # lists aren't hashable
            key = os.path.sep.join(path)
        
        if key not in self.loaders:
            self.loaders[key] = NotebookLoader(path)
        return self.loaders[key]

sys.meta_path.append(NotebookFinder())

In [91]:
import torch
from torch import nn
from torch.nn import init

In [92]:
net = nn.Sequential(nn.Linear(4,3),nn.ReLU(),nn.Linear(3,1))

In [93]:
print(net)

Sequential(
  (0): Linear(in_features=4, out_features=3, bias=True)
  (1): ReLU()
  (2): Linear(in_features=3, out_features=1, bias=True)
)


In [94]:
X=torch.rand(2,4)

In [95]:
Y=net(X).sum()

In [96]:
Y

tensor(-0.1928, grad_fn=<SumBackward0>)

In [97]:
print(type(net.named_parameters()))

<class 'generator'>


In [98]:
for name,param in net.named_parameters():
    print(name,param.size())

0.weight torch.Size([3, 4])
0.bias torch.Size([3])
2.weight torch.Size([1, 3])
2.bias torch.Size([1])


In [99]:
class MyModule(nn.Module):
    def __init__(self,**kwargs):
        super(MyModule,self).__init__(**kwargs);
        self.weight = nn.Parameter(torch.rand(20,20))
        self.weight1=torch.rand(20,20)
    def forward(self,x):
        pass

In [100]:
n=MyModule()
for name in n.named_parameters():
    print(name)

('weight', Parameter containing:
tensor([[0.1239, 0.3848, 0.8373, 0.6588, 0.7113, 0.8705, 0.0796, 0.3825, 0.1711,
         0.9441, 0.7956, 0.1103, 0.0068, 0.9385, 0.6255, 0.7774, 0.9924, 0.3567,
         0.7544, 0.0140],
        [0.1768, 0.0403, 0.8012, 0.2789, 0.8994, 0.3911, 0.9659, 0.9159, 0.7282,
         0.1771, 0.9174, 0.5610, 0.5464, 0.0122, 0.8337, 0.0148, 0.7537, 0.5281,
         0.5309, 0.8504],
        [0.8882, 0.0131, 0.0799, 0.5777, 0.8364, 0.1469, 0.4844, 0.9128, 0.9627,
         0.0274, 0.5378, 0.4283, 0.8872, 0.1107, 0.6105, 0.2327, 0.7091, 0.1408,
         0.8121, 0.7699],
        [0.4932, 0.4027, 0.0991, 0.3281, 0.5955, 0.3658, 0.5784, 0.3158, 0.6725,
         0.9345, 0.1733, 0.8438, 0.6291, 0.9810, 0.7884, 0.5437, 0.2490, 0.4427,
         0.8027, 0.4939],
        [0.8799, 0.8934, 0.4125, 0.9576, 0.7691, 0.9591, 0.4713, 0.1658, 0.3762,
         0.1995, 0.5198, 0.8160, 0.2834, 0.1425, 0.1825, 0.2042, 0.9459, 0.1795,
         0.9083, 0.3752],
        [0.7194, 0.3767, 0.

### 共享模型参数

In [101]:
linear = nn.Linear(1,1,bias=False)
net = nn.Sequential(linear,linear)
print(net
    )

Sequential(
  (0): Linear(in_features=1, out_features=1, bias=False)
  (1): Linear(in_features=1, out_features=1, bias=False)
)


下面可以显示在这个Sequential中，两个线性层共享了一个权重参数

In [102]:
for name,param in net.named_parameters():
    init.constant_(param,val=3)
    print(name,param.data)

0.weight tensor([[3.]])


In [103]:
x=torch.ones(1,1,requires_grad=True)

In [104]:
x

tensor([[1.]], requires_grad=True)

In [105]:
y=net(x).sum()

In [106]:
y

tensor(9., grad_fn=<SumBackward0>)

In [107]:
y.backward()

In [108]:
net[0].weight.grad.data

tensor([[6.]])

In [109]:
x.grad.data

tensor([[9.]])

In [110]:
net[1].weight.grad

tensor([[6.]])

In [111]:
tmp = torch.ones(2,2,requires_grad=True)

In [112]:
y = tmp+2

In [113]:
y

tensor([[3., 3.],
        [3., 3.]], grad_fn=<AddBackward0>)

In [114]:
y.sum().backward()

In [115]:
x.grad

tensor([[9.]])

In [116]:
l = torch.nn.Linear(1,1)
init.constant_(l.weight,val=3)
init.constant_(l.bias,val=0)

Parameter containing:
tensor([0.], requires_grad=True)

In [117]:
tmp = torch.ones(1,1)

### 4.2.4 
4.2.4关于参数共享，两个线性层共用参数，对于权重的梯度应该是错误的

In [118]:
y = l(tmp)

In [119]:
z = l(y)

可以看出第一次经过传播后，

In [120]:
z

tensor([[9.]], grad_fn=<AddmmBackward>)

In [121]:
z.backward()

In [122]:
l.weight.grad

tensor([[6.]])

In [123]:
tmp.grad