In [14]:
import torch as t
from torch import nn

In [2]:
class Linear(nn.Module):
    def __init__(self,in_features,out_features):
        super(Linear,self).__init__()
        self.w = nn.Parameter(t.randn(in_features,out_features))
        self.b = nn.Parameter(t.randn(out_features))
    def forward(self,x):
        x = x.mm(self.w)
        return x + self.b.expand_as(x)

In [6]:
layer = Linear(4,3)
input_ = t.randn(2,4)
out = layer(input_)
out

tensor([[ 2.3582, -2.4739, -3.5985],
        [ 4.9557,  2.6804, -0.5911]], grad_fn=<AddBackward0>)

In [7]:
for name, parameter in layer.named_parameters():
    print(name, parameter) # w and b 

w Parameter containing:
tensor([[ 0.3905,  1.4701,  1.7444],
        [ 0.4800, -0.7096, -0.7714],
        [-1.3453, -0.7401,  1.3357],
        [ 0.2643, -0.4295,  0.3789]], requires_grad=True)
b Parameter containing:
tensor([ 2.1937, -1.1663, -0.7490], requires_grad=True)


In [8]:
class Perceptron(nn.Module):
    def __init__(self, in_features, hidden_features, out_features):
        nn.Module.__init__(self)
        self.layer1 = Linear(in_features, hidden_features) # 此处的Linear是前面自定义的全连接层
        self.layer2 = Linear(hidden_features, out_features)
    def forward(self,x):
        x = self.layer1(x)
        x = t.sigmoid(x)
        return self.layer2(x)

In [9]:
perceptron = Perceptron(3,4,1)
for name, param in perceptron.named_parameters():
    print(name, param.size())

layer1.w torch.Size([3, 4])
layer1.b torch.Size([4])
layer2.w torch.Size([4, 1])
layer2.b torch.Size([1])


In [19]:
class LogisticRegression(nn.Module):
    def __init__(self, in_features,  out_features):
        nn.Module.__init__(self)
        self.w = nn.Parameter(t.randn(in_features,out_features))
        self.b = nn.Parameter(t.randn(out_features))
    def forward(self,x):
        x = x.mm(self.w)
        z = x + self.b.expand_as(x)
        return t.sigmoid(z)

In [20]:
l = LogisticRegression(1,1)

In [22]:
from torch import  optim
optimizer = optim.SGD(params=l.parameters(), lr=1)
optimizer.zero_grad() # 梯度清零，等价于net.zero_grad()

input = t.randn(100,1)
output = l(input)
output.backward(output) # fake backward

optimizer.step() # 执行优化