In [1]:
import torch as t
from torch import nn
from torch.autograd import Variable as V


In [4]:
class Linear(nn.Module):
    def __init__(self,in_features,out_features):
        super(Linear,self).__init__()
        self.w  = nn.Parameter(t.randn(in_features,out_features))
        self.b = nn.Parameter(t.randn(out_features))
    
    def forward(self,x):
        x = x.mm(self.w)
        return x + self.b

layer = Linear(4,3)
input = V(t.randn(2,4))
output = layer(input)
print(output)

tensor([[-3.8215,  2.2869, -3.2666],
        [-1.5071,  0.5723,  0.0329]])


In [5]:
for name,parameter in layer.named_parameters():
    print(name,parameter)

w Parameter containing:
tensor([[ 1.1823, -0.8361, -1.2115],
        [ 0.4752, -0.0507, -0.6972],
        [ 0.6956, -0.2602,  0.7799],
        [-1.0541,  0.2979, -1.7820]])
b Parameter containing:
tensor([-0.6705,  0.3712, -2.8513])


In [7]:
class Perceptron(nn.Module):
    def __init__(self,in_features,hidden_features,out_features):
        nn.Module.__init__(self)
        self.layer1 = Linear(in_features,hidden_features)
        self.layer2 = Linear(hidden_features,out_features)
    
    def forward(self,x):
        x = layer1(x)
        x = nn.Sigmoid(x)
        out = layer2(x)
        return out

perceptron = Perceptron(3,4,1)
for name,parameter in perceptron.named_parameters():
    print(name,parameter.size())

layer1.w torch.Size([3, 4])
layer1.b torch.Size([4])
layer2.w torch.Size([4, 1])
layer2.b torch.Size([1])


In [8]:
from PIL import Image
from torchvision.transforms import ToPILImage,ToTensor
to_tensor = ToTensor()
to_pil = ToPILImage()

lena = Image.open('F:/Pycharm/lena.jpg')


In [9]:
input1 = to_tensor(lena).unsqueeze(0)
kernel = t.ones(3,3) / -9
kernel[1][1] = 1
conv = nn.Conv2d(1,1,(3,3),1,bias=False)
conv.weight.data = kernel.view(1,1,3,3)
out = conv(V(input1))
to_pil(out.data.squeeeze(0))

RuntimeError: Given groups=1, weight[1, 1, 3, 3], so expected input[1, 3, 300, 300] to have 1 channels, but got 3 channels instead

In [10]:
input1 = V(t.randn(2,3))
linear = nn.Linear(3,4)
h = linear(input1)
print(h)

tensor([[-0.2236,  1.0610,  0.2820, -0.1842],
        [-0.0414, -1.0146,  0.1228, -0.3233]])


In [11]:
bn = nn.BatchNorm1d(4)
bn.weight.data = t.ones(4) * 4
bn.bias.data = t.zeros(4)
bn_out = bn(h)
bn_out.mean(0),bn_out.var(0)

(tensor(1.00000e-07 *
        [ 0.0000,  0.0000,  0.0000, -4.7684]),
 tensor([ 31.9615,  31.9997,  31.9496,  31.9340]))

In [12]:
relu = nn.ReLU(inplace=True)
input = V(t.randn(2,3))

print(input)
output = relu(input)
print(output)

tensor([[-0.4587, -1.1515,  0.3176],
        [-0.7854,  0.4379,  0.7150]])
tensor([[ 0.0000,  0.0000,  0.3176],
        [ 0.0000,  0.4379,  0.7150]])


In [None]:
net1 = nn.Sequential()
net1.add_module('conv',nn.Conv2d(3,3,3))
net1.add_module('batchnorm',nn.BatchNorm2d(3))
net1.add_module('activation_layer',nn.ReLU())

net2 = nn.Sequential(
nn.Conv2d(3,3,3),
nn.BatchNorm2d(3),
nn.ReLU()
)
from collections import OrderedDict

net3 = nn.Sequential()

In [3]:
class Mymodule(nn.Module):
    def __init__(self):
        nn.Module.__init__(self)
        self.list = [nn.Linear(3,4),nn.ReLU()]
        self.module_list = nn.ModuleList([nn.Conv2d(3,3,3),nn.ReLU()])
    
    def forward(self):
        pass
    

module = Mymodule()
module

Mymodule(
  (module_list): ModuleList(
    (0): Conv2d(3, 3, kernel_size=(3, 3), stride=(1, 1))
    (1): ReLU()
  )
)

In [4]:
for name,param in module.named_parameters():
    print(name,param)

module_list.0.weight Parameter containing:
tensor([[[[-0.1568, -0.1539,  0.1271],
          [-0.0341,  0.1552, -0.0705],
          [-0.0228, -0.0531,  0.1033]],

         [[-0.0290, -0.0612,  0.1752],
          [-0.0443,  0.1360,  0.0650],
          [-0.1013, -0.1112, -0.0380]],

         [[ 0.0432, -0.0138,  0.0361],
          [-0.0035, -0.0582, -0.0646],
          [ 0.1375, -0.1269,  0.0359]]],


        [[[-0.0078,  0.1323, -0.0875],
          [-0.0077,  0.0815,  0.0843],
          [-0.1312,  0.1130, -0.0558]],

         [[ 0.1307,  0.1370, -0.0895],
          [ 0.0886, -0.1650,  0.1579],
          [ 0.0744, -0.0384,  0.0151]],

         [[-0.0776, -0.0704,  0.0692],
          [ 0.0467, -0.0395,  0.1517],
          [ 0.1763,  0.0974, -0.0783]]],


        [[[ 0.1426,  0.1073,  0.1921],
          [ 0.0063,  0.0546, -0.0974],
          [ 0.0410,  0.0155, -0.0944]],

         [[-0.1633,  0.0394,  0.0057],
          [-0.1708, -0.1686, -0.1061],
          [-0.1124, -0.1902,  0.0418]],

 

In [8]:
t.manual_seed(1)
score = V(t.randn(3,2))
print(score)
label = V(t.Tensor([1,0,1])).long()

criterion = nn.CrossEntropyLoss()
loss = criterion(score,label)
print(loss)

tensor([[ 0.6614,  0.2669],
        [ 0.0617,  0.6213],
        [-0.4519, -0.1661]])
tensor(0.8272)


In [16]:
#优化器
class Net(nn.Module):
    def __init__(self):
        nn.Module.__init__(self)
        self.features = nn.Sequential(
                    nn.Conv2d(3,6,5),
                    nn.ReLU(),
                    nn.MaxPool2d(2,2),
                    nn.Conv2d(6,16,5),
                    nn.ReLU(),
                    nn.MaxPool2d(2,2)

        )
        
        self.classifier = nn.Sequential(
                        nn.Linear(16*5*5,120),
                        nn.ReLU(),
                        nn.Linear(120,84),
                        nn.ReLU(),
                        nn.Linear(84,10)
        )
    
    def forward(self,x):
        x = self.features(x)
        x = x.view(-1,16*5*5)
        x = self.classifier(x)
        return x
    
net = Net()

In [17]:
from torch import optim
optimizer = optim.SGD(params=net.parameters(),lr=1)
optimizer.zero_grad()
input = V(t.randn(1,3,32,32))
output = net(input)

output.backward(output)

optimizer.step()

In [18]:
#为不同的子网络设置不同的学习率

optimizer = optim.SGD([
    {'params':net.features.parameters()},
    {'params':net.classifier.parameters(),'lr':1e-2}
],lr=1e-5)


In [19]:
#只为两个全连接层设置较大的学习率，其余层的学习率较小
import torch
special_layers = nn.ModuleList([net.classifier[0],net.classifier[2]])
special_layers_params = list(map(id,special_layers.parameters()))

base_params = filter(lambda p:id(p) not in special_layers_params,net.parameters())

optimizer = torch.optim.SGD([{'params':base_params},{'params':special_layers.parameters(),'lr':0.01}],lr=0.001) 

In [21]:
input = V(t.randn(2,3))

model = nn.Linear(3,4)

output1 = model(input)
output2 = nn.functional.linear(input,model.weight,model.bias)

print(output1)
print(output2)

tensor([[-0.8794, -0.6148, -0.5532,  0.7073],
        [-0.2332,  0.1333, -0.3331,  0.4284]])
tensor([[-0.8794, -0.6148, -0.5532,  0.7073],
        [-0.2332,  0.1333, -0.3331,  0.4284]])


In [24]:
b2 = nn.ReLU()(input)

In [25]:
print(b2)

tensor([[ 1.1371,  0.5824,  0.0000],
        [ 0.3988,  0.0000,  0.0000]])


In [None]:
from torch.nn import functional as F
class Net(nn.Module):
    def __init__(self):
        nn.Module.__init__(self)
        self.conv1 = nn.Conv2d(3,6,5)
        self.conv2 = nn.Conv2d(6,16,5)
        self.fc1 = nn.Linear(16*5*5,120)
        self.fc2 = nn.Linear(120,84)
        self.fc3 = nn.Linear(84,10)
    
    def forward(self,x):
        x = F.pool(F.relu(self.conv1(x)),2)
        x = F.pool(F.relu(self.conv2(x)),2)
        x = x.view(-1,16*5*5)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x
    



In [26]:
class Net(nn.Module):
    def __init__(self):
        nn.Module.__init__(self)
        self.params = nn.Parameter(t.randn(3,3))
        self.submodule = nn.Linear(3,4)
    
    def forward(self,input):
        x  = self.params@input
        x = self.submodule(x)
        return x
    
net = Net()
print(net)

Net(
  (submodule): Linear(in_features=3, out_features=4, bias=True)
)


In [27]:
net._modules

OrderedDict([('submodule', Linear(in_features=3, out_features=4, bias=True))])

In [28]:
net._parameters

OrderedDict([('params', Parameter containing:
              tensor([[-1.1465, -0.2239, -0.1879],
                      [-0.2030,  1.2752,  0.1303],
                      [ 1.6539,  0.0022, -0.1065]]))])

In [29]:
for name,param in net.named_parameters():
    print(name,param.size())

params torch.Size([3, 3])
submodule.weight torch.Size([4, 3])
submodule.bias torch.Size([4])
