In [1]:
%matplotlib inline
import torch
from IPython import display
from matplotlib import pyplot as plt
import numpy as np
import random

In [2]:
#生成数据集
feature_num = 2
sample_num = 1000
true_w = [2,-3.4]
true_b = 4.2
features = torch.tensor(np.random.normal(0,1,(sample_num,feature_num)),dtype=torch.float)
labels = true_w[0] * features[:,0] + true_w[1] * features[:,1] + true_b
labels += torch.tensor(np.random.normal(0,0.01,size=labels.size()),dtype=torch.float)
#print(features,labels)

In [3]:
#读取数据
import torch.utils.data as Data
batch_size = 10

dataset = Data.TensorDataset(features,labels)
data_iter = Data.DataLoader(dataset,batch_size,shuffle=True)

for X,y in data_iter:
    print(X,y)
    break

tensor([[-0.4390,  1.1119],
        [-0.8313, -1.2455],
        [-0.5052,  0.6056],
        [ 2.0362, -0.3696],
        [ 1.1511, -1.0748],
        [-1.0065, -1.1541],
        [-0.2085,  0.6747],
        [ 0.7455,  1.5894],
        [ 0.7679, -1.1447],
        [ 0.7240, -0.9364]]) tensor([-0.4584,  6.7666,  1.1268,  9.4984, 10.1507,  6.0910,  1.5085,  0.2764,
         9.6165,  8.8296])


In [5]:
#定义模型
from torch import nn
class LinearRegression(nn.Module):
    def __init__(self,n_feature):
        super(LinearRegression,self).__init__()
        self.linear = nn.Linear(n_feature,1)
    def forward(self,x):
        y = self.linear(x)
        return y

net = LinearRegression(feature_num)
print(net)

LinearRegression(
  (linear): Linear(in_features=2, out_features=1, bias=True)
)


In [7]:
#用nn.sequential
net2 = nn.Sequential(
    nn.Linear(feature_num,1)
)
print(net2)
print(net2[0])

Sequential(
  (0): Linear(in_features=2, out_features=1, bias=True)
)
Linear(in_features=2, out_features=1, bias=True)


In [8]:
#通过net.parameters()查看可学习参数，函数返回一个生成器
for param in net.parameters():
    print(param)

Parameter containing:
tensor([[-0.4245,  0.1204]], requires_grad=True)
Parameter containing:
tensor([-0.3449], requires_grad=True)


In [None]:
#torch.nn仅支持输入一个batch的样本，不支持单个样本输入

In [13]:
#初始化模型参数
from torch.nn import init
init.normal_(net2[0].weight,mean=0,std=0.01)
init.constant_(net2[0].bias,val=0)


Parameter containing:
tensor([0.], requires_grad=True)

In [14]:
#定义损失函数
loss = nn.MSELoss()

In [20]:
#定义优化算法
import torch.optim as optim
optimizer = optim.SGD(net2.parameters(),lr=0.03)
print(optimizer)

SGD (
Parameter Group 0
    dampening: 0
    lr: 0.03
    momentum: 0
    nesterov: False
    weight_decay: 0
)


In [21]:
#可以为子网络设置不同的学习率，example:
#optimizer = optim.SGD([
#    {'params':net.subnet1.parameters()},
#   {'params':net.subnet2.parameters(),'lr':0.01}
#],lr=0.03)
#如果没有指定学习率，就使用最外层的默认学习率

In [22]:
#需要调整学习率时
#for param_group in optimizer.param_groups:
#    param_group['lr'] *= 0.1

In [30]:
#训练模型
num_epochs = 3
for epoch in range(1,num_epochs+1):
    for X,y in data_iter:
        output = net2(X)
        l = loss(output,y.view(-1,1))
        optimizer.zero_grad()
        l.backward()
        optimizer.step()
    print('epoch %d, loss: %f' % (epoch, l.item()))
    print(net2[0].bias)

epoch 1, loss: 0.000192
Parameter containing:
tensor([4.1925], requires_grad=True)
epoch 2, loss: 0.000092
Parameter containing:
tensor([4.2003], requires_grad=True)
epoch 3, loss: 0.000112
Parameter containing:
tensor([4.2004], requires_grad=True)


In [31]:
dense = net2[0]
print(true_w, dense.weight)
print(true_b, dense.bias)

[2, -3.4] Parameter containing:
tensor([[ 2.0001, -3.3989]], requires_grad=True)
4.2 Parameter containing:
tensor([4.2004], requires_grad=True)
