In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "3"
import torch
from sklearn import datasets
import math
import numpy as np

In [2]:
def load_boston(ratio=0.8):
    X, Y = datasets.load_boston(True)
    Y.shape = -1, 1
    
    # normalization
    X = X/80
    Y = Y/(np.max(Y) - np.min(Y))
    
    num_samples = len(Y)
    num_train = math.ceil(num_samples * ratio)
    
    # 随机打乱数据
    idx = np.random.permutation(np.arange(num_samples))
    traindata = X[idx[:num_train]], Y[idx[:num_train]]
    validdata = X[idx[num_train:]], Y[idx[num_train:]]
    
    return traindata, validdata

In [3]:
(X_train, Y_train), (X_valid, Y_valid) = load_boston()
X_train, Y_train = torch.tensor(X_train,dtype=torch.float32).cuda(), torch.tensor(Y_train,dtype=torch.float32).cuda()
X_valid, Y_valid = torch.tensor(X_valid,dtype=torch.float32).cuda(), torch.tensor(Y_valid,dtype=torch.float32).cuda()

In [4]:
print(X_train.shape)

torch.Size([405, 13])


In [5]:
X_train.device

device(type='cuda', index=0)

In [6]:
model = torch.nn.Sequential(
    torch.nn.Linear(13, 30),
    torch.nn.Sigmoid(),
    torch.nn.Linear(30, 1)
)

In [7]:
model

Sequential(
  (0): Linear(in_features=13, out_features=30, bias=True)
  (1): Sigmoid()
  (2): Linear(in_features=30, out_features=1, bias=True)
)

In [8]:
if torch.cuda.is_available():
    model = model.cuda()

In [9]:
loss = torch.nn.MSELoss()

In [10]:
loss

MSELoss()

In [11]:
opt = torch.optim.SGD(model.parameters(), lr=1e-3)

In [12]:
valid_losses = []
train_losses = []
for i in range(1000):
    X, Y = X_train, Y_train

    # 梯度清零
    opt.zero_grad()
    
    # 前向传播 -- 得到反向传播所需要的中间结果
    l = loss(model(X), Y)
    
    # 反向传播 -- 计算梯度
    l.backward()
    
    # 更新权重
    opt.step()
    
    with torch.no_grad():
        cur_train_loss = l
        cur_valid_loss = loss(Y_valid, model(X_valid))
        valid_losses.append(cur_valid_loss) 
        train_losses.append(cur_train_loss)

    if i%100 == 0:
        print(f"Iter {i}: loss {cur_train_loss:.4f}, valid loss {cur_valid_loss:.4f}")

Iter 0: loss 0.3119, valid loss 0.3082
Iter 100: loss 0.0709, valid loss 0.0695
Iter 200: loss 0.0630, valid loss 0.0608
Iter 300: loss 0.0599, valid loss 0.0576
Iter 400: loss 0.0572, valid loss 0.0549
Iter 500: loss 0.0548, valid loss 0.0524
Iter 600: loss 0.0526, valid loss 0.0503
Iter 700: loss 0.0506, valid loss 0.0483
Iter 800: loss 0.0488, valid loss 0.0466
Iter 900: loss 0.0472, valid loss 0.0450
