PyTorch神经网络建模：
- 数据准备
- 模型建立
- 模型训练
- 模型评估使用和保存

训练耗时两个原因：
- 数据准备：使用更多的进程来准备数据
- 参数迭代：GPU进行加速训练

# PyTorch使用GPU加速

In [4]:
import torch
import torch.nn as nn

features = torch.tensor(range(10))
labels = torch.tensor(range(10))

model = nn.Linear(3, 3)

In [5]:
# 定义模型
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model.to(device) # 移动模型到cuda

# 训练模型
features = features.to(device) # 移动数据到cuda
labels = labels.to(device)  # 或者  labels = labels.cuda() if torch.cuda.is_available() else labels

如果要使用多个GPU训练模型，可以将模型设置为数据并行风格。

则模型移动到GPU上之后，会在每个GPU上拷贝一个副本，并把数据平分到各个GPU上进行训练

In [6]:
# 定义模型
if torch.cuda.device_count() > 1:
    model = nn.DataParallel(model) # 包装为并行风格模型

# 训练模型
features = features.to(device) # 移动数据到cuda
labels = labels.to(device) # 或者 labels = labels.cuda() if torch.cuda.is_available() else labels

# GPU相关操作

In [7]:
# 查看gpu信息
if_cuda = torch.cuda.is_available()
print("if_cuda=", if_cuda)

if_cuda= True


In [8]:
# GPU的数量
gpu_count = torch.cuda.device_count()
print("gpu_count=", gpu_count)

gpu_count= 1


In [9]:
# 将张量在gpu和cpu间移动
tensor = torch.rand((100,100))
tensor_gpu = tensor.to("cuda:0") # 或者 tensor_gpu = tensor.cuda()
print(tensor_gpu.device)
print(tensor_gpu.is_cuda)

tensor_cpu = tensor_gpu.to("cpu") # 或者 tensor_cpu = tensor_gpu.cpu() 
print(tensor_cpu.device)

cuda:0
True
cpu


In [13]:
# 查看数据与模型的device
tensor = torch.rand((100,100))
print(tensor.device)

print(next(model.parameters()).device)

cpu
cuda:0


In [14]:
# 将模型中的全部张量移动到gpu上
net = nn.Linear(2, 1)
print(next(net.parameters()).is_cuda)
net.to("cuda:0") # 将模型中的全部参数张量依次到GPU上，无需重新赋值net = net.to("cuda:0")
print(next(net.parameters()).is_cuda)
print(next(net.parameters()).device)

False
True
cuda:0


In [15]:
# 创建支持多个gpu数据并行的模型
linear = nn.Linear(2,1)
print(next(linear.parameters()).device)

model = nn.DataParallel(linear)
print(model.device_ids)
print(next(model.module.parameters()).device) 

cpu
[0]
cuda:0


In [16]:
# 注意保存参数时要指定保存model.module的参数
torch.save(model.module.state_dict(), "model_parameter.pkl") 

linear = nn.Linear(2,1)
linear.load_state_dict(torch.load("model_parameter.pkl")) 

<All keys matched successfully>

In [17]:
# 清空cuda缓存, 该方在cuda超内存时十分有用
torch.cuda.empty_cache()

# 线性回归范例

In [21]:
import time

# 准备数据
n = 1000000

X = 10 * torch.rand([n, 2]) - 5.0  #torch.rand是均匀分布 
w0 = torch.tensor([[2.0, -3.0]])
b0 = torch.tensor([[10.0]])
# @表示矩阵乘法, 增加正态扰动
Y = X @ w0.t() + b0 + torch.normal(0.0, 2.0, size = [n, 1])

# 移动到GPU上
print("torch.cuda.is_available() = ", torch.cuda.is_available())
X = X.cuda()
Y = Y.cuda()

# 定义模型
class LinearRegression(nn.Module): 
    def __init__(self):
        super().__init__()
        
        self.w = nn.Parameter(torch.randn_like(w0))
        self.b = nn.Parameter(torch.zeros_like(b0))
    
    #正向传播
    def forward(self,x): 
        return x @ self.w.t() + self.b


linear = LinearRegression() 

# 移动模型到GPU上
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
linear.to(device)

# 训练模型
optimizer = torch.optim.Adam(linear.parameters(), lr=0.1)
loss_func = nn.MSELoss()


def train(epoches):
    tic = time.time()
    for epoch in range(epoches):
        optimizer.zero_grad()
        
        Y_pred = linear(X) 
        loss = loss_func(Y_pred, Y)
        
        loss.backward() 
        optimizer.step()
        
        if epoch % 50==0:
            print({"epoch":epoch, "loss":loss.item()})
    
    toc = time.time()
    print("time used:",toc-tic)


train(500)

torch.cuda.is_available() =  True
{'epoch': 0, 'loss': 214.75949096679688}
{'epoch': 50, 'loss': 33.45890808105469}
{'epoch': 100, 'loss': 9.039307594299316}
{'epoch': 150, 'loss': 4.49655818939209}
{'epoch': 200, 'loss': 4.030478000640869}
{'epoch': 250, 'loss': 4.006821632385254}
{'epoch': 300, 'loss': 4.006302356719971}
{'epoch': 350, 'loss': 4.006300449371338}
{'epoch': 400, 'loss': 4.0062994956970215}
{'epoch': 450, 'loss': 4.006317615509033}
time used: 0.6919867992401123


# torchkeras

## torchkeras单个GPU

In [29]:
import torchkeras

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

model = torchkeras.Model(LinearRegression())

# 注意此处compile时指定了device
model.compile(loss_func = nn.MSELoss(),
             optimizer= torch.optim.Adam(model.parameters(), lr=0.1),
             device = device)

## torchkeras多GPU

In [30]:
class CnnModule(nn.Module):
    def __init__(self):
        super().__init__()
        self.layers = nn.ModuleList([
            nn.Conv2d(in_channels=1,out_channels=32,kernel_size = 3),
            nn.MaxPool2d(kernel_size = 2,stride = 2),
            nn.Conv2d(in_channels=32,out_channels=64,kernel_size = 5),
            nn.MaxPool2d(kernel_size = 2,stride = 2),
            nn.Dropout2d(p = 0.1),
            nn.AdaptiveMaxPool2d((1,1)),
            nn.Flatten(),
            nn.Linear(64,32),
            nn.ReLU(),
            nn.Linear(32,10)]
        )
    def forward(self,x):
        for layer in self.layers:
            x = layer(x)  
        return x

net = nn.DataParallel(CnnModule())  #Attention this line!!!   这一行， 要封装成并行的方式
model = torchkeras.Model(net)

model.summary(input_shape=(1,32,32))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 32, 30, 30]             320
         MaxPool2d-2           [-1, 32, 15, 15]               0
            Conv2d-3           [-1, 64, 11, 11]          51,264
         MaxPool2d-4             [-1, 64, 5, 5]               0
         Dropout2d-5             [-1, 64, 5, 5]               0
 AdaptiveMaxPool2d-6             [-1, 64, 1, 1]               0
           Flatten-7                   [-1, 64]               0
            Linear-8                   [-1, 32]           2,080
              ReLU-9                   [-1, 32]               0
           Linear-10                   [-1, 10]             330
Total params: 53,994
Trainable params: 53,994
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.003906
Forward/backward pass size (MB): 0.359695
Params size (MB): 0.205971
E

In [33]:
import torch
from torch import nn
from torchkeras import summary, Model

import torchvision
from torchvision import transforms

transform = transforms.Compose([transforms.ToTensor()])

ds_train = torchvision.datasets.MNIST(root="data/minist/", train=True, download=True, transform=transform)
ds_valid = torchvision.datasets.MNIST(root="data/minist/", train=False, download=True, transform=transform)

dl_train =  torch.utils.data.DataLoader(ds_train, batch_size=128, shuffle=True, num_workers=4)
dl_valid =  torch.utils.data.DataLoader(ds_valid, batch_size=128, shuffle=False, num_workers=4)

print(len(ds_train))  # 60000
print(len(ds_valid))  # 10000

60000
10000


In [34]:
from sklearn.metrics import accuracy_score

def accuracy(y_pred,y_true):
    y_pred_cls = torch.argmax(nn.Softmax(dim=1)(y_pred),dim=1).data
    return accuracy_score(y_true.cpu().numpy(),y_pred_cls.cpu().numpy()) 
    # 注意此处要将数据先移动到cpu上，然后才能转换成numpy数组

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model.compile(loss_func = nn.CrossEntropyLoss(),
             optimizer= torch.optim.Adam(model.parameters(),lr = 0.02),
             metrics_dict={"accuracy":accuracy},device = device) # 注意此处compile时指定了device

dfhistory = model.fit(3,dl_train = dl_train, dl_val=dl_valid, log_step_freq=100) 

Start Training ...

{'step': 100, 'loss': 0.899, 'accuracy': 0.676}
{'step': 200, 'loss': 0.573, 'accuracy': 0.802}
{'step': 300, 'loss': 0.459, 'accuracy': 0.846}
{'step': 400, 'loss': 0.398, 'accuracy': 0.87}

 +-------+-------+----------+----------+--------------+
| epoch |  loss | accuracy | val_loss | val_accuracy |
+-------+-------+----------+----------+--------------+
|   1   | 0.362 |  0.882   |  0.078   |    0.976     |
+-------+-------+----------+----------+--------------+

{'step': 100, 'loss': 0.129, 'accuracy': 0.962}
{'step': 200, 'loss': 0.135, 'accuracy': 0.96}
{'step': 300, 'loss': 0.157, 'accuracy': 0.955}
{'step': 400, 'loss': 0.151, 'accuracy': 0.957}

 +-------+-------+----------+----------+--------------+
| epoch |  loss | accuracy | val_loss | val_accuracy |
+-------+-------+----------+----------+--------------+
|   2   | 0.148 |  0.958   |  0.131   |     0.97     |
+-------+-------+----------+----------+--------------+

{'step': 100, 'loss': 0.168, 'accuracy': 0

保存模型

In [36]:
# save the model parameters
torch.save(model.net.module.state_dict(), "model_parameter.pkl")  # 这里的model.net.module

net_clone = CnnModule()
net_clone.load_state_dict(torch.load("model_parameter.pkl"))

model_clone = torchkeras.Model(net_clone)
model_clone.compile(loss_func = nn.CrossEntropyLoss(),
             optimizer= torch.optim.Adam(model.parameters(),lr = 0.02),
             metrics_dict={"accuracy":accuracy},device = device)
model_clone.evaluate(dl_valid)

{'val_loss': 0.10967098912990451, 'val_accuracy': 0.9728045886075949}