调用所有的库

In [1]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import time

from sympy.tensor.array.arrayop import Flatten
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")

In [2]:
def nin_block(in_channel, out_channel, kernel_size, stride, padding):
    blk = nn.Sequential(nn.Conv2d(in_channel,out_channel,kernel_size,stride,padding),
                        nn.ReLU(),
                        nn.Conv2d(out_channel,out_channel,kernel_size=1),
                        nn.ReLU(),
                        nn.Conv2d(out_channel,out_channel,kernel_size=1),
                        nn.ReLU())
    return blk


In [3]:
class GlobalAvgPool2d(nn.Module):
    def __init__(self):
        super(GlobalAvgPool2d, self).__init__()
    def forward(self,x):
        return F.avg_pool2d(x,kernel_size = x.size()[2:])

In [4]:
net = nn.Sequential(nin_block(3,96,11,4,0),
                    nn.MaxPool2d(kernel_size=2, stride=2),

                    nin_block(96,256,5,1,2),
                    nn.MaxPool2d(kernel_size=2, stride=2),

                    nin_block(256,384,3,1,1),
                    nn.MaxPool2d(kernel_size=2, stride=2),

                    nin_block(384,10,3,1,1),

                    GlobalAvgPool2d(),#把四维的输出转换为二维形状 shape = （batch_size,10(标签数量))

                    nn.Flatten())

In [5]:
import torch
x=torch.randn(1,3,64,64)
print(net(x))
print(net(x).shape)

tensor([[0.0138, 0.2762, 0.0000, 0.0000, 0.0000, 0.0000, 0.1554, 0.0000, 0.0000,
         0.2148]], grad_fn=<ViewBackward0>)
torch.Size([1, 10])


In [6]:
for name, blk in net.named_children():
    print(f"名称: {name}, 模块类型: {blk}")

名称: 0, 模块类型: Sequential(
  (0): Conv2d(3, 96, kernel_size=(11, 11), stride=(4, 4))
  (1): ReLU()
  (2): Conv2d(96, 96, kernel_size=(1, 1), stride=(1, 1))
  (3): ReLU()
  (4): Conv2d(96, 96, kernel_size=(1, 1), stride=(1, 1))
  (5): ReLU()
)
名称: 1, 模块类型: MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
名称: 2, 模块类型: Sequential(
  (0): Conv2d(96, 256, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (1): ReLU()
  (2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
  (3): ReLU()
  (4): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
  (5): ReLU()
)
名称: 3, 模块类型: MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
名称: 4, 模块类型: Sequential(
  (0): Conv2d(256, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (1): ReLU()
  (2): Conv2d(384, 384, kernel_size=(1, 1), stride=(1, 1))
  (3): ReLU()
  (4): Conv2d(384, 384, kernel_size=(1, 1), stride=(1, 1))
  (5): ReLU()
)
名称: 5, 模块类型: MaxPool2d(kernel_size=2, stride=2, padding=0, d

In [7]:
for name,blk in net.named_children():
    x=blk(x)
    print(name,'output_size',x.shape)

0 output_size torch.Size([1, 96, 14, 14])
1 output_size torch.Size([1, 96, 7, 7])
2 output_size torch.Size([1, 256, 7, 7])
3 output_size torch.Size([1, 256, 3, 3])
4 output_size torch.Size([1, 384, 3, 3])
5 output_size torch.Size([1, 384, 1, 1])
6 output_size torch.Size([1, 10, 1, 1])
7 output_size torch.Size([1, 10, 1, 1])
8 output_size torch.Size([1, 10])


In [8]:
data_transform = transforms.Compose([transforms.Resize(256),
                                     transforms.CenterCrop(224),
                                     transforms.ToTensor(),
                                     transforms.Normalize(mean=[0.485, 0.456, 0.406],std = [0.229, 0.224, 0.225]) ])

train_sets = datasets.CIFAR10(root='cifar_10', train=True, transform=data_transform, download=True)
test_sets = datasets.CIFAR10(root='cifar_10', train=False, transform=data_transform, download=True)

batch_size = 64

train_loader = torch.utils.data.DataLoader(dataset = train_sets,batch_size = batch_size, shuffle = True)

test_loader =  torch.utils.data.DataLoader(dataset = test_sets,batch_size = batch_size, shuffle = False)


In [9]:
def evaluate_accuracy(data_iter,net):
    acc_sum = 0
    n = 0

    net.eval()
    for X,y in data_iter:
        X = X.to(device)
        y = y.to(device)
        acc_sum += (net(X).argmax(dim = 1) == y).float().sum().item()

        n += y.shape[0]
    return acc_sum/n

In [10]:
def train(net,train_iter, test_iter,batch_size,optimizer,device, num_epochs):
    net = net.to(device)
    print("training on ",device)
    loss = torch.nn.CrossEntropyLoss()
    batch_count = 0
    for epoch in range(num_epochs):
        train_loss_sum , train_acc_sum , n , start = 0.0 , 0.0 , 0 , time.time()

        net.train()
        for X, y in train_iter:
            X = X.to(device)
            y = y.to(device)

            y_hat = net(X)
            l= loss(y_hat, y)

            optimizer.zero_grad()
            l.backward()
            optimizer.step()

            train_loss_sum += l.item()
            train_acc_sum +=(y_hat.argmax(dim=1)==y).sum().cpu().item()
            n += y.shape[0]
            batch_count += 1

        test_acc = evaluate_accuracy(test_iter,net)
        print('epoch % d ,loss % .4f , train acc %.3f , test acc %.3f , time %.1f sec' %
              (epoch+1,train_loss_sum/n,train_acc_sum/n,test_acc,time.time()-start))

In [None]:
train_iter = train_loader
test_iter = test_loader
lr = 0.001
num_epochs = 5
optimizer = torch.optim.Adam(net.parameters(), lr=lr)

train(net,train_iter,test_iter,batch_size,optimizer,device,num_epochs )

training on  cpu


In [27]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class Inception(nn.Module):
    def __init__(self, in_channel, c1, c2, c3, c4):
        super(Inception, self).__init__()
        self.p1_1 = nn.Conv2d(in_channel, c1, kernel_size=1)

        self.p2_1 = nn.Conv2d(in_channel, c2[0], kernel_size=1)
        self.p2_2 = nn.Conv2d(c2[0], c2[1], kernel_size=3, padding=1)

        self.p3_1 = nn.Conv2d(in_channel, c3[0], kernel_size=1)
        self.p3_2 = nn.Conv2d(c3[0], c3[1], kernel_size=5, padding=2)

        self.p4_1 = nn.MaxPool2d(kernel_size=3, stride=1, padding=1)
        self.p4_2 = nn.Conv2d(in_channel, c4, kernel_size=1)

    def forward(self, x):
        p1 = self.p1_1(x)
        p1 = F.relu(p1)

        p2 = self.p2_1(x)
        p2 = F.relu(p2)
        p2 = self.p2_2(p2)
        p2 = F.relu(p2)

        p3 = self.p3_1(x)
        p3 = F.relu(p3)
        p3 = self.p3_2(p3)
        p3 = F.relu(p3)

        p4 = self.p4_1(x)
        p4 = F.relu(p4)
        p4 = self.p4_2(p4)
        p4 = F.relu(p4)

        return torch.cat((p1, p2, p3, p4), 1)

b1 = nn.Sequential(
    nn.Conv2d(3, 128, kernel_size=7, stride=2, padding=3),
    nn.ReLU(),
    nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
)

b2 = nn.Sequential(
    nn.Conv2d(128, 128, kernel_size=1),
    nn.ReLU(),
    nn.Conv2d(128, 192, kernel_size=3, padding=1),
    nn.ReLU(),
    nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
)

b3 = nn.Sequential(
    # 输出通道数: 64 + 128 + 32 + 32 = 256
    Inception(192, 64, (96, 128), (16, 32), 32),
    # 输出通道数: 128 + 192 + 96 + 64 = 480
    Inception(256, 128, (128, 192), (32, 96), 64),
    nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
)

b4 = nn.Sequential(
    # 输出通道数: 192 + 208 + 48 + 64 = 512
    Inception(480, 192, (96, 208), (16, 48), 64),
    # 输出通道数: 160 + 224 + 64 + 64 = 512
    Inception(512, 160, (112, 224), (24, 64), 64),
    # 输出通道数: 128 + 256 + 64 + 64 = 512
    Inception(512, 128, (128, 256), (24, 64), 64),
    # 输出通道数: 112 + 288 + 64 + 64 = 528
    Inception(512, 112, (144, 288), (32, 64), 64),
    # 输出通道数: 256 + 320 + 128 + 128 = 832
    Inception(528, 256, (160, 320), (32, 128), 128),
    nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
)

b5 = nn.Sequential(
    # 输出通道数: 256 + 320 + 128 + 128 = 832
    Inception(832, 256, (160, 320), (32, 128), 128),
    # 输出通道数: 384 + 384 + 128 + 128 = 1024
    Inception(832, 384, (192, 384), (48, 128), 128),
    nn.AdaptiveAvgPool2d((1, 1)),
    nn.Flatten()
)

netg = nn.Sequential(b1, b2, b3, b4, b5, nn.Linear(1024, 10))

X = torch.rand(size=(1, 3, 224, 224))
for layer in netg:
    try:
        X = layer(X)
        print(layer.__class__.__name__, "output shape:\t", X.shape)
    except Exception as e:
        print(f"Error in layer {layer.__class__.__name__}: {e}")


Sequential output shape:	 torch.Size([1, 128, 56, 56])
Sequential output shape:	 torch.Size([1, 192, 28, 28])
Sequential output shape:	 torch.Size([1, 480, 14, 14])
Sequential output shape:	 torch.Size([1, 832, 7, 7])
Sequential output shape:	 torch.Size([1, 1024])
Linear output shape:	 torch.Size([1, 10])


In [28]:
data_transform = transforms.Compose([transforms.Resize(256),
                                     transforms.CenterCrop(224),
                                     transforms.ToTensor(),
                                     transforms.Normalize(mean=[0.485, 0.456, 0.406],std = [0.229, 0.224, 0.225]) ])

train_sets = datasets.CIFAR10(root='cifar_10', train=True, transform=data_transform, download=True)
test_sets = datasets.CIFAR10(root='cifar_10', train=False, transform=data_transform, download=True)

batch_size = 64

train_loader = torch.utils.data.DataLoader(dataset = train_sets,batch_size = batch_size, shuffle = True)

test_loader =  torch.utils.data.DataLoader(dataset = test_sets,batch_size = batch_size, shuffle = False)

In [29]:
def train(net,train_iter, test_iter,batch_size,optimizer,device, num_epochs):
    net = net.to(device)
    print("training on ",device)
    loss = torch.nn.CrossEntropyLoss()
    batch_count = 0
    for epoch in range(num_epochs):
        train_loss_sum , train_acc_sum , n , start = 0.0 , 0.0 , 0 , time.time()

        net.train()
        for X, y in train_iter:
            X = X.to(device)
            y = y.to(device)

            y_hat = net(X)
            l= loss(y_hat, y)

            optimizer.zero_grad()
            l.backward()
            optimizer.step()

            train_loss_sum += l.item()
            train_acc_sum +=(y_hat.argmax(dim=1)==y).sum().cpu().item()
            n += y.shape[0]
            batch_count += 1

        test_acc = evaluate_accuracy(test_iter,net)
        print('epoch % d ,loss % .4f , train acc %.3f , test acc %.3f , time %.1f sec' %
              (epoch+1,train_loss_sum/n,train_acc_sum/n,test_acc,time.time()-start))

In [None]:
train_iter = train_loader
test_iter = test_loader
lr = 0.001
num_epochs = 10
optimizer = torch.optim.Adam(net.parameters(), lr=lr)

train(netg,train_iter,test_iter,batch_size,optimizer,device,num_epochs )

training on  cpu
