简单使用

In [None]:
import torch as t
import torch.nn as nn
import torch.nn.functional as F
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 6, 5)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16*5*5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)
    
    def forward(self, x):
        x = F.max_pool2d(F.relu(self.conv1(x)), (2,2))
        x = F.max_pool2d(F.relu(self.conv2(x)), 2)
        x = x.view(x.size()[0], -1)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        return self.fc3(x)
    
net = Net()


In [None]:
input = t.randn(1,1,32,32)
output = net(input)
target = t.arange(0,10).view(1,10).float()
criterion = nn.MSELoss()
loss = criterion(output, target)
loss

In [None]:
net.zero_grad()
print(net.conv1.bias.grad)
loss.backward()
print(net.conv1.bias.grad)

In [None]:
import torch.optim as optim
optimizer = optim.SGD(net.parameters(), lr=0.01)
optimizer.zero_grad()
print(net.conv1.bias.grad)
output = net(input)
loss = criterion(output, target)
loss.backward()
optimizer.step()
print(net.conv1.bias.grad)

CIFAR-10分类

In [None]:
import torchvision as tv
import torchvision.transforms as transforms

show = transforms.ToPILImage()
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5))
])
trainset = tv.datasets.CIFAR10(
    root='./data/cifar10/',
    train=True,
    download=True,
    transform=transform
)
trainloader = t.utils.data.DataLoader(trainset, batch_size=4, shuffle=True, num_workers=2)

testset = tv.datasets.CIFAR10(
    root='./data/cifar10/',
    train=False,
    download=True,
    transform=transform
)
testloader = t.utils.data.DataLoader(testset, batch_size=4, shuffle=False, num_workers=2)

classes = ('plane', 'car', 'bird', 'cat','deer', 'dog', 'frog', 'horse', 'ship', 'truck')

In [None]:
(data, label) = trainset[100]
print(classes[label])
show((data+1)/2).resize((100, 100))

修改上面的LeNet网络处理CIFAR10数据

In [None]:
import torch as t
import torch.nn as nn
import torch.nn.functional as F
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16*5*5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)
    
    def forward(self, x):
        x = F.max_pool2d(F.relu(self.conv1(x)), (2,2))
        x = F.max_pool2d(F.relu(self.conv2(x)), 2)
        x = x.view(x.size()[0], -1)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        return self.fc3(x)
    
net = Net()


In [None]:
from torch import optim
# 交叉熵损失函数
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.01, momentum=0.9)
# GPU
#device = t.device('cuda:0' if t.cuda.is_available() else 'cpu')
#net.to(device=device)

for epoch in range(2):
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        (input, labels) = data
        optimizer.zero_grad()
        output = net(input)
        loss = criterion(output, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        if i % 2000 == 1999:
            print(f'[{epoch+1}, {i+1}], loss: {running_loss/2000}')
            running_loss = 0.0
print("Finish")

In [None]:
testiter = iter(testloader)
image, labels = testiter.__next__()
print("labels : ", "    ".join([classes[labels[i]] for i in range(4)]))
show(tv.utils.make_grid((image+1)/2)).resize((400,100))
output = net(image)
_, predicted = t.max(output.data, 1)
print("predicted : ", "    ".join([classes[predicted[i]] for i in range(4)]))

In [None]:
total = 0
correct = 0
with t.no_grad():
    for data in testloader:
        images, labels = data
        output = net(images)
        _, predicted = t.max(output.data, 1)
        total += len(labels)
        correct += (predicted == labels).sum()

print(total, correct)
    

In [None]:
img = t.randn(3, 128, 256)
img = img.view(3, -1)
img.mean(dim=1)

ResNet32 深度残差网络

In [None]:
from torch import nn
import torch as t
from torch.nn import functional as F

In [None]:
class ResidualBlock(nn.Module):
    def __init__(self, inchannel, outchannel, stride=1, shortcat=None):
        super().__init__()
        self.left = nn.Sequential(
            nn.Conv2d(inchannel, outchannel, 3, stride, 1, bias=False),
            nn.BatchNorm2d(outchannel),
            nn.ReLU(inplace=True),
            nn.Conv2d(outchannel, outchannel, 3, 1, 1, bias=False),
            nn.BatchNorm2d(outchannel)
        )
        self.right = shortcat
    
    def forward(self, x):
        output = self.left(x)
        residual = x if self.right is None else self.right(x)
        output += residual
        return F.relu(output)


class ResNet(nn.Module):
    def __init__(self, num_classes=1000):
        super().__init__()
        self.pre = nn.Sequential(
            nn.Conv2d(3, 64, 7, 2, 3, bias=False),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(3,2,1)
        )
        self.layer1 = self._make_layer(64, 64, 3,1,is_shortcat=False)
        self.layer2 = self._make_layer(64, 128,4,2)
        self.layer3 = self._make_layer(128, 256,6,2)
        self.layer4 = self._make_layer(256, 512,3,2)
        self.classifies = nn.Linear(512, num_classes)
    
    
    def _make_layer(self, inchannel, outchannel, block_num, stride, is_shortcat=True):
        if is_shortcat:
            shortcat = nn.Sequential(
                nn.Conv2d(inchannel, outchannel, 1, stride, bias=False),
                nn.BatchNorm2d(outchannel)
            )
        else:
            shortcat = None
        layers = []
        layers.append(ResidualBlock(inchannel=inchannel, outchannel=outchannel, stride=stride, shortcat=shortcat))
        for _ in range(1, block_num):
            layers.append(ResidualBlock(inchannel=outchannel, outchannel=outchannel))
        return nn.Sequential(*layers)
    
    def forward(self, x):
        x = self.pre(x)
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        x = F.avg_pool2d(x, 7)
        x = x.view(x.size(0), -1)
        return self.classifies(x)



In [None]:
model = ResNet()
input = t.randn(1,3,224,224)
output = model(input)
output

数据集处理 Kaggle: Dog vs Cat

In [None]:
import os
from PIL import Image
import numpy as np
import torch as t
from torch.utils.data import Dataset
from pathlib import Path
from torchvision import transforms as T

class DogCat(Dataset):
    def __init__(self, root, transform = None):
        dog_path = os.path.join(root, 'dog')
        cat_path = os.path.join(root, 'cat')
        dog_image = os.listdir(dog_path)
        cat_image = os.listdir(cat_path)
        self.imgs = [os.path.join(dog_path, i) for i in dog_image]
        self.imgs.extend([os.path.join(cat_path, i) for i in cat_image])
        self.transform = transform
        self.class_map = {'dog': 1, 'cat': 2}
    
    def __getitem__(self, index):
        img = self.imgs[index]
        file_path = Path(img)
        parent_dir_name = file_path.parent.parent.name
        classify = self.class_map.get(parent_dir_name, 0)
        image = Image.open(img)
        if self.transform:
            image = self.transform(image)
        return image, classify
    
    def __len__(self):
        return len(self.imgs)
    
transform = T.Compose([
    T.Resize(224),
    T.CenterCrop(224),
    T.ToTensor(),
    T.Normalize(mean=[.5, .5, .5], std=[.5, .5, .5]),
])

dataset = DogCat(root="./data/dogcat", transform=transform)
print(dataset[0])



In [None]:
from torchvision.datasets import ImageFolder

dataset = ImageFolder("./data/dogcat/", transform=transform)
dataset[0][0].size()

torch索引操作

In [None]:
# 仅使用高级索引
import torch as t
a = t.arange(12).view(3,4)
index1 = t.tensor([1,2])
index2 = t.tensor([0,2])
print(a[index1, index2])
# 不同维度使使用广播原则
index1 = t.tensor([1,2])[None, :]
index2 = t.tensor([0,2])[:, None]
print(a)
print(index1)
print(index2)
a[index1, index2]

In [None]:
# 高级索引和基本索引结合使用
import torch as t
a = t.arange(24).view(2,3,4)
print(a)
index1 = t.tensor([[1,0]]) # 维度为1*2
index2 = t.tensor([[0,2]]) # 维度为1*2
# 所有的高级索引相邻
# 保留a的第一个维度，后面是索引的维度，也就是 2*1*2
print(a[:, index1, index2])

a = t.arange(120).view(2,3,4,5)
# 将中间的两个维度替换为索引维度
print("高级索引在中间位置：",a[:, index1, index2, :].shape)

# 高级索引不相邻，被基本索引分隔开
print("高级索引被分隔开：", a[index1, :, index2].shape)
print("高级索引被分隔开：", a[:, index1, :, index2].shape)



手动实现卷积

In [None]:
import torch as t
from torch import nn
# 使用双层循环实现数据提取
def Conv_base(img, filters, stride, padding):
    in_channel, Hin, Win = img.shape
    _, out_channel, k, _ = filters.shape
    Hout = ((Hin + 2*padding - k)/stride).long() + 1
    Wout = ((Win + 2*padding - k)/stride).long() + 1

    col = t.zeros(in_channel, k, k, Hout, Wout)
    imgs = nn.ZeroPad2d(padding=padding.item())(img)

    for i in range(Hout):
        for j in range(Wout):
            h = int(i*stride.item())
            w = int(j*stride.item())
            col[..., h, w] = imgs[:, h:h+k, w:w+k]
    col = col.view(in_channel*k*k, Hout*Wout)
    filters = filters.transpose(1,0).reshape(out_channel, in_channel*k*k)
    return (filters @ col).view(out_channel, Hout, Wout)


def Conv_index(img, filters, stride, padding):
    in_channel, Hin, Win = img.shape
    _, out_channel, k, _ = filters.shape
    Hout = ((Hin + 2*padding - k)/stride).long() + 1
    Wout = ((Win + 2*padding - k)/stride).long() + 1

    k1 = t.arange(-(k//2), k//2+1)
    index11, index12 = t.meshgrid(k1,k1)
    stride = 1
    Hout, Wout = 4, 4
    H = t.linspace(k//2, k//2 + stride *(Hout-1), Hout).long()
    W = t.linspace(k//2, k//2 + stride *(Wout-1), Wout).long()
    index21, index22 = t.meshgrid(H, W)

    index1 = index11[:, :, None, None] + index21[None, None, :, :]
    index2 = index12[:, :, None, None] + index22[None, None, :, :]
    filters = filters.transpose(1,0).reshape(out_channel, in_channel*k*k)
    img = img[:, index1, index2].reshape(in_channel*k*k, Hout*Wout)
    return (filters @ img).view(out_channel, Hout, Wout)


简单测试卷积代码

In [None]:
filters = (t.ones(3,3,3,3) / 9).float()
img = t.arange(36*3).view(3,6,6).float()
stride, padding = t.tensor(1.), t.tensor(0)
output = Conv_index(img, filters, stride, padding)
print(output)


使用cat图片测试卷积代码

In [None]:
transform = T.Compose([
    T.ToTensor()
])
dataset = DogCat(root="./data/dogcat", transform=transform)
img_data = dataset[0][0]
img_data = (img_data * 255).to(t.uint8)
# display(T.ToPILImage()(img_data))
print(img_data.shape)
filters = t.ones(3,3,32,32) / 9
stride, padding = t.tensor(1.), t.tensor(0)
output = Conv_base(img_data, filters, stride, padding)
print(output.shape)
display(T.ToPILImage()(output))

In [None]:
k = 3
k1 = t.arange(-(k//2), k//2+1)
index11, index12 = t.meshgrid(k1,k1)
stride = 1
Hout, Wout = 4, 4
H = t.linspace(k//2, k//2 + stride *(Hout-1), Hout).long()
W = t.linspace(k//2, k//2 + stride *(Wout-1), Wout).long()
index21, index22 = t.meshgrid(H, W)

index1 = index11[:, :, None, None] + index21[None, None, :, :]
index2 = index12[:, :, None, None] + index22[None, None, :, :]
print(index11, index12)
print(index21, index22)
index1, index2

In [3]:
import torch as t
t.tensor([2,2,5,0]).reshape(1,4)[:,[2,3,0,1]]

tensor([[5, 0, 2, 2]])