pytorch安装

In [1]:
import torch

In [2]:
torch.cuda.is_available()

True

查询package里的函数以及函数如何使用

In [11]:
dir(torch.cuda)

['Any',
 'BFloat16Storage',
 'BFloat16Tensor',
 'BoolStorage',
 'BoolTensor',
 'ByteStorage',
 'ByteTensor',
 'CUDAGraph',
 'CUDAPluggableAllocator',
 'Callable',
 'CharStorage',
 'CharTensor',
 'ComplexDoubleStorage',
 'ComplexFloatStorage',
 'CudaError',
 'DeferredCudaCallError',
 'Device',
 'DoubleStorage',
 'DoubleTensor',
 'Event',
 'ExternalStream',
 'FloatStorage',
 'FloatTensor',
 'HalfStorage',
 'HalfTensor',
 'IntStorage',
 'IntTensor',
 'List',
 'LongStorage',
 'LongTensor',
 'MemPool',
 'MemPoolContext',
 'Optional',
 'OutOfMemoryError',
 'ShortStorage',
 'ShortTensor',
 'Stream',
 'StreamContext',
 'Tuple',
 'Union',
 '_CudaBase',
 '_CudaDeviceProperties',
 '_DeviceGuard',
 '_HAS_PYNVML',
 '_LazySeedTracker',
 '_PYNVML_ERR',
 '_WrappedTritonKernel',
 '__all__',
 '__annotations__',
 '__builtins__',
 '__cached__',
 '__doc__',
 '__file__',
 '__loader__',
 '__name__',
 '__package__',
 '__path__',
 '__spec__',
 '_cached_device_count',
 '_check_bf16_tensor_supported',
 '_check_c

In [12]:
#查函数不要加括号
help(torch.cuda.is_available)

Help on function is_available in module torch.cuda:

is_available() -> bool
    Return a bool indicating if CUDA is currently available.



### 读取数据:dataset与dataloader

dataset:提供一种方式获取数据及其label

dataloader：为后面的网络提供不同的数据形式

In [15]:
from torch.utils.data import Dataset
from PIL import Image
import os

In [None]:
#自定义Dataset类用于读取图片数据
class MyDataset(Dataset):
    def __init__(self,root_dir,label_dir):
        self.root_dir = root_dir #数据集根目录
        self.label_dir = label_dir #标签目录 ants or bees
        self.path = os.path.join(self.root_dir,self.label_dir) #拼接路径
        self.img_path = os.listdir(self.path) #读取路径下的所有文件名称
    
    def __getitem__(self,idx):
        img_name = self.img_path[idx] #获取图片名称
        img_path = os.path.join(self.path,img_name) #拼接图片路径
        img = Image.open(img_path) #读取图片
        label = self.label_dir
        return img,label
    def __len__(self):
        return len(self.img_path) 

In [None]:
#读取蚂蚁数据集
root_dir = r'F:\RUC\pytorch\数据集\hymenoptera_data\train'
label_dir = 'ants'
ants_dataset = MyDataset(root_dir,label_dir)

In [25]:
#打开图片
img,label = ants_dataset.__getitem__(0)
#或者使用:
#img,label = ants_dataset[0]
img.show()

#查看数据集大小
ants_dataset.__len__()
#或者使用:
#len(ants_dataset)

124

In [26]:
root_dir = r'F:\RUC\pytorch\数据集\hymenoptera_data\train'
label_dir = 'bees'
bees_dataset = MyDataset(root_dir,label_dir)

In [27]:
#合并数据集
train_dataset = ants_dataset + bees_dataset

In [28]:
#查看合并数数据集的信息
img,label = train_dataset.__getitem__(125)
img.show()

In [None]:
#如果是图片+标签的形式 如何读取数据
class MyDataset2(Dataset):
    def __init__(self,root_dir,image_dir,label_dir):
        self.root_dir = root_dir #数据集根目录
        self.image_dir = image_dir #图片目录
        self.label_dir = label_dir #标签目录
        self.image_path = os.path.join(self.root_dir,self.image_dir) #拼接路径并读取
        self.label_path = os.path.join(self.root_dir,self.label_dir)
        self.image_list = os.listdir(self.image_path)
        self.label_list = os.listdir(self.label_path)
        # 因为label 和 Image文件名相同，进行一样的排序，可以保证取出的数据和label是一一对应的
        self.image_list.sort()
        self.label_list.sort()
 
    def __getitem__(self,idx):
        #图片、标签名称/路径
        img_name = self.image_list[idx]
        label_name = self.label_list[idx]
        img_item_path = os.path.join(self.root_dir, self.image_dir, img_name)
        label_item_path = os.path.join(self.root_dir, self.label_dir, label_name)
        #读取图片、标签
        img = Image.open(img_item_path)

        with open(label_item_path, 'r') as f:
            label = f.readline()

        # img = np.array(img)
        #?这里为什么要转换为tensor
        #img = self.transform(img) 暂时先不用transform
        sample = {'img': img, 'label': label}
        return sample

    def __len__(self):
        return len(self.label_list) 

In [39]:
root_dir = r'F:\RUC\pytorch\数据集\练手数据集\train'
image_dir = 'ants_image'
label_dir = 'ants_label'

ants_dataset = MyDataset2(root_dir,image_dir,label_dir)

In [42]:
#打开图片
ants_dataset[0]['img'].show()

### Tensorboard的使用

SummaryWriter类是 PyTorch 中 torch.utils.tensorboard 模块提供的一个重要工具，主要用于将训练过程中的各种数据（如损失值、准确率、图像等）写入 TensorBoard 可以读取的日志文件，方便用户通过 TensorBoard 可视化工具直观地观察和分析模型的训练过程和性能。

In [4]:
from torch.utils.tensorboard import SummaryWriter

In [7]:
#创建类
writer = SummaryWriter('logs')

for i in range(100):
    writer.add_scalar('y = 2x',2*i,i)
#关闭类
writer.close()

In [10]:
from PIL import Image
import numpy as np

#打开一张图片然后转成numpy类型
image_path = r'F:\RUC\pytorch\数据集\练手数据集\train\ants_image\0013035.jpg'
image_PIL = Image.open(image_path)
image_array = np.array(image_PIL)
#查看array的通道数
print(image_array.shape)

#创建类
writer = SummaryWriter('logs')
#转换成numpy数组后需要修改dataformats,因为和默认的不一样
writer.add_image('test',image_array,1,dataformats='HWC')
writer.close()

(512, 768, 3)


### Transforms

In [11]:
from torchvision import transforms

In [12]:
from PIL import Image

In [14]:
#读取一个图片
img_path = r'F:\RUC\pytorch\数据集\练手数据集\train\ants_image\541630764_dbd285d63c.jpg'
img = Image.open(img_path)

#定义totensor对象 转换图片
tensor_trans = transforms.ToTensor()
tensor_img = tensor_trans(img)

#查看tensor
tensor_img

tensor([[[0.3608, 0.3686, 0.3686,  ..., 0.4039, 0.4000, 0.4078],
         [0.3569, 0.3647, 0.3686,  ..., 0.4078, 0.4078, 0.4157],
         [0.3686, 0.3608, 0.3569,  ..., 0.4039, 0.4118, 0.4157],
         ...,
         [0.3725, 0.3686, 0.3686,  ..., 0.8902, 0.8863, 0.8824],
         [0.3608, 0.3608, 0.3647,  ..., 0.8941, 0.8902, 0.8902],
         [0.3608, 0.3608, 0.3647,  ..., 0.8941, 0.8902, 0.8863]],

        [[0.5686, 0.5725, 0.5725,  ..., 0.6235, 0.6196, 0.6275],
         [0.5647, 0.5686, 0.5725,  ..., 0.6275, 0.6275, 0.6353],
         [0.5765, 0.5647, 0.5608,  ..., 0.6314, 0.6392, 0.6431],
         ...,
         [0.5922, 0.5882, 0.5843,  ..., 0.9176, 0.9137, 0.9098],
         [0.5765, 0.5765, 0.5843,  ..., 0.9137, 0.9098, 0.9098],
         [0.5765, 0.5765, 0.5843,  ..., 0.9137, 0.9098, 0.9059]],

        [[0.0039, 0.0039, 0.0039,  ..., 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0039, 0.0039,  ..., 0.0039, 0.0000, 0.0039],
         [0.0118, 0.0000, 0.0000,  ..., 0.0039, 0.0039, 0.

In [None]:
#常见的transforms

#打开一张图片
from PIL import Image
from torchvision import transforms
img = Image.open(r'F:\RUC\pytorch\数据集\练手数据集\train\ants_image\2288481644_83ff7e4572.jpg')
                 
#compose
#transforms.Compose将多个transforms方法组合起来使用
#比如将图片先resize到256*256，然后随机裁剪到224*224，最后转换为tensor


#totensor
#将PIL Image或者 ndarray 转换为tensor，并且归一化到[0-1.0]之间
tensor_trans = transforms.ToTensor()
tensor_img = tensor_trans(img)


In [16]:
#normalize
#将每个信道的数据标准化到设定的均值和标准差
#标准化前
print(tensor_img[0][0][0])
trans_norm = transforms.Normalize(mean=[0.5,0.5,0.5],std=[0.5,0.5,0.5])
img_norm = trans_norm(tensor_img)
#标准化后
print(img_norm[0][0][0])

tensor(0.2784)
tensor(-0.4431)


In [18]:
#resize
#调整图片大小
print(img.size)
trans_resize = transforms.Resize((256,256))
img_resize = trans_resize(img)
print(img_resize.size)
#如果要在tensorboard中显示，需要转换成tensor  

(500, 500)
(256, 256)


In [None]:
#randomcrop
#随机裁剪图片
trans_random = transforms.RandomCrop(256)
trans_compose = transforms.Compose([trans_random,tensor_trans])
for i in range(10):
    img_crop = trans_compose(img)
    writer.add_image('randomcrop',img_crop,i)
#在tensorboard中查看各步骤结果
writer.close()


In [22]:
#下载torchvision中的数据集且不进行transform操作
import torchvision
train_set = torchvision.datasets.CIFAR10(root='./data',train=True,download=True)
test_set = torchvision.datasets.CIFAR10(root='./data',train=False,download=True)

#查看原始数据集
img,target = train_set[0]
img.show()
#查看标签
print(train_set.classes[target])

frog


In [None]:
#下载torchvision中的数据集且进行transform操作
#用compose定义transform
transform = transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5))])
train_set = torchvision.datasets.CIFAR10(root='./data',train=True,download=True,transform=transform)
test_set = torchvision.datasets.CIFAR10(root='./data',train=False,download=True,transform=transform)

### Dataloader

In [None]:
#从torchvision中加载数据集
from torch.utils.data import DataLoader
transform = transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5))])
test_set = torchvision.datasets.CIFAR10(root='./data',train=False,download=True,transform=transform)

test_loader = DataLoader(test_set,batch_size=64,shuffle=True,num_workers=0,drop_last=True)
#batch_size:每次读取并合并的数据量,然后将所有数据按照该方式划分成n//4批

#查看原始数据集的第一张图片及标签
img,target = test_set[0]
print(img.shape)
print(target)

#查看dataloader中的数据
writer = SummaryWriter('dataloader')
step = 0
#epoch是指遍历整个数据集的次数
for epoch in range(2):
    for data in test_loader:
        img,target = data
        #print(img.shape)
        #print(target)
        writer.add_images('test_drop',img,step)
        step += 1
writer.close()

torch.Size([3, 32, 32])
3


### nn.Module

In [None]:
import torch.nn as nn
import torch

class ep(nn.Module):
    def __init__(self):
        super(ep,self).__init__() #调用父类的构造函数
    def forward(self,x):
        out_put = x+1
        return out_put

model = ep()
#输入需要是tensor类型
input = torch.tensor(1.0)
print(model(input))

tensor(2.)


### convolution layers

In [None]:
import torch
import torch.nn.functional as F
input = torch.tensor([[1,2,0,3,1],[0,1,2,3,1],[1,2,1,0,0],[5,2,3,1,1],[2,1,0,1,1]])

#定义卷积核
kernel = torch.tensor([[-1,0,1],[-1,0,1],[-1,0,1]])

print(input.shape)
print(kernel.shape)

#如果要进行卷进操作需要对尺寸进行变换
#batch_size,channel,height,width
input = torch.reshape(input,(1,1,5,5))
kernel = torch.reshape(kernel,(1,1,3,3))

print(input.shape)
print(kernel.shape)

#进行卷积操作
#stride是步长
output = F.conv2d(input,kernel)
print(output)

output2 = F.conv2d(input,kernel,stride=2)
print(output2)

#padding是填充
#padding = 1,是上下左右都填充1,5*5变成7*7
output3 = F.conv2d(input,kernel,stride=1,padding=1)
print(output3)

torch.Size([5, 5])
torch.Size([3, 3])
torch.Size([1, 1, 5, 5])
torch.Size([1, 1, 3, 3])
tensor([[[[ 1,  1, -1],
          [ 0, -1, -4],
          [-4, -3, -2]]]])
tensor([[[[ 1, -1],
          [-4, -2]]]])
tensor([[[[ 3,  1,  3,  0, -6],
          [ 5,  1,  1, -1, -6],
          [ 5,  0, -1, -4, -4],
          [ 5, -4, -3, -2, -2],
          [ 3, -4, -1, -1, -2]]]])


In [16]:
#kernel_size是卷积核的大小,不需要手动写kernel矩阵,训练过程中会对卷积核不断调优
#out_channels是输出通道数,就是卷积核的个数
#实际上就是使用两个卷积核进行卷积操作,再把结果合并
import torch
import torchvision
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter


#提数据 先提成dataset再转换成dataLoader
dataset = torchvision.datasets.CIFAR10(root='./data2',train=False,download=True,transform=torchvision.transforms.ToTensor())
dataloader = DataLoader(dataset,batch_size=64)

#定义卷积神经网络
class CNN(nn.Module):
    def __init__(self):
        super(CNN,self).__init__()
        #定义卷积层
        #为什么输入通道是3,因为图片是RGB三通道
        self.conv1 = nn.Conv2d(in_channels=3,out_channels=6,kernel_size=3,stride=1,padding=0)
    def forward(self,x):
        x = self.conv1(x)
        return x

cnn = CNN()

writer = SummaryWriter('cnn')
#把数据填进网络,并在tensorboard中查看结果
#30 = 32 - 3 + 1
step = 0
for data in dataloader:
    img,target = data
    # [64, 3, 32, 32] -> [64, 6, 30, 30]
    output = cnn(img)
    #要在tensorboard中查看结果需要channel不大于3
    output = torch.reshape(output,(-1,3,30,30))
    writer.add_images('cnn',output,step)
    step += 1
writer.close()


### Pooling layer

In [21]:
import torch

#需修改dtype=torch.float32,否则会报错
input = torch.tensor([[1,2,0,3,1],[0,1,2,3,1],[1,2,1,0,0],[5,2,3,1,1],[2,1,0,1,1]],dtype=torch.float32)
input = torch.reshape(input,(-1,1,5,5))

#ceil_mode=True,向上取整(后面取不完的矩阵保留/不保留)
#kenel_size=2,stride=2,就是2*2的矩阵,每次移动2个单位,不需要自己写矩阵

class max_pooling(nn.Module):
    def __init__(self):
        super(max_pooling,self).__init__()
        self.max_pool = nn.MaxPool2d(kernel_size=3,ceil_mode=True)
    def forward(self,x):
        x = self.max_pool(x)
        return x

model = max_pooling()
output = model(input)
print(output)

tensor([[[[2., 3.],
          [5., 1.]]]])


最大池化的作用：保留数据的特征并且减小数据量 1080p -> 720p 压缩

In [23]:
#直观查看maxpooling的效果

#提数据 先提成dataset再转换成dataLoader
dataset = torchvision.datasets.CIFAR10(root='./data2',train=False,download=True,transform=torchvision.transforms.ToTensor())
dataloader = DataLoader(dataset,batch_size=64)

writer = SummaryWriter('max_pooling')
#把数据填进网络,并在tensorboard中查看结果
#30 = 32 - 3 + 1
step = 0
model = max_pooling()
for data in dataloader:
    img,target = data
    # [64, 3, 32, 32] -> [64, 6, 30, 30]
    output = model(img)
    #要在tensorboard中查看结果需要channel不大于3
    writer.add_images('max_pooling',output,step)
    step += 1
writer.close()


### 非线性激活

In [25]:
import torch
import torch.nn as nn

input = torch.tensor([[1,-0.5],[-1,3]])

class sigmoid(nn.Module):
    def __init__(self):
        super(sigmoid,self).__init__()
    def forward(self,x):
        return nn.functional.sigmoid(x)

model = sigmoid()
output = model(input)
print(output)

tensor([[0.7311, 0.3775],
        [0.2689, 0.9526]])


In [None]:
dataset = torchvision.datasets.CIFAR10(root='./data2',train=False,download=True,transform=torchvision.transforms.ToTensor())
dataloader = DataLoader(dataset,batch_size=64)

writer = SummaryWriter('sigmoid')
#把数据填进网络,并在tensorboard中查看结果
step = 0
model = sigmoid()
for data in dataloader:
    img,target = data
    writer.add_images('input',img,step)
    output = model(img)
    #要在tensorboard中查看结果需要channel不大于3
    writer.add_images('output',output,step)
    step += 1
writer.close()

###  线性层

In [None]:
import torch
import torch.nn as nn
import torchvision 
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
dataset = torchvision.datasets.CIFAR10(root='./data2',train=False,download=True,transform=torchvision.transforms.ToTensor())
dataloader = DataLoader(dataset,batch_size=64)

writer = SummaryWriter('linear')

class linear(nn.Module):
    def __init__(self):
        super(linear,self).__init__()
        #channel*height*width
        self.linear = nn.Linear(3*32*32,10)
    def forward(self,x):
        return self.linear(x)

model = linear()
step = 0
for data in dataloader:
    img,target = data
    print(img.shape)
    img = torch.flatten(img,start_dim=1)
    print(img.shape)
    output = model(img)
    print(output.shape)
    step += 1
    break

torch.Size([64, 3, 32, 32])
torch.Size([64, 3072])
torch.Size([64, 10])


### sequential

In [None]:
import torch
from torch import nn
from torch.nn import Conv2d
from torch.utils.tensorboard import SummaryWriter
class ep(nn.Module):
    def __init__(self):
        super(ep,self).__init__() #调用父类的构造函数
        self.model1 = nn.Sequential(
            #conv中的padding通过计算得出
            nn.Conv2d(3, 32, 5, padding=2),
            nn.MaxPool2d(2),
            nn.Conv2d(32, 32, 5, padding=2),
            nn.MaxPool2d(2),
            nn.Conv2d(32, 64, 5, padding=2),
            nn.MaxPool2d(2),
            nn.Flatten(),
            #linear的输入输出都是一维
            nn.Linear(64*4*4, 64),
            nn.Linear(64, 10)
        )
    def forward(self,x):
        x = self.model1(x)
        return x

model = ep()
input = torch.ones((64,3,32,32))
print(model(input).shape)

writer = SummaryWriter('logs_seq')
#计算图
writer.add_graph(model,input)
writer.close()
        

torch.Size([64, 10])


### 损失函数

In [None]:
import torch
from torch import nn

input = torch.tensor([1,2,3],dtype=torch.float32)
target = torch.tensor([1,2,5],dtype=torch.float32)

#为什么要reshape,因为输入的数据是[3],而loss函数需要的是[1,3] 1是batch_size
#主要关注input和target的shape
input = torch.reshape(input,(1,1,1,3))
target = torch.reshape(target,(1,1,1,3))

loss = nn.L1Loss()
print(loss(input,target))

tensor(0.6667)


神经网络类:初始化与前向传播，计算损失函数和梯度反向传播都写在外面

In [None]:
import torchvision
from torch.utils.data import DataLoader
dataset = torchvision.datasets.CIFAR10(root='./data2',train=False,download=True,transform=torchvision.transforms.ToTensor())
dataloader = DataLoader(dataset,batch_size=1)
class ep(nn.Module):
    def __init__(self):
        super(ep,self).__init__() #调用父类的构造函数
        self.model1 = nn.Sequential(
            #conv中的padding通过计算得出
            nn.Conv2d(3, 32, 5, padding=2),
            nn.MaxPool2d(2),
            nn.Conv2d(32, 32, 5, padding=2),
            nn.MaxPool2d(2),
            nn.Conv2d(32, 64, 5, padding=2),
            nn.MaxPool2d(2),
            nn.Flatten(),
            #linear的输入输出都是一维
            nn.Linear(64*4*4, 64),
            nn.Linear(64, 10)
        )
    def forward(self,x):
        x = self.model1(x)
        return x
#nn.Module与nn.Loss都要先进行初始化
model = ep()
loss = nn.CrossEntropyLoss()
for data in dataloader:
    img,target = data
    output = model(img)
    #计算损失
    loss_value = loss(output,target)
    #反向传播写在这里
    loss_value.backward()
    print(loss_value)
    break

tensor(2.3211, grad_fn=<NllLossBackward0>)


### 优化器

torch.optim

In [None]:
import torch
import torchvision
from torch.utils.data import DataLoader
import torch.nn as nn
dataset = torchvision.datasets.CIFAR10(root='./data2',train=False,download=True,transform=torchvision.transforms.ToTensor())
dataloader = DataLoader(dataset,batch_size=1)
class ep(nn.Module):
    def __init__(self):
        super(ep,self).__init__() #调用父类的构造函数
        self.model1 = nn.Sequential(
            #conv中的padding通过计算得出
            nn.Conv2d(3, 32, 5, padding=2),
            nn.MaxPool2d(2),
            nn.Conv2d(32, 32, 5, padding=2),
            nn.MaxPool2d(2),
            nn.Conv2d(32, 64, 5, padding=2),
            nn.MaxPool2d(2),
            nn.Flatten(),
            #linear的输入输出都是一维
            nn.Linear(64*4*4, 64),
            nn.Linear(64, 10)
        )
    def forward(self,x):
        x = self.model1(x)
        return x
#先进行初始化 模型/损失函数/优化器
model = ep()
loss = nn.CrossEntropyLoss()
optim = torch.optim.SGD(model.parameters(),lr=0.01)
for epoch in range(5):
    running_loss = 0.0
    for data in dataloader:
        img,target = data
        output = model(img)
        #计算损失
        loss_value = loss(output,target)
        #梯度清零
        optim.zero_grad()
        #反向传播求梯度
        loss_value.backward()
        #更新参数
        optim.step()
        running_loss += loss_value.item()
    print(running_loss)

18706.792583032046
16125.594056693866
15516.942673036378
16073.606622038087
17985.536766862962


: 