transforms 对图片进行变换

In [1]:
from torchvision import transforms

结构与用法： transforms 中为模板类，需要实例化对象才可使用（可以理解为用官方模板创建自己的工具）

<img src="notes/image/transforms.png" width="500">


In [2]:
dir(transforms)

['AugMix',
 'AutoAugment',
 'AutoAugmentPolicy',
 'CenterCrop',
 'ColorJitter',
 'Compose',
 'ConvertImageDtype',
 'ElasticTransform',
 'FiveCrop',
 'GaussianBlur',
 'Grayscale',
 'InterpolationMode',
 'Lambda',
 'LinearTransformation',
 'Normalize',
 'PILToTensor',
 'Pad',
 'RandAugment',
 'RandomAdjustSharpness',
 'RandomAffine',
 'RandomApply',
 'RandomAutocontrast',
 'RandomChoice',
 'RandomCrop',
 'RandomEqualize',
 'RandomErasing',
 'RandomGrayscale',
 'RandomHorizontalFlip',
 'RandomInvert',
 'RandomOrder',
 'RandomPerspective',
 'RandomPosterize',
 'RandomResizedCrop',
 'RandomRotation',
 'RandomSolarize',
 'RandomVerticalFlip',
 'Resize',
 'TenCrop',
 'ToPILImage',
 'ToTensor',
 'TrivialAugmentWide',
 '__builtins__',
 '__cached__',
 '__doc__',
 '__file__',
 '__loader__',
 '__name__',
 '__package__',
 '__path__',
 '__spec__',
 '_pil_constants',
 '_presets',
 'autoaugment',
 'functional',
 'functional_pil',
 'functional_tensor',
 'transforms']

常用的工具：
- transforms.CenterCrop(size)：中心裁剪
- transforms.toTensor()：转换为张量
- transforms.resize(size)：调整图片大小

In [3]:
from PIL import Image
from torchvision import transforms

img = Image.open('refactor/train/image/5650366_e22b7e1065.jpg')
print(type(img))

<class 'PIL.JpegImagePlugin.JpegImageFile'>


In [4]:
# 转换为 tensor
# img_tensor = transforms.ToTensor()(img)
PIL2Tensor = transforms.ToTensor()
img_tensor = PIL2Tensor(img)
print(type(img_tensor))

<class 'torch.Tensor'>


In [5]:
import cv2
from torch.utils.tensorboard.writer import SummaryWriter

writer = SummaryWriter('logs/exp4')
img_ndarray = cv2.imread('refactor/train/image/5650366_e22b7e1065.jpg')
img_tensor = transforms.ToTensor()(img_ndarray)
writer.add_image('Tensor', img_tensor, 1)
writer.close()

In [6]:
img_tensor[0,:,:]

tensor([[0.4157, 0.4157, 0.4196,  ..., 0.3608, 0.3569, 0.3529],
        [0.4196, 0.4157, 0.4196,  ..., 0.3569, 0.3529, 0.3490],
        [0.4235, 0.4235, 0.4235,  ..., 0.3608, 0.3569, 0.3529],
        ...,
        [0.5608, 0.5608, 0.5647,  ..., 0.4392, 0.4392, 0.4392],
        [0.5412, 0.5529, 0.5608,  ..., 0.4353, 0.4353, 0.4353],
        [0.5333, 0.5412, 0.5608,  ..., 0.4314, 0.4314, 0.4314]])

input[channel] = (input[channel] - mean[channel]) / std[channel]

In [7]:
# Normalize 使用
normalize = transforms.Normalize(mean=[.5, .5, .5], std=[.5, .5, .5])
img_norm = normalize(img_tensor)
writer.add_image('Normalize', img_norm, 1)
print(img_norm[0, :, :])

tensor([[-0.1686, -0.1686, -0.1608,  ..., -0.2784, -0.2863, -0.2941],
        [-0.1608, -0.1686, -0.1608,  ..., -0.2863, -0.2941, -0.3020],
        [-0.1529, -0.1529, -0.1529,  ..., -0.2784, -0.2863, -0.2941],
        ...,
        [ 0.1216,  0.1216,  0.1294,  ..., -0.1216, -0.1216, -0.1216],
        [ 0.0824,  0.1059,  0.1216,  ..., -0.1294, -0.1294, -0.1294],
        [ 0.0667,  0.0824,  0.1216,  ..., -0.1373, -0.1373, -0.1373]])


In [8]:
# resize 使用
resize = transforms.Resize((100, 200))
# 注意，输入的是 PIL.Image，给定的参数是 (height, width)，给定单个值时较小边匹配，输出也是 PIL.Image
img_resize = resize(img)
writer.add_image('Resize',PIL2Tensor(img_resize), 1)
writer.close()
print(img_resize.size)

(200, 100)


In [None]:
# compose 使用
# 从上面的例子可以看出，每次都要写 transforms.ToTensor()，transforms.Normalize()，transforms.Resize()，很麻烦，可以使用 transforms.Compose() 来将这些操作组合到一起。
# 注意，transforms.Compose() 的参数是一个 list，list 中的每个元素是一个 transform 操作，它会将这些操作依次执行。

transforms_list = transforms.Compose([
    transforms.Resize((100, 200)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[.5, .5, .5], std=[.5, .5, .5]),
])
img_compose = transforms_list(img)
writer.add_image('Compose', img_compose, 1)
writer.close()