In [1]:
import os
import torch

data_dir = './data/'
train_dir = './data/train/train/'

In [2]:
def read_csv_labels(fname):
    """读取fname来给标签字典返回一个文件名"""
    with open(fname, 'r') as f:
        # 跳过文件头行(列名)
        lines = f.readlines()[1:]
    tokens = [l.rstrip().split(',') for l in lines]
    return dict(((name, label) for name, label in tokens))


labels = read_csv_labels(os.path.join(data_dir, 'trainLabels.csv'))
train_dir_len = len(labels)
labels_set = set(labels.values())
print('训练样本数量 :', train_dir_len)
print('类别数:', len(labels_set))
print("------------------------------------------")
index = 1
for each in labels_set:
    print("标签{}:".format(index), each)
    index += 1

训练样本数量 : 50000
类别数: 10
------------------------------------------
标签1: frog
标签2: ship
标签3: dog
标签4: horse
标签5: truck
标签6: cat
标签7: deer
标签8: airplane
标签9: bird
标签10: automobile


In [3]:
import pandas as pd

train_labels = pd.read_csv(f'{data_dir}trainLabels.csv', dtype=str)
train_images = pd.DataFrame(columns=['id', 'label', 'path'], dtype=str)
test_labels = pd.read_csv(f'{data_dir}sampleSubmission.csv')
train_labels.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 50000 entries, 0 to 49999
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   id      50000 non-null  object
 1   label   50000 non-null  object
dtypes: object(2)
memory usage: 781.4+ KB


In [4]:
print(type(train_images))
for index in range(0, train_dir_len):
    path = train_dir + str(index + 1) + '.png'
    if os.path.exists(path):
        train_images.loc[len(train_images)] = {
            'id': str(train_labels['id'].iloc[index]),
            'path': path,
            'label': train_labels['label'].iloc[index]
        }
train_images.head(2)

<class 'pandas.core.frame.DataFrame'>


Unnamed: 0,id,label,path
0,1,frog,./data/train/train/1.png
1,2,truck,./data/train/train/2.png


In [5]:
display_groupby = train_images.groupby(['label']).count()
display_groupby.head(10)

Unnamed: 0_level_0,id,path
label,Unnamed: 1_level_1,Unnamed: 2_level_1
airplane,5000,5000
automobile,5000,5000
bird,5000,5000
cat,5000,5000
deer,5000,5000
dog,5000,5000
frog,5000,5000
horse,5000,5000
ship,5000,5000
truck,5000,5000


In [6]:
class_names = ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']
for name in class_names:
    index = class_names.index(name)
    train_images.loc[train_images.label == name, 'label'] = str(index)

train_images.head(5)

Unnamed: 0,id,label,path
0,1,6,./data/train/train/1.png
1,2,9,./data/train/train/2.png
2,3,9,./data/train/train/3.png
3,4,4,./data/train/train/4.png
4,5,1,./data/train/train/5.png


In [7]:
from torch.utils.data import Dataset, DataLoader
from PIL import Image


class MyDataset(Dataset):
    def __init__(self, dataframe, transform=None):
        self.dataframe = dataframe
        self.transform = transform

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, index):
        row = self.dataframe.iloc[index]
        image = Image.open(row["path"]).convert("RGB")  #确保是RGB图片
        label = int(row["label"])

        if self.transform:
            image = self.transform(image)

        return image, label


dataset = MyDataset(train_images)

In [8]:
import torchvision

# 图像增广
IMAGE_SIZE = 40

mean, std = [0.4914, 0.4822, 0.4465], [0.247, 0.243, 0.261]

transform_train = torchvision.transforms.Compose([
    # 在高度和宽度上将图像RESIZE
    torchvision.transforms.Resize((IMAGE_SIZE, IMAGE_SIZE)),  # Resize the image in a 40x40 shape
    torchvision.transforms.RandomHorizontalFlip(0.1),
    torchvision.transforms.RandomRotation(20),
    torchvision.transforms.ColorJitter(brightness=0.1,  #随机颜色抖动
                                       contrast=0.1,
                                       saturation=0.1),
    torchvision.transforms.ToTensor(),
    # 标准化图像的每个通道
    torchvision.transforms.Normalize(mean, std)])

# 在测试期间，只对图像执行标准化，以消除评估结果中的随机性。
transform_test = torchvision.transforms.Compose([
    torchvision.transforms.Resize((IMAGE_SIZE, IMAGE_SIZE)),
    torchvision.transforms.ToTensor(),
    torchvision.transforms.Normalize(mean, std)])

transform_train

Compose(
    Resize(size=(40, 40), interpolation=bilinear, max_size=None, antialias=warn)
    RandomHorizontalFlip(p=0.1)
    RandomRotation(degrees=[-20.0, 20.0], interpolation=nearest, expand=False, fill=0)
    ColorJitter(brightness=(0.9, 1.1), contrast=(0.9, 1.1), saturation=(0.9, 1.1), hue=None)
    ToTensor()
    Normalize(mean=[0.4914, 0.4822, 0.4465], std=[0.247, 0.243, 0.261])
)

In [9]:
from torch.utils.data import random_split

BATCH_SIZE = 64
NUM_WORKERS = 4
VALIDATION_SIZE = 0.2  # 训练的五万张图片的0.2即1万张用于验证
num = len(dataset)
split = round(num * VALIDATION_SIZE)

train_dataset, val_dataset = random_split(dataset, [num - split, split])

# 创建数据加载器
train_data = MyDataset(train_dataset, transform=transform_train)
val_data = MyDataset(val_dataset, transform=transform_test)

train_loader = DataLoader(
    dataset=train_data,
    batch_size=BATCH_SIZE,
    shuffle=True,
    num_workers=NUM_WORKERS,
    pin_memory=True
)

test_loader = DataLoader(
    dataset=val_data,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=NUM_WORKERS,
    pin_memory=True
)

In [10]:
def get_default_device():
    """获取默认设备，如果有 GPU 则选择 GPU，否则选择 CPU"""
    if torch.cuda.is_available():
        return torch.device('cuda')
    else:
        return torch.device('cpu')


device = get_default_device()
print("Using device:{}".format(device))

Using device:cuda


In [None]:
from matplotlib import pyplot as plt

# 获取一个批次的数据
data_iter = iter(train_loader)
images, labels = next(data_iter)


# 定义一个函数来显示图像
def show_images(images, labels, title):
    fig, axes = plt.subplots(1, len(images), figsize=(12, 2))
    for ax, image, label in zip(axes, images, labels):
        ax.imshow(image.permute(1, 2, 0))  # 将张量的通道顺序调整为 (H, W, C) 并显示
        ax.set_title(f'Label: {label.item()}')
        ax.axis('off')
    plt.suptitle(title)
    plt.show()


# 显示一个批次的训练图像
show_images(images, labels, 'CIFAR-10 Images')