In [2]:
import json
import os
import random
import zipfile

import numpy as np
import paddle
from PIL import Image
from matplotlib import pyplot as plt
from paddle import nn

In [3]:
train_parameters = {
    "input_size": [
        3,
        64,
        64
    ],
    "class_dim": 102,
    "src_path": "E:\\Python\\CaltechClassification\\data\\dataset.zip",
    "target_path": "E:\\Python\\CaltechClassification\\data\\dataset\\",
    "image_path": "E:\\Python\\CaltechClassification\\data\\dataset\\dataset\\images\\",
    "train_list_path": "E:\\Python\\CaltechClassification\\data\\dataset\\train.txt",
    "eval_list_path": "E:\\Python\\CaltechClassification\\data\\dataset\\eval.txt",
    "class_list_path": "E:\\Python\\CaltechClassification\\data\\dataset\\dataset\\class.txt",
    "readme_path": "E:\\Python\\CaltechClassification\\data\\dataset\\readme.json",
    "label_dict": {
    },
    "num_epochs": 2,
    "train_batch_size": 32,
    "learning_strategy": {
        "lr": 0.01
    }
}

## 1.数据准备

In [4]:
def unzip_data(src_path, target_path):
    """
    解压原始数据集，将src_path路径下的zip包解压至target_path目录下
    :param src_path:  zip包
    :param target_path:  目标目录
    :return:
    """
    if not os.path.isdir(target_path):
        z = zipfile.ZipFile(src_path, 'r')
        z.extractall(path=target_path)
        z.close()


def get_data_list(target_path, train_list_path, image_path, class_list_path, readme_path):
    """
    生成数据列表
    :param target_path: 目标文件夹
    :param train_list_path: 训练列表
    :param image_path: 图片路径
    :param class_list_path: 标签文件路径
    :param readme_path: readme 文件路径
    :return:
    """
    # 训练集图片路径
    train_data_paths = []
    # 训练集图片标签
    train_data_labels = []
    # 训练集
    train_list = []
    # 所有图片数量
    all_class_images = 0
    # 标签类别
    class_detail = []
    # 训练集文本
    train_data_list = open(target_path + "dataset\\train.txt", "r").readlines()
    # 标签集
    class_data_list = open(class_list_path, "r").readlines()
    # 标签对应的图片数
    class_num = {}

    # 生成train.txt
    for train_data in train_data_list:
        train_data_path = image_path.replace("\\", "\\\\") + train_data.split("\t")[0]
        train_data_label = train_data.split("\t")[1].replace("\n", "")

        train_data_paths.append(train_data_path)
        train_data_labels.append(train_data_labels)

        train_list.append(f"{train_data_path}\t{train_data_label}\n")

        # 统计对应标签的训练集数量
        if train_data_label not in class_num.keys():
            class_num.setdefault(train_data_label, 0)
        class_num[train_data_label] += 1

    # 乱序
    random.shuffle(train_list)

    # 写入 train.txt
    with open(train_list_path, "a") as f:
        for train_data in train_list:
            f.write(train_data)

    # 生成标签集
    for class_data in class_data_list:
        class_name = class_data.split("\t")[0]
        class_label = class_data.split("\t")[1].replace("\n", "")
        # 标签类别默认内容
        class_detail_default = {"class_train_images": class_num[class_label], "class_label": int(class_label),
                                "class_name": class_name}
        class_detail.append(class_detail_default)

    all_class_images = len(train_data_paths)

    # 写入 readme
    readme_json = {"all_class_images": all_class_images, "class_detail": class_detail}
    with open(readme_path, 'w') as f:
        f.write(json.dumps(readme_json, sort_keys=True, indent=4, separators=(',', ': ')))

    print("数据生成完成")

In [5]:
unzip_data(train_parameters["src_path"], train_parameters["target_path"])
get_data_list(train_parameters["target_path"], train_parameters["train_list_path"], train_parameters["image_path"],
              train_parameters["class_list_path"], train_parameters["readme_path"])

数据生成完成


## 2.模型搭建

In [6]:
class caltech_dataset(paddle.io.Dataset):
    def __init__(self, data_path, mode="train"):
        """
        数据读取器
        :param data_path: 文件路径
        :param mode: 读取模式
        """
        super().__init__()
        self.data_path = data_path
        self.image_paths = []
        self.labels = []

        if mode == "train":
            with open(os.path.join(self.data_path, "train.txt"), "r", encoding="utf8") as f:
                self.info = f.readlines()
            for image_info in self.info:
                image_path, image_label = image_info.strip().split("\t")
                self.image_paths.append(image_path)
                self.labels.append(image_label)
        else:
            pass

    def __getitem__(self, index):
        image_path = self.image_paths[index]
        label = self.labels[index]
        image = Image.open(image_path)
        if not image.mode == "RGB":
            image = image.convert("RGB")
        image = image.resize((64, 64), Image.BILINEAR)
        image = np.array(image).astype("float32")
        image = image.transpose((2, 0, 1)) / 255
        label = np.array([label], dtype="int64")
        return image, label

    def __len__(self):
        return len(self.image_paths)

In [7]:
class caltech_model(paddle.nn.Layer):
    def __init__(self):
        super(caltech_model, self).__init__()
        # 62 * 62
        self.conv1 = nn.Conv2D(in_channels=3, out_channels=64, kernel_size=3, padding=0, stride=1)
        # 31 * 31
        self.pool1 = nn.MaxPool2D(kernel_size=2, stride=2)
        # 29 * 29
        self.conv2 = nn.Conv2D(in_channels=64, out_channels=128, kernel_size=3, padding=0, stride=1)
        # 14 * 14
        self.pool2 = nn.MaxPool2D(kernel_size=2, stride=2)
        # 10 * 10
        self.conv3 = nn.Conv2D(in_channels=128, out_channels=128, kernel_size=5, padding=0, stride=1)
        # 5 * 5
        self.pool3 = nn.MaxPool2D(kernel_size=2, stride=2)
        self.fc1 = nn.Linear(in_features=5 * 5 * 128, out_features=25)

    def forward(self, input):
        x = self.conv1(input)
        x = self.pool1(x)
        x = self.conv2(x)
        x = self.pool2(x)
        x = self.conv3(x)
        x = self.pool3(x)
        x = paddle.reshape(x, [-1, 5 * 5 * 128])
        x = self.fc1(x)
        return x

## 3.模型训练

In [8]:
Batch = 0
Batchs = []
all_train_accs = []


def draw_train_acc(Batchs, train_accs):
    title = "training accs"
    plt.title(title, fontsize=24)
    plt.xlabel("batch", fontsize=14)
    plt.ylabel("acc", fontsize=14)
    plt.plot(Batchs, train_accs, color='green', label='training accs')
    plt.legend()
    plt.grid()
    plt.show()


all_train_loss = []


def draw_train_loss(Batchs, train_loss):
    title = "training loss"
    plt.title(title, fontsize=24)
    plt.xlabel("batch", fontsize=14)
    plt.ylabel("loss", fontsize=14)
    plt.plot(Batchs, train_loss, color='red', label='training loss')
    plt.legend()
    plt.grid()
    plt.show()


In [9]:
train_dataset = caltech_dataset(train_parameters["target_path"])
train_dataloader = paddle.io.DataLoader(train_dataset)

In [10]:
model = caltech_model()
model.train()
cross_entropy = paddle.nn.CrossEntropyLoss()
opt = paddle.optimizer.SGD(learning_rate=train_parameters["learning_strategy"]["lr"], parameters=model.parameters())
epoch_num = train_parameters["num_epochs"]

for epoch in range(epoch_num):
    for batch_id, data in enumerate(train_dataloader):
        img = data[0]
        label = data[1]
        predict = model(img)
        loss = cross_entropy(predict, label)
        acc = paddle.metric.accuracy(predict, label.reshape([-1, 1]))

        if batch_id != 0 and batch_id % 10 == 0:
            Batch = Batch + 10
            Batchs.append(Batch)
            all_train_loss.append(loss.numpy()[0])
            all_train_accs.append(acc.numpy()[0])
            print("epoch:{},step:{},train_loss:{},train_acc:{}".format(epoch, batch_id, loss.numpy()[0],
                                                                       acc.numpy()[0]))
        if batch_id != 0 and batch_id % 20 == 0:
            paddle.save(model.state_dict(), f"./checkpoints/caltech_dataset_{str(batch_id)}")

        loss.backward()
        opt.step()
        opt.clear_grad()

paddle.save(model.state_dict(), f"./checkpoints/caltech_dataset_last")  #保存模型
draw_train_acc(Batchs, all_train_accs)
draw_train_loss(Batchs, all_train_loss)

OSError: (External) CUDNN error(8), CUDNN_STATUS_EXECUTION_FAILED. 
  [Hint: Please search for the error code(8) on website (https://docs.nvidia.com/deeplearning/cudnn/api/index.html#cudnnStatus_t) to get Nvidia's official solution and advice about CUDNN Error.] (at C:\home\workspace\Paddle\paddle\phi\kernels\gpudnn\pool_grad_kernel.cu:284)


## 3.结果预测

In [None]:
def load_image(img_path):
    '''
    预测图片预处理
    '''
    image = Image.open(img_path)
    if image.mode != 'RGB':
        image = image.convert('RGB')
    image = image.resize((64, 64), Image.BILINEAR)
    image = np.array(image).astype('float32')
    image = image.transpose((2, 0, 1))  # HWC to CHW
    image = image / 255  # 像素值归一化
    return image

In [None]:
para_state_dict = paddle.load("./checkpoints/caltech_dataset_last")
model = caltech_model()
model.set_state_dict(para_state_dict)

infer_image_path = ""
infer_images = open(infer_image_path, 'r').readlines()

infer_image_list = []

for infer_image in infer_images:
    infer_image_list.append(os.path.join(train_parameters["image_path"], load_image(infer_image)))

infer_image_list = np.array(infer_image_list)

outs = []

for i in range(infer_image_list):
    dy_x_data = np.array(infer_image_list[i]).astype('float32')
    dy_x_data = dy_x_data[np.newaxis, :, :, :]
    img = paddle.to_tensor(dy_x_data)
    out = model(img)
    lab = np.argmax(out.numpy())  #argmax():返回最大数的索引
    print(lab)
    outs.append(f"{infer_images[i]}\t{lab}")
print("结束")