In [45]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image
import os
import cv2
from paddle.io import Dataset
import paddle
from paddle.vision.transforms import *

In [46]:
train_path = 'work/datas/Butterfly20/Butterfly20'
target_path = ''
label_path = 'work/datas/Butterfly20/data_list.txt'
test_data_dir='work/datas/Butterfly20_test'
test_path='work/datas/Butterfly20_test/testpath.txt'
spicies_path = 'work/datas/Butterfly20/species.txt'


In [47]:
#数据预处理
class MyDataset(Dataset):
    """
    步骤一：继承 paddle.io.Dataset 类
    """
    def __init__(self, label_path, transform=None):
        """
        步骤二：实现 __init__ 函数，初始化数据集，将样本和标签映射到列表中
        """
        super(MyDataset, self).__init__()
        self.data_list = []
        with open(label_path,encoding='utf-8') as f:
            for line in f.readlines():
                image_path,genus_label,species_label = line.strip('\n').split(' ')
                self.data_list.append([image_path,species_label])
        # 2. 传入定义好的数据处理方法，作为自定义数据集类的一个属性
        self.transform = transform

    def __getitem__(self, index):
        """
        步骤三：实现 __getitem__ 函数，定义指定 index 时如何获取数据，并返回单条数据（样本数据、对应的标签）
        """
        image_path, label = self.data_list[index]
        image = cv2.imread(image_path, cv2.IMREAD_COLOR)
        image = image.astype('float32')
        # 3. 应用数据处理方法到图像上
        if self.transform is not None:
            image = self.transform(image)
        label = int(label)-1
        return image, label

    def __len__(self):
        """
        步骤四：实现 __len__ 函数，返回数据集的样本总数
        """
        return len(self.data_list)

# 数据预处理和数据增强
transform_train = Compose([
    RandomRotation(40),
    RandomHorizontalFlip(0.4),
    RandomVerticalFlip(0.1),
    Resize(size=(224, 224)),
    Normalize(mean=[127.5, 127.5, 127.5], std=[127.5, 127.5, 127.5],data_format='HWC'),
    Transpose()])

#数据加载
train_dataset = MyDataset(label_path, transform_train)
print(train_dataset[0].__getitem__(0).shape)
print(train_dataset.__len__())

(3, 224, 224)
1866


In [48]:
#模型组网
res50 = paddle.vision.models.resnet50(num_classes=20)
paddle.summary(res50,(1,3,224,224))

-------------------------------------------------------------------------------
   Layer (type)         Input Shape          Output Shape         Param #    
    Conv2D-213       [[1, 3, 224, 224]]   [1, 64, 112, 112]        9,408     
  BatchNorm2D-213   [[1, 64, 112, 112]]   [1, 64, 112, 112]         256      
      ReLU-69       [[1, 64, 112, 112]]   [1, 64, 112, 112]          0       
    MaxPool2D-5     [[1, 64, 112, 112]]    [1, 64, 56, 56]           0       
    Conv2D-215       [[1, 64, 56, 56]]     [1, 64, 56, 56]         4,096     
  BatchNorm2D-215    [[1, 64, 56, 56]]     [1, 64, 56, 56]          256      
      ReLU-70        [[1, 256, 56, 56]]    [1, 256, 56, 56]          0       
    Conv2D-216       [[1, 64, 56, 56]]     [1, 64, 56, 56]        36,864     
  BatchNorm2D-216    [[1, 64, 56, 56]]     [1, 64, 56, 56]          256      
    Conv2D-217       [[1, 64, 56, 56]]     [1, 256, 56, 56]       16,384     
  BatchNorm2D-217    [[1, 256, 56, 56]]    [1, 256

{'total_params': 23602132, 'trainable_params': 23495892}

In [49]:
#封装模型
model = paddle.Model(res50)
#调参
#paddle.optimizer.SGD(learning_rate=0.1, parameters=linear.parameters(), weight_decay=0.01)
#paddle.optimizer.Adam(learning_rate=0.001, parameters=model.parameters())
model.prepare(optimizer=paddle.optimizer.Adam(learning_rate=0.001, parameters=model.parameters()), 
              loss=paddle.nn.CrossEntropyLoss(), 
              metrics=paddle.metric.Accuracy())

In [50]:
#训练
model.fit(train_dataset, 
          epochs=256,
          batch_size=32,
          verbose=1)

The loss value printed in the log is the current step, and the metric is the average value of previous steps.
Epoch 1/256
Epoch 2/256
Epoch 3/256
Epoch 4/256
Epoch 5/256
Epoch 6/256
Epoch 7/256
Epoch 8/256
Epoch 9/256
Epoch 10/256
Epoch 11/256
Epoch 12/256
Epoch 13/256
Epoch 14/256
Epoch 15/256
Epoch 16/256
Epoch 17/256
Epoch 18/256
Epoch 19/256
Epoch 20/256
Epoch 21/256
Epoch 22/256
Epoch 23/256
Epoch 24/256
Epoch 25/256
Epoch 26/256
Epoch 27/256
Epoch 28/256
Epoch 29/256
Epoch 30/256
Epoch 31/256
Epoch 32/256
Epoch 33/256
Epoch 34/256
Epoch 35/256
Epoch 36/256
Epoch 37/256
Epoch 38/256
Epoch 39/256
Epoch 40/256
Epoch 41/256
Epoch 42/256
Epoch 43/256
Epoch 44/256
Epoch 45/256
Epoch 46/256
Epoch 47/256
Epoch 48/256
Epoch 49/256
Epoch 50/256
Epoch 51/256
Epoch 52/256
Epoch 53/256
Epoch 54/256
Epoch 55/256
Epoch 56/256
Epoch 57/256
Epoch 58/256
Epoch 59/256
Epoch 60/256
Epoch 61/256
Epoch 62/256
Epoch 63/256
Epoch 64/256
Ep

In [51]:
# 用 evaluate 在训练集上对模型进行验证
eval_result = model.evaluate(train_dataset, verbose=1)
print(eval_result)

Eval begin...
Eval samples: 1866
{'loss': [0.00063244364], 'acc': 0.9957127545551983}


In [52]:
class InferDataset(Dataset):
    def __init__(self, data_dir, image_paths, transform=None):
        """
        步骤二：实现 __init__ 函数，初始化数据集，将样本映射到列表中
        """
        super(InferDataset, self).__init__()
        self.data_list = []
        with open(image_paths,encoding='utf-8') as f:
            for line in f.readlines():
                image_path = test_data_dir+'/'+line.strip('\n')
                self.data_list.append(image_path)
        # 2. 传入定义好的数据处理方法，作为自定义数据集类的一个属性
        self.transform = transform

    def __getitem__(self, index):
        """
        步骤三：实现 __getitem__ 函数，定义指定 index 时如何获取数据，并返回单条数据（样本数据、对应的标签）
        """
        image_path = self.data_list[index]
        image = cv2.imread(image_path, cv2.IMREAD_COLOR)
        image = image.astype('float32')
        # 3. 应用数据处理方法到图像上
        if self.transform is not None:
            image = self.transform(image)
        return image

    def __len__(self):
        """
        步骤四：实现 __len__ 函数，返回数据集的样本总数
        """
        return len(self.data_list)

transform_test = Compose([
    Resize(size=(224,224)),
    Normalize(mean=[127.5, 127.5, 127.5], std=[127.5, 127.5, 127.5],data_format='HWC'),
    Transpose()])
#加载测试集
test_dataset= InferDataset(test_data_dir,test_path,transform_test)
print(test_dataset.__getitem__(0).shape)
print(test_dataset.__len__())

(3, 224, 224)
200


In [53]:
test_result = model.predict(test_dataset)

Predict begin...
Predict samples: 200


In [54]:
species_dict={}
with open(spicies_path) as f:
    for line in f:
        a,b = line.strip("\n").split(" ")
        species_dict[int(a)-1]=b

print(species_dict)

{0: '001.Atrophaneura_horishanus', 1: '002.Atrophaneura_varuna', 2: '003.Byasa_alcinous', 3: '004.Byasa_dasarada', 4: '005.Byasa_polyeuctes', 5: '006.Graphium_agamemnon', 6: '007.Graphium_cloanthus', 7: '008.Graphium_sarpedon', 8: '009.Iphiclides_podalirius', 9: '010.Lamproptera_curius', 10: '011.Lamproptera_meges', 11: '012.Losaria_coon', 12: '013.Meandrusa_payeni', 13: '014.Meandrusa_sciron', 14: '015.Pachliopta_aristolochiae', 15: '016.Papilio_alcmenor', 16: '017.Papilio_arcturus', 17: '018.Papilio_bianor', 18: '019.Papilio_dialis', 19: '020.Papilio_hermosanus'}


In [55]:
with open('model_result.txt','w')as f:
    for i in range(0,200):
        f.write(species_dict[test_result[0][i].argmax()]+'\n')