In [62]:
import os
import glob
import random
import csv
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import callbacks
import numpy as np


In [41]:
def load_pokemon(root, mode='train'):
    name2label = {}
    root = 'pokemon'
    # os.listdir(os.path.join('pokemon')) 读取这个文件夹的下子文件夹
    for name in sorted(os.listdir(os.path.join(root))):
        # 拼接目录 方便判断是否是子目录
        child_dir = os.path.join(root, name)
        if not os.path.isdir(child_dir):
            continue
            
        # {'bulbasaur': 0, 'charmander': 1, 'mewtwo': 2, 'pikachu': 3, 'squirtle': 4}
        name2label[name] = len(name2label.keys()) 
    # 数据转化为[name.png, 0]....
    images, labels = data_to_csv(root, 'images.csv', name2label)
    if mode == 'train':  # 60%
        images = images[:int(0.6 * len(images))]
        labels = labels[:int(0.6 * len(labels))]
    elif mode == 'val':  # 20% = 60%->80%
        images = images[int(0.6 * len(images)):int(0.8 * len(images))]
        labels = labels[int(0.6 * len(labels)):int(0.8 * len(labels))]
    else:  # 20% = 80%->100%
        images = images[int(0.8 * len(images)):]
        labels = labels[int(0.8 * len(labels)):]

    return images, labels, name2label

def data_to_csv(root, filename, name2label):
    images = []
    for name in name2label.keys():
        # 模糊匹配这个路径下所有的png,jpg,jpeg图片
        images += glob.glob(os.path.join(root, name, '*.png'))
        images += glob.glob(os.path.join(root, name, '*.jpg'))
        images += glob.glob(os.path.join(root, name, '*.jpeg'))
        
    # 打乱数组中元素的顺序
    random.shuffle(images)
    
    with open(os.path.join(root, filename), mode='w', newline='') as f:
        writer = csv.writer(f)
        for img in images:
            # img.split(os.sep) ['pokemon', 'bulbasaur', '00000008.png']
            name = img.split(os.sep)[-2]
            label = name2label[name]
            writer.writerow([img, label])
            
    # 读取数据 返回数据和标签
    images, labels = [], []
    with open(os.path.join(root, filename), mode='r') as f:
        reader = csv.reader(f)
        for row in reader:
            image, label = row
            
            images.append(image)
            labels.append(int(label))
            
    assert len(images) == len(labels)
    return images, labels
            

img_mean = tf.constant([0.485, 0.456, 0.406])
img_std = tf.constant([0.229, 0.224, 0.225])
def normalize(x, mean=img_mean, std=img_std):
    """
    归一化
    """
    # x: [224, 224, 3]
    # mean: [224, 224, 3], std: [3]
    x = (x - mean)/std
    return x

def denormalize(x, mean=img_mean, std=img_std):
    """
    还原x
    """
    x = x * std + mean
    return x    

def preprocess(x,y):
    # x: 图片的路径，y：图片的数字编码
    x = tf.io.read_file(x)
    # RGBA三色通道
    x = tf.image.decode_jpeg(x, channels=3)
    x = tf.image.resize(x, size=[224, 224])
    
    # 左右翻转
    # x = tf.image.random_flip_left_right(x)
    # 上下翻转
    # x = tf.image.random_flip_up_down(x)
    # 裁剪
    # x = tf.image.random_crop(x, [224,224,3])
    
    x = tf.cast(x, dtype=tf.float32) / 225.
    x = normalize(x)
    
    y = tf.convert_to_tensor(y)
    
    return x, y


In [72]:
# 自定义ResNet
class ResnetBlock(keras.Model):
    def __init__(self, channels, strides=1):
        super(ResnetBlock, self).__init__()
        self.channels = channels
        self.strides = strides
        
        self.conv1 = keras.layers.Conv2D(channels, kernel_size=3, 
                                         strides=strides, 
                                         padding=[[0,0],[1,1],[1,1],[0,0]])
        self.bn1 = keras.layers.BatchNormalization()
        
        self.conv2 = keras.layers.Conv2D(channels, kernel_size=3, 
                                         strides=1,  
                                         padding=[[0,0],[1,1],[1,1],[0,0]])
        self.bn2 = keras.layers.BatchNormalization()
        
        if strides != 1:
            self.down_conv = keras.layers.Conv2D(channels, 1, strides=strides, padding='valid')
            self.down_bn = keras.layers.BatchNormalization()
        
    def call(self, inputs, training=None):
        residual = inputs
        # print('ResnetBlock-inputs:', inputs.shape)

        x = self.conv1(inputs)
        x = tf.nn.relu(x)
        x = self.bn1(x, training=training)
        # print('ResnetBlock-bn1:', x.shape)

        
        x = self.conv2(x)
        x = tf.nn.relu(x)
        x = self.bn2(x, training=training)
        # print('ResnetBlock-bn2:', x.shape)

        # 残差链接
        if self.strides != 1:
            residual = self.down_conv(inputs)
            residual = tf.nn.relu(residual)
            residual = self.down_bn(residual, training=training)
        
        x = x + residual
        x = tf.nn.relu(x)
        return x
            
        
class ResNet(keras.Model):
    def __init__(self, num_classes, initial_filters=16, **kwargs):
        super(ResNet, self).__init__()
        # 第一层
        self.stem = keras.layers.Conv2D(initial_filters, 3, strides=3, padding='valid')
        # 16层
        self.blocks = keras.Sequential([
            ResnetBlock(initial_filters * 2, strides=3),
            ResnetBlock(initial_filters * 2, strides=1),
            # 防止过拟合改进方法
            keras.layers.Dropout(0.5),
            
            ResnetBlock(initial_filters * 4, strides=3),
            ResnetBlock(initial_filters * 4, strides=1), 
            # 防止过拟合改进方法
            keras.layers.Dropout(0.5),
            
            ResnetBlock(initial_filters * 8, strides=2),
            ResnetBlock(initial_filters * 8, strides=1),
            # 防止过拟合改进方法
            keras.layers.Dropout(0.5),
            
            ResnetBlock(initial_filters * 16, strides=2),
            ResnetBlock(initial_filters * 16, strides=1),
            # 防止过拟合改进方法
            keras.layers.Dropout(0.5),
        ])
        
        self.final_bn = keras.layers.BatchNormalization()
        self.avg_pool = keras.layers.GlobalMaxPool2D()
        self.fc = keras.layers.Dense(num_classes)
    
    def call(self, inputs, training=None):
        # print('inputs:', inputs.shape)
        out = self.stem(inputs)
        out = tf.nn.relu(out)
        # print('out1:', inputs.shape)

        out = self.blocks(out, training=training)
        # print('blocks:', inputs.shape)

        out = self.final_bn(out, training=training)
        out = self.avg_pool(out)
        out = self.fc(out)
        return out

In [73]:
num_classes = 5
model = ResNet(5)
model.build(input_shape=(4,224,224,3))
model.summary()      

Model: "res_net_19"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_219 (Conv2D)          multiple                  448       
_________________________________________________________________
sequential_17 (Sequential)   multiple                  2797280   
_________________________________________________________________
batch_normalization_230 (Bat multiple                  1024      
_________________________________________________________________
global_max_pooling2d_14 (Glo multiple                  0         
_________________________________________________________________
dense_20 (Dense)             multiple                  1285      
Total params: 2,800,037
Trainable params: 2,794,725
Non-trainable params: 5,312
_________________________________________________________________


In [42]:
batch_size = 128

images, labels, table = load_pokemon('pokemon', mode='train')
db_train = tf.data.Dataset.from_tensor_slices((images, labels))
db_train = db_train.map(preprocess).shuffle(1000).batch(batch_size)

images2, labels2, table2 = load_pokemon('pokemon', mode='val')
db_val = tf.data.Dataset.from_tensor_slices((images2, labels2))
db_val = db_val.map(preprocess).shuffle(1000).batch(batch_size)

images3, labels3, table3 = load_pokemon('pokemon', mode='test')
db_test = tf.data.Dataset.from_tensor_slices((images3, labels3))
db_test = db_test.map(preprocess).shuffle(1000).batch(batch_size)

In [75]:
# 自定义早停
early_stopping = callbacks.EarlyStopping(monitor='val_accuracy', min_delta=0.001, patience=5)

model.compile(optimizer=tf.optimizers.Adam(lr=0.001),
               loss=tf.losses.CategoricalCrossentropy(from_logits=True),
               metrics=['accuracy'])
model.fit(db_train, validation_data=db_val, validation_freq=1, epochs=100,
           callbacks=[early_stopping])
model.evaluate(db_test)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100


KeyboardInterrupt: 

In [70]:
# 很明显 出现了过拟合 原因模型的复杂度大于实际数据的复杂度
# 改进 使用一个简单的模型在试试
model2 = keras.Sequential([
    keras.layers.Conv2D(16,5,3),
    keras.layers.MaxPool2D(3,3),
    keras.layers.ReLU(),
    keras.layers.Conv2D(64,5,3),
    keras.layers.MaxPool2D(2,2),
    keras.layers.ReLU(),
    keras.layers.Flatten(),
    keras.layers.Dense(64),
    keras.layers.ReLU(),
    keras.layers.Dense(5)
])


model2.compile(optimizer=tf.optimizers.Adam(0.01),
               loss=tf.losses.CategoricalCrossentropy(from_logits=True),
               metrics=['accuracy'])
model2.fit(db_train, validation_data=db_val, validation_freq=1, epochs=15,
           callbacks=[early_stopping])
model2.evaluate(db_test)

Epoch 1/15
Epoch 2/15
Epoch 3/15


KeyboardInterrupt: 

In [77]:
model_train = keras.applications.VGG19(weights='imagenet',  # 使用imagenet的数据集训练的模型
                                      include_top=False,   # 顶部输出层不需要,弃掉
                                      pooling='max',       # 采用最大值池化层
                                     )
# 模型可训练参数 在反向传播时不更新
model_train.trainable = False

Downloading data from https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg19_weights_tf_dim_ordering_tf_kernels_notop.h5


In [78]:
new_model_train = keras.Sequential([
    model_train,
    keras.layers.Dense(5)
])
new_model_train.build(input_shape=(4,224,224,3))
new_model_train.summary()

Model: "sequential_18"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
vgg19 (Model)                (None, 512)               20024384  
_________________________________________________________________
dense_21 (Dense)             (None, 5)                 2565      
Total params: 20,026,949
Trainable params: 2,565
Non-trainable params: 20,024,384
_________________________________________________________________


In [81]:
early_stopping = callbacks.EarlyStopping(
    monitor='val_accuracy',
    min_delta=0.001,
    patience=5
)

new_model_train.compile(optimizer=tf.optimizers.Adam(lr=1e-3),
               loss=tf.losses.CategoricalCrossentropy(from_logits=True),
               metrics=['accuracy'])
new_model_train.fit(db_train, validation_data=db_val, validation_freq=1, epochs=100,
           callbacks=[early_stopping])
new_model_train.evaluate(db_test)

Epoch 1/100
Epoch 2/100


KeyboardInterrupt: 