# <b><font color="#FF6633">图像溯源-main</font></b>

## 包导入与参数定义

In [1]:
# 加载tensorflow模型
import numpy as np
import os
# Uncomment the line below to make GPU unavaliable
# os.environ["CUDA_VISIBLE_DEVICES"] = "-1" 
from tensorflow.keras.layers import Dense,Dropout, Input, concatenate,Flatten
import tensorflow.keras.optimizers as optimizers
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications.vgg16 import preprocess_input

## 定义模型
1. 采用**vgg16**进行分类，使用在imageNet上进行预训练的模型进行迁移学习（当然我们的任务和和物体分类差别很大，所有预训练的模型在这里意义不是很大）  
2. 池化选择**平均池化**：因为我们想要的是全局特征，平均池化有利于滤除细微的扰动【但我不确定max_pool是否会更好】   
3. 优化方式为**sgd**,随机性更强的sgd更有利于跳过局部最优，对于我们的任务来说，当然是有必要的  

In [2]:
# 定义宏参数
PICS_WIDTH,PICS_HEIGHT = 256,256
MODEL_LOSS = 'categorical_crossentropy'
MODEL_METRIC = 'categorical_accuracy'
NUM_CATEGS = 10

def InitialiazeModel(head_only,weights,model_name,lr=0.001):
    """
    head_only:选择是否只训练顶端（即自定义的全连接层）
    weights:选择是否从外部导入权重
    model:模型名称
    lr:学习率：默认为0.001
    """

    # ================================ 该实验选择的分类器 ============================================ #
    if model_name == 'VGG16':
        from tensorflow.keras.applications.vgg16 import VGG16
        base_model = VGG16(include_top=False, weights='imagenet',
                      input_shape=(PICS_WIDTH, PICS_HEIGHT, 3), pooling = 'avg')
        
    # ============================================================================================== #

#     # 是否训练头部    
#     if head_only:
#         for lay in base_model.layers:
#             lay.trainable = False
            
#     for i,lay in enumerate(base_model.layers):
#             # print(i,lay)
#             if i <= 10:
#                 lay.trainable = False

    # ======================= 全连接层 ======================================
    flat1 = Flatten()(base_model.layers[-1].output)
    # 第一层
    class1 = Dense(256, activation='relu', kernel_initializer='he_uniform')(flat1)
    dropout1 = Dropout(0.2)(class1)
    # 输出层
    output = Dense(NUM_CATEGS, activation='softmax')(dropout1)
    # define new model
    model = Model(inputs=base_model.inputs, outputs=output)
    
    # print(model.summary())
    # 如果存在已有权重则加载已有权重
    if weights != '':
        model.load_weights(weights)
    # ========================= 优化器 =======================================
    # MODEL_OPTIMIZER = optimizers.Adam(lr=1e-5,beta_2=0.9999,amsgrad=True)
    MODEL_OPTIMIZER = optimizers.SGD(lr=lr, momentum=0.7, nesterov=True)
    # 编译模型
    model.compile(loss=MODEL_LOSS, optimizer=MODEL_OPTIMIZER, metrics=[MODEL_METRIC])
    # model.summary()
    return model

In [5]:
InitialiazeModel(head_only=True,weights='',model_name='VGG16',lr=0.001)

Model: "functional_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 256, 256, 3)]     0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 256, 256, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 256, 256, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 128, 128, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 128, 128, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 128, 128, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 64, 64, 128)      

<tensorflow.python.keras.engine.functional.Functional at 0x7fecadaf7d90>

## 开始训练
定义训练用变量

In [3]:
category = os.listdir("./datasets/Train")
category.pop()
print(category)

['Apple_iPhone6Plus', 'Canon_PowerShotA640', 'Sony_DSC-W170', 'Samsung_GalaxyS5', 'Huawei_P9', 'Nikon_D70s', 'OnePlus_A3003', 'Microsoft_Lumia640LTE', 'Lenovo_P70A', 'Xiaomi_RedmiNote3']


很麻烦的一件事是，初始的vgg16权重是在ImageNet上训练的，需要通过preprocess_input函数处理，而问题是keras的生成器函数ImageDataGenerator  
没有提供能进行自定义预处理的接口。一个聪明的方法是**自定义一个生成器封装ImageDataGenerator，对ImageDataGenerator生成的图像进行处理**

In [4]:
from random import randrange as rrg
def vgg16_gen(directory,classes):
    """
    生成器，对ImageDataGenerator的输出进行处理
    """
    data_gen = ImageDataGenerator()
    train_it = data_gen.flow_from_directory(directory=directory,target_size=(512,512),
                                           classes=category,class_mode= "categorical",
                                            batch_size=BATCH_SIZE)
    while True:
        X,y = next(train_it)
        X = preprocess_input(X)
        a,b = rrg(0,256),rrg(0,256)
        yield(X[:,a:a+256,b:b+256,:],y)    

--- 
从头开始(前两轮动量大，防止梯度爆炸)（毕竟我们数据大）

In [None]:
from tensorflow.keras.callbacks import ModelCheckpoint,EarlyStopping,TensorBoard,ReduceLROnPlateau
from math import ceil

model_name = "VGG16"
train_path = "./datasets/Train"
val_path = "./datasets/Val_main"
BATCH_SIZE = 64
train_sample_num = 30000
val_sample_num = 1555
weights = ''
# weights = ''

model = InitialiazeModel(head_only=False,weights=weights,model_name = model_name, lr=0.001)

weights_path_name = "./model_weight/vgg256{epoch:02d}+8.hdf5" 
callbacks = [ModelCheckpoint(weights_path_name, monitor='val_acc', save_best_only=True, verbose=0,
                                             save_weights_only=True),
             EarlyStopping(monitor='val_acc', patience=3, verbose=0.01),
             TensorBoard(log_dir='train_log',update_freq=20000),
             ReduceLROnPlateau(factor=0.5,
                               patience=1, 
                              min_lr=0.0005)]
history1 = model.fit_generator(generator = vgg16_gen(train_path,category),
                    validation_data = vgg16_gen(val_path,category),
                    epochs = 40,
                    steps_per_epoch=ceil(train_sample_num/ BATCH_SIZE),
                    validation_steps=ceil(val_sample_num/ BATCH_SIZE),
                   max_queue_size=20,
                   callbacks=callbacks,
                   verbose = 1)


Instructions for updating:
Please use Model.fit, which supports generators.
Found 30152 images belonging to 10 classes.
Epoch 1/40
  1/469 [..............................] - ETA: 0s - loss: 4.0624 - categorical_accuracy: 0.0625

In [6]:
model.save_weights("./model_weight/vgg256-02+30[90.9].hdf5")

第二轮开始设置momentum=0.9,继续训练

In [5]:
from tensorflow.keras.callbacks import ModelCheckpoint,EarlyStopping,TensorBoard,ReduceLROnPlateau
from math import ceil

model_name = "VGG16"
train_path = "./datasets/Train"
val_path = "./datasets/Val_main"
BATCH_SIZE = 64
train_sample_num = 30000
val_sample_num = 1555
# 记得改权重地址
weights = "./model_weight/vgg25601+8.hdf5"

model = InitialiazeModel(head_only=False,weights=weights,model_name = model_name, lr=0.001)

weights_path_name = "./model_weight/vgg256-{epoch:02d}+10.hdf5" 
callbacks = [ModelCheckpoint(weights_path_name, monitor='val_loss', save_best_only=True, verbose=0,
                                             save_weights_only=True),
             EarlyStopping(monitor='val_loss', patience=3, verbose=0.01)]
history1 = model.fit_generator(generator = vgg16_gen(train_path,category),
                    validation_data = vgg16_gen(val_path,category),
                    epochs = 40,
                    steps_per_epoch=ceil(train_sample_num/ BATCH_SIZE),
                    validation_steps=ceil(val_sample_num/ BATCH_SIZE),
                   max_queue_size=20,
                   callbacks=callbacks,
                   verbose = 1)

Instructions for updating:
Please use Model.fit, which supports generators.
Found 30152 images belonging to 10 classes.
Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
  2/469 [..............................] - ETA: 3:57 - loss: 0.1104 - categorical_accuracy: 0.9531

KeyboardInterrupt: 

In [6]:
from tensorflow.keras.callbacks import ModelCheckpoint,EarlyStopping,TensorBoard,ReduceLROnPlateau
from math import ceil

model_name = "VGG16"
train_path = "./datasets/Train"
val_path = "./datasets/Val_main"
BATCH_SIZE = 80
train_sample_num = 30000
val_sample_num = 1555
# 记得改权重地址
weights = "./model_weight/vgg256[p22][93.59].hdf5"

model = InitialiazeModel(head_only=False,weights=weights,model_name = model_name, lr=5e-8)

weights_path_name = "./model_weight/vgg256-{epoch:02d}+real-10.hdf5" 
callbacks = [ModelCheckpoint(weights_path_name, monitor='val_loss', save_best_only=True, verbose=0,
                                             save_weights_only=True),
             EarlyStopping(monitor='val_loss', patience=3, verbose=0.01)]
history1 = model.fit_generator(generator = vgg16_gen(train_path,category),
                    validation_data = vgg16_gen(val_path,category),
                    epochs = 40,
                    steps_per_epoch=ceil(train_sample_num/ BATCH_SIZE),
                    validation_steps=ceil(val_sample_num/ BATCH_SIZE),
                   max_queue_size=20,
                   callbacks=callbacks,
                   verbose = 1)

Found 30152 images belonging to 10 classes.
Epoch 1/40
Epoch 2/40
  3/375 [..............................] - ETA: 8:46 - loss: 0.0385 - categorical_accuracy: 0.9914 

KeyboardInterrupt: 