In [1]:
import tensorflow as tf
from tensorflow.keras import layers,optimizers,datasets,Sequential
import os
from resnet import resnet18,resnet34
import numpy as np
import datetime
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 

In [2]:
len(tf.config.experimental.list_physical_devices('GPU'))

1

数据读取

In [3]:
def preprocess(x,y):
    x=2 * tf.cast(x,dtype=tf.float32)/255.-1
    y=tf.cast(y,dtype=tf.int32)
    return x,y

(x,y),(x_test,y_test) = datasets.cifar100.load_data()
x = x.astype(np.float32)
x_test = x_test.astype(np.float32)
y=tf.squeeze(y,axis=1)
y_test=tf.squeeze(y_test,axis=1)

train_db=tf.data.Dataset.from_tensor_slices((x,y))
train_db=train_db.map(preprocess).batch(50)

test_db=tf.data.Dataset.from_tensor_slices((x_test,y_test))
test_db=test_db.map(preprocess).batch(50)

数据增强方法

In [12]:
def cutout_mask(img,label):
    length = 16
    img = img.numpy()
    label = label.numpy()
  
    batch_size,h,w,channel_num = img.shape
    for i in range(0,batch_size,2):
        y = np.random.randint(h)
        x = np.random.randint(w)

        y1 = np.clip(y - length // 2, 0, h)
        y2 = np.clip(y + length // 2, 0, h)
        x1 = np.clip(x - length // 2, 0, w)
        x2 = np.clip(x + length // 2, 0, w)
        
        img[i, y1: y2, x1: x2, :] = 0 
    
    return img, label

def mixup_mask(img,label):
    img = img.numpy()
    label = label.numpy()
    batch_size,h,w,channel_num = img.shape
    
    for i in range(0,batch_size,2):
        mixup_idx = np.random.randint(0,batch_size)
        lamda = np.random.uniform()
        img[i,:,:,:] = lamda*img[i,:,:,:] + (1-lamda) * img[mixup_idx,:,:,:]
        label[i,:] = lamda*label[i,:] + (1-lamda) * label[mixup_idx,:]
    return img,label

def cutmix_mask(img,label):
    length = 16
    img = img.numpy()
    label = label.numpy()
    batch_size,h,w,channel_num = img.shape
    for i in range(0,batch_size,2):
        mixup_idx = np.random.randint(0,batch_size)
        y = np.random.randint(h)
        x = np.random.randint(w)

        y1 = np.clip(y - length // 2, 0, h)
        y2 = np.clip(y + length // 2, 0, h)
        x1 = np.clip(x - length // 2, 0, w)
        x2 = np.clip(x + length // 2, 0, w)
        
        lamda = 1 - (y2-y1)*(x2 - x1)/(h*w)
        img[i, y1: y2, x1: x2, :] = img[mixup_idx, y1: y2, x1: x2, :]
        label[i,:] = lamda*label[i,:] + (1-lamda) * label[mixup_idx,:]
    return img,label

模型构建

In [8]:
model = resnet34()
model.build(input_shape=(None,32,32,3))
current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") + '_baseline'
train_log_dir = 'logs/gradient_tape/' + current_time + '/train'
test_log_dir = 'logs/gradient_tape/' + current_time + '/test'
train_summary_writer = tf.summary.create_file_writer(train_log_dir)
test_summary_writer = tf.summary.create_file_writer(test_log_dir)

训练前清空log

In [7]:
%load_ext tensorboard

训练

In [9]:
model.build(input_shape=(None,32,32,3))
lr = 1e-4
optimizer=optimizers.Adam(lr=lr)

train_loss = tf.keras.metrics.Mean('train_loss', dtype=tf.float32)
train_accuracy = tf.keras.metrics.Mean('train_accuracy', dtype=tf.float32)
test_loss = tf.keras.metrics.Mean('test_loss', dtype=tf.float32)
test_accuracy = tf.keras.metrics.Mean('test_accuracy', dtype=tf.float32)

for epoch in range(50):
    # if epoch % 5 == 4:
    #     lr/=10
    #     optimizer.lr = lr
    for step,(x,y) in enumerate(train_db):
        #这里做一个前向循环,将需要求解梯度放进来
        with tf.GradientTape() as tape:
            y_onehot=tf.one_hot(y,depth=100)
            # x, y_onehot = cutmix_mask(x,y_onehot)
            #[b,32,32,3] => [b,100]
            logits=model(x)
            #[b] => [b,100]
            #compute loss
            loss=tf.losses.categorical_crossentropy(y_onehot,logits)
    
            loss=tf.reduce_mean(loss)
            
        pred=tf.argmax(logits,axis=1)
        pred=tf.cast(pred,dtype=tf.int32)
        correct=tf.cast(tf.equal(pred,y),dtype=tf.int32)
        correct=tf.reduce_sum(correct)
        acc = correct/x.shape[0]
        
        train_loss(loss)
        train_accuracy(acc)
        #计算gradient
        grads=tape.gradient(loss,model.trainable_variables)
        #传给优化器两个参数：grads和variable，完成梯度更新
        optimizer.apply_gradients(zip(grads,model.trainable_variables))

        if step % 100 == 0:
            print(epoch,step,'losses:',float(loss))
            
    
    with train_summary_writer.as_default():
        tf.summary.scalar('loss', train_loss.result(), step=epoch)
        tf.summary.scalar('accuracy', train_accuracy.result(), step=epoch)
        
    total_num=0
    total_correct=0
    for x,y in test_db:
        logits=model(x)
        y_onehot = tf.one_hot(y,depth = 100)
        loss=tf.losses.categorical_crossentropy(y_onehot,logits)
        loss=tf.reduce_mean(loss)
        
        #prob=tf.nn.softmax(logits,axis=1)
        pred=tf.argmax(logits,axis=1)
        pred=tf.cast(pred,dtype=tf.int32)
        correct=tf.cast(tf.equal(pred,y),dtype=tf.int32)
        correct=tf.reduce_sum(correct)

        total_num += x.shape[0]
        total_correct += int(correct)
        
        test_accuracy(correct/x.shape[0])
        test_loss(loss)
    
    with test_summary_writer.as_default():
        tf.summary.scalar('loss', test_loss.result(), step=epoch)
        tf.summary.scalar('accuracy', test_accuracy.result(), step=epoch)
            

    template = 'Epoch {}, Loss: {}, Accuracy: {}, Test Loss: {}, Test Accuracy: {}'
    print(template.format(epoch+1,
                         train_loss.result(), 
                         train_accuracy.result()*100,
                         test_loss.result(), 
                         test_accuracy.result()*100))

    
    train_loss.reset_states()
    test_loss.reset_states()
    train_accuracy.reset_states()
    test_accuracy.reset_states()

model.save_weights('baseline.h5')

0 0 losses: 4.627544403076172
0 100 losses: 4.504234790802002
0 200 losses: 4.452385902404785
0 300 losses: 4.341122150421143
0 400 losses: 4.169934272766113
0 500 losses: 3.9839138984680176
0 600 losses: 4.03331995010376
0 700 losses: 3.9039089679718018
0 800 losses: 3.8345959186553955
0 900 losses: 3.6105217933654785
Epoch 1, Loss: 4.0777130126953125, Accuracy: 6.052009105682373, Test Loss: 3.6715903282165527, Test Accuracy: 12.940006256103516
1 0 losses: 3.8005781173706055
1 100 losses: 3.794304847717285
1 200 losses: 3.796342372894287
1 300 losses: 3.6335818767547607
1 400 losses: 3.5087995529174805
1 500 losses: 3.819816827774048
1 600 losses: 3.5144262313842773
1 700 losses: 3.7881975173950195
1 800 losses: 3.5731048583984375
1 900 losses: 2.925558090209961
Epoch 2, Loss: 3.47660756111145, Accuracy: 16.12400245666504, Test Loss: 3.205563545227051, Test Accuracy: 21.960006713867188
2 0 losses: 3.364091157913208
2 100 losses: 3.39127254486084
2 200 losses: 3.4530022144317627
2 300 

NotImplementedError: Saving the model to HDF5 format requires the model to be a Functional model or a Sequential model. It does not work for subclassed models, because such models are defined via the body of a Python method, which isn't safely serializable. Consider saving to the Tensorflow SavedModel format (by setting save_format="tf") or using `save_weights`.

In [10]:
model.save_weights('baseline.h5')

In [13]:
model = resnet34()
model.build(input_shape=(None,32,32,3))
current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")+'_cutout'
train_log_dir = 'logs/gradient_tape/' + current_time + '/train'
test_log_dir = 'logs/gradient_tape/' + current_time + '/test'
train_summary_writer = tf.summary.create_file_writer(train_log_dir)
test_summary_writer = tf.summary.create_file_writer(test_log_dir)

model.build(input_shape=(None,32,32,3))
lr = 1e-4
optimizer=optimizers.Adam(lr=lr)

train_loss = tf.keras.metrics.Mean('train_loss', dtype=tf.float32)
train_accuracy = tf.keras.metrics.Mean('train_accuracy', dtype=tf.float32)
test_loss = tf.keras.metrics.Mean('test_loss', dtype=tf.float32)
test_accuracy = tf.keras.metrics.Mean('test_accuracy', dtype=tf.float32)

for epoch in range(50):
    # if epoch % 5 == 4:
    #     lr/=10
    #     optimizer.lr = lr
    for step,(x,y) in enumerate(train_db):
        #这里做一个前向循环,将需要求解梯度放进来
        with tf.GradientTape() as tape:
            y_onehot=tf.one_hot(y,depth=100)
            x, y_onehot = cutout_mask(x,y_onehot)
            #[b,32,32,3] => [b,100]
            logits=model(x)
            #[b] => [b,100]
            #compute loss
            loss=tf.losses.categorical_crossentropy(y_onehot,logits)
    
            loss=tf.reduce_mean(loss)
            
        pred=tf.argmax(logits,axis=1)
        pred=tf.cast(pred,dtype=tf.int32)
        correct=tf.cast(tf.equal(pred,y),dtype=tf.int32)
        correct=tf.reduce_sum(correct)
        acc = correct/x.shape[0]
        
        train_loss(loss)
        train_accuracy(acc)
        #计算gradient
        grads=tape.gradient(loss,model.trainable_variables)
        #传给优化器两个参数：grads和variable，完成梯度更新
        optimizer.apply_gradients(zip(grads,model.trainable_variables))

        if step % 100 == 0:
            print(epoch,step,'losses:',float(loss))
            
    
    with train_summary_writer.as_default():
        tf.summary.scalar('loss', train_loss.result(), step=epoch)
        tf.summary.scalar('accuracy', train_accuracy.result(), step=epoch)
        
    total_num=0
    total_correct=0
    for x,y in test_db:
        logits=model(x)
        y_onehot = tf.one_hot(y,depth = 100)
        loss=tf.losses.categorical_crossentropy(y_onehot,logits)
        loss=tf.reduce_mean(loss)
        
        #prob=tf.nn.softmax(logits,axis=1)
        pred=tf.argmax(logits,axis=1)
        pred=tf.cast(pred,dtype=tf.int32)
        correct=tf.cast(tf.equal(pred,y),dtype=tf.int32)
        correct=tf.reduce_sum(correct)

        total_num += x.shape[0]
        total_correct += int(correct)
        
        test_accuracy(correct/x.shape[0])
        test_loss(loss)
    
    with test_summary_writer.as_default():
        tf.summary.scalar('loss', test_loss.result(), step=epoch)
        tf.summary.scalar('accuracy', test_accuracy.result(), step=epoch)
            

    template = 'Epoch {}, Loss: {}, Accuracy: {}, Test Loss: {}, Test Accuracy: {}'
    print(template.format(epoch+1,
                         train_loss.result(), 
                         train_accuracy.result()*100,
                         test_loss.result(), 
                         test_accuracy.result()*100))

    
    train_loss.reset_states()
    test_loss.reset_states()
    train_accuracy.reset_states()
    test_accuracy.reset_states()

model.save_weights('cutout.h5')

0 0 losses: 4.6083478927612305
0 100 losses: 4.606198310852051
0 200 losses: 4.415788650512695
0 300 losses: 4.3008575439453125
0 400 losses: 4.232976913452148
0 500 losses: 4.002837657928467
0 600 losses: 4.1679816246032715
0 700 losses: 3.956101417541504
0 800 losses: 4.018930435180664
0 900 losses: 3.664863348007202
Epoch 1, Loss: 4.088409423828125, Accuracy: 6.170012950897217, Test Loss: 3.7066118717193604, Test Accuracy: 12.080011367797852
1 0 losses: 3.8980062007904053
1 100 losses: 3.8947360515594482
1 200 losses: 3.9306674003601074
1 300 losses: 3.775172710418701
1 400 losses: 3.618562936782837
1 500 losses: 3.704801082611084
1 600 losses: 3.5955889225006104
1 700 losses: 3.7246532440185547
1 800 losses: 3.484116792678833
1 900 losses: 3.016052007675171
Epoch 2, Loss: 3.547438859939575, Accuracy: 14.948015213012695, Test Loss: 3.257460355758667, Test Accuracy: 20.910005569458008
2 0 losses: 3.5213677883148193
2 100 losses: 3.461047887802124
2 200 losses: 3.5837273597717285
2 30

In [14]:
model = resnet34()
model.build(input_shape=(None,32,32,3))
current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")+'_mixup'
train_log_dir = 'logs/gradient_tape/' + current_time + '/train'
test_log_dir = 'logs/gradient_tape/' + current_time + '/test'
train_summary_writer = tf.summary.create_file_writer(train_log_dir)
test_summary_writer = tf.summary.create_file_writer(test_log_dir)

model.build(input_shape=(None,32,32,3))
lr = 1e-4
optimizer=optimizers.Adam(lr=lr)

train_loss = tf.keras.metrics.Mean('train_loss', dtype=tf.float32)
train_accuracy = tf.keras.metrics.Mean('train_accuracy', dtype=tf.float32)
test_loss = tf.keras.metrics.Mean('test_loss', dtype=tf.float32)
test_accuracy = tf.keras.metrics.Mean('test_accuracy', dtype=tf.float32)

for epoch in range(50):
    # if epoch % 5 == 4:
    #     lr/=10
    #     optimizer.lr = lr
    for step,(x,y) in enumerate(train_db):
        #这里做一个前向循环,将需要求解梯度放进来
        with tf.GradientTape() as tape:
            y_onehot=tf.one_hot(y,depth=100)
            x, y_onehot = mixup_mask(x,y_onehot)
            #[b,32,32,3] => [b,100]
            logits=model(x)
            #[b] => [b,100]
            #compute loss
            loss=tf.losses.categorical_crossentropy(y_onehot,logits)
    
            loss=tf.reduce_mean(loss)
            
        pred=tf.argmax(logits,axis=1)
        pred=tf.cast(pred,dtype=tf.int32)
        correct=tf.cast(tf.equal(pred,y),dtype=tf.int32)
        correct=tf.reduce_sum(correct)
        acc = correct/x.shape[0]
        
        train_loss(loss)
        train_accuracy(acc)
        #计算gradient
        grads=tape.gradient(loss,model.trainable_variables)
        #传给优化器两个参数：grads和variable，完成梯度更新
        optimizer.apply_gradients(zip(grads,model.trainable_variables))

        if step % 100 == 0:
            print(epoch,step,'losses:',float(loss))
            
    
    with train_summary_writer.as_default():
        tf.summary.scalar('loss', train_loss.result(), step=epoch)
        tf.summary.scalar('accuracy', train_accuracy.result(), step=epoch)
        
    total_num=0
    total_correct=0
    for x,y in test_db:
        logits=model(x)
        y_onehot = tf.one_hot(y,depth = 100)
        loss=tf.losses.categorical_crossentropy(y_onehot,logits)
        loss=tf.reduce_mean(loss)
        
        #prob=tf.nn.softmax(logits,axis=1)
        pred=tf.argmax(logits,axis=1)
        pred=tf.cast(pred,dtype=tf.int32)
        correct=tf.cast(tf.equal(pred,y),dtype=tf.int32)
        correct=tf.reduce_sum(correct)

        total_num += x.shape[0]
        total_correct += int(correct)
        
        test_accuracy(correct/x.shape[0])
        test_loss(loss)
    
    with test_summary_writer.as_default():
        tf.summary.scalar('loss', test_loss.result(), step=epoch)
        tf.summary.scalar('accuracy', test_accuracy.result(), step=epoch)
            

    template = 'Epoch {}, Loss: {}, Accuracy: {}, Test Loss: {}, Test Accuracy: {}'
    print(template.format(epoch+1,
                         train_loss.result(), 
                         train_accuracy.result()*100,
                         test_loss.result(), 
                         test_accuracy.result()*100))

    
    train_loss.reset_states()
    test_loss.reset_states()
    train_accuracy.reset_states()
    test_accuracy.reset_states()

model.save_weights('mixup.h5')

0 0 losses: 4.586793899536133
0 100 losses: 4.587038040161133
0 200 losses: 4.580397129058838
0 300 losses: 4.358099460601807
0 400 losses: 4.336691856384277
0 500 losses: 4.2393341064453125
0 600 losses: 4.331840991973877
0 700 losses: 4.111297130584717
0 800 losses: 4.2459211349487305
0 900 losses: 4.081028461456299
Epoch 1, Loss: 4.278772830963135, Accuracy: 4.216019153594971, Test Loss: 3.8562304973602295, Test Accuracy: 9.290008544921875
1 0 losses: 4.041572570800781
1 100 losses: 4.044863224029541
1 200 losses: 4.179912567138672
1 300 losses: 4.010278701782227
1 400 losses: 3.9121639728546143
1 500 losses: 4.059662342071533
1 600 losses: 3.917426824569702
1 700 losses: 3.955514430999756
1 800 losses: 3.8790173530578613
1 900 losses: 3.381333112716675
Epoch 2, Loss: 3.861189126968384, Accuracy: 10.16402816772461, Test Loss: 3.42830753326416, Test Accuracy: 17.740005493164062
2 0 losses: 3.735278367996216
2 100 losses: 3.760380744934082
2 200 losses: 3.722231388092041
2 300 losses:

In [15]:
model = resnet34()
model.build(input_shape=(None,32,32,3))
current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")+'_cutmix'
train_log_dir = 'logs/gradient_tape/' + current_time + '/train'
test_log_dir = 'logs/gradient_tape/' + current_time + '/test'
train_summary_writer = tf.summary.create_file_writer(train_log_dir)
test_summary_writer = tf.summary.create_file_writer(test_log_dir)

model.build(input_shape=(None,32,32,3))
lr = 1e-4
optimizer=optimizers.Adam(lr=lr)

train_loss = tf.keras.metrics.Mean('train_loss', dtype=tf.float32)
train_accuracy = tf.keras.metrics.Mean('train_accuracy', dtype=tf.float32)
test_loss = tf.keras.metrics.Mean('test_loss', dtype=tf.float32)
test_accuracy = tf.keras.metrics.Mean('test_accuracy', dtype=tf.float32)

for epoch in range(50):
    # if epoch % 5 == 4:
    #     lr/=10
    #     optimizer.lr = lr
    for step,(x,y) in enumerate(train_db):
        #这里做一个前向循环,将需要求解梯度放进来
        with tf.GradientTape() as tape:
            y_onehot=tf.one_hot(y,depth=100)
            x, y_onehot = cutmix_mask(x,y_onehot)
            #[b,32,32,3] => [b,100]
            logits=model(x)
            #[b] => [b,100]
            #compute loss
            loss=tf.losses.categorical_crossentropy(y_onehot,logits)
    
            loss=tf.reduce_mean(loss)
            
        pred=tf.argmax(logits,axis=1)
        pred=tf.cast(pred,dtype=tf.int32)
        correct=tf.cast(tf.equal(pred,y),dtype=tf.int32)
        correct=tf.reduce_sum(correct)
        acc = correct/x.shape[0]
        
        train_loss(loss)
        train_accuracy(acc)
        #计算gradient
        grads=tape.gradient(loss,model.trainable_variables)
        #传给优化器两个参数：grads和variable，完成梯度更新
        optimizer.apply_gradients(zip(grads,model.trainable_variables))

        if step % 100 == 0:
            print(epoch,step,'losses:',float(loss))
            
    
    with train_summary_writer.as_default():
        tf.summary.scalar('loss', train_loss.result(), step=epoch)
        tf.summary.scalar('accuracy', train_accuracy.result(), step=epoch)
        
    total_num=0
    total_correct=0
    for x,y in test_db:
        logits=model(x)
        y_onehot = tf.one_hot(y,depth = 100)
        loss=tf.losses.categorical_crossentropy(y_onehot,logits)
        loss=tf.reduce_mean(loss)
        
        #prob=tf.nn.softmax(logits,axis=1)
        pred=tf.argmax(logits,axis=1)
        pred=tf.cast(pred,dtype=tf.int32)
        correct=tf.cast(tf.equal(pred,y),dtype=tf.int32)
        correct=tf.reduce_sum(correct)

        total_num += x.shape[0]
        total_correct += int(correct)
        
        test_accuracy(correct/x.shape[0])
        test_loss(loss)
    
    with test_summary_writer.as_default():
        tf.summary.scalar('loss', test_loss.result(), step=epoch)
        tf.summary.scalar('accuracy', test_accuracy.result(), step=epoch)
            

    template = 'Epoch {}, Loss: {}, Accuracy: {}, Test Loss: {}, Test Accuracy: {}'
    print(template.format(epoch+1,
                         train_loss.result(), 
                         train_accuracy.result()*100,
                         test_loss.result(), 
                         test_accuracy.result()*100))

    
    train_loss.reset_states()
    test_loss.reset_states()
    train_accuracy.reset_states()
    test_accuracy.reset_states()

model.save_weights('cutmix.h5')

0 0 losses: 4.623771667480469
0 100 losses: 4.638014316558838
0 200 losses: 4.54831600189209
0 300 losses: 4.331723690032959
0 400 losses: 4.3754191398620605
0 500 losses: 4.244229316711426
0 600 losses: 4.376081466674805
0 700 losses: 4.142012596130371
0 800 losses: 4.1289520263671875
0 900 losses: 3.907952308654785
Epoch 1, Loss: 4.249817848205566, Accuracy: 4.762017726898193, Test Loss: 3.8200714588165283, Test Accuracy: 9.48000717163086
1 0 losses: 4.173576354980469
1 100 losses: 4.044555187225342
1 200 losses: 4.122124195098877
1 300 losses: 3.9464609622955322
1 400 losses: 3.7651538848876953
1 500 losses: 4.017367362976074
1 600 losses: 3.972993850708008
1 700 losses: 3.997004985809326
1 800 losses: 3.854299306869507
1 900 losses: 3.37711763381958
Epoch 2, Loss: 3.8290576934814453, Accuracy: 12.252025604248047, Test Loss: 3.381711483001709, Test Accuracy: 18.3900089263916
2 0 losses: 3.6837966442108154
2 100 losses: 3.8258373737335205
2 200 losses: 3.870396375656128
2 300 losses: