In [1]:
import numpy as np
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()

from keras import layers
from keras.layers import Input, Add, Dense, Activation, ZeroPadding2D, BatchNormalization, Flatten, Conv2D, AveragePooling2D, MaxPooling2D, GlobalMaxPooling2D
from keras.models import Model, load_model
from keras.preprocessing import image
from keras.utils import layer_utils
from keras.utils.data_utils import get_file
from keras.applications.imagenet_utils import preprocess_input
from keras.utils.vis_utils import model_to_dot
from keras.utils.vis_utils import plot_model
from keras.initializers import glorot_uniform

import pydot
from IPython.display import SVG
import scipy.misc
from matplotlib.pyplot import imshow
import keras.backend as K
K.set_image_data_format('channels_last')
K.set_learning_phase(1)

import resnets_utils 

def identity_block(X, f, filters, stage, block):
    """
    功能：
    实现恒等块
    参数：
    X:输入数据,维度(m, n_H_prev, n_W_prev, n_C_prev)
    f:第二部分卷积层的维度
    filters:每部分卷积层的过滤器个数
    stage:命名参数，层数
    block:命名参数，层的名字
    返回值：
    X_out:恒等快输出，维度(n_H, n_W, n_C)
    """

    conv_name_base = "res" + str(stage) + block + "_branch"
    bn_name_base   = "bn"  + str(stage) + block + "_branch"
    

    F1, F2, F3 = filters

    X_shortcut = X
    
    #第一部分
    X = Conv2D(filters=F1, 
               kernel_size=(1,1), 
               strides=(1,1),
               padding="valid",
               name=conv_name_base+"2a", 
               kernel_initializer=glorot_uniform(seed=0))(X)
    X = BatchNormalization(axis=3,name=bn_name_base+"2a")(X)
    X = Activation("relu")(X)
    
    #第二部分
    X = Conv2D(filters=F2, 
               kernel_size=(f,f),
               strides=(1,1), 
               padding="same",
               name=conv_name_base+"2b", 
               kernel_initializer=glorot_uniform(seed=0))(X)
    X = BatchNormalization(axis=3,name=bn_name_base+"2b")(X)
    X = Activation("relu")(X)
    
    
    #第三部分
    X = Conv2D(filters=F3,
               kernel_size=(1,1), 
               strides=(1,1),
               padding="valid",
               name=conv_name_base+"2c", kernel_initializer=glorot_uniform(seed=0))(X)
    X = BatchNormalization(axis=3,name=bn_name_base+"2c")(X)

    X = Add()([X,X_shortcut])
    X_output = Activation("relu")(X)

    return X_output


#test 恒等快
tf.reset_default_graph()
with tf.Session() as test:
    np.random.seed(1)
    A_prev = tf.placeholder("float",[3,4,4,6])
    X = np.random.randn(3,4,4,6)
    A = identity_block(A_prev,f=2,filters=[2,4,6],stage=1,block="a")
    
    test.run(tf.global_variables_initializer())
    out = test.run([A],feed_dict={A_prev:X,K.learning_phase():0})
    print("out = " + str(out[0][1][1][0]))
    
    test.close()


Instructions for updating:
non-resource variables are not supported in the long term




Instructions for updating:
Colocations handled automatically by placer.
out = [0.9482299 0.        1.1610144 2.747859  0.        1.36677  ]


In [7]:
def convolutional_block(X, f, filters, stage, block, s = 2):
    """
    功能：
    实现跳跃三层的恒等快
    参数：
    X:输入数据,维度(m, n_H_prev, n_W_prev, n_C_prev)
    f:第二部分卷积层的维度
    filters:每部分卷积层的过滤器个数
    stage:命名参数，层数
    block:命名参数，层的名字
    s:第一部分、第三部分、X_shortcut的stride
    返回值：
    X_out:恒等快输出，维度(n_H, n_W, n_C)
    """
    conv_name_base = "res" + str(stage) + block + "_branch"
    bn_name_base = "bn" + str(stage) + block + "_branch"
    
    F1, F2, F3 = filters
    
    X_shortcut = X
    
    #第一部分
    X = Conv2D(filters = F1, 
               kernel_size=(1,1),
               strides=(s,s),
               padding="valid",
               name=conv_name_base+"2a",
               kernel_initializer=glorot_uniform(seed=0)
              )(X)
    X = BatchNormalization(axis=3, name=bn_name_base+"2a")(X)
    X = Activation("relu")(X)
    
    #第二部分
    X = Conv2D(filters = F2, 
               kernel_size=(f,f),
               strides=(1,1),
               padding="same",
               name=conv_name_base+"2b",
               kernel_initializer=glorot_uniform(seed=0)
              )(X)
    X = BatchNormalization(axis=3, name=bn_name_base+"2b")(X)
    X = Activation("relu")(X)
    
    #第三部分
    X = Conv2D(filters = F3, 
               kernel_size=(1,1),
               strides=(1,1),
               padding="valid",
               name=conv_name_base+"2c",
               kernel_initializer=glorot_uniform(seed=0)
              )(X)
    X = BatchNormalization(axis=3, name=bn_name_base+"2c")(X)
    
    #shortcut
    X_shortcut = Conv2D(filters = F3, 
               kernel_size=(1,1),
               strides=(s,s),
               padding="valid",
               name=conv_name_base+"1",
               kernel_initializer=glorot_uniform(seed=0)
              )(X_shortcut)
    X_shortcut = BatchNormalization(axis=3, name=bn_name_base+"1")(X_shortcut)
    
    #求和
    X = Add()([X, X_shortcut])
    #激活
    X_output = Activation("relu")(X)
    
    return X_output    

tf.reset_default_graph()

with tf.Session() as test:
    np.random.seed(1)
    A_prev = tf.placeholder("float",[3,4,4,6])
    X = np.random.randn(3,4,4,6)
    
    A = convolutional_block(A_prev,f=2,filters=[2,4,6],stage=1,block="a")
    test.run(tf.global_variables_initializer())
    
    out = test.run([A],feed_dict={A_prev:X,K.learning_phase():0})
    print("out = " + str(out[0][1][1][0]))
    
    test.close()

out = [0.09018461 1.2348977  0.46822017 0.0367176  0.         0.655166  ]


In [9]:
def ResNet50(input_shape, classes):
    """
    功能：
    实现resnet50残差网络
    参数:
    input_shape:输入数据的维度(n_H,n_W,n_C)
    classes:分类种类
    返回：
    net:残差网络模型
    """
    X_input = Input(input_shape)
    
    #填充
    X = ZeroPadding2D((3,3))(X_input)
    
    #stage 1
    X = Conv2D(filters = 64, 
               kernel_size=(7,7),
               strides=(2,2),
               name="conv1",
               kernel_initializer=glorot_uniform(seed=0)
              )(X)
    X = BatchNormalization(axis=3, name="bn_conv1")(X)
    X = Activation("relu")(X)
    X = MaxPooling2D(pool_size=(3,3), strides=(2,2))(X)

    #stage 2
    X = convolutional_block(X, f=3, filters=[64,64,256], stage=2, block="a", s=1)
    X = identity_block(X, f=3, filters=[64,64,256], stage=2, block="b")
    X = identity_block(X, f=3, filters=[64,64,256], stage=2, block="c")
    
    #stage3
    X = convolutional_block(X, f=3, filters=[128,128,512], stage=3, block="a", s=2)
    X = identity_block(X, f=3, filters=[128,128,512], stage=3, block="b")
    X = identity_block(X, f=3, filters=[128,128,512], stage=3, block="c")
    X = identity_block(X, f=3, filters=[128,128,512], stage=3, block="d")
    
    #stage4
    X = convolutional_block(X, f=3, filters=[256,256,1024], stage=4, block="a", s=2)
    X = identity_block(X, f=3, filters=[256,256,1024], stage=4, block="b")
    X = identity_block(X, f=3, filters=[256,256,1024], stage=4, block="c")
    X = identity_block(X, f=3, filters=[256,256,1024], stage=4, block="d")
    X = identity_block(X, f=3, filters=[256,256,1024], stage=4, block="e")
    X = identity_block(X, f=3, filters=[256,256,1024], stage=4, block="f")
    
    #stage5
    X = convolutional_block(X, f=3, filters=[512,512,2048], stage=5, block="a", s=2)
    X = identity_block(X, f=3, filters=[512,512,2048], stage=5, block="b")
    X = identity_block(X, f=3, filters=[512,512,2048], stage=5, block="c")
    
    #均匀池化
    X = AveragePooling2D(pool_size=(2,2), padding="same", name="avg_pool")(X) 
    
    X = Flatten()(X)
    X = Dense(classes, 
              activation="softmax", 
              name="fc"+str(classes),
              kernel_initializer=glorot_uniform(seed=0)
             )(X)
    
    model = Model(inputs = X_input, outputs = X, name = "ResNet")
    
    return model

model = ResNet50(input_shape=(64,64,3), classes=6)
model.compile("adam", "binary_crossentropy", metrics=['accuracy'])

In [15]:
X_train_orig, Y_train_orig, X_test_orig, Y_test_orig, classes = resnets_utils.load_dataset()

X_train = X_train_orig / 255
X_test = X_test_orig / 255

Y_train = resnets_utils.convert_to_one_hot(Y_train_orig,6).T
Y_test = resnets_utils.convert_to_one_hot(Y_test_orig,6).T

print("number of training examples = " + str(X_train.shape[0]))
print("number of test examples = " + str(X_test.shape[0]))
print("X_train shape: " + str(X_train.shape))
print("Y_train shape: " + str(Y_train.shape))
print("X_test shape: " + str(X_test.shape))
print("Y_test shape: " + str(Y_test.shape))

model.fit(X_train,Y_train,epochs=2,batch_size=32)
preds = model.evaluate(X_test,Y_test)

print("误差值 = " + str(preds[0]))
print("准确率 = " + str(preds[1]))

number of training examples = 1080
number of test examples = 120
X_train shape: (1080, 64, 64, 3)
Y_train shape: (1080, 6)
X_test shape: (120, 64, 64, 3)
Y_test shape: (120, 6)
Train on 1080 samples
Epoch 1/10


ResourceExhaustedError: 2 root error(s) found.
  (0) Resource exhausted: OOM when allocating tensor with shape[32,512,8,8] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
	 [[{{node training/Adam/gradients/gradients/zeros_174}}]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info. This isn't available when running in Eager mode.

	 [[metrics_2/acc/Identity/_2235]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info. This isn't available when running in Eager mode.

  (1) Resource exhausted: OOM when allocating tensor with shape[32,512,8,8] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
	 [[{{node training/Adam/gradients/gradients/zeros_174}}]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info. This isn't available when running in Eager mode.

0 successful operations.
0 derived errors ignored.