-
Notifications
You must be signed in to change notification settings - Fork 0
/
DivResNet.py
301 lines (264 loc) · 12.2 KB
/
DivResNet.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
"""
初赛题的多标签解决方法,该方法已知最高能实现94.375%的准确率
请注意你们的TFrecords的读取,有问题的话可以换成其他的方法,目前还有点玄学
如果你们内存/显存不够了,可以减小batch_size试试,还不行就压缩图片大小到64*64
另外注意binary_crossentropy和categorical_crossentropy的区别
binary_crossentropy是multi-label任务从原理上唯一正确选择,Keras的官方acc无法正常显示,需要自定义
categorical_crossentropy只会找到三个输出神经元中第一个最大值(1)的地址,所以只会更新最前面的那个,这不正确
原有acc=0.84意味着真正的正确率约为0.84^3=0.59,而只有acc=98.08%才能接近94.35%
请大家多多思考,多多尝试,争取搞出点新想法而不是靠玄学来提升准确率
比如改一改网络的结构或者规模?(模型简单就不要太深,你用ResNet50基本也是这个正确率,梯度会消失的)
接下来,欢迎大家享受深度玄学调参之旅~~
welcome to the Beginning of Deep Dark Learning~~
"""
import keras
from keras.models import model_from_json
import tensorflow as tf
from keras import backend as K
from keras.applications.inception_v3 import inception_v3
from keras import Input, Model, layers
from keras.utils import np_utils
from keras.layers import Dense, Activation, Dropout
from keras.layers import Conv2D, ZeroPadding2D, GlobalAveragePooling2D
from keras.layers.pooling import MaxPooling2D, AveragePooling2D, GlobalMaxPooling2D
from keras.layers.normalization import BatchNormalization
from keras.layers.core import Flatten
from keras.preprocessing.image import ImageDataGenerator
from keras.preprocessing import image
from IPython.display import SVG
from keras.utils.vis_utils import model_to_dot
from keras.engine.topology import get_source_inputs
from keras.optimizers import Adam
test_path = "testTanh.tfrecords" # TFrecords的名字改成你们自己的
train_path = "trainTanh.tfrecords"
batch_size = 32
num_classes = 3
epochs = 5
num_predictions = 3
input_shape = (128, 128, 3)
train_samples = 8000
val_samples = 8000
def imgs_input_fn(filenames, perform_shuffle=False, Repeats=epochs, Batchs=train_samples):
def _parse_function(serialized):
features = \
{
'raw_image': tf.FixedLenFeature([], tf.string),
'label': tf.FixedLenFeature([num_classes], tf.int64) # num_classes=0就空着[]
}
# Parse the serialized data so we get a dict with our data.
parsed_example = tf.parse_single_example(serialized=serialized,
features=features)
# Get the image as raw bytes.
image_shape = tf.stack([128, 128, 3])
image_raw = parsed_example['raw_image']
label = tf.cast(parsed_example['label'], tf.int32)
# label = tf.one_hot(label, num_classes)
# Decode the raw bytes so it becomes a tensor with type.
image = tf.decode_raw(image_raw, tf.uint8)
image = tf.cast(image, tf.float32) * 1./255 - 0.5
image = tf.reshape(image, image_shape)
image = tf.reverse(image, axis=[2]) # 'RGB'->'BGR'
return image, label
# 其实这个才是标准用法,然而莫名其妙的行不通,我也很绝望啊
# imgs, labels = _parse_function(filenames)
# # print(imgs, labels)
# x_batch, y_batch = K.tf.train.batch([imgs, labels], batch_size=Batchs, capacity=100)
# print(x_batch, y_batch)
# return x_batch, y_batch
dataset = tf.data.TFRecordDataset(filenames=filenames)
# Parse the serialized data in the TFRecords files.
# This returns TensorFlow tensors for the image and labels.
dataset = dataset.map(_parse_function)
dataset = dataset.batch(Batchs).repeat(Repeats) # Batch size to use
iterator = dataset.make_one_shot_iterator()
iterator = iterator.get_next()
with tf.Session() as sess: # 这里使用的是“大力出奇迹”,把所有数据都读入内存
batch_features, batch_labels = sess.run(iterator)
return batch_features, batch_labels
# sess.run(tf.global_variables_initializer()) # 参数初始化,要用yield的话贼重要
# while Run:
# batch_features, batch_labels = sess.run(iterator)
# yield {'input': batch_features}, {'output': batch_labels}
'''开始
以下是官网上ResNet的部分结构函数,直接COPY过来用的
'''
def identity_block(input_tensor, kernel_size, filters, stage, block):
"""The identity block is the block that has no conv layer at shortcut.
# Arguments
input_tensor: input tensor #输入变量#
kernel_size: defualt 3, the kernel size of middle conv layer at main path #卷积核的大小#
filters: list of integers, the filterss of 3 conv layer at main path #卷积核的数目#
stage: integer, current stage label, used for generating layer names #当前阶段的标签#
block: 'a','b'..., current block label, used for generating layer names #当前块的标签#
# Returns
Output tensor for the block. #返回块的输出变量#
"""
filters1, filters2, filters3 = filters # 滤波器的名称#
if K.image_data_format() == 'channels_last': # 代表图像通道维的位置#
bn_axis = 3
else:
bn_axis = 1
conv_name_base = 'res' + str(stage) + block + '_branch'
bn_name_base = 'bn' + str(stage) + block + '_branch'
x = Conv2D(filters1, (1, 1), name=conv_name_base + '2a')(input_tensor)
x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2a')(x)
x = Activation('relu')(x) # 卷积层,BN层,激活函数#
x = Conv2D(filters2, kernel_size,
padding='same', name=conv_name_base + '2b')(x)
x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2b')(x)
x = Activation('relu')(x)
x = Conv2D(filters3, (1, 1), name=conv_name_base + '2c')(x)
x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2c')(x)
x = layers.add([x, input_tensor])
x = Activation('relu')(x)
return x
def conv_block(input_tensor, kernel_size, filters, stage, block, strides=(2, 2)):
# conv_block is the block that has a conv layer at shortcut
filters1, filters2, filters3 = filters
if K.image_data_format() == 'channels_last':
bn_axis = 3
else:
bn_axis = 1
conv_name_base = 'res' + str(stage) + block + '_branch'
bn_name_base = 'bn' + str(stage) + block + '_branch'
x = Conv2D(filters1, (1, 1), strides=strides,
name=conv_name_base + '2a')(input_tensor)
x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2a')(x)
x = Activation('relu')(x)
x = Conv2D(filters2, kernel_size, padding='same',
name=conv_name_base + '2b')(x)
x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2b')(x)
x = Activation('relu')(x)
x = Conv2D(filters3, (1, 1), name=conv_name_base + '2c')(x)
x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2c')(x)
shortcut = Conv2D(filters3, (1, 1), strides=strides,
name=conv_name_base + '1')(input_tensor)
shortcut = BatchNormalization(axis=bn_axis, name=bn_name_base + '1')(shortcut)
x = layers.add([x, shortcut])
x = Activation('relu')(x)
return x
def DivResNet(include_top=True, weights='imagenet',
input_tensor=None, pooling='max',
classes=num_classes): # 这里采用的权重是imagenet,可以更改,种类为1000#
# Determine proper input shape
if input_tensor is None:
img_input = Input(shape=input_shape)
else:
if not K.is_keras_tensor(input_tensor):
img_input = Input(tensor=input_tensor, shape=input_shape)
else:
img_input = input_tensor
if K.image_data_format() == 'channels_last':
bn_axis = 3
else:
bn_axis = 1
x = ZeroPadding2D((3, 3))(img_input) # 对图片界面填充0,保证特征图的大小#
x = Conv2D(64, (7, 7), strides=(2, 2), name='conv1')(x) # 定义卷积层#
x = BatchNormalization(axis=bn_axis, name='bn_conv1')(x) # 批标准化#
x = Activation('relu')(x) # 激活函数#
x = MaxPooling2D((3, 3), strides=(2, 2))(x) # 最大池化层#
# stage2#
x = conv_block(x, 3, [32, 32, 128], stage=2, block='a', strides=(1, 1))
x = identity_block(x, 3, [32, 32, 128], stage=2, block='b')
# stage3#
x = conv_block(x, 3, [64, 64, 256], stage=3, block='a')
x = identity_block(x, 3, [64, 64, 256], stage=3, block='b')
# stage4#
x = conv_block(x, 3, [128, 128, 512], stage=4, block='a')
x = identity_block(x, 3, [128, 128, 512], stage=4, block='b')
# stage5#
x = conv_block(x, 3, [256, 256, 1024], stage=5, block='a')
x = identity_block(x, 3, [256, 256, 1024], stage=5, block='b')
x = AveragePooling2D((4, 4), name='avr_pool')(x) # 平均池化层#
if include_top:
x = Flatten()(x)
x = Dense(classes, activation='sigmoid', name='fc1000')(x)
else:
if pooling == 'avg':
x = GlobalAveragePooling2D()(x)
elif pooling == 'max':
x = GlobalMaxPooling2D()(x)
# Ensure that the model takes into account
# any potential predecessors of `input_tensor`.
if input_tensor is not None:
inputs = get_source_inputs(input_tensor)
else:
inputs = img_input
# Create model.
model = Model(inputs, x, name='resnet50')
return model
'''结束
以上是官网上ResNet50的部分结构函数,直接COPY过来用的
'''
def multi_pred(y_true, y_pred):
acc = tf.floor(K.mean(K.equal(y_true, K.round(y_pred)), axis=-1))
return acc
# The data, split between train and test sets:
# print(imgs_input_fn(train_path).shape)
x_train, y_train = imgs_input_fn(train_path)
x_test, y_test = imgs_input_fn(test_path, Batchs=val_samples)
print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')
# # Convert class vectors to binary class matrices.
# train_inp = Input(tensor=x_train)
model = DivResNet()
# model = keras.models.load_model('model3.h5', custom_objects={'multi_pred': multi_pred})
model.compile(loss='binary_crossentropy',
optimizer=keras.optimizers.RMSprop(lr=1e-3, decay=1e-6),
metrics=['accuracy', multi_pred],
)
model.summary()
best_acc = []
train_acc = []
model_json = model.to_json()
mdl_save_path = 'modelMini.json'
with open(mdl_save_path, "w") as json_file:
json_file.write(model_json)
TensorBoard = keras.callbacks.TensorBoard(log_dir='./DivResNet', histogram_freq=1,
write_images=True)
CSVLogger = keras.callbacks.CSVLogger('./DivResNet.csv', separator=',', append=True)
for i in range(epochs):
History = model.fit(x_train, y_train,
batch_size=batch_size,
epochs=1,
shuffle=True,
callbacks=[CSVLogger],
verbose=1)
with open('DivHistory.txt', 'w') as f:
f.write(str(History.history))
scores = model.evaluate(x_test, y_test, verbose=0)
print('epoch', i)
print('Test loss:', scores[0])
print('Test accuracy:', scores[2])
best_acc.append((i, {'Multi-acc': scores[2]}))
# Save the final model
mdl_save_path = 'modelMini_'+str(i)+'_'+str(scores[2])+'.h5'
model.save(mdl_save_path)
if i == 1:
model.compile(loss='binary_crossentropy',
optimizer=keras.optimizers.RMSprop(lr=1e-4, decay=1e-6),
metrics=['accuracy', multi_pred],
)
scores = model.evaluate(x_train, y_train, verbose=0)
print('epoch', i)
print('Train loss:', scores[0])
print('Train accuracy:', scores[2])
print(best_acc)
# 保存模型
model.save('myModel.h5') # HDF5文件,pip install h5py
# 保存网络结构,载入网络结构
json_path = 'myModel.json'
json_string = model.to_json()
with open(json_path, "w") as json_file:
json_file.write(json_string)
print(json_string)
# Score trained model.
scores = model.evaluate(x_test, y_test, verbose=0)
print('Test loss:', scores[0])
print('Test accuracy:', scores[2])
# 输出图
# SVG(model_to_dot(model).create(prog='dot', format='svg'))
# 使用完模型之后,清空之前model占用的内存
K.clear_session()
tf.reset_default_graph()