/
predict.py
319 lines (239 loc) · 11.1 KB
/
predict.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
import glob
import random
import json
import os
import six
import cv2
import numpy as np
from tqdm import tqdm
from time import time
from .train import find_latest_checkpoint
from .data_utils.data_loader import get_image_array, get_segmentation_array,\
DATA_LOADER_SEED, class_colors, get_pairs_from_paths
from .models.config import IMAGE_ORDERING
random.seed(DATA_LOADER_SEED)
def model_from_checkpoint_path(checkpoints_path):
from .models.all_models import model_from_name
assert (os.path.isfile(checkpoints_path+"_config.json")
), "Checkpoint not found."
model_config = json.loads(
open(checkpoints_path+"_config.json", "r").read())
latest_weights = find_latest_checkpoint(checkpoints_path)
assert (latest_weights is not None), "Checkpoint not found."
model = model_from_name[model_config['model_class']](
model_config['n_classes'], input_height=model_config['input_height'],
input_width=model_config['input_width'])
print("loaded weights ", latest_weights)
status = model.load_weights(latest_weights)
if status is not None:
status.expect_partial()
return model
def get_colored_segmentation_image(seg_arr, n_classes, colors=class_colors):
output_height = seg_arr.shape[0]
output_width = seg_arr.shape[1]
seg_img = np.zeros((output_height, output_width, 3))
for c in range(n_classes):
seg_arr_c = seg_arr[:, :] == c
seg_img[:, :, 0] += ((seg_arr_c)*(colors[c][0])).astype('uint8')
seg_img[:, :, 1] += ((seg_arr_c)*(colors[c][1])).astype('uint8')
seg_img[:, :, 2] += ((seg_arr_c)*(colors[c][2])).astype('uint8')
return seg_img
def get_legends(class_names, colors=class_colors):
n_classes = len(class_names)
legend = np.zeros(((len(class_names) * 25) + 25, 125, 3),
dtype="uint8") + 255
class_names_colors = enumerate(zip(class_names[:n_classes],
colors[:n_classes]))
for (i, (class_name, color)) in class_names_colors:
color = [int(c) for c in color]
cv2.putText(legend, class_name, (5, (i * 25) + 17),
cv2.FONT_HERSHEY_COMPLEX, 0.5, (0, 0, 0), 1)
cv2.rectangle(legend, (100, (i * 25)), (125, (i * 25) + 25),
tuple(color), -1)
return legend
def overlay_seg_image(inp_img, seg_img):
orininal_h = inp_img.shape[0]
orininal_w = inp_img.shape[1]
seg_img = cv2.resize(seg_img, (orininal_w, orininal_h), interpolation=cv2.INTER_NEAREST)
fused_img = (inp_img/2 + seg_img/2).astype('uint8')
return fused_img
def concat_lenends(seg_img, legend_img):
new_h = np.maximum(seg_img.shape[0], legend_img.shape[0])
new_w = seg_img.shape[1] + legend_img.shape[1]
out_img = np.zeros((new_h, new_w, 3)).astype('uint8') + legend_img[0, 0, 0]
out_img[:legend_img.shape[0], : legend_img.shape[1]] = np.copy(legend_img)
out_img[:seg_img.shape[0], legend_img.shape[1]:] = np.copy(seg_img)
return out_img
def visualize_segmentation(seg_arr, inp_img=None, n_classes=None,
colors=class_colors, class_names=None,
overlay_img=False, show_legends=False,
prediction_width=None, prediction_height=None):
if n_classes is None:
n_classes = np.max(seg_arr)
seg_img = get_colored_segmentation_image(seg_arr, n_classes, colors=colors)
if inp_img is not None:
original_h = inp_img.shape[0]
original_w = inp_img.shape[1]
seg_img = cv2.resize(seg_img, (original_w, original_h), interpolation=cv2.INTER_NEAREST)
if (prediction_height is not None) and (prediction_width is not None):
seg_img = cv2.resize(seg_img, (prediction_width, prediction_height), interpolation=cv2.INTER_NEAREST)
if inp_img is not None:
inp_img = cv2.resize(inp_img,
(prediction_width, prediction_height))
if overlay_img:
assert inp_img is not None
seg_img = overlay_seg_image(inp_img, seg_img)
if show_legends:
assert class_names is not None
legend_img = get_legends(class_names, colors=colors)
seg_img = concat_lenends(seg_img, legend_img)
return seg_img
def predict(model=None, inp=None, out_fname=None,
checkpoints_path=None, overlay_img=False,
class_names=None, show_legends=False, colors=class_colors,
prediction_width=None, prediction_height=None,
read_image_type=1):
if model is None and (checkpoints_path is not None):
model = model_from_checkpoint_path(checkpoints_path)
assert (inp is not None)
assert ((type(inp) is np.ndarray) or isinstance(inp, six.string_types)),\
"Input should be the CV image or the input file name"
if isinstance(inp, six.string_types):
inp = cv2.imread(inp, read_image_type)
assert (len(inp.shape) == 3 or len(inp.shape) == 1 or len(inp.shape) == 4), "Image should be h,w,3 "
output_width = model.output_width
output_height = model.output_height
input_width = model.input_width
input_height = model.input_height
n_classes = model.n_classes
x = get_image_array(inp, input_width, input_height,
ordering=IMAGE_ORDERING)
pr = model.predict(np.array([x]))[0]
pr = pr.reshape((output_height, output_width, n_classes)).argmax(axis=2)
seg_img = visualize_segmentation(pr, inp, n_classes=n_classes,
colors=colors, overlay_img=overlay_img,
show_legends=show_legends,
class_names=class_names,
prediction_width=prediction_width,
prediction_height=prediction_height)
if out_fname is not None:
cv2.imwrite(out_fname, seg_img)
return pr
def predict_multiple(model=None, inps=None, inp_dir=None, out_dir=None,
checkpoints_path=None, overlay_img=False,
class_names=None, show_legends=False, colors=class_colors,
prediction_width=None, prediction_height=None, read_image_type=1):
if model is None and (checkpoints_path is not None):
model = model_from_checkpoint_path(checkpoints_path)
if inps is None and (inp_dir is not None):
inps = glob.glob(os.path.join(inp_dir, "*.jpg")) + glob.glob(
os.path.join(inp_dir, "*.png")) + \
glob.glob(os.path.join(inp_dir, "*.jpeg"))
inps = sorted(inps)
assert type(inps) is list
all_prs = []
if not out_dir is None:
if not os.path.exists(out_dir):
os.makedirs(out_dir)
for i, inp in enumerate(tqdm(inps)):
if out_dir is None:
out_fname = None
else:
if isinstance(inp, six.string_types):
out_fname = os.path.join(out_dir, os.path.basename(inp))
else:
out_fname = os.path.join(out_dir, str(i) + ".jpg")
pr = predict(model, inp, out_fname,
overlay_img=overlay_img, class_names=class_names,
show_legends=show_legends, colors=colors,
prediction_width=prediction_width,
prediction_height=prediction_height, read_image_type=read_image_type)
all_prs.append(pr)
return all_prs
def set_video(inp, video_name):
cap = cv2.VideoCapture(inp)
fps = int(cap.get(cv2.CAP_PROP_FPS))
video_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
video_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
size = (video_width, video_height)
fourcc = cv2.VideoWriter_fourcc(*"XVID")
video = cv2.VideoWriter(video_name, fourcc, fps, size)
return cap, video, fps
def predict_video(model=None, inp=None, output=None,
checkpoints_path=None, display=False, overlay_img=True,
class_names=None, show_legends=False, colors=class_colors,
prediction_width=None, prediction_height=None):
if model is None and (checkpoints_path is not None):
model = model_from_checkpoint_path(checkpoints_path)
n_classes = model.n_classes
cap, video, fps = set_video(inp, output)
while(cap.isOpened()):
prev_time = time()
ret, frame = cap.read()
if frame is not None:
pr = predict(model=model, inp=frame)
fused_img = visualize_segmentation(
pr, frame, n_classes=n_classes,
colors=colors,
overlay_img=overlay_img,
show_legends=show_legends,
class_names=class_names,
prediction_width=prediction_width,
prediction_height=prediction_height
)
else:
break
print("FPS: {}".format(1/(time() - prev_time)))
if output is not None:
video.write(fused_img)
if display:
cv2.imshow('Frame masked', fused_img)
if cv2.waitKey(fps) & 0xFF == ord('q'):
break
cap.release()
if output is not None:
video.release()
cv2.destroyAllWindows()
def evaluate(model=None, inp_images=None, annotations=None,
inp_images_dir=None, annotations_dir=None, checkpoints_path=None, read_image_type=1):
if model is None:
assert (checkpoints_path is not None),\
"Please provide the model or the checkpoints_path"
model = model_from_checkpoint_path(checkpoints_path)
if inp_images is None:
assert (inp_images_dir is not None),\
"Please provide inp_images or inp_images_dir"
assert (annotations_dir is not None),\
"Please provide inp_images or inp_images_dir"
paths = get_pairs_from_paths(inp_images_dir, annotations_dir)
paths = list(zip(*paths))
inp_images = list(paths[0])
annotations = list(paths[1])
assert type(inp_images) is list
assert type(annotations) is list
tp = np.zeros(model.n_classes)
fp = np.zeros(model.n_classes)
fn = np.zeros(model.n_classes)
n_pixels = np.zeros(model.n_classes)
for inp, ann in tqdm(zip(inp_images, annotations)):
pr = predict(model, inp, read_image_type=read_image_type)
gt = get_segmentation_array(ann, model.n_classes,
model.output_width, model.output_height,
no_reshape=True, read_image_type=read_image_type)
gt = gt.argmax(-1)
pr = pr.flatten()
gt = gt.flatten()
for cl_i in range(model.n_classes):
tp[cl_i] += np.sum((pr == cl_i) * (gt == cl_i))
fp[cl_i] += np.sum((pr == cl_i) * ((gt != cl_i)))
fn[cl_i] += np.sum((pr != cl_i) * ((gt == cl_i)))
n_pixels[cl_i] += np.sum(gt == cl_i)
cl_wise_score = tp / (tp + fp + fn + 0.000000000001)
n_pixels_norm = n_pixels / np.sum(n_pixels)
frequency_weighted_IU = np.sum(cl_wise_score*n_pixels_norm)
mean_IU = np.mean(cl_wise_score)
return {
"frequency_weighted_IU": frequency_weighted_IU,
"mean_IU": mean_IU,
"class_wise_IU": cl_wise_score
}