Large diffs are not rendered by default.

@@ -0,0 +1,229 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 164,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import tensorflow as tf\n",
"from keras import backend as kb\n",
"\n",
"def yolo_filter_boxes(box_confidence, boxes, box_class_probs, threshold = 0.6):\n",
" \"\"\"\n",
" Arguments:\n",
" box_confidence -- tensor of shape (S, S, B, 1)\n",
" boxes -- tensor of shape (S, S, B, 4)\n",
" box_class_probs -- tensor of shape (S, S, B, C)\n",
" threshold -- real value, if [ highest class probability score < threshold], then get rid of the corresponding box\n",
"\n",
" S - number of grid, B - number of anchor boxes, C - classes\n",
"\n",
" Returns:\n",
" scores -- tensor of shape (None,), containing the class probability score for selected boxes\n",
" boxes -- tensor of shape (None, 4), containing (b_x, b_y, b_h, b_w) coordinates of selected boxes\n",
" classes -- tensor of shape (None,), containing the index of the class detected by the selected boxes\n",
"\n",
" Note: \"None\" is here because you don't know the exact number of selected boxes, as it depends on the threshold. \n",
" For example, the actual output size of scores would be (10,) if there are 10 boxes. \n",
" \"\"\"\n",
"\n",
" box_scores = tf.multiply(box_confidence, box_class_probs) # confidence broadcasted\n",
" box_max_classes = tf.argmax(box_scores, axis=-1)\n",
" box_max_scores = kb.max(box_scores, axis=-1)\n",
" \n",
" mask = box_max_scores >= threshold\n",
" \n",
" scores = tf.boolean_mask(box_max_scores, mask)\n",
" boxes = tf.boolean_mask(boxes, mask)\n",
" classes = tf.boolean_mask(box_max_classes, mask)\n",
" \n",
" return scores, boxes, classes\n",
" \n",
" \n",
"def iou(box1, box2):\n",
" \"\"\"\n",
" Arguments:\n",
" box1 -- first box, list object with coordinates (x1, y1, x2, y2)\n",
" box2 -- second box, list object with coordinates (x1, y1, x2, y2)\n",
" \"\"\" \n",
" ix = tf.subtract(tf.minimum(box1[2], box2[2]), tf.maximum(box1[0], box2[0]))\n",
" iy = tf.subtract(tf.minimum(box1[3], box2[3]), tf.maximum(box1[1], box2[1]))\n",
" \n",
" intersect = tf.multiply(tf.maximum(ix, 0), tf.maximum(iy, 0))\n",
" b1 = tf.multiply(tf.subtract(box1[2], box1[0]), tf.subtract(box1[3], box1[1]))\n",
" b2 = tf.multiply(tf.subtract(box2[2], box2[0]), tf.subtract(box2[3], box2[1]))\n",
" union = tf.subtract(tf.add(b1, b2), intersect)\n",
" \n",
" return tf.divide(intersect, union)\n",
" \n",
" \n",
"def non_max_suppression(scores, boxes, classes, max_boxes=10, iou_threshold=0.5):\n",
" \"\"\"\n",
" Applies Non-max suppression (NMS) to set of boxes\n",
" \n",
" Arguments:\n",
" scores -- tensor of shape (None,), output of yolo_filter_boxes()\n",
" boxes -- tensor of shape (None, 4), output of yolo_filter_boxes() that have been scaled to the image size (see later)\n",
" classes -- tensor of shape (None,), output of yolo_filter_boxes()\n",
" max_boxes -- integer, maximum number of predicted boxes you'd like\n",
" iou_threshold -- real value, \"intersection over union\" threshold used for NMS filtering\n",
" \n",
" Returns:\n",
" scores -- tensor of shape (, None), predicted score for each box\n",
" boxes -- tensor of shape (4, None), predicted box coordinates\n",
" classes -- tensor of shape (, None), predicted class for each box\n",
" \n",
" Note: The \"None\" dimension of the output tensors has obviously to be less than max_boxes. Note also that this\n",
" function will transpose the shapes of scores, boxes, classes. This is made for convenience.\n",
" \"\"\"\n",
" \n",
" nms_indices = np.zeros(max_boxes, dtype=int)\n",
" tmp = scores\n",
" for i in range(max_boxes):\n",
" maxidx = tf.argmax(tmp)\n",
" mask = tf.map_fn(lambda box: tf.less(iou(boxes[maxidx], box), iou_threshold), boxes, dtype=tf.bool)\n",
"\n",
" tmp = tf.boolean_mask(tmp, mask)\n",
" nms_indices[i] = maxidx.eval()\n",
" \n",
" scores = kb.gather(scores, nms_indices)\n",
" boxes = kb.gather(boxes, nms_indices)\n",
" classes = kb.gather(classes, nms_indices)\n",
" \n",
" return scores, boxes, classes"
]
},
{
"cell_type": "code",
"execution_count": 166,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[12 3 6 49 33 40 37 40 27 23]\n",
"scores[2] = 6.0194\n",
"boxes[2] = [ 6.41169071 2.62523627 3.64737511 10.97169209]\n",
"classes[2] = 3.37129\n",
"scores.shape = (10,)\n",
"boxes.shape = (10, 4)\n",
"classes.shape = (10,)\n",
"[ -0.99814534 5.00565434 -1.99267793 -3.4164257 -1.0503962\n",
" 6.51658058 -0.27600873 6.51658058 -10.05641556 6.34522581]\n"
]
}
],
"source": [
"with tf.Session() as test_b:\n",
" scores = tf.random_normal([54,], mean=1, stddev=4, seed = 1)\n",
" boxes = tf.random_normal([54, 4], mean=1, stddev=4, seed = 1)\n",
" classes = tf.random_normal([54,], mean=1, stddev=4, seed = 1)\n",
" scores, boxes, classes = non_max_suppression(scores, boxes, classes, max_boxes=10)\n",
" print(\"scores[2] = \" + str(scores[2].eval()))\n",
" print(\"boxes[2] = \" + str(boxes[2].eval()))\n",
" print(\"classes[2] = \" + str(classes[2].eval()))\n",
" print(\"scores.shape = \" + str(scores.eval().shape))\n",
" print(\"boxes.shape = \" + str(boxes.eval().shape))\n",
" print(\"classes.shape = \" + str(classes.eval().shape))\n"
]
},
{
"cell_type": "code",
"execution_count": 108,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"scores[2] = 10.7506\n",
"boxes[2] = [ 8.42653275 3.27136683 -0.53134358 -4.94137335]\n",
"classes[2] = 7\n",
"scores.shape = (?,)\n",
"boxes.shape = (?, 4)\n",
"classes.shape = (?,)\n",
"iou = 0.142857142857\n"
]
}
],
"source": [
"S = 19\n",
"with tf.Session() as test_a:\n",
" box_confidence = tf.random_normal([S, S, 5, 1], mean=1, stddev=4, seed = 1)\n",
" boxes = tf.random_normal([S, S, 5, 4], mean=1, stddev=4, seed = 1)\n",
" box_class_probs = tf.random_normal([S, S, 5, 80], mean=1, stddev=4, seed = 1)\n",
" scores, boxes, classes = yolo_filter_boxes(box_confidence, boxes, box_class_probs, threshold = 0.5)\n",
" print(\"scores[2] = \" + str(scores[2].eval()))\n",
" print(\"boxes[2] = \" + str(boxes[2].eval()))\n",
" print(\"classes[2] = \" + str(classes[2].eval()))\n",
" print(\"scores.shape = \" + str(scores.shape))\n",
" print(\"boxes.shape = \" + str(boxes.shape))\n",
" print(\"classes.shape = \" + str(classes.shape))\n",
"\n",
" # scores, boxes, classes = non_max_suppression(scores, boxes, classes, 10)\n",
" \n",
" box1 = (2, 1, 4, 3)\n",
" box2 = (1, 2, 3, 4) \n",
" print(\"iou = \" + str(iou(box1, box2).eval()))"
]
},
{
"cell_type": "code",
"execution_count": 103,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[ True False]\n"
]
}
],
"source": [
"def dist(a, b):\n",
" return tf.add(tf.multiply(a[0], b[0]), tf.multiply(a[1], b[1]))\n",
"\n",
"with tf.Session() as test_a:\n",
" a = np.array([[1, 1], [2, 2]])\n",
" aa = np.array([1, 4])\n",
" #a = np.array([1, 1, 2, 2])\n",
" b = tf.map_fn(lambda x: tf.equal(dist(x, aa), 5), a, dtype=tf.bool)\n",
" print(b.eval())"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}