| @@ -0,0 +1,229 @@ | ||
| { | ||
| "cells": [ | ||
| { | ||
| "cell_type": "code", | ||
| "execution_count": 164, | ||
| "metadata": {}, | ||
| "outputs": [], | ||
| "source": [ | ||
| "import numpy as np\n", | ||
| "import tensorflow as tf\n", | ||
| "from keras import backend as kb\n", | ||
| "\n", | ||
| "def yolo_filter_boxes(box_confidence, boxes, box_class_probs, threshold = 0.6):\n", | ||
| " \"\"\"\n", | ||
| " Arguments:\n", | ||
| " box_confidence -- tensor of shape (S, S, B, 1)\n", | ||
| " boxes -- tensor of shape (S, S, B, 4)\n", | ||
| " box_class_probs -- tensor of shape (S, S, B, C)\n", | ||
| " threshold -- real value, if [ highest class probability score < threshold], then get rid of the corresponding box\n", | ||
| "\n", | ||
| " S - number of grid, B - number of anchor boxes, C - classes\n", | ||
| "\n", | ||
| " Returns:\n", | ||
| " scores -- tensor of shape (None,), containing the class probability score for selected boxes\n", | ||
| " boxes -- tensor of shape (None, 4), containing (b_x, b_y, b_h, b_w) coordinates of selected boxes\n", | ||
| " classes -- tensor of shape (None,), containing the index of the class detected by the selected boxes\n", | ||
| "\n", | ||
| " Note: \"None\" is here because you don't know the exact number of selected boxes, as it depends on the threshold. \n", | ||
| " For example, the actual output size of scores would be (10,) if there are 10 boxes. \n", | ||
| " \"\"\"\n", | ||
| "\n", | ||
| " box_scores = tf.multiply(box_confidence, box_class_probs) # confidence broadcasted\n", | ||
| " box_max_classes = tf.argmax(box_scores, axis=-1)\n", | ||
| " box_max_scores = kb.max(box_scores, axis=-1)\n", | ||
| " \n", | ||
| " mask = box_max_scores >= threshold\n", | ||
| " \n", | ||
| " scores = tf.boolean_mask(box_max_scores, mask)\n", | ||
| " boxes = tf.boolean_mask(boxes, mask)\n", | ||
| " classes = tf.boolean_mask(box_max_classes, mask)\n", | ||
| " \n", | ||
| " return scores, boxes, classes\n", | ||
| " \n", | ||
| " \n", | ||
| "def iou(box1, box2):\n", | ||
| " \"\"\"\n", | ||
| " Arguments:\n", | ||
| " box1 -- first box, list object with coordinates (x1, y1, x2, y2)\n", | ||
| " box2 -- second box, list object with coordinates (x1, y1, x2, y2)\n", | ||
| " \"\"\" \n", | ||
| " ix = tf.subtract(tf.minimum(box1[2], box2[2]), tf.maximum(box1[0], box2[0]))\n", | ||
| " iy = tf.subtract(tf.minimum(box1[3], box2[3]), tf.maximum(box1[1], box2[1]))\n", | ||
| " \n", | ||
| " intersect = tf.multiply(tf.maximum(ix, 0), tf.maximum(iy, 0))\n", | ||
| " b1 = tf.multiply(tf.subtract(box1[2], box1[0]), tf.subtract(box1[3], box1[1]))\n", | ||
| " b2 = tf.multiply(tf.subtract(box2[2], box2[0]), tf.subtract(box2[3], box2[1]))\n", | ||
| " union = tf.subtract(tf.add(b1, b2), intersect)\n", | ||
| " \n", | ||
| " return tf.divide(intersect, union)\n", | ||
| " \n", | ||
| " \n", | ||
| "def non_max_suppression(scores, boxes, classes, max_boxes=10, iou_threshold=0.5):\n", | ||
| " \"\"\"\n", | ||
| " Applies Non-max suppression (NMS) to set of boxes\n", | ||
| " \n", | ||
| " Arguments:\n", | ||
| " scores -- tensor of shape (None,), output of yolo_filter_boxes()\n", | ||
| " boxes -- tensor of shape (None, 4), output of yolo_filter_boxes() that have been scaled to the image size (see later)\n", | ||
| " classes -- tensor of shape (None,), output of yolo_filter_boxes()\n", | ||
| " max_boxes -- integer, maximum number of predicted boxes you'd like\n", | ||
| " iou_threshold -- real value, \"intersection over union\" threshold used for NMS filtering\n", | ||
| " \n", | ||
| " Returns:\n", | ||
| " scores -- tensor of shape (, None), predicted score for each box\n", | ||
| " boxes -- tensor of shape (4, None), predicted box coordinates\n", | ||
| " classes -- tensor of shape (, None), predicted class for each box\n", | ||
| " \n", | ||
| " Note: The \"None\" dimension of the output tensors has obviously to be less than max_boxes. Note also that this\n", | ||
| " function will transpose the shapes of scores, boxes, classes. This is made for convenience.\n", | ||
| " \"\"\"\n", | ||
| " \n", | ||
| " nms_indices = np.zeros(max_boxes, dtype=int)\n", | ||
| " tmp = scores\n", | ||
| " for i in range(max_boxes):\n", | ||
| " maxidx = tf.argmax(tmp)\n", | ||
| " mask = tf.map_fn(lambda box: tf.less(iou(boxes[maxidx], box), iou_threshold), boxes, dtype=tf.bool)\n", | ||
| "\n", | ||
| " tmp = tf.boolean_mask(tmp, mask)\n", | ||
| " nms_indices[i] = maxidx.eval()\n", | ||
| " \n", | ||
| " scores = kb.gather(scores, nms_indices)\n", | ||
| " boxes = kb.gather(boxes, nms_indices)\n", | ||
| " classes = kb.gather(classes, nms_indices)\n", | ||
| " \n", | ||
| " return scores, boxes, classes" | ||
| ] | ||
| }, | ||
| { | ||
| "cell_type": "code", | ||
| "execution_count": 166, | ||
| "metadata": {}, | ||
| "outputs": [ | ||
| { | ||
| "name": "stdout", | ||
| "output_type": "stream", | ||
| "text": [ | ||
| "[12 3 6 49 33 40 37 40 27 23]\n", | ||
| "scores[2] = 6.0194\n", | ||
| "boxes[2] = [ 6.41169071 2.62523627 3.64737511 10.97169209]\n", | ||
| "classes[2] = 3.37129\n", | ||
| "scores.shape = (10,)\n", | ||
| "boxes.shape = (10, 4)\n", | ||
| "classes.shape = (10,)\n", | ||
| "[ -0.99814534 5.00565434 -1.99267793 -3.4164257 -1.0503962\n", | ||
| " 6.51658058 -0.27600873 6.51658058 -10.05641556 6.34522581]\n" | ||
| ] | ||
| } | ||
| ], | ||
| "source": [ | ||
| "with tf.Session() as test_b:\n", | ||
| " scores = tf.random_normal([54,], mean=1, stddev=4, seed = 1)\n", | ||
| " boxes = tf.random_normal([54, 4], mean=1, stddev=4, seed = 1)\n", | ||
| " classes = tf.random_normal([54,], mean=1, stddev=4, seed = 1)\n", | ||
| " scores, boxes, classes = non_max_suppression(scores, boxes, classes, max_boxes=10)\n", | ||
| " print(\"scores[2] = \" + str(scores[2].eval()))\n", | ||
| " print(\"boxes[2] = \" + str(boxes[2].eval()))\n", | ||
| " print(\"classes[2] = \" + str(classes[2].eval()))\n", | ||
| " print(\"scores.shape = \" + str(scores.eval().shape))\n", | ||
| " print(\"boxes.shape = \" + str(boxes.eval().shape))\n", | ||
| " print(\"classes.shape = \" + str(classes.eval().shape))\n" | ||
| ] | ||
| }, | ||
| { | ||
| "cell_type": "code", | ||
| "execution_count": 108, | ||
| "metadata": {}, | ||
| "outputs": [ | ||
| { | ||
| "name": "stdout", | ||
| "output_type": "stream", | ||
| "text": [ | ||
| "scores[2] = 10.7506\n", | ||
| "boxes[2] = [ 8.42653275 3.27136683 -0.53134358 -4.94137335]\n", | ||
| "classes[2] = 7\n", | ||
| "scores.shape = (?,)\n", | ||
| "boxes.shape = (?, 4)\n", | ||
| "classes.shape = (?,)\n", | ||
| "iou = 0.142857142857\n" | ||
| ] | ||
| } | ||
| ], | ||
| "source": [ | ||
| "S = 19\n", | ||
| "with tf.Session() as test_a:\n", | ||
| " box_confidence = tf.random_normal([S, S, 5, 1], mean=1, stddev=4, seed = 1)\n", | ||
| " boxes = tf.random_normal([S, S, 5, 4], mean=1, stddev=4, seed = 1)\n", | ||
| " box_class_probs = tf.random_normal([S, S, 5, 80], mean=1, stddev=4, seed = 1)\n", | ||
| " scores, boxes, classes = yolo_filter_boxes(box_confidence, boxes, box_class_probs, threshold = 0.5)\n", | ||
| " print(\"scores[2] = \" + str(scores[2].eval()))\n", | ||
| " print(\"boxes[2] = \" + str(boxes[2].eval()))\n", | ||
| " print(\"classes[2] = \" + str(classes[2].eval()))\n", | ||
| " print(\"scores.shape = \" + str(scores.shape))\n", | ||
| " print(\"boxes.shape = \" + str(boxes.shape))\n", | ||
| " print(\"classes.shape = \" + str(classes.shape))\n", | ||
| "\n", | ||
| " # scores, boxes, classes = non_max_suppression(scores, boxes, classes, 10)\n", | ||
| " \n", | ||
| " box1 = (2, 1, 4, 3)\n", | ||
| " box2 = (1, 2, 3, 4) \n", | ||
| " print(\"iou = \" + str(iou(box1, box2).eval()))" | ||
| ] | ||
| }, | ||
| { | ||
| "cell_type": "code", | ||
| "execution_count": 103, | ||
| "metadata": {}, | ||
| "outputs": [ | ||
| { | ||
| "name": "stdout", | ||
| "output_type": "stream", | ||
| "text": [ | ||
| "[ True False]\n" | ||
| ] | ||
| } | ||
| ], | ||
| "source": [ | ||
| "def dist(a, b):\n", | ||
| " return tf.add(tf.multiply(a[0], b[0]), tf.multiply(a[1], b[1]))\n", | ||
| "\n", | ||
| "with tf.Session() as test_a:\n", | ||
| " a = np.array([[1, 1], [2, 2]])\n", | ||
| " aa = np.array([1, 4])\n", | ||
| " #a = np.array([1, 1, 2, 2])\n", | ||
| " b = tf.map_fn(lambda x: tf.equal(dist(x, aa), 5), a, dtype=tf.bool)\n", | ||
| " print(b.eval())" | ||
| ] | ||
| }, | ||
| { | ||
| "cell_type": "code", | ||
| "execution_count": null, | ||
| "metadata": { | ||
| "collapsed": true | ||
| }, | ||
| "outputs": [], | ||
| "source": [] | ||
| } | ||
| ], | ||
| "metadata": { | ||
| "kernelspec": { | ||
| "display_name": "Python 3", | ||
| "language": "python", | ||
| "name": "python3" | ||
| }, | ||
| "language_info": { | ||
| "codemirror_mode": { | ||
| "name": "ipython", | ||
| "version": 3 | ||
| }, | ||
| "file_extension": ".py", | ||
| "mimetype": "text/x-python", | ||
| "name": "python", | ||
| "nbconvert_exporter": "python", | ||
| "pygments_lexer": "ipython3", | ||
| "version": "3.6.3" | ||
| } | ||
| }, | ||
| "nbformat": 4, | ||
| "nbformat_minor": 2 | ||
| } |