| @@ -0,0 +1,402 @@ | ||
| { | ||
| "cells": [ | ||
| { | ||
| "metadata": { | ||
| "collapsed": true, | ||
| "_uuid": "6d83a1815b19019b63290af1a21834be59701070", | ||
| "_cell_guid": "e1f0f9ef-b2c2-4fe0-8098-24bd8b6cbbf9" | ||
| }, | ||
| "cell_type": "markdown", | ||
| "source": "#Intro\nThis piece of work is inspired by a [youtube video lecture](https://www.youtube.com/watch?v=vaL1I2BD_xY) by Martin Gorner, a Google developer \nwho put forth this idea of combining YOLO grid with SqueezeNet (instead of DarkNet - original stack for [YOLO](https://arxiv.org/pdf/1506.02640.pdf))." | ||
| }, | ||
| { | ||
| "metadata": { | ||
| "collapsed": true, | ||
| "_uuid": "ace99be745c98fc8919e877a991adf5bde701ac3", | ||
| "_cell_guid": "04e6a10f-9963-48e1-9f87-def2cf0c7616", | ||
| "trusted": false | ||
| }, | ||
| "cell_type": "code", | ||
| "source": "import os\nimport sys\nimport numpy as np\nimport tensorflow as tf\nimport random\nimport math\nimport warnings\nimport pandas as pd\nimport cv2\nimport matplotlib.pyplot as plt\nimport matplotlib.cm as cm\nimport matplotlib.ticker as ticker\n\nfrom tqdm import tqdm\nfrom itertools import chain\nfrom skimage.io import imread, imshow, imread_collection, concatenate_images\nfrom skimage.transform import resize\nfrom skimage.morphology import label\n\nwarnings.filterwarnings('ignore', category=UserWarning, module='skimage')\nseed = 19\nrandom.seed = seed\nnp.random.seed = seed", | ||
| "execution_count": null, | ||
| "outputs": [] | ||
| }, | ||
| { | ||
| "metadata": { | ||
| "collapsed": true, | ||
| "_uuid": "a9e074a502adde1fffb5be9139934ffeb700dfdf", | ||
| "_cell_guid": "53fa742a-7b4c-4490-837c-016b3c34c3d5", | ||
| "trusted": false | ||
| }, | ||
| "cell_type": "code", | ||
| "source": "IMG_WIDTH = 384\nIMG_HEIGHT = 384\nIMG_CHANNELS = 3\n\nTRAIN_PATH = '../input/data-science-bowl-2018-2/stage1_train'\nTEST_PATH = '../input/data-science-bowl-2018-2/stage1_test'", | ||
| "execution_count": null, | ||
| "outputs": [] | ||
| }, | ||
| { | ||
| "metadata": { | ||
| "collapsed": true, | ||
| "_uuid": "a7f29a280bd557e3d42de110d60ec60bf254a21d", | ||
| "_cell_guid": "c7cadfa9-4b8c-4a72-8042-38b82b57e6c8", | ||
| "trusted": false | ||
| }, | ||
| "cell_type": "code", | ||
| "source": "train_ids = next(os.walk(TRAIN_PATH))[1]\ntest_ids = next(os.walk(TEST_PATH))[1]\n\ntrain_images = np.zeros((len(train_ids), IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS), dtype=np.uint8)\ntest_images = np.zeros((len(test_ids), IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS), dtype=np.uint8)", | ||
| "execution_count": null, | ||
| "outputs": [] | ||
| }, | ||
| { | ||
| "metadata": { | ||
| "collapsed": true, | ||
| "scrolled": true, | ||
| "_uuid": "cb2c127840f51e9f63d22b7d4bc691433ed3812c", | ||
| "_cell_guid": "61bd8439-48fa-4ff8-b308-2c06e736e55e", | ||
| "trusted": false | ||
| }, | ||
| "cell_type": "code", | ||
| "source": "# Resize train images.\nprint('resize train images... ')\nsys.stdout.flush()\n\nfor n, id_ in tqdm(enumerate(train_ids), total=len(train_ids)):\n path = TRAIN_PATH + \"/\" + id_\n img = imread(path + '/images/' + id_ + '.png')[:,:,:IMG_CHANNELS]\n img = resize(img, (IMG_HEIGHT, IMG_WIDTH), mode='constant', preserve_range=True)\n \n train_images[n] = img\n \n# Get and resize test images\nsizes_test = []\nprint('resize test images ... ')\nsys.stdout.flush()\n\nfor n, id_ in tqdm(enumerate(test_ids), total=len(test_ids)):\n path = TEST_PATH + \"/\" + id_\n img = imread(path + '/images/' + id_ + '.png')[:,:,:IMG_CHANNELS]\n sizes_test.append([img.shape[0], img.shape[1]])\n img = resize(img, (IMG_HEIGHT, IMG_WIDTH), mode='constant', preserve_range=True)\n test_images[n] = img\n\nprint('Done!')", | ||
| "execution_count": null, | ||
| "outputs": [] | ||
| }, | ||
| { | ||
| "metadata": { | ||
| "collapsed": true, | ||
| "_uuid": "05c2163cc42f5cfbc0064f4bcf2ee542ee3d10b6", | ||
| "_cell_guid": "f323fcc1-f5ad-49da-ae66-9114f02d318b", | ||
| "trusted": false | ||
| }, | ||
| "cell_type": "code", | ||
| "source": "def store_bounding_boxes(img, train_id, mask_id, rotby_90):\n ret, thresh = cv2.threshold(img, 127, 255, 0)\n contours = cv2.findContours(thresh.astype(np.uint8), 1, 2)\n cnt = contours[0]\n \n x, y, w, h = cv2.boundingRect(cnt) \n \n x = x * (IMG_WIDTH/img.shape[1])\n w = w * (IMG_WIDTH/img.shape[1])\n y = y * (IMG_WIDTH/img.shape[0])\n h = h * (IMG_WIDTH/img.shape[0])\n \n if(x > IMG_WIDTH-1):\n x = IMG_WIDTH-1\n if(y > IMG_HEIGHT-1):\n y = IMG_HEIGHT-1\n if(x+w > IMG_WIDTH-1):\n w = IMG_WIDTH-1 - x\n if(y+h > IMG_HEIGHT-1):\n h = IMG_HEIGHT-1 - y\n \n bbdict = { \"train_id\": train_id, \"mask_id\": mask_id, \"rotby_90\": rotby_90, \"x\": x, \"y\": y, \"w\": w, \"h\": h}\n return bbdict", | ||
| "execution_count": null, | ||
| "outputs": [] | ||
| }, | ||
| { | ||
| "metadata": { | ||
| "scrolled": true, | ||
| "collapsed": true, | ||
| "_uuid": "e7e51d42966cbfb60988aaae2c97d27ed036e16e", | ||
| "_cell_guid": "8bf9d677-d161-4965-98f1-1898235bfb3e", | ||
| "trusted": false | ||
| }, | ||
| "cell_type": "code", | ||
| "source": "path_bboxes_csv = \"../input/data-science-bowl-2018-1/bboxes.csv\"\nif not os.path.isfile(path_bboxes_csv):\n bboxes = pd.DataFrame(columns=[\"train_id\", \"mask_id\", \"rotby_90\", \"x\", \"y\", \"w\", \"h\"])\n row_count = 1\n for n, id_ in tqdm(enumerate(train_ids), total=len(train_ids)):\n path = TRAIN_PATH + \"/\" + id_\n for mask_id, mask_file in enumerate(next(os.walk(path + '/masks/'))[2]):\n mask_ = imread(path + '/masks/' + mask_file)\n for r in range(4):\n bboxes.loc[row_count] = store_bounding_boxes(np.rot90(mask_, r), id_, mask_id, r)\n row_count += 1\n bboxes.to_csv(path_bboxes_csv, index=False)\nelse:\n bboxes = pd.read_csv(path_bboxes_csv)", | ||
| "execution_count": null, | ||
| "outputs": [] | ||
| }, | ||
| { | ||
| "metadata": { | ||
| "collapsed": true, | ||
| "_uuid": "b9fe4667636d82dfb581816be128ed701e050721", | ||
| "_cell_guid": "3fe09645-5de6-42cf-ac58-a27cd8f07842", | ||
| "trusted": false | ||
| }, | ||
| "cell_type": "code", | ||
| "source": "GRID_DIM = 12\nGRID_PIX = IMG_WIDTH//GRID_DIM\nBATCH_SIZE = 14", | ||
| "execution_count": null, | ||
| "outputs": [] | ||
| }, | ||
| { | ||
| "metadata": { | ||
| "collapsed": true, | ||
| "_uuid": "34821e885175fe4fc94c03f5eebd57d056dbb7a8", | ||
| "_cell_guid": "4539f816-97c4-4076-a736-89b8f83f0f10", | ||
| "trusted": false | ||
| }, | ||
| "cell_type": "code", | ||
| "source": "train_ids_df = pd.DataFrame(columns=[\"idx\", \"id_\"])\ncnt = 0\nfor n, id_ in tqdm(enumerate(train_ids), total=len(train_ids)):\n train_ids_df.loc[cnt] = { \"idx\": n, \"id_\": id_}\n cnt += 1\n\ntrain_ids_df = train_ids_df.set_index(['idx'])", | ||
| "execution_count": null, | ||
| "outputs": [] | ||
| }, | ||
| { | ||
| "metadata": { | ||
| "scrolled": true, | ||
| "collapsed": true, | ||
| "_uuid": "34f67397be5647147d935e46c1d4c2676a2b329f", | ||
| "_cell_guid": "57f6e469-ff8f-4640-9c49-066230d172b4", | ||
| "trusted": false | ||
| }, | ||
| "cell_type": "code", | ||
| "source": "bboxes['grid_row'] = bboxes['y']//GRID_PIX\nbboxes['grid_column'] = bboxes['x']//GRID_PIX\n\nbboxes['grid_center_x'] = bboxes['grid_column'] * GRID_PIX + GRID_PIX/2\nbboxes['grid_center_y'] = bboxes['grid_row'] * GRID_PIX + GRID_PIX/2\n\nbboxes['box_center_x'] = bboxes.x + bboxes['w']/2\nbboxes['box_center_y'] = bboxes.y + bboxes['h']/2\n\nbboxes['new_x'] = (bboxes.box_center_x - bboxes.grid_center_x)/(IMG_WIDTH)\nbboxes['new_y'] = (bboxes.box_center_y - bboxes.grid_center_y)/(IMG_HEIGHT)\n\nbboxes['new_w'] = np.sqrt(bboxes.w/(IMG_WIDTH))\nbboxes['new_h'] = np.sqrt(bboxes.h/(IMG_WIDTH))\n\nbboxes['confidence'] = 1\n\nbboxes['box_area'] = bboxes.new_w*bboxes.new_h", | ||
| "execution_count": null, | ||
| "outputs": [] | ||
| }, | ||
| { | ||
| "metadata": { | ||
| "scrolled": true, | ||
| "collapsed": true, | ||
| "_uuid": "fc694c7985f0fdf7f9afcc957a12599d2a700310", | ||
| "_cell_guid": "393aa0a8-1460-435b-b138-126358f7c71f", | ||
| "trusted": false | ||
| }, | ||
| "cell_type": "code", | ||
| "source": "mask_count = 1\n#Set maximum bounding boxes allowed per grid cell\nMAX_BB_CNT = 2", | ||
| "execution_count": null, | ||
| "outputs": [] | ||
| }, | ||
| { | ||
| "metadata": { | ||
| "collapsed": true, | ||
| "_uuid": "d98d67b7a65591259de624a4e684d8c66146e2d1", | ||
| "_cell_guid": "bebafd3e-3c9e-4553-8951-12bc50c9275a", | ||
| "trusted": false | ||
| }, | ||
| "cell_type": "code", | ||
| "source": "def get_grid_info(tr_id, rotby_90):\n df = bboxes.loc[(bboxes.train_id == tr_id) & (bboxes.rotby_90 == rotby_90), 'grid_row':'box_area']\n df.drop(['grid_center_x', 'grid_center_y','box_center_x', 'box_center_y',], axis = 1, inplace=True)\n df = df.sort_values(['grid_column', 'grid_row', 'box_area'], ascending=False)\n #print(len(df))\n global mask_count\n mask_count += len(df)\n label_info = np.zeros(shape=(GRID_DIM, GRID_DIM, MAX_BB_CNT, 5), dtype=np.float32) + 0.000001\n \n for ind, row in df.iterrows():\n i = int(row[0])\n j = int(row[1])\n for b in range(MAX_BB_CNT):\n if(label_info[i, j, b][4] != 1.0):\n label_info[i, j, b] = np.array(row[2:7])\n break\n return label_info", | ||
| "execution_count": null, | ||
| "outputs": [] | ||
| }, | ||
| { | ||
| "metadata": { | ||
| "collapsed": true, | ||
| "_uuid": "7092a0c1755366ab2dc327872e5a8b98525a9c71", | ||
| "_cell_guid": "8041f7f4-1bc8-40a5-9537-787047df5805", | ||
| "trusted": false | ||
| }, | ||
| "cell_type": "code", | ||
| "source": "def get_labels(counts, rotations):\n grid_info = np.zeros(shape=(BATCH_SIZE, GRID_DIM, GRID_DIM, MAX_BB_CNT, 5), dtype=np.float32)\n for i, c in enumerate(counts):\n tr_id = train_ids_df.loc[c, 'id_']\n grid_info[i] = get_grid_info(tr_id, rotations[i])\n grid_info = np.reshape(grid_info, newshape=[BATCH_SIZE, GRID_DIM, GRID_DIM, MAX_BB_CNT, 5])\n return grid_info", | ||
| "execution_count": null, | ||
| "outputs": [] | ||
| }, | ||
| { | ||
| "metadata": { | ||
| "collapsed": true, | ||
| "_uuid": "a2e45eed12bd78ae8b0f182cfa555eea4906295d", | ||
| "_cell_guid": "86f0c93a-4cb8-4f30-bd7c-d62e0cd574f9", | ||
| "trusted": false | ||
| }, | ||
| "cell_type": "code", | ||
| "source": "def get_images(counts, rotations):\n images = np.zeros(shape=(BATCH_SIZE, IMG_WIDTH, IMG_HEIGHT, IMG_CHANNELS), dtype=np.uint8)\n for i, c in enumerate(counts):\n images[i] = np.rot90(train_images[c], rotations[i])\n return images", | ||
| "execution_count": null, | ||
| "outputs": [] | ||
| }, | ||
| { | ||
| "metadata": { | ||
| "collapsed": true, | ||
| "_uuid": "24fb3490eda2918e60cd27c980768e01987e3b35", | ||
| "_cell_guid": "55aa9b6c-6039-482f-837d-459ef775640b", | ||
| "trusted": false | ||
| }, | ||
| "cell_type": "code", | ||
| "source": "def next_batch():\n rotations = []\n rand_counts = []\n for i in range(BATCH_SIZE):\n rotations.append(random.randint(0, 3))\n rand_counts.append(random.randint(0, 669))\n return get_images(rand_counts, rotations), get_labels(rand_counts, rotations)", | ||
| "execution_count": null, | ||
| "outputs": [] | ||
| }, | ||
| { | ||
| "metadata": { | ||
| "collapsed": true, | ||
| "_uuid": "012a319735ee860225612b3db531dcc7de34dce7", | ||
| "_cell_guid": "4d109332-ee00-4ce5-b928-2981ebccbe1a", | ||
| "trusted": false | ||
| }, | ||
| "cell_type": "code", | ||
| "source": "tf.reset_default_graph()\nX = tf.placeholder(tf.float32, [None, IMG_WIDTH, IMG_HEIGHT, 3])\nY_ = tf.placeholder(tf.float32, [None, GRID_DIM, GRID_DIM, MAX_BB_CNT, 5])\nlr = tf.placeholder(tf.float32)", | ||
| "execution_count": null, | ||
| "outputs": [] | ||
| }, | ||
| { | ||
| "metadata": { | ||
| "collapsed": true, | ||
| "_uuid": "18e2016bfe77e41ee5b80937ce854761341283e2", | ||
| "_cell_guid": "31e600d9-4b19-4f17-84df-7e22559b6ff1", | ||
| "trusted": false | ||
| }, | ||
| "cell_type": "code", | ||
| "source": "def process_logits(logits, name=None):\n net = tf.reshape(logits, [-1, GRID_DIM*1, GRID_DIM*1, MAX_BB_CNT*5*16, 1])\n net = tf.layers.average_pooling3d(net, [1, 1, 16], [1, 1, 16], padding=\"valid\")\n\n net = tf.reshape(net, [-1, GRID_DIM*GRID_DIM*MAX_BB_CNT, 5]) #GRID_DIM = 64\n net = tf.transpose(net, [1, 2, 0]) \n\n logits_tensor = tf.map_fn(lambda x:\n tf.stack([\n tf.tanh(x[0]),\n tf.tanh(x[1]),\n tf.sqrt(tf.sigmoid(x[2])),\n tf.sqrt(tf.sigmoid(x[3])),\n tf.sigmoid(x[4])\n ]), net)\n\n logits_tensor = tf.transpose(logits_tensor, [2, 0, 1])\n logits_tensor = tf.reshape(logits_tensor, [-1, GRID_DIM, GRID_DIM, MAX_BB_CNT, 5])\n\n return logits_tensor", | ||
| "execution_count": null, | ||
| "outputs": [] | ||
| }, | ||
| { | ||
| "metadata": { | ||
| "collapsed": true, | ||
| "_uuid": "39ae1bf5322d8a9590d6c1cde641a8feb35d6132", | ||
| "_cell_guid": "92ac7464-bb2f-41ea-93f7-88308379d664", | ||
| "trusted": false | ||
| }, | ||
| "cell_type": "code", | ||
| "source": "def normalize_yolo_loss(processed_logits, lambda_coords, lambda_noobj):\n yolo_loss = tf.reduce_sum(tf.squared_difference(labels, processed_logits), axis=0)\n yolo_loss = tf.reduce_sum(yolo_loss, axis=0)\n yolo_loss = tf.reduce_sum(yolo_loss, axis=0)\n yolo_loss = tf.reduce_sum(yolo_loss, axis=0)\n\n yolo_loss = tf.stack([tf.multiply(lambda_coords, yolo_loss[0]), \n tf.multiply(lambda_coords, yolo_loss[1]),\n yolo_loss[2],\n yolo_loss[3],\n tf.multiply(lambda_noobj,yolo_loss[4])])\n yolo_loss = tf.reduce_sum(yolo_loss)\n\n return yolo_loss", | ||
| "execution_count": null, | ||
| "outputs": [] | ||
| }, | ||
| { | ||
| "metadata": { | ||
| "collapsed": true, | ||
| "_uuid": "d3a09342ef22164571512d2ea5a8fc0a5ecc83be", | ||
| "_cell_guid": "011d9e8d-f2f5-4e8f-b70f-5eaeaeac009c", | ||
| "trusted": false | ||
| }, | ||
| "cell_type": "code", | ||
| "source": "def l_relu(features):\n return tf.nn.leaky_relu(features, 0.1)", | ||
| "execution_count": null, | ||
| "outputs": [] | ||
| }, | ||
| { | ||
| "metadata": { | ||
| "collapsed": true, | ||
| "_uuid": "4fdbf5b535ea64baef0b9968c26c958902efdc3e", | ||
| "_cell_guid": "46b61ca1-c078-4a51-8fb9-5b80497e15e5", | ||
| "trusted": false | ||
| }, | ||
| "cell_type": "code", | ||
| "source": "def squeeze_module(x, dim, idx):\n name = 'conv_' + idx + '_sq'\n return tf.layers.conv2d(x, filters=dim, kernel_size=1, strides=1, padding=\"same\", \n activation=l_relu, name=name)\n\ndef expand_module(x, dim, idx):\n name = 'conv_' + idx + '_ex_' + '0'\n net1 = tf.layers.conv2d(x, filters=dim, kernel_size=1, strides=1, padding=\"same\", \n activation=l_relu, name=name)\n name = 'conv_' + idx + '_ex_' + '1'\n net2 = tf.layers.conv2d(x, filters=dim, kernel_size=3, strides=1, padding=\"same\", \n activation=l_relu, name=name) \n return tf.concat([net1, net2], 3)", | ||
| "execution_count": null, | ||
| "outputs": [] | ||
| }, | ||
| { | ||
| "metadata": { | ||
| "collapsed": true, | ||
| "_uuid": "2bda31db511f59fd3b8a6d2716f1f4ad9d2ffef3", | ||
| "_cell_guid": "1f40a34a-cd31-4e2c-bdbe-02af767b2b36", | ||
| "trusted": false | ||
| }, | ||
| "cell_type": "code", | ||
| "source": "def fire_module(input_tensor, squeeze_dim, expand_dim, idx):\n net = squeeze_module(input_tensor, squeeze_dim, idx)\n net = expand_module(net, expand_dim, idx)\n return net", | ||
| "execution_count": null, | ||
| "outputs": [] | ||
| }, | ||
| { | ||
| "metadata": { | ||
| "scrolled": false, | ||
| "collapsed": true, | ||
| "_uuid": "1f8dbd627374389c081c93622e2526b6d30d5ace", | ||
| "_cell_guid": "0d5e7f2a-d015-4733-b6a4-0070778c5a07", | ||
| "trusted": false | ||
| }, | ||
| "cell_type": "code", | ||
| "source": "net = tf.layers.conv2d(X, filters=32, kernel_size=1, strides=1, padding=\"same\", \n activation=l_relu, name='conv0') #384 \n\nnet = tf.layers.max_pooling2d(net, pool_size=2, strides=2, padding=\"same\") #192\n\nnet = fire_module(net, 32, 64, '0')\nnet = fire_module(net, 32, 64, '1')\nnet = fire_module(net, 32, 64, '2')\n\nnet = tf.layers.max_pooling2d(net, pool_size=2, strides=2, padding=\"same\") #96\n\n\nnet = fire_module(net, 64, 96, '3')\nnet = fire_module(net, 64, 96, '4')\nnet = fire_module(net, 64, 96, '5')\n\n\nnet = tf.layers.max_pooling2d(net, pool_size=2, strides=2, padding=\"same\") #48\n\nnet = fire_module(net, 128, 160, '6')\nnet = fire_module(net, 128, 160, '7') \nnet = fire_module(net, 128, 128, '8')\n\n\nnet = tf.layers.max_pooling2d(net, pool_size=2, strides=2, padding=\"same\") #24\n\nnet = fire_module(net, 256, 512, '9')\nnet = fire_module(net, 256, 512, '10')\nnet = fire_module(net, 256, 512, '11')\n\nnet = tf.layers.max_pooling2d(net, pool_size=2, strides=2, padding=\"same\") #12\n\nnet = fire_module(net, 512, 1024, '12')\nnet = fire_module(net, 512, 1024, '13')\nnet = fire_module(net, 512, 1024, '14')\n\nlogits = tf.layers.conv2d(net, filters=MAX_BB_CNT*5*16, kernel_size=1, strides=1, padding=\"same\",\n activation=None, name='conv40') #12\n\nprocessed_logits = process_logits(logits)\n\nlabels = Y_\n\nlambda_coords = tf.constant(5.0)\nlambda_noobj = tf.constant(0.5)\n\nyolo_loss = normalize_yolo_loss(processed_logits, lambda_coords, lambda_noobj)\n\ntrain_op = tf.train.AdamOptimizer(lr).minimize(yolo_loss)", | ||
| "execution_count": null, | ||
| "outputs": [] | ||
| }, | ||
| { | ||
| "metadata": { | ||
| "collapsed": true, | ||
| "_uuid": "d6462feb2c5ef1a87da69cd7b3b98532bfd6297b", | ||
| "_cell_guid": "18c4df73-668b-4e2e-a354-35bdb7a1bb14", | ||
| "trusted": false | ||
| }, | ||
| "cell_type": "code", | ||
| "source": "start_training = False #using pretrained model as Kaggle server doesn't allow training time more than 6 hrs.\nif not start_training:\n test_image_id = 1\n saver = tf.train.Saver()\n with tf.Session() as sess:\n saver.restore(sess, \"../input/pre-trained-model/model.ckpt\")\n result = sess.run([processed_logits], {X: np.reshape(test_images[test_image_id], [1, 384, 384, 3])})", | ||
| "execution_count": null, | ||
| "outputs": [] | ||
| }, | ||
| { | ||
| "metadata": { | ||
| "scrolled": true, | ||
| "collapsed": true, | ||
| "_uuid": "23704a1844a9d88838b34c9de1227aeb9b467f68", | ||
| "_cell_guid": "fa3439f2-c429-4a60-8aed-ed681e22a68f", | ||
| "trusted": false | ||
| }, | ||
| "cell_type": "code", | ||
| "source": "if(start_training):\n # initialize\n init = tf.global_variables_initializer()\n sess = tf.Session()\n sess.run(init)\n\n batch_count = 0\n display_count = 1\n global mask_count\n\n for i in range(33000):\n batch_X, batch_Y = next_batch()\n batch_count += 1\n train_loss, _ = sess.run([yolo_loss, train_op], {X: batch_X, Y_: batch_Y, lr: 0.0001})\n\n if(i % 100 == 0):\n print(str(display_count) + \" training loss(\" + str(mask_count) + \"): \" + str(train_loss))\n display_count +=1\n mask_count = 0\n print(\"Done!\")", | ||
| "execution_count": null, | ||
| "outputs": [] | ||
| }, | ||
| { | ||
| "metadata": { | ||
| "collapsed": true, | ||
| "_uuid": "fe08560286895963e8b86df3cb4aaf247d34423f", | ||
| "_cell_guid": "14d0e4d2-3be6-4a80-9123-60f016751501", | ||
| "trusted": false | ||
| }, | ||
| "cell_type": "code", | ||
| "source": "if(start_training):\n test_image_id = 13\n result = sess.run([processed_logits], {X: np.reshape(test_images[test_image_id], [1, 384, 384, 3])})", | ||
| "execution_count": null, | ||
| "outputs": [] | ||
| }, | ||
| { | ||
| "metadata": { | ||
| "collapsed": true, | ||
| "scrolled": true, | ||
| "_uuid": "0cd95adbb0630b80a3108cd1167ad5a7c040c33a", | ||
| "_cell_guid": "e7bd9300-d395-4094-9a68-7f7ccd88801f", | ||
| "trusted": false | ||
| }, | ||
| "cell_type": "code", | ||
| "source": "boxes = result[0]\nprint(boxes.shape)\nboxes = np.reshape(boxes, newshape=[GRID_DIM, GRID_DIM, MAX_BB_CNT, 5])\nbbs = []\n\nfor i in range(GRID_DIM):\n for j in range(GRID_DIM):\n for b in range(MAX_BB_CNT):\n if(boxes[i][j][b][4]> 0.1):\n grid_center_x = ((j+0)*GRID_PIX + GRID_PIX/2)\n grid_center_y = ((i+0)*GRID_PIX + GRID_PIX/2)\n \n new_box_center_x = boxes[i][j][b][0] * IMG_WIDTH + grid_center_x\n new_box_center_y = boxes[i][j][b][1] * IMG_HEIGHT + grid_center_y\n\n new_w = np.square(boxes[i][j][b][2]) * IMG_WIDTH\n new_h = np.square(boxes[i][j][b][3]) * IMG_HEIGHT\n \n x1 = new_box_center_x - new_w/2\n y1 = new_box_center_y - new_h/2\n\n x2 = new_box_center_x + new_w/2\n y2 = new_box_center_y + new_h/2\n\n bbs.append((math.floor(x1), math.floor(y1), math.ceil(x2), math.ceil(y2)))", | ||
| "execution_count": null, | ||
| "outputs": [] | ||
| }, | ||
| { | ||
| "metadata": { | ||
| "scrolled": false, | ||
| "collapsed": true, | ||
| "_uuid": "7056ebe41cc55fd18cc4a4da8419528f392def4a", | ||
| "_cell_guid": "eb81ca6f-f513-4d9d-a07e-2137765342bf", | ||
| "trusted": false | ||
| }, | ||
| "cell_type": "code", | ||
| "source": "img = test_images[test_image_id]\nf, axs = plt.subplots(1,2)\naxs[0].imshow(img)\n\nimshow(img)\nfor i, b in enumerate(bbs):\n cv2.rectangle(img,(b[0], b[1]),(b[2], b[3]),(0,255,0),2)\n\naxs[1].imshow(img)", | ||
| "execution_count": null, | ||
| "outputs": [] | ||
| }, | ||
| { | ||
| "metadata": { | ||
| "collapsed": true, | ||
| "_uuid": "8e52fb9970793915faa10c96fd731a0e863d323e", | ||
| "_cell_guid": "f73445c0-39b3-4de4-9d36-8335125454a2", | ||
| "trusted": false | ||
| }, | ||
| "cell_type": "code", | ||
| "source": "", | ||
| "execution_count": null, | ||
| "outputs": [] | ||
| }, | ||
| { | ||
| "metadata": { | ||
| "collapsed": true, | ||
| "_uuid": "f19a362b06e23fb09a4a660759b6a9bd19852f82", | ||
| "_cell_guid": "05bdd5e4-09c6-4f3b-848a-315c76087f0a", | ||
| "trusted": false | ||
| }, | ||
| "cell_type": "code", | ||
| "source": "", | ||
| "execution_count": null, | ||
| "outputs": [] | ||
| }, | ||
| { | ||
| "metadata": { | ||
| "collapsed": true, | ||
| "_uuid": "936ab01273eb13ed1796a64b1f2594bcc49b1628", | ||
| "_cell_guid": "e92b1fc7-514c-4efd-a097-86c2c16710f6", | ||
| "trusted": false | ||
| }, | ||
| "cell_type": "code", | ||
| "source": "", | ||
| "execution_count": null, | ||
| "outputs": [] | ||
| }, | ||
| { | ||
| "metadata": { | ||
| "collapsed": true, | ||
| "_uuid": "b14b3400efae5b713180e546cda031f5e7fd6c75", | ||
| "_cell_guid": "8b4bf39c-7399-4b56-a108-321941f4e5ac", | ||
| "trusted": false | ||
| }, | ||
| "cell_type": "code", | ||
| "source": "", | ||
| "execution_count": null, | ||
| "outputs": [] | ||
| } | ||
| ], | ||
| "metadata": { | ||
| "language_info": { | ||
| "file_extension": ".py", | ||
| "pygments_lexer": "ipython3", | ||
| "name": "python", | ||
| "codemirror_mode": { | ||
| "name": "ipython", | ||
| "version": 3 | ||
| }, | ||
| "version": "3.6.4", | ||
| "mimetype": "text/x-python", | ||
| "nbconvert_exporter": "python" | ||
| }, | ||
| "kernelspec": { | ||
| "display_name": "Python 3", | ||
| "language": "python", | ||
| "name": "python3" | ||
| } | ||
| }, | ||
| "nbformat": 4, | ||
| "nbformat_minor": 1 | ||
| } |