In [1]:
%matplotlib inline
from pycocotools.coco import COCO
import numpy as np
import random
import skimage.io as io
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import pylab
import tensorflow as tf
import tensorflow.contrib.slim as slim
import tensorflow.contrib.slim.nets

# import cv2
pylab.rcParams['figure.figsize'] = (10.0, 8.0)

In [2]:
# define the path to the annotation file corresponding to the images you want to work with
dataDir='/Users/kyle/Repositories/coco'
trainData='person_keypoints_train2014'
valData='person_keypoints_val2014'
testData='image_info_test-dev2015'

annTrainFile='{}/annotations/{}.json'.format(dataDir,trainData)
annValFile='{}/annotations/{}.json'.format(dataDir,valData)
annTestFile='{}/annotations/{}.json'.format(dataDir,testData)

imageTrainFolder='{}/images/{}'.format(dataDir,trainData)
imageValFolder='{}/images/{}'.format(dataDir,valData)
imageTestFolder='{}/images/{}'.format(dataDir,testData)

print(annTrainFile)
print(annValFile)
print(annTestFile)
print(imageTrainFolder)
print(imageValFolder)
print(imageTestFolder)

# initialize a coco object
coco = COCO(annTrainFile)

# get all images containing the 'person' category
catIds = coco.getCatIds(catNms=['person'])
imgIds = coco.getImgIds(catIds=catIds)

# Just for dealing with the images on my computer (not necessary when working with the whole dataset)
catIds = imgIds[0:30]
imgIds = imgIds[0:30]

/Users/kyle/Repositories/coco/annotations/person_keypoints_train2014.json
/Users/kyle/Repositories/coco/annotations/person_keypoints_val2014.json
/Users/kyle/Repositories/coco/annotations/image_info_test-dev2015.json
/Users/kyle/Repositories/coco/images/person_keypoints_train2014
/Users/kyle/Repositories/coco/images/person_keypoints_val2014
/Users/kyle/Repositories/coco/images/image_info_test-dev2015
loading annotations into memory...
Done (t=8.57s)
creating index...
index created!


In [3]:
# Initialize train_dataset

baseDir = '/Users/kyle/Repositories/coco'
imageTrainDir = 'train2014'
annTrainDir = 'person_keypoints_val2014'
imagePath = '{}/images/{}'.format(baseDir,imageTrainDir)
annPath = '{}/annotations/{}'.format(baseDir,annTrainDir)


filenames = tf.constant(['{}/COCO_train2014_{:0>12}.jpg'.format(imagePath,img_ID) for img_ID in imgIds])
imgID_tensor = tf.constant(imgIds)

precision = tf.float32

In [4]:
def extract_annotations(filename, imgID, coco=coco):
    anns = coco.loadAnns(coco.getAnnIds(imgID,catIds=[1],iscrowd=None))
    ann = max([ann for ann in anns], key=lambda item:item['area']) # extract annotation for biggest instance
    bbox = np.array(np.floor(ann['bbox']),dtype=int)
    keypoints = np.reshape(ann['keypoints'],(-1,3))
    mask = coco.annToMask(ann)
    
    return filename, bbox, keypoints, mask

In [5]:
def resize_image_tf(filename, bbox_tensor, keypoints_tensor, mask, D = tf.constant(224.0)):
    image_string = tf.read_file(filename)
    image_decoded = tf.image.decode_jpeg(image_string, channels=3)
    image = tf.cast(image_decoded, tf.float32)

    mask = tf.transpose([mask],[1,2,0])
    bbox_tensor = tf.to_float(bbox_tensor)
    keypoints_tensor = tf.to_float(keypoints_tensor)

    sideLength = tf.reduce_max(bbox_tensor[2:],axis=0)
    centerX = tf.floor(bbox_tensor[0] + tf.divide(bbox_tensor[2],tf.constant(2.0)))
    centerY = tf.floor(bbox_tensor[1] + tf.divide(bbox_tensor[3],tf.constant(2.0)))
    center = tf.stack([centerX,centerY])

    corner1 = tf.to_int32(tf.minimum(tf.maximum(tf.subtract(center, tf.divide(sideLength,tf.constant(2.0))),0),
                         tf.reverse(tf.to_float(tf.shape(image)[:2]),tf.constant([0]))))
    corner2 = tf.to_int32(tf.minimum(tf.maximum(tf.add(center, tf.divide(sideLength,tf.constant(2.0))),0),
                         tf.reverse(tf.to_float(tf.shape(image)[:2]),tf.constant([0]))))
    i_shape = tf.subtract(corner2,corner1)
    d_shape = tf.subtract(tf.to_int32(sideLength),i_shape)

    scale = tf.divide(D, sideLength)

    cropped_image = tf.image.crop_to_bounding_box(image,corner1[1],corner1[0],
                                                  tf.subtract(corner2,corner1)[1],tf.subtract(corner2,corner1)[0])
    cropped_mask = tf.image.crop_to_bounding_box(mask,corner1[1],corner1[0],
                                                  tf.subtract(corner2,corner1)[1],tf.subtract(corner2,corner1)[0])

    dX = tf.floor(tf.divide(d_shape,tf.constant(2)))
    dY = tf.ceil(tf.divide(d_shape,tf.constant(2)))

    pts, labels = tf.split(keypoints_tensor,[2,1],axis=1)
    pts = tf.subtract(pts,tf.to_float(corner1)) # shift keypoints
    pts = tf.add(pts,tf.to_float(dX)) # shift keypoints
    pts = tf.multiply(pts,scale) # scale keypoints
    
    # set invalid pts to 0
    inbounds = tf.less(pts,D)
    inbounds = tf.multiply(tf.to_int32(inbounds), tf.to_int32(tf.greater(pts,0)))
    pts = tf.multiply(pts,tf.to_float(inbounds))

    padded_image = tf.image.pad_to_bounding_box(cropped_image,tf.to_int32(dX[1]),tf.to_int32(dX[0]),
                                                tf.to_int32(sideLength),tf.to_int32(sideLength))
    padded_mask = tf.image.pad_to_bounding_box(cropped_mask,tf.to_int32(dX[1]),tf.to_int32(dX[0]),
                                                tf.to_int32(sideLength),tf.to_int32(sideLength))

    resized_image = tf.image.resize_images(padded_image,tf.constant([224,224]),tf.image.ResizeMethod.NEAREST_NEIGHBOR)
    resized_mask = tf.image.resize_images(padded_mask,tf.constant([224,224]),tf.image.ResizeMethod.NEAREST_NEIGHBOR)

    return resized_image , resized_mask, pts, labels


In [6]:
train_dataset = tf.contrib.data.Dataset.from_tensor_slices((filenames,imgID_tensor))
# Extract Annotations via coco interface
train_dataset = train_dataset.map(lambda filename, imgID: tf.py_func(extract_annotations, [filename, imgID], 
                                                             [filename.dtype, tf.int64, tf.int64, tf.uint8]))
# All other preprocessing in tensorflow
train_dataset = train_dataset.map(resize_image_tf)
train_dataset

<MapDataset shapes: ((224, 224, 3), (224, 224, ?), <unknown>, <unknown>), types: (tf.float32, tf.uint8, tf.float32, tf.float32)>

In [7]:
# BATCH
train_dataset = train_dataset.shuffle(buffer_size=10000)
train_dataset = train_dataset.batch(10) # must resize images to make them match

iterator = tf.contrib.data.Iterator.from_structure(train_dataset.output_types,
                                                   train_dataset.output_shapes)
# resized_image, resized_mask, pts, labels = iterator.get_next()
images, masks, pts, labels = iterator.get_next()

train_init_op = iterator.make_initializer(train_dataset)
# val_init_op = iterator.make_initializer(val_dataset)
train_dataset

<BatchDataset shapes: ((?, 224, 224, 3), (?, 224, 224, ?), <unknown>, <unknown>), types: (tf.float32, tf.uint8, tf.float32, tf.float32)>

In [None]:
sess = tf.Session()
sess.run(train_init_op)

In [None]:
# resized_image, resized_mask, pts, labels = sess.run(next_element)

try:
    I, M, P, L = sess.run([images, masks, pts, labels])
    plt.imshow(I[0])
    plt.imshow(M[0][:,:,0],alpha=0.5)
    plt.scatter(P[0][(np.reshape(L[0],-1)==2),0],P[0][(np.reshape(L[0],-1)==2),1],c="r")
except tf.errors.OutOfRangeError:
    sess.run(train_init_op)
    print("Reinitialized Dataset Iterator...")
    

In [8]:
is_training = tf.placeholder(tf.bool)

vgg = tf.contrib.slim.nets.vgg
with slim.arg_scope(vgg.vgg_arg_scope(weight_decay=.9)):
    logits, _ = vgg.vgg_16(images, num_classes=50, is_training=is_training,dropout_keep_prob=.5)