In [1]:
%matplotlib inline
from pycocotools.coco import COCO
import os
import numpy as np
import random
import skimage.io as io
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import pylab
import tensorflow as tf
import tensorflow.contrib.slim as slim
import tensorflow.contrib.slim.nets

import resnet_v2 as resnet
# import cv2
pylab.rcParams['figure.figsize'] = (10.0, 8.0)



# Initialize Dataset

In [2]:
def get_data(base_dir,image_dir,ann_file):
    image_path = '{}/images/{}'.format(baseDir,image_dir)
    ann_path='{}/annotations/{}.json'.format(baseDir,ann_file)

    return image_path, ann_path
    
# define the path to the annotation file corresponding to the images you want to work with
baseDir='/Users/kyle/Repositories/coco'

trainData='person_keypoints_train2014'
valData='person_keypoints_val2014'
testData='image_info_test-dev2015'

imageTrainDir = 'train2014'
imageValDir = 'val2014'
imageTestDir = 'test2015'

train_img_path, train_ann_path = get_data(baseDir,imageTrainDir,trainData)
val_img_path, val_ann_path = get_data(baseDir,imageValDir,valData)

# initialize a coco object
coco = COCO(train_ann_path)

# get all images containing the 'person' category
catIds = coco.getCatIds(catNms=['person'])
imgIds = coco.getImgIds(catIds=catIds)

# Just for dealing with the images on my computer (not necessary when working with the whole dataset)
catIds = imgIds[0:30]
imgIds = imgIds[0:30]
    
graph = tf.Graph()
with graph.as_default():
    
    VGG_MEAN = tf.reshape(tf.constant([123.68, 116.78, 103.94]),[1,1,3])
    
    def extract_annotations(filename, imgID, coco=coco):
        anns = coco.loadAnns(coco.getAnnIds(imgID,catIds=[1],iscrowd=None))
        ann = max([ann for ann in anns], key=lambda item:item['area']) # extract annotation for biggest instance
        bbox = np.array(np.floor(ann['bbox']),dtype=int)
        keypoints = np.reshape(ann['keypoints'],(-1,3))
        mask = coco.annToMask(ann)

        return filename, bbox, keypoints, mask
    
    
    def preprocess_image_tf(filename, bbox_tensor, keypoints_tensor, mask, D = tf.constant(224.0)):
        image_string = tf.read_file(filename)
        image_decoded = tf.image.decode_jpeg(image_string, channels=3)
        image = tf.cast(image_decoded, tf.float32)

        mask = tf.transpose([mask],[1,2,0])
        bbox_tensor = tf.to_float(bbox_tensor)
        keypoints_tensor = tf.to_float(keypoints_tensor)

        sideLength = tf.reduce_max(bbox_tensor[2:],axis=0)
        centerX = tf.floor(bbox_tensor[0] + tf.divide(bbox_tensor[2],tf.constant(2.0)))
        centerY = tf.floor(bbox_tensor[1] + tf.divide(bbox_tensor[3],tf.constant(2.0)))
        center = tf.stack([centerX,centerY])

        corner1 = tf.to_int32(tf.minimum(tf.maximum(tf.subtract(center, tf.divide(sideLength,tf.constant(2.0))),0),
                             tf.reverse(tf.to_float(tf.shape(image)[:2]),tf.constant([0]))))
        corner2 = tf.to_int32(tf.minimum(tf.maximum(tf.add(center, tf.divide(sideLength,tf.constant(2.0))),0),
                             tf.reverse(tf.to_float(tf.shape(image)[:2]),tf.constant([0]))))
        i_shape = tf.subtract(corner2,corner1)
        d_shape = tf.subtract(tf.to_int32(sideLength),i_shape)

        scale = tf.divide(D, sideLength)

        cropped_image = tf.image.crop_to_bounding_box(image,corner1[1],corner1[0],
                                                      tf.subtract(corner2,corner1)[1],tf.subtract(corner2,corner1)[0])
        cropped_mask = tf.image.crop_to_bounding_box(mask,corner1[1],corner1[0],
                                                      tf.subtract(corner2,corner1)[1],tf.subtract(corner2,corner1)[0])

        dX = tf.floor(tf.divide(d_shape,tf.constant(2)))
        dY = tf.ceil(tf.divide(d_shape,tf.constant(2)))

        pts, labels = tf.split(keypoints_tensor,[2,1],axis=1)
        pts = tf.subtract(pts,tf.to_float(corner1)) # shift keypoints
        pts = tf.add(pts,tf.to_float(dX)) # shift keypoints
        pts = tf.multiply(pts,scale) # scale keypoints

        # set invalid pts to 0
        inbounds = tf.less(pts,D)
        inbounds = tf.multiply(tf.to_int32(inbounds), tf.to_int32(tf.greater(pts,0)))
        pts = tf.multiply(pts,tf.to_float(inbounds))

        padded_image = tf.image.pad_to_bounding_box(cropped_image,tf.to_int32(dX[1]),tf.to_int32(dX[0]),
                                                    tf.to_int32(sideLength),tf.to_int32(sideLength))
        padded_mask = tf.image.pad_to_bounding_box(cropped_mask,tf.to_int32(dX[1]),tf.to_int32(dX[0]),
                                                    tf.to_int32(sideLength),tf.to_int32(sideLength))

        resized_image = tf.image.resize_images(padded_image,tf.constant([224,224]),tf.image.ResizeMethod.NEAREST_NEIGHBOR)
        resized_image = resized_image - VGG_MEAN

        resized_mask = tf.image.resize_images(padded_mask,tf.constant([224,224]),tf.image.ResizeMethod.NEAREST_NEIGHBOR)

        return resized_image , resized_mask, pts, labels


    # Initialize train_dataset
    filenames = tf.constant(['{}/COCO_train2014_{:0>12}.jpg'.format(train_img_path,imgID) for imgID in imgIds])
    imgID_tensor = tf.constant(imgIds)
    
    train_dataset = tf.contrib.data.Dataset.from_tensor_slices((filenames,imgID_tensor))
    # Extract Annotations via coco interface
    train_dataset = train_dataset.map(lambda filename, imgID: tf.py_func(extract_annotations, [filename, imgID], 
                                                                 [filename.dtype, tf.int64, tf.int64, tf.uint8]))
    # All other preprocessing in tensorflow
    train_dataset = train_dataset.map(preprocess_image_tf)
    
    # BATCH
    train_dataset = train_dataset.shuffle(buffer_size=10000)
    train_dataset = train_dataset.batch(10) # must resize images to make them match
    iterator = tf.contrib.data.Iterator.from_structure(train_dataset.output_types,train_dataset.output_shapes)
    # resized_image, resized_mask, pts, labels = iterator.get_next()
    images, masks, pts, labels = iterator.get_next()
    train_init_op = iterator.make_initializer(train_dataset)

    
    # Define model
    net, endpoints = resnet.resnet_v2_50(images,
                 num_classes=10,
                 is_training=True,
                 global_pool=False, # dense prediction
                 output_stride=None,
                 reuse=None,
                 scope='resnet_v2_50')
    
    with tf.variable_scope("KyleScope"):
        branch_layer = endpoints['resnet_v2_50/block4/unit_1/bottleneck_v2/conv1']
        with tf.variable_scope("level_1_Kyle"):
            branch_layer = tf.layers.conv2d(branch_layer,10,(3,3),(1,1),padding='same',activation=tf.nn.relu,name="Conv1")
            branch_layer = tf.layers.conv2d(branch_layer,10,(3,3),(1,1),padding='same',activation=tf.nn.relu,name="Conv2")
    
    with tf.Session(graph=graph) as sess:
        file_writer = tf.summary.FileWriter('.', sess.graph)
        sess.run(tf.global_variables_initializer())
        sess.run(train_init_op) 
        sess.run(net)
        sess.run(branch_layer)
    

loading annotations into memory...
Done (t=9.96s)
creating index...
index created!
Type is unsupported, or the types of the items don't match field type in CollectionDef.
'dict' object has no attribute 'name'


Type is unsupported, or the types of the items don't match field type in CollectionDef.
'dict' object has no attribute 'name'


In [None]:
def get_data(base_dir,image_dir,ann_file):
    image_path = '{}/images/{}'.format(baseDir,image_dir)
    ann_path='{}/annotations/{}.json'.format(baseDir,ann_file)
    
    return image_path, ann_path

In [None]:
# define the path to the annotation file corresponding to the images you want to work with
baseDir='/Users/kyle/Repositories/coco'

trainData='person_keypoints_train2014'
valData='person_keypoints_val2014'
testData='image_info_test-dev2015'

imageTrainDir = 'train2014'
imageValDir = 'val2014'
imageTestDir = 'test2015'

train_img_path, train_ann_path = get_data(baseDir,imageTrainDir,trainData)
val_img_path, val_ann_path = get_data(baseDir,imageValDir,valData)

# initialize a coco object
coco = COCO(train_ann_path)

# get all images containing the 'person' category
catIds = coco.getCatIds(catNms=['person'])
imgIds = coco.getImgIds(catIds=catIds)

# Just for dealing with the images on my computer (not necessary when working with the whole dataset)
catIds = imgIds[0:30]
imgIds = imgIds[0:30]

# Initialize train_dataset

filenames = tf.constant(['{}/COCO_train2014_{:0>12}.jpg'.format(train_img_path,imgID) for imgID in imgIds])
imgID_tensor = tf.constant(imgIds)

In [None]:
def extract_annotations(filename, imgID, coco=coco):
    anns = coco.loadAnns(coco.getAnnIds(imgID,catIds=[1],iscrowd=None))
    ann = max([ann for ann in anns], key=lambda item:item['area']) # extract annotation for biggest instance
    bbox = np.array(np.floor(ann['bbox']),dtype=int)
    keypoints = np.reshape(ann['keypoints'],(-1,3))
    mask = coco.annToMask(ann)
    
    return filename, bbox, keypoints, mask

In [None]:
def preprocess_image_tf(filename, bbox_tensor, keypoints_tensor, mask, D = tf.constant(224.0)):
    image_string = tf.read_file(filename)
    image_decoded = tf.image.decode_jpeg(image_string, channels=3)
    image = tf.cast(image_decoded, tf.float32)

    mask = tf.transpose([mask],[1,2,0])
    bbox_tensor = tf.to_float(bbox_tensor)
    keypoints_tensor = tf.to_float(keypoints_tensor)

    sideLength = tf.reduce_max(bbox_tensor[2:],axis=0)
    centerX = tf.floor(bbox_tensor[0] + tf.divide(bbox_tensor[2],tf.constant(2.0)))
    centerY = tf.floor(bbox_tensor[1] + tf.divide(bbox_tensor[3],tf.constant(2.0)))
    center = tf.stack([centerX,centerY])

    corner1 = tf.to_int32(tf.minimum(tf.maximum(tf.subtract(center, tf.divide(sideLength,tf.constant(2.0))),0),
                         tf.reverse(tf.to_float(tf.shape(image)[:2]),tf.constant([0]))))
    corner2 = tf.to_int32(tf.minimum(tf.maximum(tf.add(center, tf.divide(sideLength,tf.constant(2.0))),0),
                         tf.reverse(tf.to_float(tf.shape(image)[:2]),tf.constant([0]))))
    i_shape = tf.subtract(corner2,corner1)
    d_shape = tf.subtract(tf.to_int32(sideLength),i_shape)

    scale = tf.divide(D, sideLength)

    cropped_image = tf.image.crop_to_bounding_box(image,corner1[1],corner1[0],
                                                  tf.subtract(corner2,corner1)[1],tf.subtract(corner2,corner1)[0])
    cropped_mask = tf.image.crop_to_bounding_box(mask,corner1[1],corner1[0],
                                                  tf.subtract(corner2,corner1)[1],tf.subtract(corner2,corner1)[0])

    dX = tf.floor(tf.divide(d_shape,tf.constant(2)))
    dY = tf.ceil(tf.divide(d_shape,tf.constant(2)))

    pts, labels = tf.split(keypoints_tensor,[2,1],axis=1)
    pts = tf.subtract(pts,tf.to_float(corner1)) # shift keypoints
    pts = tf.add(pts,tf.to_float(dX)) # shift keypoints
    pts = tf.multiply(pts,scale) # scale keypoints
    
    # set invalid pts to 0
    inbounds = tf.less(pts,D)
    inbounds = tf.multiply(tf.to_int32(inbounds), tf.to_int32(tf.greater(pts,0)))
    pts = tf.multiply(pts,tf.to_float(inbounds))

    padded_image = tf.image.pad_to_bounding_box(cropped_image,tf.to_int32(dX[1]),tf.to_int32(dX[0]),
                                                tf.to_int32(sideLength),tf.to_int32(sideLength))
    padded_mask = tf.image.pad_to_bounding_box(cropped_mask,tf.to_int32(dX[1]),tf.to_int32(dX[0]),
                                                tf.to_int32(sideLength),tf.to_int32(sideLength))

    resized_image = tf.image.resize_images(padded_image,tf.constant([224,224]),tf.image.ResizeMethod.NEAREST_NEIGHBOR)
    resized_image = resized_image - VGG_MEAN
    
    resized_mask = tf.image.resize_images(padded_mask,tf.constant([224,224]),tf.image.ResizeMethod.NEAREST_NEIGHBOR)

    return resized_image , resized_mask, pts, labels


In [None]:
train_dataset = tf.contrib.data.Dataset.from_tensor_slices((filenames,imgID_tensor))
# Extract Annotations via coco interface
train_dataset = train_dataset.map(lambda filename, imgID: tf.py_func(extract_annotations, [filename, imgID], 
                                                             [filename.dtype, tf.int64, tf.int64, tf.uint8]))
# All other preprocessing in tensorflow
train_dataset = train_dataset.map(preprocess_image_tf)
train_dataset

In [None]:
# BATCH
train_dataset = train_dataset.shuffle(buffer_size=10000)
train_dataset = train_dataset.batch(10) # must resize images to make them match

iterator = tf.contrib.data.Iterator.from_structure(train_dataset.output_types,
                                                   train_dataset.output_shapes)
# resized_image, resized_mask, pts, labels = iterator.get_next()
images, masks, pts, labels = iterator.get_next()

train_init_op = iterator.make_initializer(train_dataset)
# val_init_op = iterator.make_initializer(val_dataset)
train_dataset

# Other Initialization Details

In [None]:
is_training = tf.placeholder(tf.bool,name='is_training')

# Play around with ResNet

In [None]:
graph = tf.Graph()
with graph.as_default():

    net, endpoints = resnet.resnet_v2_50(images,
                     num_classes=10,
                     is_training=True,
                     global_pool=False, # dense prediction
                     output_stride=None,
                     reuse=None,
                     scope='resnet_v2_50')
    branch_layer = endpoints['resnet_v2_50/block4/unit_1/bottleneck_v2/conv1']
    sess = tf.Session(graph=graph)
    
    file_writer = tf.summary.FileWriter('.', sess.graph)
    sess.run(train_init_op)

In [None]:
branch_layer = endpoints['resnet_v2_50/block4/unit_1/bottleneck_v2/conv1']

In [None]:
sess = tf.Session()
file_writer = tf.summary.FileWriter('.', sess.graph)

In [None]:
vgg = tf.contrib.slim.nets.vgg
with slim.arg_scope(vgg.vgg_arg_scope(weight_decay=.9)):
    logits, _ = vgg.vgg_16(images, num_classes=50, is_training=is_training,dropout_keep_prob=.5)
    
model_path = '/Users/kyle/Documents/SCHOOL/Stanford/STANFORD_2016_to_2017/3rd_quarter/CS231N/Project/ExampleCode/vgg_16.ckpt'
assert(os.path.isfile(model_path))

variables_to_restore = tf.contrib.framework.get_variables_to_restore(exclude=['vgg_16/fc8','vgg_16/fc7','vgg_16/fc6'])
init_fn = tf.contrib.framework.assign_from_checkpoint_fn(model_path, variables_to_restore)
variables_to_restore

In [None]:
vgg_head = variables_to_restore[-1]

In [None]:
net = tf.layers.conv2d(vgg_head, filters=10,kernel_size=(3,3),strides=(1,1),padding='same')

In [None]:
# Non frozen layer
fc8_variables = tf.contrib.framework.get_variables('vgg_16/fc8')
fc8_init = tf.variables_initializer(fc8_variables)
fc8_variables

In [None]:
sess = tf.Session()
init_fn(sess)  # load the pretrained weights
# sess.run(fc8_init)  # initialize the new fc8 layer
sess.run(train_init_op)

In [None]:
# resized_image, resized_mask, pts, labels = sess.run(next_element)

try:
    I, M, P, L = sess.run([images, masks, pts, labels], {is_training: True})
    plt.imshow(I[0])
    plt.imshow(M[0][:,:,0],alpha=0.5)
    plt.scatter(P[0][(np.reshape(L[0],-1)==2),0],P[0][(np.reshape(L[0],-1)==2),1],c="r")
except tf.errors.OutOfRangeError:
    sess.run(train_init_op)
    print("Reinitialized Dataset Iterator...")
    