In [1]:
import warnings
warnings.simplefilter("ignore")
import cv2
#import utils
import numpy as np
import math
import tensorflow as tf
import time
import random
import os
import json
from pprint import pprint
import Detector as dt
from config import FLAGS

  from ._conv import register_converters as _register_converters


In [2]:
SHOW_INFO = False
gaussian_radius = 2

In [4]:
base_dir = "../hand_pose"
json_file = os.path.join(base_dir, 'annotation.json')
csv_file = os.path.join(base_dir, 'hand_labels.csv')
image_dir = os.path.join(base_dir, 'Color')

with open(json_file) as f:
    content = json.load(f)

In [5]:
img_list = os.listdir(image_dir)
img_name = [img[:-4] for img in img_list]
split_ratio = 0.9

In [6]:
train_index = np.random.choice(len(img_list), size=int(len(img_list)*split_ratio), replace=False)
test_index = np.setdiff1d(list(range(len(img_list))), train_index)

In [7]:
print(train_index.shape, test_index.shape, len(img_list))
print(train_index, test_index)

(99,) (12,) 111
[ 72   7  63  16  87  29 106  94  59  56  18  36  99 104  62   5  69  96
  84  98  68  37  39  12  71 109  93  91  61  60  70  50  78  66  14  44
  79 107  45  27  92  34  81  41  80  42 101  49  89  35  86  24  65  46
  95  19   3  31   4  83 102  74  67  57  76  75   9  28  13  58  10  53
   6  43  32  48 103 110 108  15  97 100  77  33  17   1   8  88  26  90
  25  40  30  54  55  51  85 105  20] [ 0  2 11 21 22 23 38 47 52 64 73 82]


In [8]:
np.unique(test_index) == sorted(test_index)

array([ True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True])

In [9]:
#later change this to import from prediction
def record_boundary(index, direction,padding, height, width):
    name = img_name[index]+direction
    xy = np.array(content[name])
    x_arr = xy[:,0]
    y_arr = xy[:,1]
    x_max = max(x_arr) 
    y_max = max(y_arr) 
    x_min = min(x_arr)
    y_min = min(y_arr)
    #print("Original was: ", x_min, y_min, x_max, y_max, " with height ", height, " and width ", width)
    return dt.adjust_bound_box(x_min, y_min, x_max, y_max, padding, height, width)

def draw_hand(canvas, joint): 
    for i in range(len(joint)):
        cv2.circle(canvas, (int(joint[i][0]),int(joint[i][1])), 4, FLAGS.joint_color_code[i], thickness=-1)

In [10]:
#test record generator
tfr_file = 'test.tfrecords'
writer = tf.python_io.TFRecordWriter(tfr_file)
t1 = time.time()
t0 = time.time()

cnt = 0
for idx in test_index:
    img = img_list[idx]
    if (cnt + 1) % 500 == 0:
        print(cnt + 1, " finished", time.time() - t1)
        t1 = time.time()
    cnt += 1
    cur_img = cv2.imread(os.path.join(image_dir, img))
    height, width, _ = cur_img .shape
    hands = []
    if img_name[idx] + "_R" in content:
        hands.append((content[img_name[idx] + "_R"], "_R"))
    if img_name[idx] + "_L" in content:
        hands.append((content[img_name[idx] + "_L"], "_L"))
    
    for line, direction in hands:
        joints_x = [float(i[0]) for i in line] 
        joints_y = [float(i[1]) for i in line] 
        bbox = record_boundary(idx, direction, FLAGS.padding, height, width)
        cur_img_new = cur_img[bbox[1]:bbox[3],bbox[0]:bbox[2],
          :]
        joints_x = np.array(joints_x) - bbox[0] #[x - bbox[0] for x in joints_x]
        joints_y = np.array(joints_y) - bbox[1] #[x - bbox[1] for x in joints_y]
        
        #output_image = np.ones(shape=(box_size, box_size, 3)) * 128
        output_heatmaps = np.zeros((FLAGS.box_size, FLAGS.box_size, FLAGS.total_joints))
        scale = FLAGS.box_size / (cur_img_new.shape[1] * 1.0)
        joints_x *= scale #list(map(lambda x: x * scale, joints_x))
        joints_y *= scale #list(map(lambda x: x * scale, joints_y))
        
        output_image = cv2.resize(cur_img_new, (0, 0), fx=scale, fy=scale, interpolation=cv2.INTER_LANCZOS4)
        
        for i in range(FLAGS.total_joints):
            output_heatmaps[:, :, i] = dt.make_2d_gaussian_map(FLAGS.box_size, gaussian_radius,
                                                     [joints_x[i], joints_y[i]])
        
        output_coords = np.zeros(FLAGS.total_joints * 2)
        output_coords[::2] = joints_x
        output_coords[1::2] = joints_y
        
        single_sample = tf.train.Example(features=tf.train.Features(feature={
            'image': tf.train.Feature(bytes_list=tf.train.BytesList(value=[output_image.astype(np.uint8).tostring()])),
            'joint': tf.train.Feature(float_list=tf.train.FloatList(value=output_coords.tolist()))
        }))
        writer.write(single_sample.SerializeToString())


        if (cnt + 1) % 10 == 0: #illumination
            #print("illu", cnt)
            if ((cnt + 1) // 10) % 2 == 0: # even
                res = np.uint8(np.clip((0.2 * output_image + 100), 0, 255))
            else: # odd
                res = np.uint8(np.clip((0.25 * output_image + 10), 0, 255))
            single_sample = tf.train.Example(features=tf.train.Features(feature={
                'image': tf.train.Feature(bytes_list=tf.train.BytesList(value=[res.astype(np.uint8).tostring()])),
                'joint': tf.train.Feature(float_list=tf.train.FloatList(value=output_coords.tolist()))
            }))
            writer.write(single_sample.SerializeToString())     
        if (cnt + 2) %  10 == 0: # rotation
            
            #print("rotate", cnt)
            if ((cnt + 2) // 10) % 2 == 0: # clockwise
                res = np.rot90(output_image, 3)
                output_coords[::2] = FLAGS.box_size - joints_y
                output_coords[1::2] = joints_x
            else: # counter-clockwise
                res = np.rot90(output_image)
                output_coords[::2] = joints_y
                output_coords[1::2] = FLAGS.box_size - joints_x      
            single_sample = tf.train.Example(features=tf.train.Features(feature={
                'image': tf.train.Feature(bytes_list=tf.train.BytesList(value=[res.astype(np.uint8).tostring()])),
                'joint': tf.train.Feature(float_list=tf.train.FloatList(value=output_coords.tolist()))
            }))
            writer.write(single_sample.SerializeToString())     
        if (cnt + 3) % 20 == 0: # mirro
            res = cv2.flip(output_image, 1)
            output_coords[::2] = FLAGS.box_size - joints_x
            output_coords[1::2] = joints_y
            single_sample = tf.train.Example(features=tf.train.Features(feature={
                'image': tf.train.Feature(bytes_list=tf.train.BytesList(value=[res.astype(np.uint8).tostring()])),
                'joint': tf.train.Feature(float_list=tf.train.FloatList(value=output_coords.tolist()))
            }))
            writer.write(single_sample.SerializeToString())     
            
print("Test records have been generated", time.time() - t0)
writer.close()

All finished 1.246161937713623


In [11]:
#training tfrecord generator
tfr_file = 'train.tfrecords'
writer = tf.python_io.TFRecordWriter(tfr_file)
t1 = time.time()
t0 = time.time()

cnt = 0
for idx in test_index:
    img = img_list[idx]
    if (cnt + 1) % 500 == 0:
        print(cnt + 1, " finished", time.time() - t1)
        t1 = time.time()
    cnt += 1
    cur_img = cv2.imread(os.path.join(image_dir, img))
    height, width, _ = cur_img .shape
    hands = []
    if img_name[idx] + "_R" in content:
        hands.append((content[img_name[idx] + "_R"], "_R"))
    if img_name[idx] + "_L" in content:
        hands.append((content[img_name[idx] + "_L"], "_L"))
    
    for line, direction in hands:
        joints_x = [float(i[0]) for i in line] 
        joints_y = [float(i[1]) for i in line] 
        bbox = record_boundary(idx, direction, FLAGS.padding, height, width)
        cur_img_new = cur_img[bbox[1]:bbox[3],bbox[0]:bbox[2],
          :]
        joints_x = np.array(joints_x) - bbox[0] #[x - bbox[0] for x in joints_x]
        joints_y = np.array(joints_y) - bbox[1] #[x - bbox[1] for x in joints_y]
        
        #output_image = np.ones(shape=(box_size, box_size, 3)) * 128
        output_heatmaps = np.zeros((FLAGS.box_size, FLAGS.box_size, FLAGS.total_joints))
        scale = FLAGS.box_size / (cur_img_new.shape[1] * 1.0)
        joints_x *= scale #list(map(lambda x: x * scale, joints_x))
        joints_y *= scale #list(map(lambda x: x * scale, joints_y))
        
        output_image = cv2.resize(cur_img_new, (0, 0), fx=scale, fy=scale, interpolation=cv2.INTER_LANCZOS4)
        
        for i in range(FLAGS.total_joints):
            output_heatmaps[:, :, i] = dt.make_2d_gaussian_map(FLAGS.box_size, gaussian_radius,
                                                     [joints_x[i], joints_y[i]])
        
        output_coords = np.zeros(FLAGS.total_joints * 2)
        output_coords[::2] = joints_x
        output_coords[1::2] = joints_y
        #output_coords = output_coords.tolist()
        
        #output_background_map = np.ones((box_size, box_size)) - np.amax(output_heatmaps, axis=2)
        #output_heatmaps_with_bg = np.concatenate((output_heatmaps, output_background_map.reshape((box_size, box_size, 1))), axis=2)
        #output_image_raw = output_image.astype(np.uint8).tostring()
        #print(output_image.shape)
        
        single_sample = tf.train.Example(features=tf.train.Features(feature={
            'image': tf.train.Feature(bytes_list=tf.train.BytesList(value=[output_image.astype(np.uint8).tostring()])),
            'joint': tf.train.Feature(float_list=tf.train.FloatList(value=output_coords.tolist()))
        }))
        writer.write(single_sample.SerializeToString())


        if (cnt + 1) % 10 == 0: #illumination
            #print("illu", cnt)
            if ((cnt + 1) // 10) % 2 == 0: # even
                res = np.uint8(np.clip((0.2 * output_image + 100), 0, 255))
            else: # odd
                res = np.uint8(np.clip((0.25 * output_image + 10), 0, 255))
            single_sample = tf.train.Example(features=tf.train.Features(feature={
                'image': tf.train.Feature(bytes_list=tf.train.BytesList(value=[res.astype(np.uint8).tostring()])),
                'joint': tf.train.Feature(float_list=tf.train.FloatList(value=output_coords.tolist()))
            }))
            writer.write(single_sample.SerializeToString())     
        if (cnt + 2) %  10 == 0: # rotation
            
            #print("rotate", cnt)
            if ((cnt + 2) // 10) % 2 == 0: # clockwise
                res = np.rot90(output_image, 3)
                output_coords[::2] = FLAGS.box_size - joints_y
                output_coords[1::2] = joints_x
            else: # counter-clockwise
                res = np.rot90(output_image)
                output_coords[::2] = joints_y
                output_coords[1::2] = FLAGS.box_size - joints_x      
            single_sample = tf.train.Example(features=tf.train.Features(feature={
                'image': tf.train.Feature(bytes_list=tf.train.BytesList(value=[res.astype(np.uint8).tostring()])),
                'joint': tf.train.Feature(float_list=tf.train.FloatList(value=output_coords.tolist()))
            }))
            writer.write(single_sample.SerializeToString())     
        if (cnt + 3) % 20 == 0: # mirro
            res = cv2.flip(output_image, 1)
            output_coords[::2] = FLAGS.box_size - joints_x
            output_coords[1::2] = joints_y
            single_sample = tf.train.Example(features=tf.train.Features(feature={
                'image': tf.train.Feature(bytes_list=tf.train.BytesList(value=[res.astype(np.uint8).tostring()])),
                'joint': tf.train.Feature(float_list=tf.train.FloatList(value=output_coords.tolist()))
            }))
            writer.write(single_sample.SerializeToString())
            
print("Training records have been generated", time.time() - t0)
writer.close()