In [38]:
import numpy as np
import tensorflow as tf
from PIL import Image
import sys
import os
import glob
from scipy.misc import imresize
import multiprocessing

train_root = '/Users/WY/Downloads/DAVIS17_train_val'
test_root = '/Users/WY/Downloads/DAVIS17_test_dev'
NUM_PROCESSES = 3

def get_file_list(train_root, test_root):
    'Get all RGB images and corresponding labels'
    search_train_img = os.path.join( train_root , "JPEGImages" , "480p" , "*" , "*.jpg")
    search_train_gt = os.path.join( train_root , "Annotations" , "480p" , "*" , "*.png")
    search_test_img = os.path.join( test_root , "JPEGImages" , "480p" , "*" , "*.jpg")
    
    files_train_img = glob.glob(search_train_img)
    files_train_gt = glob.glob(search_train_gt)
    files_test_img = glob.glob(search_test_img)
    
    files_train_img.sort()
    files_train_gt.sort()
    files_test_img.sort()
    
    train_img_len = len(files_train_img)
    train_img_gt = len(files_train_gt)
    test_img_len = len(files_test_img)
    
    if (train_img_len != train_img_gt):
        sys.exit('Length of train/val files do not match!')
    else:
        print('Got {0} train/gt files. {1} test files'.format(train_img_len, test_img_len))
        
    # Group train/gt pairs
    train_file_list = []
    for i in range(train_img_len):
        train_file_list.append([files_train_img[i], files_train_gt[i]])
    
    return train_file_list, files_test_img

def load_img(file_pair, mode):
    ''' Input:  a list of length 2: [img_name, gt_name]
        Return: two arrays: [img_arr, gt_arr]
            img_arr: [480,910,3]
            gt_arr: [480,910]
    '''
    if mode == 'train':
        img = Image.open(file_pair[0])
        image = np.array(img, dtype=np.uint8)
    
        gt = Image.open(file_pair[1])
        gt_label = np.array(gt, dtype=np.uint8)
        gt_label_bool = np.greater(gt_label,0)
        gt_label_bin = gt_label_bool.astype(np.uint8)
    elif mode == 'test':
        img = Image.open(file_pair)
        image = np.array(img, dtype=np.uint8)
        gt_label_bin = None
    else:
        image = None
        gt_label_bin = None
    
    return image, gt_label_bin

def resize_pair(img, gt, scale):
    ''' Input: img/gt in full size [480,910]
        Return: resize img/gt [480*scale, 910*scale]
            scale: 0.5, 0.8, 1.0
    '''
    img_sc = imresize(img, scale)
    gt_sc = imresize(gt,scale)
    new_img = np.array(img_sc, dtype=np.uint8)
    new_gt = np.array(gt_sc, dtype=np.uint8)
    
    return new_img, new_gt

def _int64_feature(value):
    return tf.train.Feature(int64_list=tf.train.Int64List(value=value))

def wrap_a_data_dict(file_pair):
    ''' Input: [img_name, gt_name]
        Return: Dict
    '''
    data_dict = {}
    data_dict['img'], data_dict['gt'] = load_img(file_pair, 'train')
    
    return data_dict

def write_single_record(record_writer, data_dict):
    ''' Input: record_writer, single data_dict
        Return: No return.
    '''
    example = tf.train.Example(features=tf.train.Features(feature={
                                'img': _int64_feature(data_dict['img'].flatten()),
                                'gt': _int64_feature(data_dict['gt'].flatten()),}))
    record_writer.write(example.SerializeToString())
    
def generate_tfrecords(files_list, record_writer):
    
    for example_file_list in files_list:
        # Get all necessary data
        data_dict = wrap_a_data_dict(example_file_list)
        # Write a single record/exampel to .tfrecord
        write_single_record(record_writer, data_dict)
        example_name = example_file_list[0].replace('.jpg', '')
        print('Write example: {}'.format(example_name))
    record_writer.flush()
    record_writer.close()
    

In [36]:
# Compute RGB mean and variance of dataset(DAVIS17_train_val, DAVIS17_test_dev)
train_pair_list, test_list = get_file_list(train_root, test_root)

len_train = len(train_pair_list)
len_test = len(test_list)

mean_r = 0.0
mean_g = 0.0
mean_b = 0.0
std = 1.0

mean_r_all = []
mean_g_all = []
mean_b_all = []

for i in range(len_train):
    image, gt = load_img(train_pair_list[i], 'train')
    mean_r_all.append(np.mean(image[:,:,0]))
    mean_g_all.append(np.mean(image[:,:,1]))
    mean_b_all.append(np.mean(image[:,:,2]))
for j in range(len_test):
    image, gt = load_img(test_list[j], 'test')
    mean_r_all.append(np.mean(image[:,:,0]))
    mean_g_all.append(np.mean(image[:,:,1]))
    mean_b_all.append(np.mean(image[:,:,2]))

mean_r = np.mean(np.array(mean_r_all))
mean_g = np.mean(np.array(mean_g_all))
mean_b = np.mean(np.array(mean_b_all))

print('R:{0}, G:{1}, B:{2}'.format(mean_r, mean_g, mean_b))

Got 6208 train/gt files. 2086 test files
R:115.412473189, G:115.199299201, B:108.512946611


In [37]:
# Compute RGB variance of dataset(DAVIS17_train_val, DAVIS17_test_dev)
train_pair_list, test_list = get_file_list(train_root, test_root)
len_train = len(train_pair_list)
len_test = len(test_list)

std_r = 0.0
std_g = 0.0
std_b = 0.0

std_r_all = []
std_g_all = []
std_b_all = []

for i in range(len_train):
    image, gt = load_img(train_pair_list[i], 'train')
    std_r_all.append(np.mean(np.square(image[:,:,0] - 115.412473189)))
    std_g_all.append(np.mean(np.square(image[:,:,1] - 115.199299201)))
    std_b_all.append(np.mean(np.square(image[:,:,2] - 108.512946611)))
for j in range(len_test):
    image, gt = load_img(test_list[j], 'test')
    std_r_all.append(np.mean(np.square(image[:,:,0] - 115.412473189)))
    std_g_all.append(np.mean(np.square(image[:,:,1] - 115.199299201)))
    std_b_all.append(np.mean(np.square(image[:,:,2] - 108.512946611)))
    
std_r = np.sqrt(np.mean(np.array(std_r_all)))
std_g = np.sqrt(np.mean(np.array(std_g_all)))
std_b = np.sqrt(np.mean(np.array(std_b_all)))

print('R:{0}, G:{1}, B:{2}'.format(std_r, std_g, std_b))

Got 6208 train/gt files. 2086 test files
R:64.3412719996, G:62.1317242839, B:66.9034415066
