### TODO
real-time augmentation: current solution is to simple write all resized images into disk, and load all augmentation into host RAM. It is feasible for small scale datasets like MSRCv2 or Corel5k. However it will not be the case for larger datasets.

In [34]:
import os, glob
import skimage.io as sio
import skimage.transform
import numpy as np

In [2]:
data_path = '../datasets/msrcv2/MSRC_ObjCategImageDatabase_v2/'
resized_path = '../datasets/msrcv2/resized/'

In [3]:
def resize_img(im, min_length=256):
    '''
    Resie image `im` to which's shorter edge is 256px
    
    Parameters
    ----------
    im: 3D-numpy-array
        Assumed [h x w x c]
    '''
    h,w,_ = im.shape
    if h < w:
        im = skimage.transform.resize(im, (min_length, w*min_length/h), preserve_range=True)
    else:
        im = skimage.transform.resize(im, (h*min_length/w, min_length), preserve_range=True)
        
    return im.astype("uint8")


In [31]:
from os import walk

# Batch resize images
for (dirpath, dirnames, filenames) in walk(os.path.join(data_path,"Images")):
    #print 1,dirpath
    #print 2,dirnames
    #print filenames
    
    for filename in filenames:
        try:
            if filename.endswith(".bmp"):
                im = sio.imread(os.path.join(dirpath,filename))
                im = resize_img(im)

                sio.imsave(os.path.join(resized_path,filename), im)
        except (ValueError, IOError):
            print(filename)
            #pass
    
    


In [92]:
for (dirpath, dirnames, filenames) in walk(resized_path):
    im_names = filenames
    
for (dirpath, dirnames, filenames) in walk(os.path.join(data_path,"GroundTruth")):
    GT_names=[]
    for filename in filenames:
        if filename.endswith('bmp'):
            GT_names.append(filename)
    

In [102]:
# Extract labels i.e. unique colors from GT images
for (dirpath, dirnames, filenames) in walk(os.path.join(data_path,"GroundTruth")):
    #print filenames
    im_labels = dict()
    for filename in filenames:
        try:
            im = sio.imread(os.path.join(dirpath, filename))
            h,w,c = im.shape
            im = im.reshape((h*w,c))
            labels = {tuple(row) for row in im}
            
            im_labels[filename[:-7]]=labels        
        except IOError:
            print 'Error',filename
            
im_labels

Error Thumbs.db


{'10_10_s': {(0, 0, 0), (64, 128, 128)},
 '10_11_s': {(0, 0, 0), (64, 128, 128)},
 '10_12_s': {(0, 0, 0), (64, 128, 128)},
 '10_13_s': {(0, 0, 0), (64, 128, 128)},
 '10_14_s': {(0, 0, 0), (64, 128, 128)},
 '10_15_s': {(0, 0, 0), (64, 128, 128)},
 '10_16_s': {(0, 0, 0), (64, 128, 128)},
 '10_17_s': {(0, 0, 0), (64, 128, 128)},
 '10_18_s': {(0, 0, 0), (64, 128, 128)},
 '10_19_s': {(0, 0, 0), (64, 128, 128)},
 '10_1_s': {(0, 0, 0), (64, 128, 128)},
 '10_20_s': {(0, 0, 0), (64, 128, 128)},
 '10_21_s': {(0, 0, 0), (64, 128, 128)},
 '10_22_s': {(0, 0, 0), (64, 128, 128)},
 '10_23_s': {(0, 0, 0), (64, 128, 128)},
 '10_24_s': {(0, 0, 0), (64, 128, 128)},
 '10_25_s': {(0, 0, 0), (64, 128, 128)},
 '10_26_s': {(0, 0, 0), (0, 128, 0), (64, 128, 128)},
 '10_27_s': {(0, 0, 0), (64, 128, 128)},
 '10_28_s': {(0, 0, 0), (64, 128, 128)},
 '10_29_s': {(0, 0, 0), (64, 128, 128)},
 '10_2_s': {(0, 0, 0), (64, 128, 128)},
 '10_30_s': {(0, 0, 0), (64, 128, 128)},
 '10_31_s': {(0, 0, 0), (64, 128, 128)},
 '10_

In [104]:
import cPickle as pickle
f = open('../datasets/msrcv2/im_labels.pkl','w')
pickle.dump(im_labels, f)
f.close()

In [106]:
len(im_labels)

591

In [120]:
label_color={} # store label indicies and color codes

label_color['void'] = (0, 0, 0)
label_color['building'] = (128, 0, 0)
label_color['grass'] = (0, 128, 0)
label_color['tree'] = (128, 128, 0)
label_color['cow'] = (0, 0, 128)
label_color['horse'] = (128, 0, 128)
label_color['sheep'] = (0, 128, 128)
label_color['sky'] = (128, 128, 128)
label_color['mountain'] = (64, 0, 0)
label_color['aeroplane'] = (192, 0, 0)
label_color['water'] = (64, 128, 0)
label_color['face'] = (192, 128, 0)
label_color['car'] = (64, 0, 128)
label_color['bicycle'] = (192, 0, 128)
label_color['flower'] = (64, 128, 128)
label_color['sign'] = (192, 128, 128)
label_color['bird'] = (0, 64, 0)
label_color['book'] = (128, 64, 0)
label_color['chair'] = (0, 192, 0)
label_color['road'] = (128, 64, 128)
label_color['cat'] = (0, 192, 128)
label_color['dog'] = (128, 192, 128)
label_color['body'] = (64, 64, 0)
label_color['boat'] = (192, 64, 0)

from natsort import natsorted
i = 1
for k,v in natsorted(label_color.iteritems()):
    label_color[k] = [i, v]
    i+=1
label_color

{'aeroplane': [1, (192, 0, 0)],
 'bicycle': [2, (192, 0, 128)],
 'bird': [3, (0, 64, 0)],
 'boat': [4, (192, 64, 0)],
 'body': [5, (64, 64, 0)],
 'book': [6, (128, 64, 0)],
 'building': [7, (128, 0, 0)],
 'car': [8, (64, 0, 128)],
 'cat': [9, (0, 192, 128)],
 'chair': [10, (0, 192, 0)],
 'cow': [11, (0, 0, 128)],
 'dog': [12, (128, 192, 128)],
 'face': [13, (192, 128, 0)],
 'flower': [14, (64, 128, 128)],
 'grass': [15, (0, 128, 0)],
 'horse': [16, (128, 0, 128)],
 'mountain': [17, (64, 0, 0)],
 'road': [18, (128, 64, 128)],
 'sheep': [19, (0, 128, 128)],
 'sign': [20, (192, 128, 128)],
 'sky': [21, (128, 128, 128)],
 'tree': [22, (128, 128, 0)],
 'void': [23, (0, 0, 0)],
 'water': [24, (64, 128, 0)]}

In [121]:
f = open('../datasets/msrcv2/label_color.pkl','w')
pickle.dump(label_color, f)
f.close()

In [122]:
f = open('../datasets/msrcv2/label_color.pkl','r')
test = pickle.load(f)
f.close()

test

{'aeroplane': [1, (192, 0, 0)],
 'bicycle': [2, (192, 0, 128)],
 'bird': [3, (0, 64, 0)],
 'boat': [4, (192, 64, 0)],
 'body': [5, (64, 64, 0)],
 'book': [6, (128, 64, 0)],
 'building': [7, (128, 0, 0)],
 'car': [8, (64, 0, 128)],
 'cat': [9, (0, 192, 128)],
 'chair': [10, (0, 192, 0)],
 'cow': [11, (0, 0, 128)],
 'dog': [12, (128, 192, 128)],
 'face': [13, (192, 128, 0)],
 'flower': [14, (64, 128, 128)],
 'grass': [15, (0, 128, 0)],
 'horse': [16, (128, 0, 128)],
 'mountain': [17, (64, 0, 0)],
 'road': [18, (128, 64, 128)],
 'sheep': [19, (0, 128, 128)],
 'sign': [20, (192, 128, 128)],
 'sky': [21, (128, 128, 128)],
 'tree': [22, (128, 128, 0)],
 'void': [23, (0, 0, 0)],
 'water': [24, (64, 128, 0)]}