In [1]:
import sys, os, time
import numpy as np
import cv2
import scipy.io

## Preprocess Images

In [2]:
path = 'ILSVRC2012'
fns = os.listdir(path+'/ILSVRC2012_img_val')
fns.sort()
fns = [path + '/ILSVRC2012_img_val/' + fn for fn in fns]

In [3]:
%%time
x_val = np.zeros((len(fns)//5, 227, 227, 3), dtype=np.float32)
for i in range(len(fns)):
    
    # Load (as BGR)
    img = cv2.imread(fns[i])
    
    # Resize
    height, width, _ = img.shape
    new_height = height * 256 // min(img.shape[:2])
    new_width = width * 256 // min(img.shape[:2])
    img = cv2.resize(img, (new_width, new_height), interpolation=cv2.INTER_CUBIC)
    
    # Crop
    height, width, _ = img.shape
    startx = width//2 - (227//2)
    starty = height//2 - (227//2)
    img = img[starty:starty+227,startx:startx+227]

    x_val[i-i//10000*10000,:,:,:] = img[:,:,::-1]
    
    if (i+1) %10000 == 0:
        np.save(path+'/'+'val_'+str(i+1)+'.npy', x_val)
        x_val = np.zeros((len(fns)//5, 227, 227, 3), dtype=np.float32)
        print("%d/%d" % (i+1, len(fns)))

10000/50000
20000/50000
30000/50000
40000/50000
50000/50000
CPU times: user 13min 53s, sys: 1min 59s, total: 15min 52s
Wall time: 41min 56s


## Mapping labels

In [4]:
meta = scipy.io.loadmat(path+'/data/meta.mat')

In [5]:
original_idx_to_synset = {}
synset_to_name = {}

for i in range(1000):
    ilsvrc2012_id = int(meta["synsets"][i,0][0][0][0])
    synset = meta["synsets"][i,0][1][0]
    name = meta["synsets"][i,0][2][0]
    original_idx_to_synset[ilsvrc2012_id] = synset
    synset_to_name[synset] = name

synset_to_keras_idx = {}
keras_idx_to_name = {}
f = open(path+"/data/synset_words.txt","r")
idx = 0
for line in f:
    parts = line.split(" ")
    synset_to_keras_idx[parts[0]] = idx
    keras_idx_to_name[idx] = " ".join(parts[1:])
    idx += 1
f.close()

def convert_original_idx_to_keras_idx(idx):
    return synset_to_keras_idx[original_idx_to_synset[idx]]

In [6]:
f = open(path+"/data/ILSVRC2012_validation_ground_truth.txt","r")
y_val = f.read().strip().split("\n")
y_val = list(map(int, y_val))
y_val = np.array([convert_original_idx_to_keras_idx(idx) for idx in y_val])
f.close()

In [7]:
np.save(path+"/y_val.npy", y_val)