In [1]:
from __future__ import print_function
import matplotlib.pyplot as plt
import numpy as np
import os
import sys
import tarfile
import tensorflow as tf
from IPython.display import display, Image
from PIL import Image
from scipy import ndimage
from six.moves.urllib.request import urlretrieve
from six.moves import cPickle as pickle
import scipy.ndimage
import scipy.misc
%matplotlib inline

In [2]:
dataset_location = '/home/carnd/Google-Street-View-House-Numbers/'

In [3]:
import h5py

# The DigitStructFile is just a wrapper around the h5py data.  It basically references 
#    inf:              The input h5 matlab file
#    digitStructName   The h5 ref to all the file names
#    digitStructBbox   The h5 ref to all struc data
class DigitStructFile:
    def __init__(self, inf):
        self.inf = h5py.File(inf, 'r')
        self.digitStructName = self.inf['digitStruct']['name']
        self.digitStructBbox = self.inf['digitStruct']['bbox']

# getName returns the 'name' string for for the n(th) digitStruct. 
    def getName(self,n):
        return ''.join([chr(c[0]) for c in self.inf[self.digitStructName[n][0]].value])

# bboxHelper handles the coding difference when there is exactly one bbox or an array of bbox. 
    def bboxHelper(self,attr):
        if (len(attr) > 1):
            attr = [self.inf[attr.value[j].item()].value[0][0] for j in range(len(attr))]
        else:
            attr = [attr.value[0][0]]
        return attr

# getBbox returns a dict of data for the n(th) bbox. 
    def getBbox(self,n):
        bbox = {}
        bb = self.digitStructBbox[n].item()
        bbox['height'] = self.bboxHelper(self.inf[bb]["height"])
        bbox['label'] = self.bboxHelper(self.inf[bb]["label"])
        bbox['left'] = self.bboxHelper(self.inf[bb]["left"])
        bbox['top'] = self.bboxHelper(self.inf[bb]["top"])
        bbox['width'] = self.bboxHelper(self.inf[bb]["width"])
        return bbox

    def getDigitStructure(self,n):
        s = self.getBbox(n)
        s['name']=self.getName(n)
        return s

# getAllDigitStructure returns all the digitStruct from the input file.     
    def getAllDigitStructure(self):
        return [self.getDigitStructure(i) for i in range(len(self.digitStructName))]

# Return a restructured version of the dataset (one structure by boxed digit).
#
#   Return a list of such dicts :
#      'filename' : filename of the samples
#      'boxes' : list of such dicts (one by digit) :
#          'label' : 1 to 9 corresponding digits. 10 for digit '0' in image.
#          'left', 'top' : position of bounding box
#          'width', 'height' : dimension of bounding box
#
# Note: We may turn this to a generator, if memory issues arise.
    def getAllDigitStructure_ByDigit(self):
        pictDat = self.getAllDigitStructure()
        result = []
        structCnt = 1
        for i in range(len(pictDat)):
            item = { 'filename' : pictDat[i]["name"] }
            figures = []
            for j in range(len(pictDat[i]['height'])):
               figure = {}
               figure['height'] = pictDat[i]['height'][j]
               figure['label']  = pictDat[i]['label'][j]
               figure['left']   = pictDat[i]['left'][j]
               figure['top']    = pictDat[i]['top'][j]
               figure['width']  = pictDat[i]['width'][j]
               figures.append(figure)
            structCnt = structCnt + 1
            item['boxes'] = figures
            result.append(item)
        return result

In [4]:
fin = os.path.join(dataset_location + 'test', 'digitStruct.mat')
dsf = DigitStructFile(fin)
test_data = dsf.getAllDigitStructure_ByDigit()

In [5]:
len(test_data)

13068

In [6]:
fin = os.path.join(dataset_location + 'train', 'digitStruct.mat')
dsf = DigitStructFile(fin)
train_data = dsf.getAllDigitStructure_ByDigit()

In [7]:
len(train_data)

33402

In [8]:
fin = os.path.join(dataset_location + 'extra', 'digitStruct.mat')
dsf = DigitStructFile(fin)
extra_data = dsf.getAllDigitStructure_ByDigit()

In [9]:
len(extra_data)

202353

In [10]:
def load_image(file_name, folder_name) :
    image_name = dataset_location + folder_name + '/' + file_name
    img = Image.open(image_name)
    img.load()
    data = np.asarray(img, dtype="int32")
    return data

In [11]:
def plot_img(image):
    plt.imshow(image)
    plt.show()

In [12]:
pickle_file = 'SVHN_new_data_struct.pickle'

try:
  f = open(pickle_file, 'wb')
  save = {
    'train_data': train_data,
    'extra_data': extra_data,
    'test_data': test_data,
    }
  pickle.dump(save, f, pickle.HIGHEST_PROTOCOL)
  f.close()
except Exception as e:
  print('Unable to save data to', pickle_file, ':', e)
  raise
    
statinfo = os.stat(pickle_file)
print('Compressed pickle size:', statinfo.st_size)

Compressed pickle size: 74750515


In [13]:
# test_images = np.empty([len(test_data), 32, 32, 3], dtype=int)
# test_images = np.empty([32, 32, 3], dtype=int)
test_images = np.array([])
test_labels = np.array([])
folder_name = 'test'

l = len(test_data)

for i in range(l) :
    image = test_data[i]
    im = load_image(image['filename'], folder_name)
    im = scipy.misc.imresize(im, (32, 32, 3))
    label = image['boxes'][0]['label']

    if len(test_images) == 0:
        test_images = np.array([im])
    else :
        test_images = np.append(test_images, np.array([im]), axis=0)
    
    
    test_labels = np.append(test_labels, label)
    
    if i % 500 == 0:
        print((i/l)*100)

0.0
3.8261401897765537
7.652280379553107
11.47842056932966
15.304560759106215
19.13070094888277
22.95684113865932
26.782981328435874
30.60912151821243
34.43526170798898
38.26140189776554
42.08754208754209
45.91368227731864
49.739822467095195
53.56596265687175
57.3921028466483
61.21824303642486
65.0443832262014
68.87052341597796
72.69666360575452
76.52280379553108
80.34894398530761
84.17508417508418
88.00122436486073
91.82736455463728
95.65350474441384
99.47964493419039


In [None]:
# test_images = np.empty([len(test_data), 32, 32, 3], dtype=int)
# test_images = np.empty([32, 32, 3], dtype=int)
train_images = np.array([])
train_labels = np.array([])
folder_name = 'train'

l = len(train_data)

for i in range(l) :
    image = train_data[i]
    im = load_image(image['filename'], folder_name)
    im = scipy.misc.imresize(im, (32, 32, 3))
    label = image['boxes'][0]['label']

    if len(train_images) == 0:
        train_images = np.array([im])
    else :
        train_images = np.append(train_images, np.array([im]), axis=0)
    
    
    train_labels = np.append(train_labels, label)
    
    if i % 1000 == 0:
        print((i/l)*100)

0.0
1.4969163523142326
2.993832704628465
4.490749056942698
5.98766540925693
7.484581761571163
8.981498113885396
10.478414466199629
11.97533081851386
13.472247170828094
14.969163523142326
16.46607987545656
17.96299622777079
19.459912580085025
20.956828932399258
22.453745284713488
23.95066163702772
25.44757798934196
26.94449434165619
28.441410693970422
29.938327046284652
31.43524339859889
32.93215975091312
34.42907610322735
35.92599245554158
37.42290880785581
38.91982516017005
40.416741512484286
41.913657864798516
43.410574217112746
44.907490569426976
46.40440692174121
47.90132327405544
49.39823962636968
50.89515597868392
52.39207233099814
53.88898868331238
55.385905035626614
56.882821387940844
58.37973774025508
59.876654092569304
61.37357044488354
62.87048679719778
64.367403149512
65.86431950182624
67.36123585414046
68.8581522064547
70.35506855876893
71.85198491108316
73.34890126339741
74.84581761571162
76.34273396802587
77.8396503203401
79.33656667265433
80.83348302496857
82.3303993772

In [None]:
# test_images = np.empty([len(test_data), 32, 32, 3], dtype=int)
# test_images = np.empty([32, 32, 3], dtype=int)
extra_images = np.array([])
extra_labels = np.array([])
folder_name = 'extra'

l = len(extra_data)

for i in range(l) :
    image = extra_data[i]
    im = load_image(image['filename'], folder_name)
    im = scipy.misc.imresize(im, (32, 32, 3))
    label = image['boxes'][0]['label']

    if len(extra_images) == 0:
        extra_images = np.array([im])
    else :
        extra_images = np.append(extra_images, np.array([im]), axis=0)
    
    
    extra_labels = np.append(extra_labels, label)
    
    if i % 2000 == 0:
        print((i/l)*100)

0.0
0.24709295142646762
0.49418590285293523
0.7412788542794029
0.9883718057058705
1.235464757132338
1.4825577085588058
1.7296506599852732
1.976743611411741
2.2238365628382084
2.470929514264676
2.718022465691144
2.9651154171176115
3.212208368544079
3.4593013199705465
3.706394271397014
3.953487222823482
4.200580174249949
4.447673125676417
4.6947660771028845
4.941859028529352
5.18895197995582
5.436044931382288
5.683137882808755
5.930230834235223
6.177323785661691
6.424416737088158
6.671509688514625
6.918602639941093
7.165695591367561
7.412788542794028
7.659881494220495
7.906974445646964
8.154067397073431
8.401160348499898
8.648253299926365
8.895346251352834
9.1424392027793
9.389532154205769
9.636625105632236
9.883718057058704
10.130811008485171
10.37790395991164
10.624996911338107
10.872089862764575
11.119182814191042
11.36627576561751
11.613368717043977
11.860461668470446
12.107554619896913
12.354647571323381
12.60174052274985
12.848833474176317
13.095926425602784
13.34301937702925
13.59

In [None]:
train_images.shape

In [None]:
train_labels.shape

In [None]:
extra_images.shape

In [None]:
extra_labels.shape

In [None]:
test_images.shape

In [None]:
test_labels.shape

In [None]:
pickle_file = 'SVHN_new_interm.pickle'

try:
  f = open(pickle_file, 'wb')
  save = {
    'train_images': train_images,
    'train_labels': train_labels,
    'extra_images': extra_images,
    'extra_labels': extra_labels,
    'test_images': test_images,
    'test_labels': test_labels,
    }
  pickle.dump(save, f, pickle.HIGHEST_PROTOCOL)
  f.close()
except Exception as e:
  print('Unable to save data to', pickle_file, ':', e)
  raise
    
statinfo = os.stat(pickle_file)
print('Compressed pickle size:', statinfo.st_size)