In [None]:
import tables
# Resources for PyTables:
# https://kastnerkyle.github.io/posts/using-pytables-for-larger-than-ram-data-processing/
# http://www.pytables.org/usersguide/libref/homogenous_storage.html#the-earray-class
trainingDataPath = "trainingData.hdf5"
file = tables.open_file(trainingDataPath, mode='w')
filters = tables.Filters(complevel=5, complib='blosc')
images = file.create_earray(file.root, 'images',
                                      tables.Atom.from_dtype(np.dtype(np.float32)),
                                      shape=(0, 3, 224, 224),# 0 is the extendable dimension
                                      filters=filters,
                                      expectedrows= 7020)
labels = file.create_earray(file.root, 'labels',
                                          tables.Atom.from_dtype(np.dtype(np.int32)),
                                          shape=(0,),
                                          filters=filters,
                                          expectedrows= 7020)

In [None]:
import cPickle
d = cPickle.load(open('vgg16_2.pkl'))

In [None]:
import os
import numpy as np
import cv2
IMAGE_MEAN = d['mean value'][:, np.newaxis, np.newaxis]

# this will be the function used to resize and grayscale the the raw input image
def resize(img):
    img = cv2.resize(img, (224, 224), interpolation = cv2.INTER_AREA)
    return img

def perCentreMean(img):
    img = np.swapaxes(np.swapaxes(img, 1, 2), 0, 1)
    img = img - IMAGE_MEAN
    return img

# for efficient memory storage :
# since the numbers are between 0 and 255, numpy's int16 datatype can be used
# secondly, we need to flatten the image array to store it efficiently ad row-major
# so for that, we will numpy's use numpy's ravel function with 'copy' flag = False
def generateDataForImages():
    
    noOfRows = 224
    noOfCols = 224
    
    # first all real currency notes' images and then all the fake ones'
    # i am skeptical of the consequences :-/
    label = 1
    inputArray = False
    
    for directory in ['500 Testing/New Real 4/', '500 Testing/New Fake 4/']:
        for filename in os.listdir(directory):
            print filename
            img = cv2.imread(directory + filename)
            img = resize(img)
            
            # TRANSLATIONS
            # will produce (6 + 6 + 1) x (6 + 6 + 1) = 169 images
            # stride of 5 pixels along both axis along all 4 directions
            for x in range(30, -35, -5):
                for y in range(30, -35, -5):
                    translationMatrix = np.float32([ [1,0,x], [0,1,y] ])
                    imgTrns = cv2.warpAffine(img, translationMatrix, (noOfCols, noOfRows))
                    imgTrns = perCentreMean(imgTrns)
                    images.append(floatX(imgTrns[np.newaxis]))
                    labels.append(np.int32([label]))

            # ROTATIONS
            # we produce 41 different angles in the range of -10 to 10
            # with the step being equal to 0.5
            for angle in range(20, -21, -1):
                rotationMatrix = cv2.getRotationMatrix2D((noOfCols/2, noOfRows/2), float(angle)/2, 1)
                imgRotated = cv2.warpAffine(img, rotationMatrix, (noOfCols, noOfRows))
                imgRotated = perCentreMean(imgRotated)
                images.append(floatX(imgRotated[np.newaxis]))
                labels.append(np.int32([label]))

            # PROJECTIVE TRANSFORMATIONS for ZOOMING IN AND ZOOMING OUT
            # will produce (30 + 30) images for the dataset
            # 1ST ZOOMING IN ...
            for step in np.arange(0.001, 0.031, 0.001):
                srcPoints = np.float32([[int(step*(noOfCols-1)),int(step*(noOfRows-1))], [int((1-step)*(noOfCols-1)),int(step*(noOfRows-1))], [int(step*(noOfCols-1)),int((1-step)*(noOfRows-1))], [int((1-step)*(noOfCols-1)), int((1-step)*(noOfRows-1))]])
                dstPoints = np.float32([[0,0], [noOfCols-1,0], [0,noOfRows-1], [noOfCols-1,noOfRows-1]]) 
                projective_matrix = cv2.getPerspectiveTransform(srcPoints, dstPoints)
                imgZoomed = cv2.warpPerspective(img, projective_matrix, (noOfCols,noOfRows))
                imgZoomed = perCentreMean(imgZoomed)
                images.append(floatX(imgZoomed[np.newaxis]))
                labels.append(np.int32([label]))
            # 2ND ZOOMING OUT ...
            for step in np.arange(0.001, 0.031, 0.001):
                srcPoints = np.float32(np.float32([[0,0], [noOfCols-1,0], [0,noOfRows-1], [noOfCols-1,noOfRows-1]]))
                dstPoints = np.float32([[int(step*(noOfCols-1)),int(step*(noOfRows-1))], [int((1-step)*(noOfCols-1)),int(step*(noOfRows-1))], [int(step*(noOfCols-1)),int((1-step)*(noOfRows-1))], [int((1-step)*(noOfCols-1)), int((1-step)*(noOfRows-1))]]) 
                projective_matrix = cv2.getPerspectiveTransform(srcPoints, dstPoints)
                imgZoomed = cv2.warpPerspective(img, projective_matrix, (noOfCols,noOfRows))
                imgZoomed = perCentreMean(imgZoomed)
                images.append(floatX(imgZoomed[np.newaxis]))
                labels.append(np.int32([label]))

        # set label for fake images to come
        label = 0

In [None]:
generateDataForImages()

In [None]:
file.close()