In [2]:
import keras
from keras.preprocessing.image import ImageDataGenerator
from keras.layers import Conv2D, MaxPooling2D, Dense, BatchNormalization, Dropout, Flatten, Activation, Lambda, Input
from keras.layers.convolutional import ZeroPadding2D
from keras.models import Sequential
from keras.optimizers import Adam
from keras.regularizers import l2
from keras.layers.core import Lambda
from keras import initializers
from keras.utils import get_file
import tensorflow as tf
import numpy as np
import os

Using TensorFlow backend.


In [1]:
import math
import os
import numpy as np
from keras.preprocessing.image import Iterator
from keras.preprocessing.image import img_to_array, load_img
from keras.preprocessing.image import apply_transform, transform_matrix_offset_center
import keras.backend as K
from keras.preprocessing.image import random_rotation, random_shear, random_shift, random_zoom
from skimage import exposure
data_path = '/work/04381/ymarathe/maverick/yearbook/'

class ClassDataGen:
    def __init__(self, directory, map_file, 
                 target_size = (171, 186, 3), 
                 class_weights_train = None, 
                 multi_output=False, 
                 do_augmentation=True, 
                 samplewise_center = True,
                 samplewise_std_deviation = True,
                 year2idx = None,
                 data_aug_factor = 1,
                 trainValidSplit = None,
                 isTraining = False,
                 batch_size = 32,
                 shuffle = True,
                 seed = 42,
                ):
        self.directory = directory
        self.map_file = map_file
        self.filenames = []
        self.map = {}
        self.fnameToGender = {}
        self.target_size = target_size
        self.populate_filenames()
        self.populate_mapping()
        self.regressIter = None
        self.steps = 0
        self.samplewise_center = samplewise_center
        self.samplewise_std_deviation = samplewise_std_deviation
        self.height_shift_range = 0.2
        self.width_shift_range = 0.2
        self.max_rotation = 45
        self.shear = 0.785398
        self.zoom_range = (0.5, 0.5)
        self.do_augmentation = do_augmentation
        self.class_weights_train = class_weights_train
        self.equalizehist = False
        self.multi_output = multi_output
        self.lastN = []
        self.year2idx = year2idx;
        self.batch_size = batch_size;
        self.data_aug_factor = data_aug_factor
        self.regressIter = Iterator(len(self.filenames), batch_size = self.batch_size, shuffle = shuffle, seed = seed)
        self.trainValidSplit = trainValidSplit
        self.isTraining = isTraining
        self.numTrainingIdx = int(trainValidSplit * len(self.filenames))
        self.numValidIdx = len(self.filenames) - self.numTrainingIdx
        self.train_test_split()
        if self.isTraining:
            self.steps = math.ceil(self.numTrainingIdx/batch_size) * self.data_aug_factor
        else:
            self.steps = math.ceil(self.numValidIdx/batch_size)
        
    def train_test_split(self):
        if self.isTraining:
            self.validIdx1 = np.random.randint(0, len(self.filenames))
            self.validIdx2 = (self.validIdx1 + self.numValidIdx) % len(self.filenames)
        
    def getBounds(self):
        return (self.validIdx1, self.validIdx2)
    
    def setBounds(self, validIdx1, validIdx2):
        if not self.isTraining:
            self.validIdx1 = validIdx1
            self.validIdx2 = validIdx2
    
    def _recursive_list(self, subpath):
        return sorted(
            os.walk(subpath, followlinks=False), key=lambda tpl: tpl[0])
    
    def populate_mapping(self):
        f = open(self.map_file, 'r')

        for line in f:
            line = line.rstrip()
            image, year = line.split("\t")
            gender, imfilename = image.split("/")
            if gender is 'M':
                encodeGender = 1
            elif gender is 'F':
                encodeGender = 0
            self.fnameToGender[image] = encodeGender
            self.map[image] = year
            
    def populate_filenames(self):
        base_dir = self.directory
        for root, _, files in self._recursive_list(base_dir):
            for fname in files:
                if fname.lower().endswith('.' + 'png'):
                    self.filenames.append(os.path.relpath(os.path.join(root, fname), base_dir))
                    
    def preprocess(self, x):
        if self.equalizehist:
            x = exposure.equalize_hist(x)
            
        return x
            
    def augment_data(self, x):
        
        x = random_shift(x, self.width_shift_range, self.height_shift_range, 
                         row_axis=0, col_axis = 1, channel_axis = 2)
        x = random_rotation(x, self.max_rotation, 
                            row_axis = 0, col_axis = 1, channel_axis = 2)
        x = random_shear(x, self.shear, row_axis = 0, col_axis = 1, channel_axis = 2)
        x = random_zoom(x, self.zoom_range, row_axis = 0, col_axis = 1, channel_axis = 2)
        
        return x
    
    def oneHotEncodeYear(self, year):
        integerEncoded = self.year2idx[year]
        oneHotVec = [0 for _ in range(len(self.year2idx))]
        oneHotVec[integerEncoded] = 1
        return oneHotVec
    
    def oneHotEncodeGender(self, gender):
        oneHotVec = [0, 0]
        oneHotVec[gender] = 1;
        return oneHotVec
    
    def modifyIdxArray(self, idx_array):
        if self.isTraining:
            for i, elem in enumerate(idx_array):
                if self.validIdx1 < self.validIdx2:
                    if elem >= self.validIdx1 and elem <= self.validIdx2:
                        idx_array[i] = np.random.choice([np.random.randint(0, self.validIdx1), 
                                                 np.random.randint(self.validIdx2, len(self.filenames))])
                else:
                    if (elem >= self.validIdx1 and elem < len(self.filenames)) or (elem <= self.validIdx2 and elem >= 0):
                        idx_array[i] = np.random.choice([np.random.randint(0, self.validIdx2), 
                                                 np.random.randint(self.validIdx1, len(self.filenames))])
                        
        else:
            for i, elem in enumerate(idx_array):
                if elem < self.validIdx1 or elem > self.validIdx2:
                    if self.validIdx1 < self.validIdx2:
                        idx_array[i] = np.random.randint(self.validIdx1, self.validIdx2)
                    else:
                        idx_array[i] = np.random.randint(self.validIdx2, self.validIdx1)
            
        return idx_array
                
    def next(self, *args, **kwargs):
           
        self.lastN = []
        
        idx_array, cur_idx, bs = next(self.regressIter.index_generator)
        
        if self.trainValidSplit is not None:
            self.modifyIdxArray(idx_array)
        
        batch_x = np.zeros(tuple([len(idx_array)] + list(self.target_size)), dtype=K.floatx())
        
        batch_y = np.zeros(tuple([len(idx_array)] + list([len(self.year2idx)])), dtype=K.floatx())
        
        if self.multi_output:
            batch_y_gender = np.zeros(tuple([len(idx_array)] + list([2])), dtype=K.floatx())

        if self.class_weights_train is not None:
            sample_weights = np.ones(tuple([len(idx_array)]), dtype=K.floatx())
        
        for i, j in enumerate(idx_array):
            fname = self.filenames[j]
            self.lastN.append(fname)
            img = load_img(
                  os.path.join(self.directory, fname),
                  grayscale = True,
                  target_size= self.target_size)
            x = np.array(img_to_array(img, data_format='channels_last'))
            x = self.preprocess(x)
            batch_x[i] = x
            batch_y[i, :] = self.oneHotEncodeYear(self.map[fname])
            
            if self.multi_output:
                batch_y_gender[i, :] = self.oneHotEncodeGender(self.fnameToGender[fname])
            
            if self.class_weights_train is not None:
                if self.multi_output:
                    sample_weights[i] = self.class_weights_train[self.map[fname]]
                else:
                    sample_weights[i] = self.class_weights_train[self.map[fname]]
        
        if self.samplewise_center:
            for x in batch_x:
                x -= np.mean(x)
        
        if self.samplewise_std_deviation:
            for x in batch_x:
                x /= np.std(x)
        
        if self.do_augmentation:
            for x in batch_x:
                x = self.augment_data(x)
        
        if self.multi_output:
            if self.class_weights_train is not None:
                return batch_x, {'out_year' : batch_y, 'out_gender': batch_y_gender}, {'out_year' : sample_weights, 'out_gender' : sample_weights} 
            else:
                return batch_x, {'out_year' : batch_y, 'out_gender': batch_y_gender}
        else:    
            if self.class_weights_train is not None:
                return (batch_x, batch_y, sample_weights)
            else:
                return (batch_x, batch_y)

Using TensorFlow backend.
