In [1]:
import HelperFuncs as hfuncs
import numpy as np
from sklearn.model_selection import train_test_split
from keras.utils.data_utils import Sequence

Using TensorFlow backend.


We're going to first try training a CNN on the individual images.
We will be using binary cross entropy across the 17 regions.

In [18]:
BATCH_SIZE = 20
FINAL_WIDTH = 400
FINAL_HEIGHT = 600
CHANNELS = 1
ZONES = 17
BUCKET_NAME = 'miscdatastorage'
DATA_DIR = 'DHSData/'
TEMP_DIR = 'temp'
LABELS_DIR = r'stage1_labels.csv'
EXTENSION = '.a3daps'
np.random.seed(0)

#Define a generator function
class myGenerator:
    #AWS and Directory information 
    bucketName = BUCKET_NAME
    dataDir = DATA_DIR
    temp_dir = TEMP_DIR
    labels_dir = LABELS_DIR
    #Connect to AWS
    key_id, secret_key = hfuncs.GetAWSCredentials()
    client = hfuncs.GetAWSClient(key_id,secret_key)
    bucket = client.Bucket(bucketName)
    extension = EXTENSION
    #labels and keys
    labels_dict = hfuncs.GetLabelsDict(labels_dir)
    key_ary = None
    #Batch information
    n_samples = 0
    batch_size = 0
    #Requester
    batch_requester = None
    #Initialize required parameters
    def __init__(self,keys,n_samples,batch_size=BATCH_SIZE):
        self.key_ary = keys
        self.n_samples = n_samples
        self.batch_size = batch_size
        #Initialize AWS Batch Requester
        self.batchrequester = hfuncs.BatchRequester(self.bucket,self.key_ary,self.labels_dict,self.dataDir,self.temp_dir,self.extension)
    def GenerateSamples(self):
        '''Returns generator that retireves n_sample scans at a time,
        mixes each scan-slice image into a meta-batch, and returns mini-batches of 
        BATCH_SIZE'''
        #While there is data left, yield batch
        while self.batchrequester.DoItemsRemain():
            #Request data
            print("Retrieving data..")
            X,y = self.batchrequester.NextBatch(self.n_samples)
            n_angles = X.shape[3] #num angles (64)

            #Create efficient mapping for mixing and indexing batch data
            indexing_dict = {}
            order = np.arange(X.shape[0]*n_angles)
            np.random.shuffle(order)
            k = 0
            for s in range(X.shape[0]):
                for a in range(n_angles):
                    indexing_dict[order[k]]=[s,a]
                    k+=1

            print("Data retrieved and indexing computed.")

            #Initialize output arrays
            print("Initializing arrays...")
            X_train = np.zeros((X.shape[0]*n_angles,FINAL_WIDTH,FINAL_HEIGHT,CHANNELS))
            y_train = np.zeros((X.shape[0]*n_angles,ZONES))
            print("Arrays initialized")

            #Set counter to 0, channel to 1
            chan = 0 #No need to iterate here
            i = 0
            #Clean each image and store it in output
            while i < X.shape[0] * n_angles:
                j = i
                while j < i+BATCH_SIZE:
                    s,a = indexing_dict[j]                    
                    X_train[j,:,:,chan] = hfuncs.CropCleanResize(X[s,:,:,a],FINAL_WIDTH,FINAL_HEIGHT)
                    y_train[j,:] = y[s,:]
                    j += 1

                yield X_train[i:i+BATCH_SIZE,:,:,:],y_train[i:i+BATCH_SIZE]
                i += BATCH_SIZE
class Sequencer(Sequence):
    generatorInstance = None
    
    def __init__(self,keys,n_samples,batch_size=BATCH_SIZE):
        self.keys = keys
        self.generatorInstance = myGenerator(keys,n_samples)
        self.batch_size = batch_size
        self.n_samples = n_samples
        #Request data
        print("Retrieving data..")
        self.X,self.y = self.generatorInstance.batchrequester.NextBatch(self.n_samples)
        self.n_angles = self.X.shape[3] #num angles (64)

        #Create efficient mapping for mixing and indexing batch data
        self.indexing_dict = {}
        order = np.arange(self.X.shape[0]*self.n_angles)
        np.random.shuffle(order)
        k = 0
        for s in range(self.X.shape[0]):
            for a in range(self.n_angles):
                self.indexing_dict[order[k]]=[s,a]
                k+=1

        print("Data retrieved and indexing computed.")

        #Initialize output arrays
        print("Initializing arrays...")
        self.X_train = np.zeros((self.X.shape[0]*self.n_angles,FINAL_WIDTH,FINAL_HEIGHT,CHANNELS))
        self.y_train = np.zeros((self.X.shape[0]*self.n_angles,ZONES))
        print("Arrays initialized")
        
        #Channel to 1
        self.chan = 0 #No need to iterate here
        
    def __len__(self):
        return (self.X.shape[0]*self.n_angles)//self.batch_size
    def __getitem__(self,idx):
        j = idx*self.batch_size
        while j < (idx+1)*self.batch_size:
            s,a = self.indexing_dict[j]
            self.X_train[j,:,:,self.chan] = hfuncs.CropCleanResize(self.X[s,:,:,a],FINAL_WIDTH,FINAL_HEIGHT)
            self.y_train[j,:] = self.y[s,:]
            j += 1
        return self.X_train[idx*self.batch_size:(idx+1)*self.batch_size,:,:,:],self.y_train[idx*self.batch_size:(idx+1)*self.batch_size]
        

def CleanKeyAry(key_ary,labels_dict,dataDir,extension):
    '''Taken from the BatchRequester class'''
    key_ary_new=[]
    for key in key_ary:
        img_id = key.strip().replace(dataDir,'').replace(extension,'')
        if img_id in labels_dict.keys():
            key_ary_new.append(key)
        else:
            continue
    return key_ary_new

def getTrainTestValData(labels_dir=LABELS_DIR,extension=EXTENSION,dataDir=DATA_DIR,bucketName=BUCKET_NAME):
    '''Retrieves all samples that have corresponding labels 
    and splits data into a train, test, val set. '''
    #Labels        
    labels_dict = hfuncs.GetLabelsDict(labels_dir)
    
    #AWS Bucket
    key_id, secret_key = hfuncs.GetAWSCredentials()
    client = hfuncs.GetAWSClient(key_id,secret_key)
    bucket = client.Bucket(bucketName)
    
    #Get shuffled keys and separate into train,test,and validation
    key_ary = hfuncs.GetShuffledKeys(bucket)
    key_ary = CleanKeyAry(key_ary,labels_dict,dataDir,extension)
    K_train,K_test = train_test_split(key_ary,test_size=0.20,random_state=0)
    K_train,K_val = train_test_split(K_train,test_size=0.25,random_state=0) #0.80*0.25 = 0.20 validation 
    
    return K_train,K_test,K_val     
    

In [19]:
K_train,K_test,K_val = getTrainTestValData()
sequencer=ValidationSequencer(K_train,5,batch_size=10)
X,Y = sequencer.__getitem__(2)

Retrieving data..
Data retrieved and indexing computed.
Initializing arrays...
Arrays initialized


In [34]:
from keras.layers import Input, Dense, Conv2D, MaxPooling2D , AveragePooling2D,Flatten
from keras.models import Model
from keras.layers.core import Dropout
from keras.layers.normalization import BatchNormalization
import keras

#Build Basic model
input_img = Input(shape=(FINAL_WIDTH,FINAL_HEIGHT,CHANNELS))
input_norm = BatchNormalization(axis=3)(input_img)

pooling_1 = MaxPooling2D((2,2),padding='same')(input_img)

tower_1 = Conv2D(64, (1, 1), padding='same', activation='relu')(pooling_1)
tower_1 = Conv2D(64, (3, 3), padding='same', activation='relu')(tower_1)

tower_2 = Conv2D(64, (1, 1), padding='same', activation='relu')(pooling_1)
tower_2 = Conv2D(64, (5, 5), padding='same', activation='relu')(tower_2)

tower_3 = MaxPooling2D((2, 2), strides=(1, 1), padding='same')(pooling_1)
tower_3 = Conv2D(64, (1, 1), padding='same', activation='relu')(tower_3)

output_inception = keras.layers.concatenate([tower_1, tower_2, tower_3], axis=1)

pooling_2 = MaxPooling2D((3,2),padding='same')(output_inception)
#pooling_2 = BatchNormalization(axis=3)(pooling_2)

tower_1_2 = Conv2D(128, (1, 1), padding='same', activation='relu')(pooling_2)
tower_1_2 = Conv2D(128, (3, 3), padding='same', activation='relu')(tower_1_2)

tower_2_2 = Conv2D(128, (1, 1), padding='same', activation='relu')(pooling_2)
tower_2_2 = Conv2D(128, (5, 5), padding='same', activation='relu')(tower_2_2)

tower_3_2 = MaxPooling2D((2, 2), strides=(1, 1), padding='same')(pooling_2)
tower_3_2 = Conv2D(128, (1, 1), padding='same', activation='relu')(tower_3_2)

output_inception_2 = keras.layers.concatenate([tower_1_2, tower_2_2, tower_3_2], axis=1)

#output_inception_2 = Dropout(0.10)(output_inception_2)
output_inception_2 = MaxPooling2D((2, 1),strides=(2,1), padding='same')(output_inception_2)

conv_3 = Conv2D(256, (1, 1), padding='same', activation='relu')(output_inception_2)
last = Flatten()(conv_3)

#List of independent guesses for each zone
output_nodes = []
for i in range(ZONES):
    output_nodes.append(Dense(1,activation='sigmoid')(last))

out = keras.layers.concatenate(output_nodes)

multi_label_model = Model(input_img, out)


In [None]:
from datetime import datetime
from keras.callbacks import TensorBoard
from keras.optimizers import SGD
from keras import metrics
from keras import optimizers

#Now we create generators for training,testing,and validation
K_train,K_test,K_val = getTrainTestValData()

s = 20
train_gen = myGenerator(K_train,s).GenerateSamples()
test_gen = myGenerator(K_test,s).GenerateSamples()
val_gen = myGenerator(K_val,s).GenerateSamples()

#Set up learning
x = datetime.today()
stamp = "{}-{}-{}_{}:{}:{}".format(x.year,x.month,x.day,x.hour,x.minute,x.second)
tensorboard = TensorBoard(log_dir="logs/{}".format(stamp))

adam_op = optimizers.Adam(lr=0.001,beta_1=0.9,beta_2=0.999)
multi_label_model.compile(optimizer=optimizers.Adam,
                          metrics=[metrics.binary_accuracy],
                         loss= 'binary_crossentropy')
train_gen = Sequencer(K_train,n_samples=len(K_train))
val_seq = Sequencer(K_val,n_samples = len(K_val))
max_val_steps = val_seq.__len__()
hist = multi_label_model.fit_generator(train_gen,validation_data=val_seq,validation_steps=max_val_steps,
                                       steps_per_epoch=20,
                                       epochs=20,callbacks=[tensorboard],
                                      use_multiprocessing =True,workers=4)


Epoch 1/5
Retrieving data..
Data retrieved
Initializing arrays...
Arrays initialized
Processing image 0
(400, 600)
Processing image 1
(400, 600)
Processing image 2
(400, 600)
Processing image 3
(400, 600)
Processing image 4
(400, 600)
Processing image 5
(400, 600)
Processing image 6
(400, 600)
Processing image 7
(400, 600)
Processing image 8
(400, 600)
Processing image 9
(400, 600)
Processing image 10
(400, 600)
Processing image 11
(400, 600)
Processing image 12
(400, 600)
Processing image 13
(400, 600)
Processing image 14
(400, 600)
Processing image 15
(400, 600)
Processing image 16
(400, 600)
Processing image 17
(400, 600)
Processing image 18
(400, 600)
Processing image 19
(400, 600)
Processing image 0
(400, 600)
Processing image 1
(400, 600)
Processing image 2
(400, 600)
Processing image 3
(400, 600)
Processing image 4
(400, 600)
Processing image 5
(400, 600)
Processing image 6
(400, 600)
Processing image 7
(400, 600)
Processing image 8
(400, 600)
Processing image 9
(400, 600)
Proce

In [None]:
np.random.set_state