We're going to first try training a CNN on the individual images.
We will be using binary cross entropy across the 17 regions.

In [3]:
import HelperFuncs as hfuncs
import numpy as np
from sklearn.model_selection import train_test_split
from keras.utils.data_utils import Sequence
import h5py
import os
import gc 

gc.collect()

BATCH_SIZE = 1
FINAL_WIDTH = 400
FINAL_HEIGHT = 600
CHANNELS = 1
ZONES = 17
ANGLES = 16
#Create directories for sequencer function if they don't exist
for d in ['temp/train_scan/','temp/test_scan/','temp/val_scan/']:
    if not os.path.isdir(d):
        print("Created directory: {}".format(d))
        os.makedirs(d)
        
class ScanSequencer(Sequence):
    idx_dict={}
    
    def __init__(self,num_batches,bucket_name,mode="train_scan"):
        self.num_batches = num_batches
        self.bucket_name = bucket_name
        self.mode = mode
        self.key_id, self.secret_key = hfuncs.GetAWSCredentials()
        self.mode = mode
        self.angles = np.arange(0,64,64//ANGLES)
    def __len__(self):
        return self.num_batches
    def on_epoch_end(self):
        pass
    def __getitem__(self,idx):
        #Get Client
        client = hfuncs.GetAWSClient(self.key_id,self.secret_key)
        bucket = client.Bucket(self.bucket_name)
        
        #Initialize vectors
        X_train = np.zeros((BATCH_SIZE,ANGLES,FINAL_WIDTH,FINAL_HEIGHT,CHANNELS))
        y_train = np.zeros((BATCH_SIZE,ZONES))
        
        j=0
        for i in range(idx*BATCH_SIZE,(idx+1)*BATCH_SIZE):
            #Download batch at index
            path = "temp/{}/batch_{}.hdf5".format(self.mode,i)
            key = "{}/batch_{}.hdf5".format(self.mode,i)
            bucket.download_file(Key=key,Filename=path)
        
            f = h5py.File(path,"r")
            try:
                X_train[j,:,:,:,:] = f['/image'].value[self.angles,:,:,:]
                y_train[j,:] = f['/labels'].value
                j += 1
            finally:
                f.close()
                os.remove(path) 
        return X_train, y_train
class LegScanSequencer(Sequence):
    idx_dict={}
    
    def __init__(self,num_batches,bucket_name,mode="train_scan",batch_size=BATCH_SIZE):
        self.num_batches = num_batches
        self.bucket_name = bucket_name
        self.mode = mode
        self.key_id, self.secret_key = hfuncs.GetAWSCredentials()
        self.mode = mode
        self.angles = np.arange(0,64,64//ANGLES)
        self.batch_size = batch_size
    def __len__(self):
        return self.num_batches
    def on_epoch_end(self):
        pass
    def __getitem__(self,idx):
        #Get Client
        client = hfuncs.GetAWSClient(self.key_id,self.secret_key)
        bucket = client.Bucket(self.bucket_name)
        
        #Initialize vectors
        X_train = np.zeros((self.batch_size,ANGLES,FINAL_WIDTH,FINAL_HEIGHT*3//5,CHANNELS))
        y_train = np.zeros((self.batch_size,1))
        s_weights = np.zeros((self.batch_size))
        j=0
        for i in range(idx*self.batch_size,(idx+1)*self.batch_size):
            #Download batch at index
            path = "temp/{}/batch_{}.hdf5".format(self.mode,i)
            key = "{}/batch_{}.hdf5".format(self.mode,i)
            bucket.download_file(Key=key,Filename=path)
        
            f = h5py.File(path,"r")
            try:
                X_train[j,:,:,:,:] = f['/image'].value[self.angles,:,:FINAL_HEIGHT*3//5,:]
                r_leg = [7,8,9,10,11,12,13,14,15]
                l_leg = [13,15]
                r_y = np.amax(f['/labels'].value[r_leg])
                #l_y = np.amax(f['/labels'].value[l_leg])
                y_train[j,:] = r_y
                s_weights[j] =np.squeeze(r_y*0 + 1) #np.squeeze(r_y + (-1 * r_y + 1)*2)
                j += 1
            finally:
                f.close()
                os.remove(path) 
        return X_train, y_train,s_weights



In [4]:
from twilio.rest import Client
import configparser
from keras.callbacks import Callback

class SMSNotifier(Callback):
    def on_epoch_end(self,epoch,logs=None):
        #Execute every other epoch
        if epoch % 2 == 0:
            #Get config credentials
            config = configparser.ConfigParser()
            config.read('twilio.conf')
            account_sid = config['DEFAULT']['AccountID']
            auth_token = config['DEFAULT']['AuthToken']
            #Get client
            client = Client(account_sid, auth_token)
            #Create message
            if logs is not None:
                message = "Epoch {} complete. Loss: {} Val_loss: {} ".format(epoch,
                                                                             logs.get('loss'),
                                                                             logs.get('val_loss'))
            else:
                message = "Epoch {} complete. No loss data available.".format(epoch)
            #Sendmessage
            message = client.messages.create(
                to="+16178884129", 
                from_="+18572142288",
                body=message)
        else:
            pass

In [5]:
#Build pre-trained V2 model
import numpy as np
from keras.layers import Input,Flatten,Dense,Concatenate,Dropout,concatenate,GlobalMaxPool2D,GlobalAveragePooling1D
from keras.models import Model
from datetime import datetime
from keras.callbacks import TensorBoard,EarlyStopping,ModelCheckpoint,ReduceLROnPlateau
from keras.optimizers import Adam,Adadelta,SGD
from keras.metrics import binary_accuracy
from keras.layers.wrappers import TimeDistributed
from keras.layers.recurrent import GRUCell,GRU,LSTM
from keras.losses import binary_crossentropy
from keras.applications.inception_v3 import InceptionV3,preprocess_input
from keras.layers.core import Lambda
import tensorflow as tf
from keras import backend as K
from keras.initializers import RandomUniform,glorot_uniform,Orthogonal
from keras.layers.normalization import BatchNormalization
#from keras.applications.vgg19 import VGG19

def ScaleTanh(x):
    return 1.7159 * K.tanh(2./3 * x)
    
#K.set_learning_phase(1)
def ToRGB(x):
    max_v = tf.reduce_max(x)
    min_v = tf.reduce_min(x)
    max_rgb = tf.constant(255,dtype=x.dtype)
    min_rgb = tf.constant(255//2,dtype=x.dtype)
    x = tf.add(tf.floordiv(tf.multiply(tf.subtract(x,min_v),tf.subtract(max_rgb,min_rgb)),tf.subtract(max_v,min_v)),min_rgb)
    return x
def ToGreyScale(x):
    #Divide RGB into 3
   # scalar = tf.constant(3,dtype=x.dtype)
    #x = tf.floordiv(x,scalar)
    shape = x.get_shape()
    #assume channel_last
    mult = [[1 for d in shape[:-1]],[3]]
    mult = [val for sublist in mult for val in sublist]
    return tf.tile(x,mult)
def ToNewShape(x):
    ndim = len(x.shape)
    if ndim == 5:
        return tf.reverse(tf.transpose(x,[0,1,3,2,4]),[-3])
    elif ndim == 4:
        return tf.reverse(tf.transpose(x,[0,2,1,3]),[-3])
    else:
        raise ValueError("Unexpected number of dims!")
def ReduceTimeDist(x):
    shape = x.get_shape()
    final = int(shape[-1] * shape[-2])
    return tf.reshape(x,[tf.shape(x)[0],final])
def PrintActivation(x):
    meanv = tf.reduce_mean(x)
    minv = tf.reduce_min(x)
    maxv = tf.reduce_max(x)
    #print('Mean:{},Min:{},Max:{}'.format(meanv.eval(),minv.eval(),maxv.eval()))
    return [meanv,minv,maxv]
def getSingleLegModel(lstm_dim=10):
    #Single model image
    input_img = Input(shape=(FINAL_WIDTH,FINAL_HEIGHT*3//5,CHANNELS))
    
    #preprocess and extract channels
    input_img_pp = Lambda(ToRGB)(input_img)
    input_img_pp = Lambda(ToGreyScale)(input_img_pp)
    input_img_pp = Lambda(preprocess_input)(input_img_pp)
    input_img_pp = Lambda(ToNewShape)(input_img_pp)
    
    #Load resnet
    incep = InceptionV3(include_top=False,
                          weights='imagenet',
                          input_tensor=None,
                          input_shape=(FINAL_HEIGHT*3//5,FINAL_WIDTH,3),
                          pooling='None')
    for l in incep.layers:
        l.trainable=False

    #Take off top and output feaatures at various levels of complexity
    reduced_net = Model(incep.input,[incep.get_layer('mixed0').output,
                                     incep.get_layer('mixed1').output,
                                     incep.get_layer('mixed2').output,
                                    incep.get_layer('mixed3').output,
                                    incep.get_layer('mixed4').output,
                                    incep.get_layer('mixed5').output,
                                    incep.get_layer('mixed6').output,
                                    incep.get_layer('mixed7').output,
                                    incep.get_layer('mixed8').output,
                                    incep.get_layer('mixed9').output])
    
    #MaxPool and concatenation
    output = reduced_net(input_img_pp)
    output_ary = []
    for o in output:
        output_ary.append(GlobalMaxPool2D()(o))
    output = Concatenate()(output_ary)
    #FInalize intermediate model
    intermediate_model = Model(input_img,output)
    
    #Time distributed model
    input_scan = Input(shape=(ANGLES,FINAL_WIDTH,FINAL_HEIGHT*3//5,CHANNELS))
    sequenced_model = TimeDistributed(intermediate_model)(input_scan)
    sequenced_model._uses_learning_phase = True
    
    #Finally,concatenate time dist outputs
    out = Lambda(ReduceTimeDist)(sequenced_model)
    
    #Complete model
    model = Model(input_scan,out)
    
    try:
        return model
    finally:
        del intermediate_model,incep,reduced_net
        



In [6]:
rnn_model = getSingleLegModel()


In [7]:
for l in rnn_model.layers:
    print(l.name, l.trainable,l.output_shape)



input_3 False (None, 16, 400, 360, 1)
time_distributed_1 True (None, 16, 8000)
lambda_5 True (None, 128000)


In [19]:
#Use model as a feature extractor and use traditional ML to sdeterine whther features have any predictive power
import h5py
from keras import backend as K

TEMP_DIR = 'temp'
#Bucket with clean data
UPLOAD_BUCKET = 'cleandhsdata' #bucket where clean data was stored
key_id, secret_key = hfuncs.GetAWSCredentials()
client = hfuncs.GetAWSClient(key_id,secret_key)
bucket = client.Bucket(UPLOAD_BUCKET)

#Initialize train sequencer
mode ="train_scan"
num_batches_train = (sum([1 if "{}/".format(mode) in k.key else 0 for k in bucket.objects.all()])-1)//BATCH_SIZE #train,test,val root directories have their own keys
#num_batches = 660//BATCH_SIZE
train_seq = LegScanSequencer(num_batches_train,UPLOAD_BUCKET,mode=mode)
#Initialize validation sequencer
mode = "val_scan"
num_batches_val = (sum([1 if "{}/".format(mode) in k.key else 0 for k in bucket.objects.all()])-1)//BATCH_SIZE #train,test,val root directories have their own keys
val_seq = LegScanSequencer(num_batches_val,UPLOAD_BUCKET,mode=mode)


#Create function that creates data set for given layer
def CreateFeatureDataSet(model,dir_name = 'fullfeatureextraction',max_batches=1200):
    #Get model and output size
    output_size = model.output_shape[1]
    
    #Variables to iterate over
    #modes = ['train','val']
    #num_batches = [num_batches_train,num_batches_val]
    #generators = [train_seq,val_seq]
    modes = ['val']
    num_batches = [num_batches_val]
    generators = [val_seq]
    
    
    for mode,num_b,gen in zip(modes,num_batches,generators):
        #Initialize dataset array
        X_d = np.zeros((min(num_b,max_batches),output_size))
        y_d = np.zeros((min(num_b,max_batches)))

        #For every item in train generator, transform data and store in dataset array
        for i in range(min(num_b,max_batches)):
            print("Storing {} in {} set...".format(i,mode))
            X, y,s = gen.__getitem__(i)
            X = model.predict(X)
            X_d[i,:] = X.flatten()
            y_d[i] = y[0,0]
            i += 1

        #Store data set in s3
        key_suffix = "{}_data.hdf5".format(mode)
        filename = os.path.join(TEMP_DIR,key_suffix)
        key = "{}/{}".format(dir_name,key_suffix)

        #Save in local hdf5 file
        with h5py.File(filename,"w") as f:
            dset = f.create_dataset('features',data=X_d)
            dset2 = f.create_dataset('labels',data=y_d)

        #Upload file to bucket, then delete
        try:
            bucket.upload_file(Filename=filename,Key=key)
            print("Completed {} upload".format(mode))
        finally:
            print("Done")
            #os.remove(filename)

        #Delete train arrays to save memory
        del X_d,y_d


In [20]:
CreateFeatureDataSet(rnn_model)

Storing 0 in val set...
Storing 1 in val set...
Storing 2 in val set...
Storing 3 in val set...
Storing 4 in val set...
Storing 5 in val set...
Storing 6 in val set...
Storing 7 in val set...
Storing 8 in val set...
Storing 9 in val set...
Storing 10 in val set...
Storing 11 in val set...
Storing 12 in val set...
Storing 13 in val set...
Storing 14 in val set...
Storing 15 in val set...
Storing 16 in val set...
Storing 17 in val set...
Storing 18 in val set...
Storing 19 in val set...
Storing 20 in val set...
Storing 21 in val set...
Storing 22 in val set...
Storing 23 in val set...
Storing 24 in val set...
Storing 25 in val set...
Storing 26 in val set...
Storing 27 in val set...
Storing 28 in val set...
Storing 29 in val set...
Storing 30 in val set...
Storing 31 in val set...
Storing 32 in val set...
Storing 33 in val set...
Storing 34 in val set...
Storing 35 in val set...
Storing 36 in val set...
Storing 37 in val set...
Storing 38 in val set...
Storing 39 in val set...
Storing 40

In [14]:
1200/5*5020

1204800.0