In [None]:
'''
A CNN model builder for analyzing inputs of both Optical Flow and Depth Flow by creating a 'stacked' image,
which is a 4 channel image that is created by stacking each optical flow on top of a depth flow image. Model has not been
run yet, and so changes may have to be made to the structure of the model. Nonetheless, this basic model is ready to be run
and will hopefully learn something.
'''

#Import from the Keras library
from keras import models
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D,  MaxPooling2D
from keras import optimizers 
from keras import utils
from keras.models import load_model
from keras.preprocessing.image import ImageDataGenerator
from keras.utils.vis_utils import plot_model
from keras.callbacks import EarlyStopping
from keras.callbacks import ModelCheckpoint
from secret import credentials

#This allows for Keras models to be saved. 
import h5py
#Other import statements 
import h5py
import random
import numpy as np
import pandas as pd
import pathlib
import cv2
import pymysql
import os

from ModelTesting import tests

from matplotlib import pyplot

#limit the data to a subset for testing
#Make empty string to have no limit
#limit = "ORDER BY RAND() LIMIT 1000"
limit = ''
epochs = 2
batch_size = 1

#directory where data from database is stored
cache_path = 'RecordingCache'

#File name for the statistics to be save in. Must include .txt at the end
statistics_output_file = 'statistics.output.txt'

#Must have the h5py package installed or the model will not save. This should be the path of the location you would like
#To save the model
model_file_name = 'model.output'

In [None]:
def connect(): 
    db_host = credentials['db_host'];
    db_port = credentials['db_port'];
    db_name = credentials['db_name'];
    db_username = credentials['db_username']
    db_password = credentials['db_password']
    
    conn = pymysql.connect(db_host, user=db_username, port=db_port, passwd=db_password, db=db_name)
    return conn

In [None]:
#imports either dflow or oflow from database
def import_data(cache_path, img_type, conn=connect()):     
    
    
    #Create the cache directory if it doesn't exist
    if not os.path.exists(cache_path):
        os.mkdir(cache_path)
        
    cursor = conn.cursor()

    try:
        recording_ids = {}
        xy = {}    
        
        image_query = "SELECT r.id, r.recording_id, r.isCSGM FROM nicu.Video_Raw AS r JOIN nicu.Video_Generated AS g ON r.id=g.raw_id  WHERE (r.recording_id>1) AND (g.RGB_Optical_Flow IS NOT NULL) AND (g.D_Depth_Flow IS NOT NULL) " +limit
        try:
            cursor.execute(image_query)
            for row in cursor.fetchall():
                raw_id = row[0]
                rec_id = row[1]
                csgm = row[2]
                if rec_id in recording_ids:
                    recording_ids.get(rec_id).append(raw_id)
                else:
                    recording_ids.update({rec_id:[raw_id]})
                xy.update({raw_id:[csgm]})
        except Exception as e:
            print("Error retrieving ID's", e)
            conn.rollback()
            raise e
            
        print("Collecting images for processing (o = source image in cache, ⇣ = source image fetched from db, x = source image not in db)")
        for rec_id in recording_ids:
            print("")
            print("Analyzing recording_id:",rec_id,": ",end="")
            raw_id_list = recording_ids.get(rec_id)
            for raw_id in raw_id_list:
                current_input=0
                if (img_type=='RGB_Optical_Flow'):
                    current_input = cache_path+'/'+str(raw_id)+".oflow.png"
                else:
                    current_input = cache_path+'/'+str(raw_id)+".dflow.png"
                if not os.path.exists(current_input):
                    cursor2 = conn.cursor()
                    try:
                        image_query = "SELECT "+img_type+" from Video_Generated WHERE (raw_id=%s)"
                        cursor2.execute(image_query, (str(raw_id)))
                        for row in cursor2.fetchall():
                            db_img = row[0]
                            if db_img is not None:
                                img=cv2.imdecode(np.asarray(bytearray(db_img),dtype=np.uint8),cv2.IMREAD_UNCHANGED)
                                cv2.imwrite(current_input,img)
                                print("⇣",end="",flush=True)
                            else:
                                print("x",end="",flush=True)
                    except Exception as e:
                        print("Error retrieving Optical Flow frame",e)
                        raise e
                    finally:
                        cursor2.close()     
                else:
                    print("o",end="",flush=True)

                #Resizing the image
                if img_type=='RGB_Optical_Flow':
                    img = cv2.imread(current_input)
                else:
                    img=cv2.imread(current_input, cv2.IMREAD_ANYDEPTH)
                scale_percent = 30
                width = int(img.shape[1] * scale_percent / 100)
                height = int(img.shape[0] * scale_percent / 100)
                cv2.resize(img,(width,height), interpolation=cv2.INTER_CUBIC)                
                xy.get(raw_id).insert(0,img)
        print("")
        return recording_ids, xy
    finally:
        cursor.close()

In [None]:
#creates an array for input into model
def create_array(raw_ids, xy):    
    image_list=[]
    csgm_list=[]
    
    random.shuffle(raw_ids)
    
    no_csgm=0
    csgm=0
    for i in raw_ids:
        image_list.append(xy.get(i)[0])
        csgm_list.append(xy.get(i)[1])
        if (xy.get(i)[1]==0):
            no_csgm += 1
        else:
            csgm += 1
    
    #if there are an uneven number of CSGM and non-CSGM images, duplicate frames until there is an even number 
    index = 0
    difference = csgm - no_csgm
    while (difference != 0):
        if (csgm_list[index]==0 and difference > 0) or (csgm_list[index]==1 and difference < 0):
            i = random.randint(0,len(csgm_list))
            csgm_list.insert(i, csgm_list[index])
            image_list.insert(i, image_list[index])
            difference = (abs(difference)-1)*int(difference/difference)
        if (index < len(csgm_list)-1):
            index += 1
        else:
            index = 0
    x = np.array(image_list)
    y = np.array(csgm_list)
    return x, y

In [None]:
#creates a CNN
def create_cnn(x_train, filter_info={0:[32,3]}, dropout={0:0.25}, pooling={0:2}, activation='relu', loss='mean_squared_error', final_activation='sigmoid'):    
    
    model = models.Sequential() 
    str_model = "Overview of Model Architecture: /n"
    
    filter_size = 0
    
    #loops through inputs to create layers
    for i in filter_info: 
        filter_size = filter_info.get(i)[1]
        num_filters = filter_info.get(i)[0]
        
        if i == 0: 
            model.add(Conv2D(num_filters, (filter_size,filter_size), activation = 'relu', input_shape=x_train.shape[1:]))
        else: 
            model.add(Conv2D(num_filters, (filter_size,filter_size), activation= 'relu'))
        
        str_model += ("2D Convolution Layer with %d filters the size of (%d,%d) and %s activation \n" %(num_filters, filter_size, filter_size, activation))
                
        if i in pooling:           
            pool_filter_size = pooling.get(i)
            model.add(MaxPooling2D(pool_size=(pool_filter_size, pool_filter_size)))
            str_model += ('2D Pooling Max Pooling Layer with filter size (%d,%d)\n' %(pool_filter_size,pool_filter_size))
            
            
        if i in dropout: 
            drop_rate = dropout.get(i)
            model.add(Dropout(drop_rate))
            str_model += ('Droput Layer with with a rate of %f \n' %(drop_rate))


    
    #These will be added to the end of every model no matter what
    model.add(Flatten())
    str_model += ('Flatten\n')
    model.add(Dense(256,activation=activation))
    str_model += ('Dense layer with %s activation\n' %(activation))
    model.add(Dropout(0.5))
    str_model += ('Droput Layer with with a rate of 0.5 \n')
    
    #Sigmoid activiation is employed in the final step because the output is binary. 
    model.add(Dense(1, activation=final_activation)) 
    str_model += ('Dense layer with %s activation\n' %(final_activation))
    
    model.compile(loss=loss, 
              optimizer=optimizers.SGD(lr=1e-4),
              metrics=['acc']) 
    str_model += ('Loss: %s' %(loss))
                      
    print(str_model)
    
    return model, str_model

In [None]:
def confusion_matrix(exp_values, predicted_values):
    """
    This creates a confusion matrix with the predicted accuracy of the model.
    
    exp_values must be in the format of a list and predicted values is expected to come in the format of the ouput 
    of Keras's model.predict()
    
    The ouput is a pandas dataframe that displays a confusion matrix indicitive of the accuracy of the model along 
        with a number score which is the accuracy of the model.
    """
    predicted_values = convert_predictions(predicted_values)
    
    
    
    #Creates a DataFrame of zeros
    matrix = pd.DataFrame(np.zeros((2,2)) , ['P0','P1'], ['E0','E1'])
   
    #Caculates whether the score was right or wrong and updates the confusion matrix 
    for i in range(len(exp_values)):
        if exp_values[i] == predicted_values[i]:
            matrix.iloc[[predicted_values[i]],[predicted_values[i]]] += 1
        else:
            matrix.iloc[[predicted_values[i]],[exp_values[i]]] += 1
   
   
    diagonal_sum = 0
    for i in range(2):
        diagonal_sum += matrix.iloc[i][i]
    
    score = diagonal_sum/len(exp_values)
    
  
    return  matrix, score

In [None]:
def convert_predictions(predictions): 
    """
    Converts predictions outputted by a keras model into a list with 1 represented the predicted output and zero 
    in other classes. 
    """
    l =[]
    for p in predictions: 
        if p >= 0.5:
            l.append(1)
        else:
            l.append(0)
    return l

In [None]:
def runTest(pooling, dropout, filter_info, loss, activation, final_activation, file_name='model.txt', model_name='model', save_model=False, epochs=5, batch_size=32, stacked=False):
    
    conn = connect()
    try:
        #imports appropriate images
        recording_ids_dict, xy = import_data(cache_path,'RGB_Optical_Flow',conn)
        #if stacked parameter is true, combine each dflow and oflow into a single 4 channel image
        if (stacked):
            rec_idDF, xyDF = import_data(cache_path,'D_Depth_Flow',conn)  
            for i in xy:
                xy[i][0]=np.dstack((xy[i][0], xyDF[i][0]))
    finally:
        conn.close()

    matrices = {}
    scores = {}
    model_scores = {}
    str_model =''

    #iterate through the recordings, using each one as a test and training on the rest
    for i in recording_ids_dict:
        print('Testing on ' + str(i))
        train_ids= []
        test_ids = []
        
        for j in recording_ids_dict:
            if j == i:
                test_ids = recording_ids_dict[j]
            else: 
                train_ids.extend(list(recording_ids_dict[j]))
        
        #create corresponding arrays for input
        x_train, y_train = create_array(train_ids, xy)
        x_test, y_test = create_array(test_ids, xy)
        
        #Scaling the values to a value between 0 and 1
        x_train = x_train.astype('float32')
        x_test = x_test.astype('float32')
        x_train /= 255
        x_test /= 255
        
        #create the model
        model,str_model = create_cnn(x_train,
                                     filter_info=filter_info,
                                     dropout=dropout,
                                     pooling=pooling,
                                     loss=loss,
                                     final_activation=final_activation,
                                     activation=activation)
        #optionally prints the structure of the model into the console and saves a png of the structure
        #print(model.summary())
        #plot_model(model, to_file='model_plot'+str(i)+'.png', show_shapes=True, show_layer_names=True)
        
        
        #creates an early stopping callback that will end the training process 
        #if the loss has not changed after 100 epochs, and prints the last epoch
        es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=100)
        #creates a callback that will save the model with the best accuracy, and prints that epoch number 
        mc = ModelCheckpoint(model_name+'.h5', monitor='val_acc', mode='max', verbose=1, save_best_only=True)
        
        #Fit the model
        history = model.fit(x_train, y_train, validation_data=(x_test, y_test), epochs = epochs, callbacks=[es, mc] )
        
        #basic fitting choice if no callbacks are needed
        #history = model.fit(x_train, y_train, epochs = epochs)
        
        #Create predictions and evaluate to find loss and accuaracy
        
        model_score = model.evaluate(x_test, y_test)
        
        '''
        #optional line graph of the training and testing accuracy
        pyplot.plot(history.history['loss'], label='train')
        pyplot.plot(history.history['val_loss'], label='test')
        pyplot.legend()
        pyplot.show()
        pyplot.savefig(model_name+'.png')
        '''
        
        predict = model.predict(x_test)
        print('Model was ' + str(model_score[1]*100) + '% accurate and exhibited an average loss of ' + str(model_score[0]) + '.')
        
        matrix,score = confusion_matrix(y_test, predict)
        
        matrices.update({i : matrix})
        print(str(matrix) + '\n')
        scores.update({i: score})
        print(str(score) + '\n')
        model_scores.update({i:model_score})
   
    with open(file_name, 'w') as f:
        for key in matrices:
            f.write("Baby %s\n" % key)
            f.write("%s\n" % str_model)
            f.write("%s\n" % matrices[key])
            f.write("%s\n" % scores[key])
            f.write("%s\n" % model_scores[key])
        
            
    if save_model : 
        model.save(model)

In [None]:
#a standard filter template
filter_info={0:[32,3],1:[64,3],2:[128,3]}
dropout={0:0.25,1:0.5,2:0.5}
pooling={0:2,1:2,2:2}


runTest(file_name=statistics_output_file, 
        filter_info=filter_info, 
        dropout=dropout, 
        pooling=pooling, 
        loss='mean_squared_error', 
        activation='relu',
        epochs=epochs, 
        batch_size=batch_size,
        final_activation='sigmoid',
        model_name=model_file_name,
        save_model=False,
        stacked=True)
    