In [3]:
#Packages required for code functions
from scipy.io import loadmat
from scipy.io import savemat
import pandas as pd
import numpy as np
import transformations  as tr
import random as rand
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
import tensorflow as tf 
from tensorflow import keras
from tensorflow.keras import layers

In [4]:
def generatePairs(frames):
    framePairs = [] #vector to hold image pairs

    for x in range(len(frames)): #x represents the first frame in the pair
        for y in range(len(frames)-3-x): #y represents second frame in the pair
            pair = [(x+1),(x+y+4),np.stack((frames[x], frames[x+y+3]), axis=-1)] #generate pair
            framePairs.append(pair)
    return framePairs

In [5]:
#Generating translated pairs to increase the amount of good scans to train the model with
def generateGoodData(goodData):
    extraGood = []
    for x in range(len(goodData)):
        extra = translatePair(goodData[x][1],goodData[x][2],goodData[x][0],goodData[x][3],1,1)
        extraGood = extraGood + extra
    return extraGood

In [6]:
#Translates frame pairs in order to generate additional data
def translatePair(frame1,frame2,patient,data,steps,label):

    frame1 = frame1-1 #get first frame position from pair object
    frame2 = frame2-1 #get second frame position from pair object
    JAdd = data.shape[0] #Get size of original frame to keep translation size consistent
    IAdd = data.shape[1] #Get size of original frame to keep translation size consistent

    baseCut = 0.1 #use same base cut as is used in original frame generation
    step = baseCut/steps #set step size to be as large as possible for given number of steps
    pairs = [] #vector to hold newly generated pairs

    #adjust how the frame is cropped from the original data to simulate translation
    for x in range(-steps,steps+1): 
        for y in range(-steps,steps+1):
            lowerI = int((baseCut+x*step)*256)
            upperI = lowerI+IAdd
            lowerJ = int((baseCut+y*step)*2064)
            upperJ = lowerJ+JAdd

            first = rawData[patient][1][lowerJ:upperJ,lowerI:upperI,frame1]
            second = rawData[patient][1][lowerJ:upperJ,lowerI:upperI,frame2]

            pairs.append([patient,frame1,frame2,np.stack((first,second), axis=-1),label]) #append translated pair to pairs vector
    return pairs

In [7]:
#Predicts frame pair fitness given a target number
def predictLabel(number):
    prediction = model.predict(np.expand_dims(np.expand_dims(x_test[number], axis=3),axis=0))[0]
    return prediction

In [8]:
#Directory containing the RF scan data
dataFolder = 'C:/Users/Lucas/OneDrive - The University of Western Ontario/Documents/GitHub/SE4450-project-code/Preprocessing/'

#Vector to hold all training data sets
rawData = []

#Loading in the matlab file information
rawMatFile = loadmat('P39-W2-S4')
#Contains just the RF data pulled from the .mat file
P39W2S4 = rawMatFile['rf1']
rawData.append([0,P39W2S4])

#repeat above steps for other RF files
rawMatFile = loadmat('P39-W4-S6')
P39W4S6 = rawMatFile['rf1']
rawData.append([1,P39W4S6])
rawMatFile = loadmat('P82-W0-S2')
P82W0S2 = rawMatFile['rf1']
# rawData.append(['P82W0S2',P82W0S2])
rawMatFile = loadmat('P87-W0-S3')
P87W0S3 = rawMatFile['rf1']
rawData.append([2,P87W0S3])
rawMatFile = loadmat('P87-W2-S4')
P87W2S4 = rawMatFile['rf1']
rawData.append([3,P87W2S4])
rawMatFile = loadmat('P90-W0-S4')
P90W0S4 = rawMatFile['rf1']
rawData.append([4,P90W0S4])
rawMatFile = loadmat('P94-W1-S3')
P94W1S3 = rawMatFile['rf1']
rawData.append([5,P94W1S3])

In [9]:
#leave a slice of each edge of the scan from being black
cut = 0.1

#Assigning which pixels to cut
lowerI = int(cut*256)
upperI = int((1-cut)*256)
lowerJ = int(cut*2064)
upperJ = int((1-cut)*2064)

allFrames = [] #vector to hold individual cropped frames

for y in range(len(rawData)):
    frames = []
    for x in range(rawData[y][1].shape[2]): #range is how many frames there are in the set
        frames.append(rawData[y][1][lowerJ:upperJ,lowerI:upperI,x]) #Take inner subset of frame to allow for translation
    allFrames.append([rawData[y][0],frames])

pairs = []
for x in range(len(allFrames)):
    pairs.append([allFrames[x][0],generatePairs(allFrames[x][1])]) #add generated pairs to list
#print(pairs[5][1][0][2].shape)

#PAIRS VARIABLE GUIDE
#First value = Holds which patient set is being referred to
#Second value = Holds either patient set name [0] or generated pairs for set
#Third value = Holds all pairs for patient set
#Fourth value = holds specific pairs for patient set
#pairs[0][1][0][2].shape <- references the shape of the third pair of frames for a certain patients data

In [10]:
#Create Vector of stacked pairs with frame number and patient ID for easy labeling
data = []
for x in range(len(pairs)):
    for y in range(len(pairs[x][1])):
        data.append([pairs[x][0],pairs[x][1][y][0],pairs[x][1][y][1],pairs[x][1][y][2]])

print(len(data))
for x in range(2):
    print(data[x][0],data[x][1],data[x][2],data[x][3].shape)

998
0 1 4 (1651, 205, 2)
0 1 5 (1651, 205, 2)


In [11]:
#Read in label data
DFP39W2S4 = pd.read_excel('Labels.xlsx', sheet_name='P39-W2-S4', engine='openpyxl')
DFP39W4S6 = pd.read_excel('Labels.xlsx', sheet_name='P39-W4-S6', engine='openpyxl')
DFP87W0S3 = pd.read_excel('Labels.xlsx', sheet_name='P87-W0-S3', engine='openpyxl')
DFP87W2S4 = pd.read_excel('Labels.xlsx', sheet_name='P87-W2-S4', engine='openpyxl')
DFP90W0S4 = pd.read_excel('Labels.xlsx', sheet_name='P90-W0-S4', engine='openpyxl')
DFP94W1S3 = pd.read_excel('Labels.xlsx', sheet_name='P94-W1-S3', engine='openpyxl')

  warn(msg)


In [12]:
# + DFP82W0S2['Label axial'].tolist() wrong shape?
#create labels for training data
labels = DFP39W2S4['Label axial'].tolist() + DFP39W4S6['Label axial'].tolist()  + DFP87W0S3['Label axial'].tolist() + DFP87W2S4['Label axial'].tolist() + DFP90W0S4['Label axial'].tolist() + DFP94W1S3['Label axial'].tolist()
print(len(labels))

998


In [13]:
#Applying imported labels to imported data
labeledData = []
for x in range(len(labels)):
    labeledData.append([data[x][0],data[x][1],data[x][2],data[x][3],labels[x]])

In [14]:
goodData = []
badData = []
for x in range(len(labeledData)):
    #Setting meh labels to 0
    if labeledData[x][4] == 0.5:
        labeledData[x][4] = 0

    if labeledData[x][4] > 0:
        goodData.append(labeledData[x])
    else:
        badData.append(labeledData[x])
print(len(goodData))
print(len(badData))
# for x in range(len(goodData)):
#     print(goodData[x][3].shape)
goodData = generateGoodData(goodData) #generate more good data
goodData = rand.sample(goodData,len(badData)) #sample from generated good data to get exactly even split
print(len(goodData))
print(len(badData))
balancedData = goodData+badData
print(len(balancedData))

159
839
839
839
1678


In [15]:
#Assigning proper dimensions for input into model
def train_preprocessing(data, label):
    data = tf.expand_dims(data, axis=3)
    return data, label
    
#Assigning proper dimensions for input into model
def validation_preprocessing(data, label):
    data = tf.expand_dims(data, axis=3)
    return data, label

In [16]:
# Split data in the ratio 80-20 for training and validation.
x_train, x_test, y_train, y_test = train_test_split([x[3] for x in balancedData], [x[4] for x in balancedData], test_size=0.20, random_state=42)

# print(len(y_train))

In [17]:
#Converting testing and training data into tensors for input into model
for x in range(len(x_train)):
    x_train[x] = tf.convert_to_tensor(x_train[x], np.float32)

#Converting testing and training data into tensors for input into model
for x in range(len(x_test)):
    x_test[x] = tf.convert_to_tensor(x_test[x], np.float32)

In [18]:
# Define data loaders.
train_loader = tf.data.Dataset.from_tensor_slices((x_train, y_train))
validation_loader = tf.data.Dataset.from_tensor_slices((x_test, y_test))

batch_size = 2
# Augment the on the fly during training.
train_dataset = (
    train_loader.shuffle(len(x_train))
    .map(train_preprocessing)
    .batch(batch_size)
    .prefetch(2)
)
# Only rescale.
validation_dataset = (
    validation_loader.shuffle(len(x_test))
    .map(validation_preprocessing)
    .batch(batch_size)
    .prefetch(2)
)

In [19]:
#Constructing the 3D convolutional neural network
def get_model(width, height, depth):

    inputs = keras.Input((width, height, depth, 1))
    
    x = layers.Conv3D(filters=64, kernel_size=(3,3,2), activation="relu")(inputs) #Convolutional layer which is automatically trained to detect desired features
    x = layers.MaxPool3D(pool_size=(2,2,1))(x) #Pooling layer which reduces the dimensionality of the RF data to allow for faster processing in further layers
    x = layers.BatchNormalization()(x) #Normalizes the data post convolution and pooling to allow for faster training

    x = layers.Conv3D(filters=64, kernel_size=(3,3,1), activation="relu")(x) #Convolutional layer which is automatically trained to detect desired features
    x = layers.MaxPool3D(pool_size=(2,2,1))(x) #Pooling layer which reduces the dimensionality of the RF data to allow for faster processing in further layers
    x = layers.BatchNormalization()(x) #Normalizes the data post convolution and pooling to allow for faster training

    x = layers.Conv3D(filters=128, kernel_size=(3,3,1), activation="relu")(x) #Convolutional layer which is automatically trained to detect desired features
    x = layers.MaxPool3D(pool_size=(2,2,1))(x) #Pooling layer which reduces the dimensionality of the RF data to allow for faster processing in further layers
    x = layers.BatchNormalization()(x) #Normalizes the data post convolution and pooling to allow for faster training

    x = layers.Conv3D(filters=256, kernel_size=(3,3,1), activation="relu")(x) #Convolutional layer which is automatically trained to detect desired features
    x = layers.MaxPool3D(pool_size=(2,2,1))(x) #Pooling layer which reduces the dimensionality of the RF data to allow for faster processing in further layers
    x = layers.BatchNormalization()(x) #Normalizes the data post convolution and pooling to allow for faster training

    x = layers.Conv3D(filters=512, kernel_size=(3,3,1), activation="relu")(x) #Convolutional layer which is automatically trained to detect desired features
    x = layers.MaxPool3D(pool_size=(2,2,1))(x) #Pooling layer which reduces the dimensionality of the RF data to allow for faster processing in further layers
    x = layers.BatchNormalization()(x) #Normalizes the data post convolution and pooling to allow for faster training

    x = layers.GlobalAveragePooling3D()(x) #Calculates average output of each feature map in previous layers in preperation for final classification
    x = layers.Dense(units=512, activation="relu")(x) #Neurons in layer are connected to each previous neuron, performs matrix-vector multiplication
    x = layers.Dropout(0.3)(x) #Helps to prevent overfitting

    outputs = layers.Dense(units=1, activation="sigmoid")(x) #defining final neuron for classification

    # Define the model.
    model = keras.Model(inputs, outputs, name="3dcnn")
    return model


# Build model.
model = get_model(width=1651, height=205, depth=2)
model.summary()


Model: "3dcnn"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 1651, 205, 2, 1)] 0         
_________________________________________________________________
conv3d (Conv3D)              (None, 1649, 203, 1, 64)  1216      
_________________________________________________________________
max_pooling3d (MaxPooling3D) (None, 824, 101, 1, 64)   0         
_________________________________________________________________
batch_normalization (BatchNo (None, 824, 101, 1, 64)   256       
_________________________________________________________________
conv3d_1 (Conv3D)            (None, 822, 99, 1, 64)    36928     
_________________________________________________________________
max_pooling3d_1 (MaxPooling3 (None, 411, 49, 1, 64)    0         
_________________________________________________________________
batch_normalization_1 (Batch (None, 411, 49, 1, 64)    256   

In [22]:
# Compile model.
initial_learning_rate = 0.0001
lr_schedule = keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate, decay_steps=100000, decay_rate=0.96, staircase=True
)
model.compile(
    loss="binary_crossentropy",
    optimizer=keras.optimizers.Adam(learning_rate=lr_schedule),
    metrics=["acc"],
)

# Define callbacks.
checkpoint_cb = keras.callbacks.ModelCheckpoint(
    "3d_image_classification.h5", save_best_only=True
)
early_stopping_cb = keras.callbacks.EarlyStopping(monitor="val_acc", patience=15)

# Train the model, doing validation at the end of each epoch
epochs = 50
model.fit(
    train_dataset,
    validation_data=validation_dataset,
    epochs=epochs,
    shuffle=True,
    verbose=2,
    callbacks=[checkpoint_cb, early_stopping_cb],
)


In [24]:
#Code to test the accuracy of the trained network
model.load_weights("3d_image_classification.h5")
toTest = 85 #frame number in y_test to check

prediction = model.predict(np.expand_dims(np.expand_dims(x_test[toTest], axis=3),axis=0))[0]
scores = [1 - prediction[0], prediction[0]]

class_names = ["a bad pair", "an optimal pair"]
#print prediction in easy to view format
for score, name in zip(scores, class_names):
    print(
        "This model is %.2f percent confident that frame pair is %s"
        % ((100 * score), name)
    )
#print actual system label for comparison
print(y_test[toTest])

This model is 0.00 percent confident that frame pair is a bad pair
This model is 100.00 percent confident that frame pair is an optimal pair
1


In [25]:
#Running batch predictions to test model accuracy
predictions = []
print(len(y_test))
for x in range(len(y_test)):
    if (x%25==0):
        print(x)
    if (predictLabel(x)>0.5):
        predictions.append(1)
    else:
        predictions.append(0)

336
0
25
50
75
100
125
150
175
200
225
250
275
300
325


In [26]:
tn, fp, fn, tp = confusion_matrix(y_test, predictions).ravel()
print(tn, fp, fn, tp) #print prediction results for confusion matrix
print((tn+tp)/336) #print overall accuracy

159 5 14 158
0.9434523809523809


In [27]:
confusion_matrix(y_test, predictions) #print above results in traditional confusion matrix format

array([[159,   5],
       [ 14, 158]], dtype=int64)

In [None]:
##BELOW ARE FUNCTIONS FOR FINAL PREDICTOR CLASS
##WRITTEN HERE FIRST FOR SIMPLICITY
##COMMENTED VERSIONS IN Predictor.py

In [28]:
##FUNCTION 1 for final predictions
def preprocessData(data):
    height = data.shape[0]
    width =  data.shape[1]
    lowerH = int((height-1651)/2)
    upperH = lowerH+1651
    lowerW = int((width-205)/2)
    upperW = lowerW + 205

    frames = []
    for x in range(data.shape[2]): #range is how many frames there are in the set
        frames.append(data[lowerH:upperH,lowerW:upperW,x]) #cutdata to match size expected by model

    pairs = generatePairs(frames)
    for x in range(len(pairs)):
        pairs[x][2] = tf.convert_to_tensor(pairs[x][2], np.float32)
    return pairs

In [29]:
##FUCNTION 2 for final predicitons
def predict(processed):
    predictions = []
    print(len(processed))
    for x in range(len(processed)):
        probability = model.predict(np.expand_dims(np.expand_dims(processed[x][2], axis=3),axis=0))[0]
        if (x%50==0):
            print(x)
        if (probability>0.5):
            predictions.append([1,probability])
        else:
            predictions.append([0,probability])
    return predictions

In [30]:
##FUNCTION 3 for final predictions
def formatOutput(processed,predictions):
    output = []
    for x in range(len(predictions)):
        output.append([processed[x][0],processed[x][1],predictions[x][1][0]])
    output.sort(key=lambda a: a[2],reverse=True)
    return output

In [31]:
##FINAL PREDICTION
from scipy.io import loadmat
from scipy.io import savemat
rawMatFile = loadmat('P39-W2-S4')
P39W2S4 = rawMatFile['rf1']
model.load_weights("3d_image_classification.h5")
processed = preprocessData(P39W2S4)
predictions = predict(processed)
output = formatOutput(processed,predictions)

210
0
50
100
150
200


In [33]:
##Outputs top five frame pairs most likely to be good and how likely
print(output[0])
print(output[1])
print(output[2])
print(output[3])
print(output[4])

[17, 22, 0.7949978]
[13, 22, 0.70975673]
[15, 23, 0.7079406]
[14, 23, 0.64001644]
[16, 22, 0.63846636]
