# Training_v05 

To be filled later

In [1]:
import yaml
import os
import subprocess
import datetime
import numpy
import sklearn.metrics
import tensorflow

print("INFO> TensorFlow version: %s" % tensorflow.__version__)
print("INFO> Num GPUs Available: ", len(tensorflow.config.experimental.list_physical_devices('GPU')))

INFO> TensorFlow version: 1.14.0
INFO> Num GPUs Available:  0


In [2]:
# Read parameters from local config.yaml file, and update corresponding Python variables
currentDir = os.getcwd()
print("INFO> Reading file config.yam from directory: %s" %currentDir)
yamlFile = open('config.yaml','r')
yamlData = yaml.load(yamlFile,Loader=yaml.Loader)

for key in sorted(yamlData):
    print("INFO> %-15s: %s" % (key,yamlData[key]))
    
imageWidth, imageHeight = yamlData['imageWidth'], yamlData['imageHeight']
tmpDir = yamlData['tmpDir']
trnDir = yamlData['trnDir']
valDir = yamlData['valDir']
tstDir = yamlData['tstDir']
remDir = yamlData['remDir']
nTrnSamples = yamlData['nTrnSamples']
nValSamples = yamlData['nValSamples']
nTstSamples = yamlData['nTstSamples']
nEpochs = yamlData['nEpochs']
batchSize = yamlData['batchSize']
learningRate = float(yamlData['learningRate'])
checkpointDir = yamlData['checkpointDir']
logDir = yamlData['logDir']
createDataset = yamlData['createDataset']
checkDataset = yamlData['checkDataset']

INFO> Reading file config.yam from directory: /raid5/disk1/mlproj10/classification
INFO> checkDataset   : True
INFO> imageHeight    : 720
INFO> trnDir         : /home/jmv/data/mlproj10/dataset/trn
INFO> nValSamples    : 671
INFO> imageWidth     : 1280
INFO> nTrnSamples    : 2000
INFO> learningRate   : 1e-6
INFO> createDataset  : True
INFO> batchSize      : 16
INFO> logDir         : /home/jmv/data/mlproj10/log/
INFO> tstDir         : /home/jmv/data/mlproj10/dataset/tst
INFO> nTstSamples    : 671
INFO> nEpochs        : 1000
INFO> tmpDir         : /home/jmv/data/mlproj10/dataset/tmp
INFO> remDir         : /home/jmv/data/mlproj10/dataset/rem
INFO> checkpointDir  : /home/jmv/data/mlproj10/tmp/
INFO> valDir         : /home/jmv/data/mlproj10/dataset/val


In [None]:
# Optionally run scripts to create dataset from golden data set & check the newly created dataset
if createDataset:
    cmd = 'rm -r -f dataset'
    !{cmd}
    cmd = './create_dataset.sh'
    !{cmd}
    %run randomize_and_copy_dataset_v04.ipynb

if checkDataset:
    cmd = './check_dataset.sh'
    !{cmd}

In [None]:
myModelInput = tensorflow.keras.layers.Input(shape=(imageHeight,imageWidth,3))
x = tensorflow.keras.layers.Conv2D(64, (3,3), activation="relu")(myModelInput)
x = tensorflow.keras.layers.BatchNormalization()(x)
x = tensorflow.keras.layers.MaxPooling2D((2,2))(x)

x = tensorflow.keras.layers.Conv2D(128, (3,3), activation="relu")(x)
x = tensorflow.keras.layers.BatchNormalization()(x)
x = tensorflow.keras.layers.MaxPooling2D((2,2))(x)

x = tensorflow.keras.layers.Conv2D(192, (3,3), activation="relu")(x)
x = tensorflow.keras.layers.BatchNormalization()(x)
x = tensorflow.keras.layers.MaxPooling2D((2,2))(x)

x = tensorflow.keras.layers.Conv2D(192, (3,3), activation="relu")(x)
x = tensorflow.keras.layers.BatchNormalization()(x)
x = tensorflow.keras.layers.MaxPooling2D((2,2))(x)

x = tensorflow.keras.layers.Conv2D(192, (3,3), activation="relu")(x)
x = tensorflow.keras.layers.BatchNormalization()(x)
x = tensorflow.keras.layers.MaxPooling2D((2,2))(x)

x = tensorflow.keras.layers.Conv2D(128, (3,3), activation="relu")(x)
x = tensorflow.keras.layers.BatchNormalization()(x)
x = tensorflow.keras.layers.MaxPooling2D((2,2))(x)


x = tensorflow.keras.layers.Flatten()(x)
x = tensorflow.keras.layers.Dense(128, activation="relu")(x)
x = tensorflow.keras.layers.BatchNormalization()(x)

x = tensorflow.keras.layers.Dropout(0.25)(x)
myModelOutput = tensorflow.keras.layers.Dense(1, activation="sigmoid")(x)

model = tensorflow.keras.models.Model(inputs=myModelInput, outputs=myModelOutput)


model.summary()

tensorflow.keras.optimizers.RMSprop(lr=learningRate) 

model.compile(loss=tensorflow.keras.losses.BinaryCrossentropy(),
              optimizer='rmsprop',
              metrics=['acc']) # should be accuracy in TF2.0

In [None]:
# JMV need to review code below - December 12, 2019
trnDataGen = tensorflow.keras.preprocessing.image.ImageDataGenerator(
    rescale=1. / 255,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True)

valDataGen = tensorflow.keras.preprocessing.image.ImageDataGenerator(rescale=1. / 255)

trnGenerator = trnDataGen.flow_from_directory(
    trnDir,
    target_size=(imageHeight,imageWidth),
    batch_size=batchSize,
    class_mode='binary')

valGenerator = valDataGen.flow_from_directory(
    valDir,
    target_size=(imageHeight,imageWidth),
    batch_size=batchSize,
    class_mode='binary')

timeNow = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
fullCheckpointDir = checkpointDir + timeNow + "/"
os.mkdir(fullCheckpointDir)
# need to replace acc by accuracy below when moving to TF2.0
filePath = fullCheckpointDir + "{epoch:05d}_{loss:.6f}_{acc:.6f}_{val_loss:.6f}_{val_acc:.6f}.h5" 
checkpoint = tensorflow.keras.callbacks.ModelCheckpoint(filePath, monitor='val_loss', verbose=0, save_best_only=False, save_weights_only=False, mode='auto', save_freq='epoch')

# profile_batch=0 required to solve a bug w/ Tensorboard according to 
#   https://github.com/tensorflow/tensorboard/issues/2412
fullLogDir = logDir + timeNow
tensorboardCallback = tensorflow.keras.callbacks.TensorBoard(log_dir=fullLogDir,profile_batch=0)

history = model.fit_generator(
    trnGenerator,
    steps_per_epoch=nTrnSamples // batchSize,
    epochs=nEpochs,
    validation_data=valGenerator,
    validation_steps=nValSamples // batchSize,
    callbacks=[tensorboardCallback,checkpoint])

In [None]:
# Look in the tmp directory and select best model candidate based on train/val loss & accuracy
# For TF1.14, added compile=False. This is not needed for TF2
model = tensorflow.keras.models.load_model('/home/jmv/data/mlproj1_new/tmp/20200304-232855/00062_0.104636_0.969000_0.155657_0.948171.h5', compile=False)

In [None]:
tstDataGen = tensorflow.keras.preprocessing.image.ImageDataGenerator(rescale=1. / 255)

tstGenerator = tstDataGen.flow_from_directory(
    directory=tstDir,
    target_size=(imageHeight,imageWidth),
    batch_size=1,
    class_mode=None,
    shuffle=False)

# Confusion matrix
predictions = model.predict_generator(tstGenerator,numpy.math.ceil(tstGenerator.samples/tstGenerator.batch_size))
images = tstGenerator.filenames
trueClasses = tstGenerator.classes
predictedClasses = numpy.argmax(predictions, axis=1)

report = sklearn.metrics.confusion_matrix(trueClasses, predictedClasses)

print(tstGenerator.class_indices)
print(report)

# Request from Signify on Feb. 7, 2020
# List images which have a different predicted class vs. true class
for image, trueClass, predictedClass in zip(images,trueClasses,predictedClasses):
    if trueClass!=predictedClass:
        print("Image: %s, True Class: %d, Predicted Class: %d" % (image, trueClass, predictedClass))

In [None]:
remDataGen = tensorflow.keras.preprocessing.image.ImageDataGenerator(rescale=1. / 255)

remGenerator = remDataGen.flow_from_directory(
    directory=remDir,
    target_size=(imageHeight,imageWidth),
    batch_size=1,
    class_mode=None,
    shuffle=False)

# Confusion matrix
predictions = model.predict_generator(remGenerator,numpy.math.ceil(remGenerator.samples/remGenerator.batch_size))
images = remGenerator.filenames
trueClasses = remGenerator.classes
predictedClasses = numpy.argmax(predictions, axis=1)

report = sklearn.metrics.confusion_matrix(trueClasses, predictedClasses)

print(remGenerator.class_indices)
print(report)

for image, trueClass, predictedClass in zip(images,trueClasses,predictedClasses):
    if trueClass!=predictedClass:
        print("Image: %s, True Class: %d, Predicted Class: %d" % (image, trueClass, predictedClass))

In [None]:
# From book "Deep Learning w/ Python" by François Chollet
# From https://stackoverflow.com/questions/58322147/how-to-generate-cnn-heatmaps-using-built-in-keras-in-tf2-0-tf-keras
import matplotlib.pyplot
import cv2
import PIL

def plot_activation(imagePath):
    # Loads an image into PIL format
    myImage = tensorflow.keras.preprocessing.image.load_img(imagePath,target_size=(imageHeight,imageWidth))
    # Converts the PIL image into a Numpy array
    myImageAsArray = tensorflow.keras.preprocessing.image.img_to_array(myImage)
    # Creates a list containing a single image [myImageAsArray]
    myImageAsArray = numpy.expand_dims(myImageAsArray,axis=0)
    # Scales the image in the same way as what we did before the training
    myImageAsArray /= 255.0
    # Gets the result of the model
    myPrediction = model.predict(myImageAsArray)
    myPredictedClass = numpy.argmax(myPrediction, axis=1)
    #print(f"DBG> Predicted class: {myPredictedClass[0]}")
    #
    convLayer = model.get_layer("block5_conv3")
    #print("DBG> convLayer is",convLayer)
    modelOutput = model.output[:,myPredictedClass[0]]
    # Was forced to add tensorflow.cast(...,'float32') because otherwise the tensor is missing
    # dtype set to float32. Bug with TF1.14?
    #grads = tensorflow.cast(tensorflow.keras.backend.gradients(modelOutput,convLayer.output),'float32')
    grads = tensorflow.keras.backend.gradients(modelOutput,convLayer.output)[0]
    pooledGrads = tensorflow.keras.backend.mean(grads,axis=(0,1,2))
    iterate = tensorflow.keras.backend.function([model.input],[pooledGrads,convLayer.output[0]])
    pooledGradsValue, convLayerOutputValue = iterate([myImageAsArray])
    numberOfChannelsConvLayer = convLayer.output[0].get_shape()[2]
    for i in range(numberOfChannelsConvLayer):
        convLayerOutputValue[:,:,i] *= pooledGradsValue[i]
    heatMap = numpy.mean(convLayerOutputValue, axis=-1)
    heatMap = numpy.maximum(heatMap,0)
    heatMap /= numpy.max(heatMap)
    matplotlib.pyplot.matshow(heatMap)
    #
    img = cv2.imread(imagePath)
    heatMap = cv2.resize(heatMap,(img.shape[1],img.shape[0]))
    heatMap =numpy.uint8(255*heatMap)
    heatMap = cv2.applyColorMap(heatMap,cv2.COLORMAP_JET)
    superImposedImg = heatMap*0.4+img
    cv2.imwrite('/home/jmv/data/mlproj8/myresultingimage.jpg',superImposedImg)

In [None]:
desiredNumberOfImagesToDisplay = 10
dirToDisplayFrom = tstDir
print(f"DBG> Desired number of images: {desiredNumberOfImagesToDisplay}")
print(f"DBG> Directory to display images from: {dirToDisplayFrom}")

for myClass in sorted(os.listdir(dirToDisplayFrom)):
    print(f"DBG> Class={myClass}")
    listOfImages = [image for image in sorted(os.listdir(os.path.join(tstDir,myClass))) if "copy" not in image]
    actualNumberOfImagesToDisplay = len(listOfImages)
    print(f"DBG> Actual number of images: {actualNumberOfImagesToDisplay}")
    for image in listOfImages[:min(desiredNumberOfImagesToDisplay,actualNumberOfImagesToDisplay)]:
        print(f"DBG> Image: {image}")
        selectImage = os.path.join(tstDir,myClass,image)
        plot_activation(selectImage)
        pil_img = PIL.Image.open('/home/jmv/data/mlproj8/myresultingimage.jpg')
        myImShow = matplotlib.pyplot.imshow(pil_img)
        matplotlib.pyplot.title(selectImage,pad=30)
        