# Classification - Analyze

In [6]:
import yaml
import os
import shutil
import subprocess
import datetime
import numpy
import random
import sklearn.metrics
import tensorflow
import matplotlib.pyplot
import tf_explain
import PIL

print("INFO> TensorFlow version : %s" % tensorflow.__version__)
print("INFO> # of GPUs available: %d" % len(tensorflow.config.experimental.list_physical_devices('GPU')))

INFO> TensorFlow version : 2.1.0
INFO> # of GPUs available: 1


In [7]:
# Read parameters from local config.yaml file, and update corresponding Python variables
currentDir = os.getcwd()
print("INFO> Reading file config.yam from directory: %s" % currentDir)
yamlFile = open('config.yaml','r')
yamlData = yaml.load(yamlFile,Loader=yaml.Loader)

for key in sorted(yamlData):
    print("INFO> %-15s: %s" % (key,yamlData[key]))

INFO> Reading file config.yam from directory: /raid5/disk1/mlproj11/classification
INFO> batchSize      : 8
INFO> checkDataset   : True
INFO> checkpointDir  : /home/jmv/data/mlproj11/tmp/
INFO> createDataset  : True
INFO> datasetDir     : /home/jmv/data/mlproj11/dataset
INFO> goldenDataset  : /home/jmv/data/mlproj11/SignifyGolden20191212/dataset
INFO> imageHeight    : 500
INFO> imageWidth     : 768
INFO> learningRate   : 1e-3
INFO> logDir         : /home/jmv/data/mlproj11/log/
INFO> nEpochs        : 1000
INFO> trnDir         : /home/jmv/data/mlproj11/dataset/trn
INFO> tstDir         : /home/jmv/data/mlproj11/dataset/tst
INFO> valDir         : /home/jmv/data/mlproj11/dataset/val


In [11]:
# Look in the tmp directory and select best model candidate based on train/val loss & accuracy
nestedModel = tensorflow.keras.models.load_model('/home/jmv/data/mlproj11/tmp/20200412-231005/00011_0.538722_0.904431_0.539206_0.904341.h5')

# Flatten the model - this is required in order to use tf_explain
# From https://stackoverflow.com/questions/54648296/how-to-flatten-a-nested-model-keras-functional-api
def flatten_model(nestedModel):
    def get_layers(layers):
        layers_flat = []
        for layer in layers:
            try:
                layers_flat.extend(get_layers(layer.layers))
            except AttributeError:
                layers_flat.append(layer)
        return layers_flat

    flatModel = tensorflow.keras.models.Sequential(
        get_layers(nestedModel.layers)
    )
    return flatModel

nestedModel.summary()
model = flatten_model(nestedModel)
    
model.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 500, 768, 3)]     0         
_________________________________________________________________
vgg16 (Model)                (None, 15, 24, 512)       14714688  
_________________________________________________________________
flatten (Flatten)            (None, 184320)            0         
_________________________________________________________________
dense (Dense)                (None, 256)               47186176  
_________________________________________________________________
dropout (Dropout)            (None, 256)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 12)                3084      
Total params: 61,903,948
Trainable params: 61,903,948
Non-trainable params: 0
_________________________________________________

In [9]:
%%time
tstDataGen = tensorflow.keras.preprocessing.image.ImageDataGenerator(rescale=1.0/255.0)

tstGenerator = tstDataGen.flow_from_directory(
    directory=yamlData['tstDir'],
    target_size=(yamlData['imageHeight'],yamlData['imageWidth']),
    batch_size=1,
    class_mode='categorical',
    shuffle=False)

# Confusion matrix
predictions = model.predict(tstGenerator)
images = tstGenerator.filenames
trueClasses = tstGenerator.classes
predictedClasses = numpy.argmax(predictions, axis=1)
    
report = sklearn.metrics.confusion_matrix(trueClasses, predictedClasses)

print(tstGenerator.class_indices)
print(report)

# List images which have a different predicted class vs. true class
#for image, trueClass, predictedClass in zip(images,trueClasses,predictedClasses):
#    if trueClass!=predictedClass:
#        print("Image: %s, True Class: %d, Predicted Class: %d" % (image, trueClass, predictedClass))

Found 9173 images belonging to 12 classes.
{'defect-01': 0, 'defect-02': 1, 'defect-03': 2, 'defect-04': 3, 'defect-05': 4, 'defect-06': 5, 'defect-07': 6, 'defect-08': 7, 'defect-09': 8, 'defect-10': 9, 'defect-11': 10, 'ok': 11}
[[   0    0    0    0    0    0    0    0    0    0    0  205]
 [   0    0    0    0    0    0    0    0    0    0    0   17]
 [   0    0    0    0    0    0    0    0    0    0    0  103]
 [   0    0    0    0    0    0    0    0    0    0    0   76]
 [   0    0    0    0    0    0    0    0    0    0    0   77]
 [   0    0    0    0    0    0    0    0    0    0    0   38]
 [   0    0    0    0    0    0    0    0    0    0    0   50]
 [   0    0    0    0    0    0    0    0    0    0    0   81]
 [   0    0    0    0    0    0    0    0    0    0    0   67]
 [   0    0    0    0    0    0    0    0    0    0    0   87]
 [   0    0    0    0    0    0    0    0    0    0    0   76]
 [   0    0    0    0    0    0    0    0    0    0    0 8296]]
CPU times: u

In [12]:
model.summary()

desiredNumberOfImagesToDisplay = 1
dirToDisplayFrom = yamlData['tstDir']  
print(f"DBG> Desired number of images: {desiredNumberOfImagesToDisplay}")    
print(f"DBG> Directory to display images from: {dirToDisplayFrom}")    

# This line forces the size of the figures
matplotlib.pyplot.rcParams['figure.figsize'] = [20, 10]

explainer = tf_explain.core.grad_cam.GradCAM()
  
for myClass in sorted(os.listdir(dirToDisplayFrom)):    
    print(f"DBG> Class={myClass}")    
    listOfImages = [image for image in sorted(os.listdir(os.path.join(dirToDisplayFrom,myClass))) if "copy" not in image]
    random.shuffle(listOfImages)
    actualNumberOfImagesToDisplay = len(listOfImages)    
    #print(f"DBG> Actual number of images: {actualNumberOfImagesToDisplay}")    
    for image in listOfImages[:min(desiredNumberOfImagesToDisplay,actualNumberOfImagesToDisplay)]:      
        selectImage = os.path.join(dirToDisplayFrom,myClass,image)    
        print(f"DBG> Full path is {selectImage}")    
        
        # Loads an image into PIL format, converts the PIL image into a Numpy array
        # Finally, scale the image
        myImage = tensorflow.keras.preprocessing.image.load_img(selectImage,target_size=(yamlData['imageHeight'],yamlData['imageWidth']))
        myImageAsArray = tensorflow.keras.preprocessing.image.img_to_array(myImage)
        myImageAsArray /= 255.0
        
        # Call the explainer and save result
        data = ([myImageAsArray], None)
        grid = explainer.explain(validation_data=data, model=model, layer_name="block5_conv3", class_index=4)
        explainer.save(grid, ".", "grad_cam.png")
        
        pil_img = PIL.Image.open(selectImage)    
        matplotlib.pyplot.figure()
        myImShow = matplotlib.pyplot.imshow(pil_img) 
        matplotlib.pyplot.title(selectImage,pad=30)  
        
        pil_img = PIL.Image.open('./grad_cam.png')    
        matplotlib.pyplot.figure()
        myImShow = matplotlib.pyplot.imshow(pil_img) 
        matplotlib.pyplot.title(selectImage,pad=30)    

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         multiple                  0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 500, 768, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 500, 768, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 250, 384, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 250, 384, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 250, 384, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 125, 192, 128)    

ValueError: in converted code:

    /raid5/disk1/tensorflow2/lib/python3.6/site-packages/tf_explain/core/grad_cam.py:106 get_gradients_and_filters  *
        grad_model = tf.keras.models.Model(
    /raid5/disk1/tensorflow2/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/training.py:146 __init__
        super(Model, self).__init__(*args, **kwargs)
    /raid5/disk1/tensorflow2/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/network.py:169 __init__
        self._init_graph_network(*args, **kwargs)
    /raid5/disk1/tensorflow2/lib/python3.6/site-packages/tensorflow_core/python/training/tracking/base.py:457 _method_wrapper
        result = method(self, *args, **kwargs)
    /raid5/disk1/tensorflow2/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/network.py:324 _init_graph_network
        self.inputs, self.outputs)
    /raid5/disk1/tensorflow2/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/network.py:1676 _map_graph_network
        str(layers_with_complete_input))

    ValueError: Graph disconnected: cannot obtain value for tensor Tensor("input_2_6:0", shape=(None, 500, 768, 3), dtype=float32) at layer "input_2". The following previous layers were accessed without issue: ['input_1', 'input_2']


In [None]:
help(explainer.explain)