In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
import pathlib
import PIL
import matplotlib.pyplot as plt
from sklearn.utils import shuffle
import cv2
import tensorflow as tf
from tensorflow import keras
from keras.layers import Input, Conv2D, MaxPool2D, add
from keras.layers import Dense, BatchNormalization, GlobalAveragePooling2D
from keras import Model

#######################
# Runtime Configuration
#######################

# When true, may do extra calculations and print more information
DEBUG = False

# Whether or not to omit files specified in "exclude" from the 
# training data. Some files are marked as exclude due to having
# characteristics that contradict a normal example of their 
# labeled class (counterfeit or authentic)
USE_EXCLUDE = True

# When true, will export processed images to the file system
EXPORT_PROCESSED = False

# The folder to write processed images to, if EXPORT_PROCESSED is True
EXPORT_DIR = "/kaggle/working/"

# Inputs will be cropped to omit any rows / columns whos grayscale value is always higher than this
FILTER_BRIGHTNESS = 250

# Size of images that will be used to train the CNN
IMAGE_SIZE = (325, 325)

# Keras Model Configuration
METRICS = ['Accuracy'] 
LOSS = 'binary_crossentropy'
OPTIMIZER = 'Adam'
BATCH_SIZE = 10
EPOCHS = 20

# Output Filter - Confidence Threshold
# Require > (1-CONF_T) confidence of genuine in order to apply genuine label
# interesting values to set to: 0.15, 0.2, 0.25, 0.35
CONF_T = 0.15


####################
# Paths, Class Names
####################
class_names = ['genuine', 'counterfeit']
class_names_label = {class_name: i for i, class_name in enumerate(class_names)}

training_path = "../input/host-23/phase1-workspace"
data_dir = pathlib.Path(training_path)
counterfeit = list(data_dir.glob('counterfeit/*'))
genuine = list(data_dir.glob('genuine/*'))

test_path = "../input/host-23/"
test_dir = pathlib.Path(test_path)
t_counterfeit = list(test_dir.glob('counterfeit_test/*'))
t_genuine = list(test_dir.glob('genuine_test/*'))

holdout_path = "../input/host-23/Holdout_data"

#######################
# Global Functions
#######################

# Get the rows and columns of an image that contain something other than just whitespace
# Whitespace threshold is set by FILTER_BRIGHTNESS
def find_img_bounds( img_path ):
    gray = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)  ;
    rows = ~np.all(gray >= FILTER_BRIGHTNESS, axis = 1);
    cols = ~np.all(gray >= FILTER_BRIGHTNESS, axis = 0);
    return (rows, cols);

# Trims an image with the given rows and bounds
def trim_img(image, rows, cols, is3d = False):
    trimmed_image = []
    if (~is3d):
        t1 = image[rows, :]
        trimmed_image = t1[:, cols]
    else:
        t1 = image[rows, :, :]
        trimmed_image = t1[:, cols, :]
    return trimmed_image

def process_img(img_path):    
    (rows, cols) = find_img_bounds(img_path)
    
    # Kernel Sizes for Sobel, Gaussian Blur
    b_sz = 3;
    s_sz = 5;
    
    # Work with gray image
    gray = cv2.imread(img_path)
    gray = cv2.cvtColor(gray, cv2.COLOR_BGR2GRAY)
    
    # Trim, then resize, then blur the grayscale image
    image = trim_img(gray, rows, cols)
    image = cv2.resize(image, IMAGE_SIZE)
    smoothed = image #cv2.GaussianBlur(image,(b_sz,b_sz), 5) #higher acc with sigmax = 5?
    
    # Extract features with laplacian, sobel (x), sobel (y)
    laplace = cv2.Laplacian(smoothed, cv2.CV_64F)
    sobelx  = cv2.Sobel(smoothed, cv2.CV_64F, 1, 0, ksize=s_sz)
    sobely  = cv2.Sobel(smoothed, cv2.CV_64F, 0, 1, ksize=s_sz)
    
    # Create a combined image with: 
    #  blue:  laplacian
    #  red:   sobelx
    #  green: sobely
    return cv2.merge([laplace, sobelx, sobely])

# Predict labels of a data set, then apply a confidence threshold. 
# -----
# Args:
#  m:    model       (Keras Model)
#  ds:   data set    (Input)
def predict_with_confidence( m, ds ): 
    predictions = model.predict(x=ds)
    filtered_preds = []
    if DEBUG:
        print(predictions)
    for idx, pred in enumerate(predictions) :
        if (pred <= CONF_T):
            filtered_preds.append(0)
        else:
            filtered_preds.append(1)
    return filtered_preds

# Predict labels of a data set, then apply a confidence threshold. 
# Compare predictions with the known labels of the inputs.
# -----
# Args:
#  m:    model       (Keras Model)
#  ds:   data set    (Input)
#  dl:   data labels 
def predict_with_confidence_knownvals( m, ds, dl ):
    filtered_preds = predict_with_confidence(m, ds)
    label_length = len(dl)
    correct_preds = 0
    for i in range(0, label_length) :
        if (filtered_preds[i] == dl[i]):
            correct_preds += 1
    print ("Correct Predictions: ", correct_preds)
    print ("Total Predictions: ", label_length)
    print ("Accuracy: ", correct_preds/label_length)
    
    print ("Confusion: ")
    cm = tf.math.confusion_matrix(labels=dl, predictions=filtered_preds)
    print (cm)

#################
# load_data() fn
#################
# Loads the data from the counterfeit and genuine folders, and applies some processing.
def load_data( DIRECTORY, CATEGORY, DIRS = [] ):
    if (len(DIRS) == 0):
        print ("Setting DIRS to CATEGORY...")
        DIRS = CATEGORY
    assert (len(DIRS) == len(CATEGORY)), "Length of CATEGORY and DIRS arguments must be equal, if DIRS is specified"
    output = []
    for category in CATEGORY:
        
        pwd = DIRS[CATEGORY.index(category)]
        path = os.path.join(DIRECTORY, pwd)
        folder = path
        images = []
        labels = []
        label = class_names_label[category]
        
        print("Loading {}".format(category))
        print ("Applying label: ", label)
            
        for file in os.listdir(folder):

            img_path = os.path.join(folder, file)
            testpath = cv2.imread(img_path)
        
            if testpath is None:
                print("Error: Unable to read file '", img_path, "'. Skipping.'")
                continue
            
            # Process the Image, optionally export so the processed variant can
            # be inspected for correctness.
            img = process_img(img_path)
            if EXPORT_PROCESSED:
                procPath = EXPORT_DIR + file.rstrip(".png") + "_TEST" + ".png"
                print("Writing processed image to: " + procPath)
                cv2.imwrite(procPath, img)
            
            # Append the processed image to the output, with the associated label.
            images.append(img)
            labels.append(label)
            
        images = np.array(images, dtype = 'float32')
        labels = np.array(labels, dtype = 'int32')
        output.append((images, labels))
    return output
    
def load_predict( folder ):
    images = []
    image_paths = []
    files = sorted(os.listdir(folder))
    for file in files:
        img_path = os.path.join(folder, file)
        tst_path = cv2.imread(img_path)
        if tst_path is None:
            print("Warning: Unable to read image at '", img_path, "'")
            continue
            
        image = process_img(img_path)
        
        if True:
            procPath = EXPORT_DIR + file.rstrip(".png") + "_proc" + ".png"
            print("Writing processed file to: " + procPath)
            cv2.imwrite(procPath, image)
            
        images.append(image)
        image_paths.append(file)
        
    images = np.array(images, dtype = 'float32')
    return (images, image_paths)

In [2]:
# Ensure that all images - including those we excluded from the training set when building the model - are tested
(ci_all, cl_all), (gi_all, gl_all) = load_data(training_path,["counterfeit", "genuine"])
cf_iset = np.append(ci_all, gi_all, axis=0) # Confidence Filter - Image Set
cf_lset = np.append(cl_all, gl_all, axis=0) # Confidence Filter - Label Set

(t_counterfeit_images, t_counterfeit_labels), (t_genuine_images, t_genuine_labels) = load_data(
    test_path, # DIRECTORY
    ["counterfeit", "genuine"], # CATEGORY
    ["counterfeit_test", "genuine_test"] # DIRS
)
test_images = np.append(t_counterfeit_images, t_genuine_images, axis=0)
test_labels = np.append(t_counterfeit_labels, t_genuine_labels, axis=0)

(holdout_images, image_paths) = load_predict(holdout_path)

Setting DIRS to CATEGORY...
Loading counterfeit
Applying label:  1
Loading genuine
Applying label:  0
Error: Unable to read file ' ../input/host-23/phase1-workspace/genuine/A-D-64QFP-29F-SM.psd '. Skipping.'
Loading counterfeit
Applying label:  1
Loading genuine
Applying label:  0
Writing processed file to: /kaggle/working/sample_1_proc.png
Writing processed file to: /kaggle/working/sample_10_proc.png
Writing processed file to: /kaggle/working/sample_2_proc.png
Writing processed file to: /kaggle/working/sample_3_proc.png
Writing processed file to: /kaggle/working/sample_4_proc.png
Writing processed file to: /kaggle/working/sample_5_proc.png
Writing processed file to: /kaggle/working/sample_6_proc.png
Writing processed file to: /kaggle/working/sample_7_proc.png
Writing processed file to: /kaggle/working/sample_8_proc.png
Writing processed file to: /kaggle/working/sample_9_proc.png


In [3]:
DEBUG = True

In [4]:
# model_name = input("Model Name: ").strip()
model_name = "t77_v75_nogauss"
model = tf.keras.models.load_model("../input/host-23/" + model_name + ".hdf5")
# except:
#     print("load failed...")

In [5]:
predict_with_confidence_knownvals( model, cf_iset, cf_lset )

[[0.05268467]
 [0.01517572]
 [0.07578192]
 [0.08604568]
 [0.03985855]
 [0.9633875 ]
 [0.17900229]
 [0.00935574]
 [0.02327668]
 [0.22659656]
 [0.30195436]
 [0.00857277]
 [0.01869953]
 [0.04834282]
 [0.03681515]
 [0.20504706]
 [0.17182437]
 [0.03965925]
 [0.07633705]
 [0.13290493]
 [0.11151869]
 [0.12686957]
 [0.24086297]
 [0.10073646]
 [0.01891858]
 [0.44969577]
 [0.31466994]
 [0.07134417]
 [0.20416124]
 [0.23301795]
 [0.6061756 ]
 [0.9236058 ]
 [0.06630076]
 [0.49930322]
 [0.60501206]
 [0.03169438]
 [0.03014238]
 [0.5389378 ]
 [0.24418446]
 [0.1119597 ]
 [0.00476042]
 [0.00209022]
 [0.0018956 ]
 [0.0010479 ]
 [0.00208197]
 [0.01041059]
 [0.0033407 ]
 [0.00715908]
 [0.00922277]
 [0.00985756]
 [0.01118588]
 [0.00433046]
 [0.00675506]
 [0.00570369]
 [0.00781068]
 [0.00572255]
 [0.01197834]
 [0.00480869]
 [0.00161758]
 [0.00701089]
 [0.00715067]
 [0.00524811]
 [0.00382777]
 [0.00306052]
 [0.0010561 ]
 [0.00324163]
 [0.00571154]
 [0.00710247]
 [0.00805068]
 [0.00198008]
 [0.00862137]
 [0.00

In [6]:
predict_with_confidence_knownvals( model, test_images, test_labels )

[[0.39769036]
 [0.25290582]
 [0.43629503]
 [0.33627614]
 [0.00928503]
 [0.5760095 ]
 [0.66957843]
 [0.00746998]
 [0.00533286]
 [0.0017077 ]
 [0.2598209 ]
 [0.102336  ]
 [0.00332981]
 [0.00311431]
 [0.00321391]
 [0.00380579]
 [0.07598548]
 [0.00733923]
 [0.01162222]
 [0.14334548]]
Correct Predictions:  15
Total Predictions:  20
Accuracy:  0.75
Confusion: 
tf.Tensor(
[[9 1]
 [4 6]], shape=(2, 2), dtype=int32)


In [7]:
h_preds = predict_with_confidence(model, holdout_images)

# Pretty-Print predictions
# Use these to build sample.csv for the chosen best model
l = len(holdout_images)
for i in range(0,l):
    print(image_paths[i], ": ", h_preds[i], " (", class_names[h_preds[i]], ")")

[[0.32146606]
 [0.49421912]
 [0.02969579]
 [0.00649106]
 [0.00896103]
 [0.96106434]
 [0.01982641]
 [0.01157281]
 [0.977569  ]
 [0.5929422 ]]
sample_1.png :  1  ( counterfeit )
sample_10.png :  1  ( counterfeit )
sample_2.png :  0  ( genuine )
sample_3.png :  0  ( genuine )
sample_4.png :  0  ( genuine )
sample_5.png :  1  ( counterfeit )
sample_6.png :  0  ( genuine )
sample_7.png :  0  ( genuine )
sample_8.png :  1  ( counterfeit )
sample_9.png :  1  ( counterfeit )
