In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
import pathlib
import PIL
import matplotlib.pyplot as plt
from sklearn.utils import shuffle
import cv2
import tensorflow as tf
from tensorflow import keras
from keras.layers import Input, Conv2D, MaxPool2D, add
from keras.layers import Dense, BatchNormalization, GlobalAveragePooling2D
from keras import Model

#######################
# Runtime Configuration
#######################

# When true, may do extra calculations and print more information
DEBUG = False

# Whether or not to omit files specified in "exclude" from the 
# training data. Some files are marked as exclude due to having
# characteristics that contradict a normal example of their 
# labeled class (counterfeit or authentic)
USE_EXCLUDE = True

# When true, will export processed images to the file system
EXPORT_PROCESSED = False

# The folder to write processed images to, if EXPORT_PROCESSED is True
EXPORT_DIR = "/kaggle/working/"

# Inputs will be cropped to omit any rows / columns whos grayscale value is always higher than this
FILTER_BRIGHTNESS = 250

# Size of images that will be used to train the CNN
IMAGE_SIZE = (325, 325)

# Keras Model Configuration
METRICS = ['Accuracy'] 
LOSS = 'binary_crossentropy'
OPTIMIZER = 'Adam'
BATCH_SIZE = 10
EPOCHS = 20

# Output Filter - Confidence Threshold
# Require > (1-CONF_T) confidence of genuine in order to apply genuine label
CONF_T = 0.15


####################
# Paths, Class Names
####################
class_names = ['genuine', 'counterfeit']
class_names_label = {class_name: i for i, class_name in enumerate(class_names)}

training_path = "../input/host-23/phase1-workspace"
data_dir = pathlib.Path(training_path)
counterfeit = list(data_dir.glob('counterfeit/*'))
genuine = list(data_dir.glob('genuine/*'))

test_path = "../input/host-23/"
test_dir = pathlib.Path(test_path)
t_counterfeit = list(test_dir.glob('counterfeit_test/*'))
t_genuine = list(test_dir.glob('genuine_test/*'))

holdout_path = "../input/host-23/Holdout_data"

#######################
# Global Functions
#######################

# Get the rows and columns of an image that contain something other than just whitespace
# Whitespace threshold is set by FILTER_BRIGHTNESS
def find_img_bounds( img_path ):
    gray = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)  ;
    rows = ~np.all(gray >= FILTER_BRIGHTNESS, axis = 1);
    cols = ~np.all(gray >= FILTER_BRIGHTNESS, axis = 0);
    return (rows, cols);

# Trims an image with the given rows and bounds
def trim_img(image, rows, cols, is3d = False):
    trimmed_image = []
    if (~is3d):
        t1 = image[rows, :]
        trimmed_image = t1[:, cols]
    else:
        t1 = image[rows, :, :]
        trimmed_image = t1[:, cols, :]
    return trimmed_image

def process_img(img_path):    
    (rows, cols) = find_img_bounds(img_path)
    
    # Kernel Sizes for Sobel, Gaussian Blur
    b_sz = 3;
    s_sz = 5;
    
    # Work with gray image
    gray = cv2.imread(img_path)
    gray = cv2.cvtColor(gray, cv2.COLOR_BGR2GRAY)
    
    # Trim, then resize, then blur the grayscale image
    image = trim_img(gray, rows, cols)
    image = cv2.resize(image, IMAGE_SIZE)
    smoothed = image #cv2.GaussianBlur(image,(b_sz,b_sz), 5) #higher acc with sigmax = 5?
    
    # Extract features with laplacian, sobel (x), sobel (y)
    laplace = cv2.Laplacian(smoothed, cv2.CV_64F)
    sobelx  = cv2.Sobel(smoothed, cv2.CV_64F, 1, 0, ksize=s_sz)
    sobely  = cv2.Sobel(smoothed, cv2.CV_64F, 0, 1, ksize=s_sz)
    
    # Create a combined image with: 
    #  blue:  laplacian
    #  red:   sobelx
    #  green: sobely
    return cv2.merge([laplace, sobelx, sobely])

# Predict labels of a data set, then apply a confidence threshold. 
# -----
# Args:
#  m:    model       (Keras Model)
#  ds:   data set    (Input)
def predict_with_confidence( m, ds ): 
    predictions = model.predict(x=ds)
    filtered_preds = []
    if DEBUG:
        print(predictions)
    for idx, pred in enumerate(predictions) :
        if (pred <= CONF_T):
            filtered_preds.append(0)
        else:
            filtered_preds.append(1)
    return filtered_preds

# Predict labels of a data set, then apply a confidence threshold. 
# Compare predictions with the known labels of the inputs.
# -----
# Args:
#  m:    model       (Keras Model)
#  ds:   data set    (Input)
#  dl:   data labels 
def predict_with_confidence_knownvals( m, ds, dl ):
    filtered_preds = predict_with_confidence(m, ds)
    label_length = len(dl)
    correct_preds = 0
    auth_preds = 0
    cfit_preds = 0
    for i in range(0, label_length) :
        if (filtered_preds[i] == dl[i]):
            correct_preds += 1
        if (filtered_preds[i] == 0):
            auth_preds += 1
        else:
            cfit_preds += 1
    print ("Correct Predictions: ", correct_preds)
    print ("Total Predictions: ", label_length)
    print ("Accuracy: ", correct_preds/label_length)
    
    print ("Confusion: ")
    cm = tf.math.confusion_matrix(labels=dl, predictions=filtered_preds)
    print (cm)
    print ("Authentics Predicted: ", auth_preds)
    print ("Counterfeits Predicted: ", cfit_preds)

In [2]:
# Files to exclude -- 
#  These are images of either counterfeit IC components that look too authentic,
#  or authentic ICs that look like counterfeits. 
exclude = [
     "A-D-64QFP-43B-D"  # large scratch on surface
    ,"A-O-08DIP-31B-D"  # irregular left edge
    ,"A-O-08DIP-33B-D"  # irregular left edge
    ,"A-O-08DIP-33F-D"  # irregular right edge
    ,"A-O-08DIP-40B-D"  # irregular right edge
    ,"A-O-08DIP-40F-D"  # heavily worn
    ,"A-D-64QFP-01B-SM" # gunk on pins
    ,"A-D-64QFP-01F-SM" # front side of the previous item
    ,"A-D-64QFP-05B-SM" # bent pin
    ,"A-O-08DIP-21B-SM" # irregular left edge
    ,"A-O-08DIP-24F-SM" # irreg sfc, silkscreen
    ,"A-O-08DIP-25F-SM" # heavily worn
    ,"C-T-64QFP-14B-SM" # cleaner than many authentic items
    ,"C-O-08DIP-16B-SM" # too clean
]

#################
# load_data() fn
#################
# Loads the data from the counterfeit and genuine folders, and applies some processing.
def load_data( DIRECTORY, CATEGORY, DIRS = [] ):
    if (len(DIRS) == 0):
        print ("Setting DIRS to CATEGORY...")
        DIRS = CATEGORY
    assert (len(DIRS) == len(CATEGORY)), "Length of CATEGORY and DIRS arguments must be equal, if DIRS is specified"
    output = []
    for category in CATEGORY:
        
        pwd = DIRS[CATEGORY.index(category)]
        path = os.path.join(DIRECTORY, pwd)
        folder = path
        images = []
        labels = []
        label = class_names_label[category]
        
        print("Loading {}".format(category))
        print ("Applying label: ", label)
            
        for file in os.listdir(folder):
            
            # Optionally skip some images that were selected to be 
            # excluded from the model's training data
            if USE_EXCLUDE:
#                 if "QFP" in file:
#                     print ("Skipping " + file + " (skipping squares)")
#                     continue
                if file.rstrip(".png") in exclude:
                    print ("Skipping " + file + " (specified exclude)")
                    continue
            img_path = os.path.join(folder, file)
            testpath = cv2.imread(img_path)
        
            if testpath is None:
                print("Error: Unable to read file '", img_path, "'. Skipping.'")
                continue
            
            # Process the Image, optionally export so the processed variant can
            # be inspected for correctness.
            img = process_img(img_path)
            if EXPORT_PROCESSED:
                procPath = EXPORT_DIR + file.rstrip(".png") + "_TEST" + ".png"
                print("Writing processed image to: " + procPath)
                cv2.imwrite(procPath, img)
            
            # Append the processed image to the output, with the associated label.
            images.append(img)
            labels.append(label)
            
        images = np.array(images, dtype = 'float32')
        labels = np.array(labels, dtype = 'int32')
        output.append((images, labels))
    return output

## Training Data
Here, we'll build the training data by reading from the `genuine` and `counterfeit` directories in `phase1-workspace`.

In [3]:
# Load data from the counterfeit, genuine folders
(counterfeit_images, counterfeit_labels), (genuine_images, genuine_labels) = load_data(training_path,["counterfeit", "genuine"])

Setting DIRS to CATEGORY...
Loading counterfeit
Applying label:  1
Skipping C-O-08DIP-16B-SM.png (specified exclude)
Skipping C-T-64QFP-14B-SM.png (specified exclude)
Loading genuine
Applying label:  0
Error: Unable to read file ' ../input/host-23/phase1-workspace/genuine/A-D-64QFP-29F-SM.psd '. Skipping.'
Skipping A-O-08DIP-21B-SM.png (specified exclude)
Skipping A-O-08DIP-25F-SM.png (specified exclude)
Skipping A-O-08DIP-33F-D.png (specified exclude)
Skipping A-O-08DIP-40F-D.png (specified exclude)
Skipping A-D-64QFP-43B-D.png (specified exclude)
Skipping A-D-64QFP-01B-SM.png (specified exclude)
Skipping A-O-08DIP-24F-SM.png (specified exclude)
Skipping A-O-08DIP-33B-D.png (specified exclude)
Skipping A-D-64QFP-05B-SM.png (specified exclude)
Skipping A-D-64QFP-01F-SM.png (specified exclude)
Skipping A-O-08DIP-31B-D.png (specified exclude)
Skipping A-O-08DIP-40B-D.png (specified exclude)


In [4]:
# Create training data from the processed images, 
# mixing counterfeit, authentic, dslr, and microscope data
train_images = np.append(counterfeit_images, genuine_images, axis=0)
train_labels = np.append(counterfeit_labels, genuine_labels, axis=0)

if DEBUG:
    print(train_images.shape)
    print(train_labels.shape)
    print(train_images)

## Test Data
Next, we'll build the test set by reading from the `genuine_test` and `counterfeit_test` directories in the top level of `host-23`.

In [5]:
# Load data from the counterfeit_test, genuine_test folders
(t_counterfeit_images, t_counterfeit_labels), (t_genuine_images, t_genuine_labels) = load_data(
    test_path, # DIRECTORY
    ["counterfeit", "genuine"], # CATEGORY
    ["counterfeit_test", "genuine_test"] # DIRS
)

Loading counterfeit
Applying label:  1
Loading genuine
Applying label:  0


In [6]:
# Create test data
test_images = np.append(t_counterfeit_images, t_genuine_images, axis=0)
test_labels = np.append(t_counterfeit_labels, t_genuine_labels, axis=0)

if DEBUG:
    print(test_images.shape)
    print(test_labels.shape)
    print(test_images)

## Build Model
The following cells build the model, using the Keras Model class.

In [7]:
inputs = Input(shape=(IMAGE_SIZE + (3,)))

n = 8
b1a = "elu"
b2a = b1a
b3a = b1a


x = Conv2D(n, (3, 3), activation = b1a)(inputs)
x = BatchNormalization()(x)
x = Conv2D(n*2, (3, 3), activation = b1a)(x)
block_1_output = MaxPool2D(pool_size=(3, 3))(x)

x = Conv2D(n*2, (3, 3), activation = b2a, padding = 'same')(block_1_output)
x = BatchNormalization()(x)
x = Conv2D(n*2, (3, 3), activation = b2a, padding = 'same')(x)
block_2_output = add([x, block_1_output])

x = Conv2D(n*2, (3, 3), activation = b3a, padding = 'same')(block_2_output)
x = BatchNormalization()(x)
x = Conv2D(n*2, (3, 3), activation = b3a, padding = 'same')(x)
block_3_output = add([x, block_2_output])

x = Conv2D(n*2*2, (3, 3), activation = 'elu')(block_3_output)
x = MaxPool2D(pool_size = (2, 2))(x)
x = GlobalAveragePooling2D()(x)
x = Dense(n*2*2*2, activation = 'elu')(x)

output = Dense(1, activation = 'sigmoid')(x)

if DEBUG:
    print(inputs.shape)

In [8]:
model = Model(inputs, output)

In [9]:
model.compile(metrics=METRICS, loss=LOSS, optimizer=OPTIMIZER)
model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 325, 325, 3  0           []                               
                                )]                                                                
                                                                                                  
 conv2d (Conv2D)                (None, 323, 323, 8)  224         ['input_1[0][0]']                
                                                                                                  
 batch_normalization (BatchNorm  (None, 323, 323, 8)  32         ['conv2d[0][0]']                 
 alization)                                                                                       
                                                                                              

In [10]:
history = model.fit(
    train_images, 
    train_labels, 
    batch_size=BATCH_SIZE, 
    epochs=EPOCHS,
    validation_data=(test_images,test_labels),
    shuffle=True
)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [11]:
def hist_pretty( h, field ):
    i = 1
    print("\n", field, ":");
    values = h.history[field]
    for v in values:
        print ("\tE", i, ":\t", v)
        i += 1

for f in ['loss', 'Accuracy', 'val_loss', 'val_Accuracy']:
    hist_pretty(history, f)



 loss :
	E 1 :	 0.7719523310661316
	E 2 :	 0.6696892380714417
	E 3 :	 0.6269795894622803
	E 4 :	 0.5999024510383606
	E 5 :	 0.5750945210456848
	E 6 :	 0.5684803128242493
	E 7 :	 0.5028275847434998
	E 8 :	 0.6155943870544434
	E 9 :	 0.5112050175666809
	E 10 :	 0.4508495330810547
	E 11 :	 0.48825064301490784
	E 12 :	 0.39627736806869507
	E 13 :	 0.41030359268188477
	E 14 :	 0.42224037647247314
	E 15 :	 0.40978267788887024
	E 16 :	 0.36944475769996643
	E 17 :	 0.29871684312820435
	E 18 :	 0.4788856506347656
	E 19 :	 0.3177022933959961
	E 20 :	 0.2808331847190857

 Accuracy :
	E 1 :	 0.4767441749572754
	E 2 :	 0.5813953280448914
	E 3 :	 0.6511628031730652
	E 4 :	 0.6976743936538696
	E 5 :	 0.6744186282157898
	E 6 :	 0.7558139562606812
	E 7 :	 0.8139534592628479
	E 8 :	 0.6744186282157898
	E 9 :	 0.7790697813034058
	E 10 :	 0.8139534592628479
	E 11 :	 0.8139534592628479
	E 12 :	 0.8604651093482971
	E 13 :	 0.8488371968269348
	E 14 :	 0.8488371968269348
	E 15 :	 0.8604651093482971
	E 16 :	 

## Test Confidence Filter on Training Data
Here, we'll apply our extra post-processing of the predictions and see how accurate our model is when predicting labels of the training set


In [12]:
# Ensure that all images - including those we excluded from the training set when building the model - are tested
USE_EXCLUDE = False
(ci_all, cl_all), (gi_all, gl_all) = load_data(training_path,["counterfeit", "genuine"])
cf_iset = np.append(ci_all, gi_all, axis=0) # Confidence Filter - Image Set
cf_lset = np.append(cl_all, gl_all, axis=0) # Confidence Filter - Label Set

Setting DIRS to CATEGORY...
Loading counterfeit
Applying label:  1
Loading genuine
Applying label:  0
Error: Unable to read file ' ../input/host-23/phase1-workspace/genuine/A-D-64QFP-29F-SM.psd '. Skipping.'


In [13]:
predict_with_confidence_knownvals( model, cf_iset, cf_lset )

Correct Predictions:  75
Total Predictions:  100
Accuracy:  0.75
Confusion: 
tf.Tensor(
[[60  0]
 [25 15]], shape=(2, 2), dtype=int32)
Authentics Predicted:  85
Counterfeits Predicted:  15


In [14]:
predict_with_confidence_knownvals( model, test_images, test_labels )

Correct Predictions:  13
Total Predictions:  20
Accuracy:  0.65
Confusion: 
tf.Tensor(
[[10  0]
 [ 7  3]], shape=(2, 2), dtype=int32)
Authentics Predicted:  17
Counterfeits Predicted:  3


In [15]:
model.save("Image_Classification")
save_model = input("Do you wish to save this model [y/n]: ").strip().lower()

if save_model == 'y' or save_model == 'yes':
    model_name = input("Model Name: ").strip()
    try:
        tf.keras.models.save_model(model, EXPORT_DIR + model_name + ".hdf5")
    except:
        print("Saving failed...")

Do you wish to save this model [y/n]:  y
Model Name:  inferior
