In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
import sys
import copy
import time
import tabulate
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import GradientBoostingClassifier, ExtraTreesClassifier
sys.path.append(os.path.join(os.path.abspath(""), ".."))

In [3]:
from app import model_features
from app.models import Classifier
from app.model_features import get_samples, get_data_handler
from app.imager import ImageLoader, DefectViewer, Show, Exposure

## Setup

In [4]:
# Complimentary:
# If True: Split the data as Category and all other
# If False: Split the data as Category and None
complimentary = True

# Maximum number of samples to choose for defects
# The other class is 2X this number
num_samples = 2000

# Seed for plotting
seed = 1234

In [5]:
# Analyzing which defect types:
model_defect_classes = [('FrontGridInterruption', 'NearSolderPad'), 'Closed', 'Isolated', 
                        'BrightSpot', 'Corrosion', 'Resistive']

In [6]:
# Analyzing which defect
model_params = {('FrontGridInterruption', 'NearSolderPad'):
            {'class': GradientBoostingClassifier, 'n_estimators': 600, 'max_depth': 4,
             'learning_rate': 0.05, 'pca_dims': min(250, num_samples)},
            'Closed': {'class': LogisticRegression, 'penalty': 'l2', 'pca_dims': min(200, num_samples)},
            'Isolated': {'class': GradientBoostingClassifier, 'n_estimators': 300, 'max_depth': 4,
                         'learning_rate': 0.1, 'pca_dims': min(160, num_samples)},
            'BrightSpot': {'class': LogisticRegression, 'penalty': 'l2', 'pca_dims': None},
            'Corrosion': {'class': LogisticRegression, 'penalty': 'l2', 'pca_dims': None},
            'Resistive':  {'class': ExtraTreesClassifier, 'max_features': 0.1, 'min_samples_split': 8,
                           'random_state': 32}}

## Main loop for creating models and running evaluations

In [7]:
model_objects = []
model_classes = []
model_data_handlers = []

In [8]:
# For each defect class, create the DataSet
for cnt, defect_classes in enumerate(model_defect_classes):
    if len(model_objects) >= cnt + 1:
        continue
        
    print(f'Working on {defect_classes}')
    start = time.perf_counter()
    model_param = model_params[defect_classes]
    
    # Get the samples for the model
    if isinstance(defect_classes, tuple):
        classes = list(defect_classes)
    else:
        classes = defect_classes
    
    # Get the data for modeling
    defect, not_defect = get_samples(classes, num_samples, complimentary=complimentary)
    
    # Get the data handler 
    data_handler = get_data_handler(defect_classes)
    
    # Get the pre processed data for this 
    defect_ = data_handler(defect, num_jobs=20)
    not_defect_ = data_handler(not_defect, num_jobs=20)
    print(not_defect_.category)
    
    # Show the pre and post processed images
    # _ = Show(num_images=2, seed=seed) << (defect, defect_) + (not_defect, not_defect_)
    
    # Get the parameter for this classifier
    this_param = copy.deepcopy(model_param)
    model_class = this_param['class']
    del this_param['class']
    
#     # Train the classifier 
#     print(defect_classes)
#     cla = Classifier(defect_, not_defect_, model_class, None)
#     score = cla.fit_cv(**this_param)
    
#     # Misclassified
#     print(score)
#     conf, out = cla.misclassified()
#     print(tabulate.tabulate([['True 0', conf[0, 0], conf[0, 1]], ['True 1', conf[1, 0], conf[1, 1]]], headers=['', 'Pred 0', 'Pred 1']))
    
    # Train the classifier 
    cla = Classifier(defect_, not_defect_, model_class, None)
    model = cla.fit(**this_param)
    
    model_objects.append(model)
    model_classes.append(defect_classes)
    model_data_handlers.append(data_handler)
    
    print(f'Completed {defect_classes} in {time.perf_counter()-start}s')

Working on ('FrontGridInterruption', 'NearSolderPad')
model_features.grid_interruption
Others
 GridInterruption - Preprocessed
Completed ('FrontGridInterruption', 'NearSolderPad') in 331.5556781030027s
Working on Closed
model_features.closed
0 images were rejected


  out_img = (in_imgs - all_min) / (all_max - all_min)


Failed on count 367
Failed on count 1614
2 images were rejected
Others
 Closed - Preprocessed


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


Completed Closed in 255.12294465501327s
Working on Isolated
model_features.isolated
0 images were rejected
Failed on count 626
1 images were rejected
Others
 Closed - Preprocessed
Completed Isolated in 38.4559089189861s
Working on BrightSpot
model_features.brightspots
0 were rejected
0 were rejected
Others
 Brightspots - Gaussian Blur - Fourier Transform


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


Completed BrightSpot in 23.122973824007204s
Working on Corrosion
model_features.generic_return
Others
 processed


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


Completed Corrosion in 6.766703513014363s
Working on Resistive
model_features.resistive


  magnitude = np.log10(np.abs(transformed))


Others
 ResistiveCrack - Preprocessed
Completed Resistive in 784.4858371959999s


## Combine the models together and calculate score

In [9]:
img = ImageLoader(defect_class=None, do_train=False)
filename_df = img.get(n=1000)
filename_df = DefectViewer(row_chop=15, col_chop=15).get(filename_df)

In [10]:
from app.models import VectorClassifier
vc = VectorClassifier(model_objects=model_objects, model_classes=model_classes, 
                      model_data_handlers=model_data_handlers, defect_classes=img.defect_classes.tolist())

[('FrontGridInterruption', 'NearSolderPad'), ('Closed',), ('Isolated',), ('BrightSpot',), ('Corrosion',), ('Resistive',)]


In [11]:
results = vc.test(filename_df)

0 images were rejected
0 images were rejected
0 were rejected
('FrontGridInterruption', 'NearSolderPad') 0.6480444842513808 0.7031527093596059
('Closed',) 0.7033777905043007 0.7191937765205092
('Isolated',) 0.7459655917599843 0.8077220077220078
('BrightSpot',) 0.9663991975927784 0.9942857142857142
('Corrosion',) 0.9929789368104314 1.0
('Resistive',) 0.7625099681020733 0.8362662337662338
{'Overall': (0.7530570401957816, 0.7856605463502015), ('FrontGridInterruption', 'NearSolderPad'): (0.6480444842513808, 0.7031527093596059), ('Closed',): (0.7033777905043007, 0.7191937765205092), ('Isolated',): (0.7459655917599843, 0.8077220077220078), ('BrightSpot',): (0.9663991975927784, 0.9942857142857142), ('Corrosion',): (0.9929789368104314, 1.0), ('Resistive',): (0.7625099681020733, 0.8362662337662338)}


In [12]:
for key, value in results.items():
    print(f'{key}, {value[0]}, {value[1]}')

Overall, 0.7530570401957816, 0.7856605463502015
('FrontGridInterruption', 'NearSolderPad'), 0.6480444842513808, 0.7031527093596059
('Closed',), 0.7033777905043007, 0.7191937765205092
('Isolated',), 0.7459655917599843, 0.8077220077220078
('BrightSpot',), 0.9663991975927784, 0.9942857142857142
('Corrosion',), 0.9929789368104314, 1.0
('Resistive',), 0.7625099681020733, 0.8362662337662338
