In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
import sys
import copy
import time
import tabulate
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import GradientBoostingClassifier
sys.path.append(os.path.join(os.path.abspath(""), ".."))

In [3]:
from app import model_features
from app.models import Classifier
from app.model_features import get_samples
from app.imager import ImageLoader, DefectViewer, Show, Exposure

## Setup

In [4]:
# Complimentary:
# If True: Split the data as Category and all other
# If False: Split the data as Category and None
complimentary = True

# Maximum number of samples to choose for defects
# The other class is 2X this number
num_samples = 2000

# Seed for plotting
seed = 1234

In [5]:
# Analyzing which defect types:
model_defect_classes = [('FrontGridInterruption', 'NearSolderPad'), 'Closed', 'Isolated' , 'BrightSpot', 'Corrosion']

In [6]:
# These are the model parameters to model with
model_params = {('FrontGridInterruption', 'NearSolderPad'): {'class': GradientBoostingClassifier, 'n_estimators': 600, 'max_depth': 4, 'learning_rate': 0.05, 'pca_dims': min(250, num_samples)},
                 'Closed': {'class': LogisticRegression,'penalty': 'l2', 'pca_dims': min(200, num_samples)}, 
                 'Isolated': {'class': GradientBoostingClassifier,'n_estimators': 300, 'max_depth': 4, 'learning_rate': 0.1, 'pca_dims': min(160, num_samples)},
                 'BrightSpot': {'class': LogisticRegression,'penalty': 'l2', 'pca_dims': None},
                 'Corrosion': {'class': LogisticRegression,'penalty': 'l2', 'pca_dims': None}}

## Main loop for creating models and running evaluations

In [7]:
def get_data_handler(defect_classes):

    if 'FrontGridInterruption' in defect_classes:
        data_handler = model_features.grid_interruption
    elif 'Closed' in defect_classes:
        data_handler = model_features.closed
    elif 'Isolated' in defect_classes:
        data_handler = model_features.isolated
    elif 'BrightSpot' in defect_classes or 'Corrosion' in defect_classes:
        data_handler = model_features.generic_return
    else:
        raise KeyError('Unsupported model type')
    
    return data_handler

In [8]:
model_objects = []
model_classes = []
model_data_handlers = []

# For each defect class, create the DataSet
for defect_classes in model_defect_classes:
    print(f'Working on {defect_classes}')
    start = time.perf_counter()
    model_param = model_params[defect_classes]
    
    # Get the samples for the model
    if isinstance(defect_classes, tuple):
        classes = list(defect_classes)
    else:
        classes = defect_classes
    
    # Get the data for modeling
    defect, not_defect = get_samples(classes, num_samples, complimentary=False)
    
    # Get the data handler 
    data_handler = get_data_handler(defect_classes)
    
    # Get the pre processed data for this 
    defect_ = data_handler(defect, num_jobs=20)
    not_defect_ = data_handler(not_defect, num_jobs=20)
    
    # Show the pre and post processed images
    # _ = Show(num_images=2, seed=seed) << (defect, defect_) + (not_defect, not_defect_)
    
    # Get the parameter for this classifier
    this_param = copy.deepcopy(model_param)
    model_class = this_param['class']
    del this_param['class']
    
    # Train the classifier 
    cla = Classifier(defect_, not_defect_, model_class, None)
    model = cla.fit(**this_param)
    
    model_objects.append(model)
    model_classes.append(defect_classes)
    model_data_handlers.append(data_handler)
    
    print(f'Completed {defect_classes} in {time.perf_counter()-start}s')

Working on ('FrontGridInterruption', 'NearSolderPad')
Completed ('FrontGridInterruption', 'NearSolderPad') in 426.36338469199836s
Working on Closed


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


Completed Closed in 57.55345582499285s
Working on Isolated
Completed Isolated in 87.74450323199562s
Working on BrightSpot


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


Completed BrightSpot in 9.311503201999585s
Working on Corrosion
Completed Corrosion in 7.005957040993962s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


## Combine the models together and calculate score

In [9]:
img = ImageLoader(defect_class=None, do_train=False)
filename_df = img.get(n = 50)
filename_df = DefectViewer(row_chop=15, col_chop=15).get(filename_df)

In [10]:
from app.models import VectorClassifier
vc = VectorClassifier(model_objects=model_objects, model_classes=model_classes,  model_data_handlers=model_data_handlers, defect_classes=img.defect_classes.tolist())

[('FrontGridInterruption', 'NearSolderPad'), ('Closed',), ('Isolated',), ('BrightSpot',), ('Corrosion',)]


In [11]:
vc.test(filename_df)

Overall 0.6928571428571428
('FrontGridInterruption', 'NearSolderPad') 0.5338345864661653
('Closed',) 0.8214285714285714
('Isolated',) 0.7888888888888889
('BrightSpot',) 0.8936170212765957
('Corrosion',) 0.02127659574468085




{'Overall': 0.6928571428571428,
 ('FrontGridInterruption', 'NearSolderPad'): 0.5338345864661653,
 ('Closed',): 0.8214285714285714,
 ('Isolated',): 0.7888888888888889,
 ('BrightSpot',): 0.8936170212765957,
 ('Corrosion',): 0.02127659574468085}