In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
import sys
import copy
import time
import tabulate
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import GradientBoostingClassifier
sys.path.append(os.path.join(os.path.abspath(""), ".."))

In [3]:
from app import model_features
from app.models import Classifier
from app.model_features import get_samples, get_data_handler
from app.imager import ImageLoader, DefectViewer, Show, Exposure

## Setup

In [4]:
# Complimentary:
# If True: Split the data as Category and all other
# If False: Split the data as Category and None
complimentary = False

# Maximum number of samples to choose for defects
# The other class is 2X this number
num_samples = 2000

# Seed for plotting
seed = 1234

In [5]:
# Analyzing which defect types:
model_defect_classes = [('FrontGridInterruption', 'NearSolderPad'), 'Closed', 'Isolated' , 'BrightSpot', 'Corrosion']

In [6]:
# These are the model parameters to model with
model_params = {('FrontGridInterruption', 'NearSolderPad'): {'class': GradientBoostingClassifier, 'n_estimators': 600, 'max_depth': 4, 'learning_rate': 0.05, 'pca_dims': min(250, num_samples)},
                 'Closed': {'class': LogisticRegression,'penalty': 'l2', 'pca_dims': min(200, num_samples)}, 
                 'Isolated': {'class': GradientBoostingClassifier,'n_estimators': 300, 'max_depth': 4, 'learning_rate': 0.1, 'pca_dims': min(160, num_samples)},
                 'BrightSpot': {'class': LogisticRegression,'penalty': 'l2', 'pca_dims': None},
                 'Corrosion': {'class': LogisticRegression,'penalty': 'l2', 'pca_dims': None}}

## Main loop for creating models and running evaluations

In [8]:
model_objects = []
model_classes = []
model_data_handlers = []

# For each defect class, create the DataSet
for defect_classes in model_defect_classes:
    print(f'Working on {defect_classes}')
    start = time.perf_counter()
    model_param = model_params[defect_classes]
    
    # Get the samples for the model
    if isinstance(defect_classes, tuple):
        classes = list(defect_classes)
    else:
        classes = defect_classes
    
    # Get the data for modeling
    defect, not_defect = get_samples(classes, num_samples, complimentary=False)
    
    # Get the data handler 
    data_handler = get_data_handler(defect_classes)
    
    # Get the pre processed data for this 
    defect_ = data_handler(defect, num_jobs=20)
    not_defect_ = data_handler(not_defect, num_jobs=20)
    print(not_defect_.category)
    
    # Show the pre and post processed images
    # _ = Show(num_images=2, seed=seed) << (defect, defect_) + (not_defect, not_defect_)
    
    # Get the parameter for this classifier
    this_param = copy.deepcopy(model_param)
    model_class = this_param['class']
    del this_param['class']
    
    # Train the classifier 
    print(defect_classes)
    cla = Classifier(defect_, not_defect_, model_class, None)
    score = cla.fit_cv(**this_param)
    
    # Misclassified
    print(score)
    conf, out = cla.misclassified()
    print(tabulate.tabulate([['True 0', conf[0, 0], conf[0, 1]], ['True 1', conf[1, 0], conf[1, 1]]], headers=['', 'Pred 0', 'Pred 1']))
    
    # Train the classifier 
    cla = Classifier(defect_, not_defect_, model_class, None)
    model = cla.fit(**this_param)
    
    model_objects.append(model)
    model_classes.append(defect_classes)
    model_data_handlers.append(data_handler)
    
    print(f'Completed {defect_classes} in {time.perf_counter()-start}s')

Working on ('FrontGridInterruption', 'NearSolderPad')
None
 GridInterruption - Preprocessed
('FrontGridInterruption', 'NearSolderPad')
0.7663281584374149
          Pred 0    Pred 1
------  --------  --------
True 0       618        41
True 1       158       232


KeyboardInterrupt: 

## Combine the models together and calculate score

In [None]:
img = ImageLoader(defect_class=None, do_train=False)
filename_df = img.get(n = 50)
filename_df = DefectViewer(row_chop=15, col_chop=15).get(filename_df)

In [None]:
from app.models import VectorClassifier
vc = VectorClassifier(model_objects=model_objects, model_classes=model_classes,  model_data_handlers=model_data_handlers, defect_classes=img.defect_classes.tolist())

In [None]:
vc.test(filename_df)