In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import os
import sys
import copy
import time
import tabulate
import numpy as np
sys.path.append(os.path.join(os.path.abspath(""), ".."))

In [None]:
from app.models import Classifier
from app.utils import ImageWrapper 
from app.transforms import FFT, IFFT, CreateOnesMask
from app.filters import CreateKernel, Convolve, Canny, HOG
from app.imager import ImageLoader, DefectViewer, Show, Exposure
from app.custom import RemoveBusBars, Orient, HighlightFrontGrid
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import GradientBoostingClassifier

In [None]:
# Seed used in show to show the same images when num_images option is set
seed = 1234
scores = []
legends = []

In [None]:
# Analyzing which defect 
n_samples = 2000
defect_class = ['FrontGridInterruption', 'NearSolderPad']
compliment = True

In [None]:
# Load 10 examples and name the category for it. Category is like a title for images
defect_class = defect_class
defect = (DefectViewer(row_chop=15, col_chop=15) << (ImageLoader(defect_class=defect_class) << n_samples))
defect.category = 'GridInterruption'

# Make the other teh same length as the defect
num_samples = len(defect)

# Get the not this defect
# not_defect = (DefectViewer(row_chop=15, col_chop=15) << (ImageLoader(defect_class='FrontGridInterruption', is_not=True) << n_samples))
if not compliment:
    not_defect = (DefectViewer(row_chop=15, col_chop=15) << (ImageLoader(defect_class='None', is_not=False) << n_samples*2))
    not_defect.category = 'No defects'
else:
    not_defect = (DefectViewer(row_chop=15, col_chop=15) << (ImageLoader(defect_class=defect_class, is_not=True) << n_samples*2))
    not_defect.category = 'Other'

# Create a copy of the defect
defect_ = defect.copy()

# Eliminate any not defect images that are in defect
defect = defect - not_defect

# ELiminate any defect images that are in not defect
not_defect = not_defect - defect_

# View both the defect and the clean class
# I am using a tuple in this case as defect and clean are ImageWrapper objects
# Show random 5 out of the 10 images. Using the seed will ensure the same 5 are shown everytime
_ = Show(num_images=2, seed=seed) << (defect, not_defect)

In [None]:
print(len(defect))
print(len(not_defect))

In [None]:
# 1. Base model on raw data
model_params = {'penalty': 'l2', 'seed': 14376, 'pca_dims': None}
cla = Classifier(defect, not_defect, LogisticRegression, None)
score = cla.fit_cv(**model_params)
print(score)

# Misclassified
conf, out = cla.misclassified()
print(tabulate.tabulate([['True 0', conf[0, 0], conf[0, 1]], ['True 1', conf[1, 0], conf[1, 1]]], headers=['', 'Pred 0', 'Pred 1']))

scores.append(score)
legends.append('Baseline Logistic Regression model')

In [None]:
# 2 Base model on raw data with pca
pca_dims = 800
model_params = {'penalty': 'l2', 'seed': 14376, 'pca_dims': pca_dims}
cla = Classifier(defect, not_defect, LogisticRegression, None)
score = cla.fit_cv(**model_params)
print(score)

# Misclassified
conf, out = cla.misclassified()
print(tabulate.tabulate([['True 0', conf[0, 0], conf[0, 1]], ['True 1', conf[1, 0], conf[1, 1]]], headers=['', 'Pred 0', 'Pred 1']))

scores.append(score)
legends.append(f'Logitc regression with {pca_dims} dimensions')

In [None]:
# 3 Gradient Boosted Classifier 
pca_dims = 200
params = {'seed': 14376,'n_estimators': 600, 'max_depth': 4, 'learning_rate': 0.05, 'pca_dims': pca_dims}
cla = Classifier(defect, not_defect, GradientBoostingClassifier, None)
     
# When done, return the score 
score = cla.fit_cv(**params)
print(score)

# Misclassified
conf, out = cla.misclassified()
print(tabulate.tabulate([['True 0', conf[0, 0], conf[0, 1]], ['True 1', conf[1, 0], conf[1, 1]]], headers=['', 'Pred 0', 'Pred 1']))

scores.append(score)
legends.append(f'Gradient Boosted Classifier model with {pca_dims} dimensions')

In [None]:
# Get the oriented images and HOG
start = time.perf_counter()
oriented_defect = Orient(num_jobs=20, do_debug=True, do_eliminate=False) << defect
oriented_not_defect = Orient(num_jobs=20, do_debug=True, do_eliminate=False) << not_defect
print(time.perf_counter() - start)

# View both the defect and the clean class
# I am using + operator as oriented_defect and oriented_clean are tuples
_ = Show(num_images=10, seed=seed) << oriented_defect + oriented_not_defect

In [None]:
# Add Sobel features
defect_kernel = CreateKernel(kernel='sobel', axis=0) << oriented_defect
not_defect_kernel = CreateKernel(kernel='sobel', axis=0) << oriented_not_defect

sobel_defect = Convolve() << defect_kernel
sobel_not_defect = Convolve() << not_defect_kernel

In [None]:
defect_ = oriented_defect[-1] & sobel_defect[-1]
not_defect_ = oriented_not_defect[-1] & sobel_not_defect[-1]

In [None]:
# Gradient booster with PCA features
pca_dims = 100
params = {'seed': 14376,'n_estimators': 600, 'max_depth': 4, 'learning_rate': 0.05, 'pca_dims': pca_dims}
cla = Classifier(defect_ , not_defect_, GradientBoostingClassifier, None)
     
# When done, return the score 
score = cla.fit_cv(**params)
print(score)

# Misclassified
conf, out = cla.misclassified()
print(tabulate.tabulate([['True 0', conf[0, 0], conf[0, 1]], ['True 1', conf[1, 0], conf[1, 1]]], headers=['', 'Pred 0', 'Pred 1']))

scores.append(score)
legends.append(f'Gradient Boosted Classifier model with {pca_dims} dimensions and Sobel images')

In [None]:
from app.models import ModelNN

lr = 0.00005
pca_dims = 1000
if pca_dims is None:
    dense_layers = (2000, 1000, 300, 300, 300)
else:
    dense_layers = (pca_dims, int(pca_dims/2), int(pca_dims/2), int(pca_dims/2), int(pca_dims/2))

optimizer_params = {'name': 'sgd', 'lr': lr, 'nesterov': False, 'momentum': 0.9}
scheduler_params = {'lr_min': lr/100, 't_mul': 2}
model_params = {'num_output_classes': 2, 'dense_layers': (2000, 1000, 300, 300, 300), 'dense_activation': 'relu', 
                'pca_dims': pca_dims, 'dropout': 0.2}

# model = ModelNN(defect, not_defect, model_params, optimizer_params, scheduler_params, model_type='dnn')
model = ModelNN(defect_, not_defect_, model_params, optimizer_params, scheduler_params, model_type='dnn')
score = model.fit(num_epochs=30)

print(score)
scores.append(score)
legends.append(f'DNN with {pca_dims} dims on enhanced images')

In [None]:
from app.models import ModelNN

lr = 0.0001
optimizer_params = {'name': 'sgd', 'lr': lr, 'nesterov': True, 'momentum': 0.9}
scheduler_params = {'lr_min': lr/100, 't_mul': 3}
model_params = {'num_output_classes': 2, 'channels': ((1, 5), (10, 3), (10, 3), (10, 3), (10, 3)), 'dense_layers': (1000, 500, 500)}

model = ModelNN(oriented_defect[-1], oriented_not_defect[-1], model_params, optimizer_params, scheduler_params, model_type='cnn')

score = model.fit(num_epochs=30)

print(score)

scores.append(score)
legends.append('CNN on oriented images')

In [None]:
import pickle
import random
import numpy as np
import matplotlib.pyplot as plt

# Save the results
tag = random.randint(0, 2**32)
with open(f'results_{tag}', 'wb') as outfi:
    pickle.dump((scores, legends), outfi)

In [None]:
x = np.arange(len(scores))
y = np.array(scores)
plt.figure(figsize=(6.4*3, 4.8*3))
plt.xticks(x, legends, rotation=90, fontsize=20)
plt.plot(x, y, '-')
plt.title('Model progression on CV set', fontsize=20)
plt.ylabel('Balanced accuracy score', fontsize=20)
plt.show()

In [None]:
raise KeyError('Ended')

## End of completed analysis

In [None]:
# 2.a Base model on raw data with pca
pca_dims = 100
model_params = {'penalty': 'l2', 'seed': 14376, 'pca_dims': pca_dims}
cla = Classifier(defect, not_defect, LogisticRegression, None)
score = cla.fit_cv(**model_params)
print(score)

# Misclassified
conf, out = cla.misclassified()
print(tabulate.tabulate([['True 0', conf[0, 0], conf[0, 1]], ['True 1', conf[1, 0], conf[1, 1]]], headers=['', 'Pred 0', 'Pred 1']))

scores.append(score)
legends.append(f'Logitc regression with {pca_dims} dimensions')

In [None]:
# 2.b Base model on raw data with pca
pca_dims = 200
model_params = {'penalty': 'l2', 'seed': 14376, 'pca_dims': pca_dims}
cla = Classifier(defect, not_defect, LogisticRegression, None)
score = cla.fit_cv(**model_params)
print(score)

# Misclassified
conf, out = cla.misclassified()
print(tabulate.tabulate([['True 0', conf[0, 0], conf[0, 1]], ['True 1', conf[1, 0], conf[1, 1]]], headers=['', 'Pred 0', 'Pred 1']))

scores.append(score)
legends.append(f'Logitc regression with {pca_dims} dimensions')

In [None]:
# 2.c Base model on raw data with pca
pca_dims = 400
model_params = {'penalty': 'l2', 'seed': 14376, 'pca_dims': pca_dims}
cla = Classifier(defect, not_defect, LogisticRegression, None)
score = cla.fit_cv(**model_params)
print(score)

# Misclassified
conf, out = cla.misclassified()
print(tabulate.tabulate([['True 0', conf[0, 0], conf[0, 1]], ['True 1', conf[1, 0], conf[1, 1]]], headers=['', 'Pred 0', 'Pred 1']))

scores.append(score)
legends.append(f'Logitc regression with {pca_dims} dimensions')

In [None]:
# 2.d Base model on raw data with pca
pca_dims = 800
model_params = {'penalty': 'l2', 'seed': 14376, 'pca_dims': pca_dims}
cla = Classifier(defect, not_defect, LogisticRegression, None)
score = cla.fit_cv(**model_params)
print(score)

# Misclassified
conf, out = cla.misclassified()
print(tabulate.tabulate([['True 0', conf[0, 0], conf[0, 1]], ['True 1', conf[1, 0], conf[1, 1]]], headers=['', 'Pred 0', 'Pred 1']))

scores.append(score)
legends.append(f'Logitc regression with {pca_dims} dimensions')

In [None]:
# 2.d Base model on raw data with pca
pca_dims = 2000
model_params = {'penalty': 'l2', 'seed': 14376, 'pca_dims': pca_dims}
cla = Classifier(defect, not_defect, LogisticRegression, None)
score = cla.fit_cv(**model_params)
print(score)

# Misclassified
conf, out = cla.misclassified()
print(tabulate.tabulate([['True 0', conf[0, 0], conf[0, 1]], ['True 1', conf[1, 0], conf[1, 1]]], headers=['', 'Pred 0', 'Pred 1']))

scores.append(score)
legends.append(f'Logitc regression with {pca_dims} dimensions')

In [None]:
# 3.a Gradient Boosted Classifier 
pca_dims = 100
params = {'seed': 14376,'n_estimators': 600, 'max_depth': 4, 'learning_rate': 0.05, 'pca_dims': pca_dims}
cla = Classifier(defect, not_defect, GradientBoostingClassifier, None)
     
# When done, return the score 
score = cla.fit_cv(**params)
print(score)

# Misclassified
conf, out = cla.misclassified()
print(tabulate.tabulate([['True 0', conf[0, 0], conf[0, 1]], ['True 1', conf[1, 0], conf[1, 1]]], headers=['', 'Pred 0', 'Pred 1']))

scores.append(score)
legends.append(f'Gradient Boosted Classifier model with {pca_dims} dimensions')

In [None]:
# 3.b Gradient Boosted Classifier 
pca_dims = 200
params = {'seed': 14376,'n_estimators': 600, 'max_depth': 4, 'learning_rate': 0.05, 'pca_dims': pca_dims}
cla = Classifier(defect, not_defect, GradientBoostingClassifier, None)
     
# When done, return the score 
score = cla.fit_cv(**params)
print(score)

# Misclassified
conf, out = cla.misclassified()
print(tabulate.tabulate([['True 0', conf[0, 0], conf[0, 1]], ['True 1', conf[1, 0], conf[1, 1]]], headers=['', 'Pred 0', 'Pred 1']))

scores.append(score)
legends.append(f'Gradient Boosted Classifier model with {pca_dims} dimensions')

In [None]:
# 3.c Gradient Boosted Classifier 
pca_dims = 400
params = {'seed': 14376,'n_estimators': 600, 'max_depth': 4, 'learning_rate': 0.05, 'pca_dims': pca_dims}
cla = Classifier(defect, not_defect, GradientBoostingClassifier, None)
     
# When done, return the score 
score = cla.fit_cv(**params)
print(score)

# Misclassified
conf, out = cla.misclassified()
print(tabulate.tabulate([['True 0', conf[0, 0], conf[0, 1]], ['True 1', conf[1, 0], conf[1, 1]]], headers=['', 'Pred 0', 'Pred 1']))

scores.append(score)
legends.append(f'Gradient Boosted Classifier model with {pca_dims} dimensions')

## Fix orientation of images

In [None]:
from app.custom import Orient
# Get the oriented images and HOG
start = time.perf_counter()
oriented_defect = Orient(num_jobs=20, do_debug=True, do_eliminate=False) << defect
oriented_not_defect = Orient(num_jobs=20, do_debug=True, do_eliminate=False) << not_defect
print(time.perf_counter() - start)

# View both the defect and the clean class
# I am using + operator as oriented_defect and oriented_clean are tuples
_ = Show(num_images=10, seed=seed) << oriented_defect + oriented_not_defect

In [None]:
# 5. Gradient Boosted classifier on oriented defects
pca_dims = 200
params = {'seed': 14376,'n_estimators': 600, 'max_depth': 4, 'learning_rate': 0.05, 'pca_dims': pca_dims}
cla = Classifier(oriented_defect[-1], oriented_not_defect[-1], GradientBoostingClassifier, None)
     
# When done, return the score 
score = cla.fit_cv(**params)
print(score)

# Misclassified
conf, out = cla.misclassified()
print(tabulate.tabulate([['True 0', conf[0, 0], conf[0, 1]], ['True 1', conf[1, 0], conf[1, 1]]], headers=['', 'Pred 0', 'Pred 1']))

scores.append(score)
legends.append(f'Gradient Boosted Classifier model with {pca_dims} dimensions and re-oriented images')

In [None]:
for misclass in out:
    _ = Show(num_images=10, seed=seed) << misclass[-1]

# Sobel Filter

In [None]:
defect_kernel = CreateKernel(kernel='sobel', axis=0) << oriented_defect
not_defect_kernel = CreateKernel(kernel='sobel', axis=0) << oriented_not_defect

sobel_defect = Convolve() << defect_kernel
sobel_not_defect = Convolve() << not_defect_kernel

In [None]:
pca_dims = 100
params = {'seed': 14376,'n_estimators': 600, 'max_depth': 4, 'learning_rate': 0.05, 'pca_dims': pca_dims}
cla = Classifier(oriented_defect[-1] & sobel_defect[-1], oriented_not_defect[-1] & sobel_not_defect[-1], GradientBoostingClassifier, None)
     
# When done, return the score 
score = cla.fit_cv(**params)
print(score)

# Misclassified
conf, out = cla.misclassified()
print(tabulate.tabulate([['True 0', conf[0, 0], conf[0, 1]], ['True 1', conf[1, 0], conf[1, 1]]], headers=['', 'Pred 0', 'Pred 1']))

scores.append(score)
legends.append(f'Gradient Boosted Classifier model with {pca_dims} dimensions and Sobel images')

In [None]:
pca_dims = 200
params = {'seed': 14376,'n_estimators': 600, 'max_depth': 4, 'learning_rate': 0.05, 'pca_dims': pca_dims}
cla = Classifier(oriented_defect[-1] & sobel_defect[-1], oriented_not_defect[-1] & sobel_not_defect[-1], GradientBoostingClassifier, None)
     
# When done, return the score 
score = cla.fit_cv(**params)
print(score)

# Misclassified
conf, out = cla.misclassified()
print(tabulate.tabulate([['True 0', conf[0, 0], conf[0, 1]], ['True 1', conf[1, 0], conf[1, 1]]], headers=['', 'Pred 0', 'Pred 1']))

scores.append(score)
legends.append(f'Gradient Boosted Classifier model with {pca_dims} dimensions and Sobel images')

In [None]:
pca_dims = 400
params = {'seed': 14376,'n_estimators': 600, 'max_depth': 4, 'learning_rate': 0.05, 'pca_dims': pca_dims}
cla = Classifier(oriented_defect[-1] & sobel_defect[-1], oriented_not_defect[-1] & sobel_not_defect[-1], GradientBoostingClassifier, None)
     
# When done, return the score 
score = cla.fit_cv(**params)
print(score)

# Misclassified
conf, out = cla.misclassified()
print(tabulate.tabulate([['True 0', conf[0, 0], conf[0, 1]], ['True 1', conf[1, 0], conf[1, 1]]], headers=['', 'Pred 0', 'Pred 1']))

scores.append(score)
legends.append(f'Gradient Boosted Classifier model with {pca_dims} dimensions and Sobel images')

In [None]:
for misclass in out:
    _ = Show(num_images=40, seed=seed) << misclass[-1]

## Highlight the front grid

In [None]:
# Remove the BusBars
front_grid_params = {'finger_width': 5, 'finger_height': 5, 'side_padding': 2, 
                     'top_padding': 0, 'bottom_padding': 0, 'finger_mult': 1, 
                     'flipped': False, 'num_jobs': 40}
grid_oriented_defect = HighlightFrontGrid(**front_grid_params) << oriented_defect
grid_oriented_not_defect = HighlightFrontGrid(**front_grid_params) << oriented_not_defect

In [None]:
defect_ = oriented_defect[-1]  & grid_oriented_defect[-1] & sobel_defect[-1]
not_defect_ = oriented_not_defect[-1] & grid_oriented_not_defect[-1] & sobel_not_defect[-1]

In [None]:
pca_dims = 70
params = {'seed': 14376,'n_estimators': 600, 'max_depth': 4, 'learning_rate': 0.05, 'pca_dims': pca_dims}
cla = Classifier(defect_, not_defect_, GradientBoostingClassifier, None)
     
# When done, return the score 
score = cla.fit_cv(**params)
print(score)

# Misclassified
conf, out = cla.misclassified()
print(tabulate.tabulate([['True 0', conf[0, 0], conf[0, 1]], ['True 1', conf[1, 0], conf[1, 1]]], headers=['', 'Pred 0', 'Pred 1']))

scores.append(score)
legends.append(f'Gradient Boosted Classifier model with {pca_dims} dimensions and sobel + custom kernel_3')

In [None]:
print(legends)
scores[-5]

In [None]:
pca_dims = 100
params = {'seed': 14376,'n_estimators': 600, 'max_depth': 4, 'learning_rate': 0.05, 'pca_dims': pca_dims}
cla = Classifier(defect_, not_defect_, GradientBoostingClassifier, None)
     
# When done, return the score 
score = cla.fit_cv(**params)
print(score)

# Misclassified
conf, out = cla.misclassified()
print(tabulate.tabulate([['True 0', conf[0, 0], conf[0, 1]], ['True 1', conf[1, 0], conf[1, 1]]], headers=['', 'Pred 0', 'Pred 1']))

scores.append(score)
legends.append(f'Gradient Boosted Classifier model with {pca_dims} dimensions and sobel + custom kernel')

In [None]:
pca_dims = 200
params = {'seed': 14376,'n_estimators': 600, 'max_depth': 4, 'learning_rate': 0.05, 'pca_dims': pca_dims}
cla = Classifier(defect_, not_defect_, GradientBoostingClassifier, None)
     
# When done, return the score 
score = cla.fit_cv(**params)
print(score)

# Misclassified
conf, out = cla.misclassified()
print(tabulate.tabulate([['True 0', conf[0, 0], conf[0, 1]], ['True 1', conf[1, 0], conf[1, 1]]], headers=['', 'Pred 0', 'Pred 1']))

scores.append(score)
legends.append(f'Gradient Boosted Classifier model with {pca_dims} dimensions and sobel + custom kernel')

In [None]:
pca_dims = 400
params = {'seed': 14376,'n_estimators': 600, 'max_depth': 4, 'learning_rate': 0.05, 'pca_dims': pca_dims}
cla = Classifier(defect_, not_defect_, GradientBoostingClassifier, None)
     
# When done, return the score 
score = cla.fit_cv(**params)
print(score)

# Misclassified
conf, out = cla.misclassified()
print(tabulate.tabulate([['True 0', conf[0, 0], conf[0, 1]], ['True 1', conf[1, 0], conf[1, 1]]], headers=['', 'Pred 0', 'Pred 1']))

scores.append(score)
legends.append(f'Gradient Boosted Classifier model with {pca_dims} dimensions and sobel + custom kernel')

## Deep Neural net models

In [None]:
defect_dnn = oriented_defect[-1] & sobel_defect[-1]
not_defect_dnn = oriented_not_defect[-1] & sobel_not_defect[-1]

In [None]:
from app.models import ModelNN

lr = 0.00005
pca_dims = 1000
if pca_dims is None:
    dense_layers = (2000, 1000, 300, 300, 300)
else:
    dense_layers = (pca_dims, int(pca_dims/2), int(pca_dims/2), int(pca_dims/2), int(pca_dims/2))

optimizer_params = {'name': 'sgd', 'lr': lr, 'nesterov': False, 'momentum': 0.9}
scheduler_params = {'lr_min': lr/100, 't_mul': 2}
model_params = {'num_output_classes': 2, 'dense_layers': (2000, 1000, 300, 300, 300), 'dense_activation': 'relu', 
                'pca_dims': pca_dims, 'dropout': 0.2}

# model = ModelNN(defect, not_defect, model_params, optimizer_params, scheduler_params, model_type='dnn')
model = ModelNN(defect_dnn, not_defect_dnn, model_params, optimizer_params, scheduler_params, model_type='dnn')
score = model.fit(num_epochs=30)

print(score)
scores.append(score)
legends.append(f'DNN with {pca_dims} dims on enhanced images')

## CNN on the original images

In [None]:
from app.models import ModelNN

lr = 0.0002
optimizer_params = {'name': 'sgd', 'lr': lr, 'nesterov': True, 'momentum': 0.9}
scheduler_params = {'lr_min': lr/100, 't_mul': 3}
model_params = {'num_output_classes': 2, 'channels': ((1, 5), (10, 3), (10, 3), (10, 3), (10, 3)), 'dense_layers': (1000, 500, 500)}

model = ModelNN(oriented_defect[-1], oriented_not_defect[-1], model_params, optimizer_params, scheduler_params, model_type='cnn')

score = model.fit(num_epochs=30)

print(score)

scores.append(score)
legends.append('CNN on oriented images')

In [None]:
import pickle
import random
import numpy as np
import matplotlib.pyplot as plt

# Save the results
tag = random.randint(0, 2**32)
with open(f'results_{tag}', 'wb') as outfi:
    pickle.dump((scores, legends), outfi)

In [None]:


x = np.arange(len(scores))
y = np.array(scores)
plt.figure(figsize=(6.4*3, 4.8*3))
plt.xticks(x, legends, rotation=90, fontsize=20)
plt.plot(x, y, '-')
plt.show()

In [None]:
import pickle
scores, legends = pickle.load(open(f'results_{tag}', 'rb'))

In [None]:
indices = [0, 5, 6, -1, -6, -5]
new_legends = []
new_scores = []
for index in indices:
    new_legends.append(legends[index])
    new_scores.append(scores[index])

In [None]:
print(new_scores)

In [None]:
import matplotlib.pyplot as plt
import numpy as np

x = np.arange(len(new_scores))
y = np.array(new_scores)
plt.figure(figsize=(6.4*3, 4.8*3))
plt.xticks(x, new_legends, rotation=90, fontsize=20)
plt.plot(x, y, '-')
plt.title('Model progression on CV set', fontsize=20)
plt.ylabel('Balanced accuracy score', fontsize=20)
plt.show()

In [None]:
raise KeyError("Ended")

In [None]:
images = ~oriented_defect[-1]

In [None]:
images.shape

In [None]:
# mask = 1 - CreateOnesMask(images).horizontal_from_center(left_width=96, right_width=96, height=10, val=0)
mask = 1 - CreateOnesMask(images).vertical_from_center(top_height=96, bottom_height=96, width=10, val=0)

In [None]:
masked_images = (IFFT(mask=mask) << (FFT(window=None) << oriented_defect))

In [None]:
_ = Show(num_images=10, seed=seed) << (oriented_defect[-1], masked_images[-1])

## 

In [None]:
params = {'seed': 14376,'n_estimators': 300, 'max_depth': 4, 'learning_rate': 0.05, 'pca_dims': 250}
# cla = Classifier(grid_defect[-1] & defect, grid_not_defect[-1] & not_defect, GradientBoostingClassifier, None)
cla = Classifier(oriented_defect[-1] & grid_oriented_defect[-1], oriented_not_defect[-1] & grid_oriented_not_defect[-1], GradientBoostingClassifier, None)
     
# When done, return the score 
score = cla.fit(**params)
print(score)

In [None]:
start = time.perf_counter()
front_grid_params = {'finger_width': 4, 'finger_height': 5, 'side_padding': 2, 'top_padding': 0, 'bottom_padding': 0, 'finger_mult': 1, 'flipped': False, 'num_jobs': 20}
clean = (DefectViewer(row_chop=15, col_chop=15) << (ImageLoader(defect_class='None', is_not=False) << 10000))
oriented_clean= Orient(num_jobs=20) << clean
oriented_grid_clean = HighlightFrontGrid(**front_grid_params) << oriented_clean
print(time.perf_counter()-start)

In [None]:
concat_clean = oriented_clean[-1] & oriented_grid_clean[-1]

In [None]:
y_vals = cla.predict(oriented_clean[-1] & oriented_grid_clean[-1])

In [None]:
import cv2
import pandas as pd
import matplotlib.pyplot as plt
from IPython.display import clear_output

In [None]:
segmented = pd.read_csv('../data/andi_segmented.csv').set_index('filename')

In [None]:
def display_get_input(img):
    """
    """
    plt.figure(figsize=(6.4*1.5, 4.8*1.5))
    plt.imshow(img, cmap='gray')
    plt.show()
    a = input()
    plt.close()
    clear_output()
    
    return a

In [None]:
count = 0
y_count = 0
for y, filename, image  in zip(y_vals, concat_clean.image_labels, concat_clean.images):
    if y and np.any(segmented.at[filename, 'clean']):
        
        a = display_get_input(image)
        if a == 'y':
            segmented.at[filename, 'clean'] = False
            count += 1
            print(f'Count is {count} y-count is {y_count}')
    elif y:
        y_count += 1

In [None]:
segmented.to_csv('../data/andi_segmented.csv')

## Clean up None

In [None]:
for i in range(out[0][0].images.shape[0]):
    image = out[0][0].images[i, :, :]
    filename = out[0][0].image_labels[i]
    a = display_get_input(image)
    if a == 't':
        segmented.at[filename, 'clean'] = False

In [None]:
segmented.to_csv('../data/andi_segmented.csv')

## Cleanup front grid

In [None]:
keep_front_grid = []

In [None]:
import cv2
def display_get_input(filename):
    """
    """
    if os.path.exists(f'../data/images/train/{filename}'):
        filepath = f'../data/images/train/{filename}'
    else:
        filepath = f'../data/images/test/{filename}'

    image = cv2.cvtColor(cv2.imread(filepath), cv2.COLOR_BGR2GRAY)
    
    plt.figure(figsize=(6.4*1.5, 4.8*1.5))
    plt.imshow(image, cmap='gray')
    plt.show()
    a = input()
    plt.close()
    clear_output()
    
    return a

In [None]:

annotations_df = pd.read_csv("../data/processed_annotations.csv")
annotations_df = annotations_df[annotations_df['defect_class'] == 'FrontGridInterruption']
annotations_df = annotations_df.groupby('filename').head(1)
annotations_df = annotations_df.sample(frac=1)

In [None]:
frontgrid_df = annotations_df[annotations_df['filename'].isin(keep_front_grid)]

In [None]:
# frontgrid_df.to_csv('../data/front_grid.csv')

## Previous work

In [None]:
# Remove the BusBars
nobus_defect = RemoveBusBars() << oriented_defect
nobus_clean = RemoveBusBars() << oriented_clean

_ = Show(num_images=5, seed=seed) << nobus_defect + nobus_clean

In [None]:
front_grid_defect = HighlightFrontGrid() << nobus_defect
front_grid_clean = HighlightFrontGrid() << nobus_clean

_ = Show(num_images=15, seed=seed) << front_grid_defect + front_grid_clean

In [None]:
sigmoid_defect = Exposure('dynamic_sigmoid', cutoff=0.6, gain=50) << front_grid_defect
sigmoid_clean = Exposure('dynamic_sigmoid', cutoff=0.6, gain=50) << front_grid_clean

_ = Show(num_images=10, seed=seed) << (defect, ) + sigmoid_defect + (clean,) + sigmoid_clean

In [None]:
hog_front_defect = HOG(pixels_per_cell=(5, 5), num_jobs=20) << sigmoid_defect
hog_front_clean = HOG(pixels_per_cell=(5, 5), num_jobs=20) << sigmoid_clean

_ = Show(num_images=10, seed=seed) << (defect, ) + hog_front_defect + (clean,) + hog_front_clean

## Model 

In [None]:
from sklearn.linear_model import LogisticRegression

In [None]:
# 1. Base model on raw data
model_params = {'penalty': 'l2', 'seed': 14376, 'pca_dims': 100}
cla = Classifier(defect, clean, LogisticRegression, None)
score = cla.fit(**model_params)
print(score)

# Misclassified
conf, out = cla.misclassified()
print(tabulate.tabulate([['True 0', conf[0, 0], conf[0, 1]], ['True 1', conf[1, 0], conf[1, 1]]], headers=['', 'Pred 0', 'Pred 1']))

In [None]:
# 2. HOG on RAW data
model_params = {'penalty': 'l2', 'seed': 14376, 'pca_dims': 100, 'num_jobs': 20}
cla = Classifier(defect, clean, LogisticRegression, HOG)
score = cla.fit(**model_params)
print(score)

# Misclassified
conf, out = cla.misclassified()
print(tabulate.tabulate([['True 0', conf[0, 0], conf[0, 1]], ['True 1', conf[1, 0], conf[1, 1]]], headers=['', 'Pred 0', 'Pred 1']))

In [None]:
for imw in out:
    print(imw[-1].images.shape[0])
    Show(num_images=20) << imw

In [None]:
# 3. HOG on rotated data
model_params = {'penalty': 'l2', 'seed': 14376, 'pca_dims': 100, 'num_jobs': 20}
cla = Classifier(oriented_defect[-1], oriented_clean[-1], LogisticRegression, HOG)
score = cla.fit(**model_params)
print(score)

# Misclassified
conf, out = cla.misclassified()
print(tabulate.tabulate([['True 0', conf[0, 0], conf[0, 1]], ['True 1', conf[1, 0], conf[1, 1]]], headers=['', 'Pred 0', 'Pred 1']))

In [None]:
# 4. HighlightFrontGrid on rotated data
model_params = {'penalty': 'l2', 'seed': 14376, 'pca_dims': 100, 'num_jobs': 20, 'reduce_max': 1, 'finger_mult': 1}
cla = Classifier(oriented_defect[-1], oriented_clean[-1], LogisticRegression, HighlightFrontGrid)
score = cla.fit(**model_params)
print(score)

# Misclassified
conf, out = cla.misclassified()
print(tabulate.tabulate([['True 0', conf[0, 0], conf[0, 1]], ['True 1', conf[1, 0], conf[1, 1]]], headers=['', 'Pred 0', 'Pred 1']))

In [None]:
for imw in out:
    print(imw[-1].images.shape[0])
    Show(num_images=20) << imw

## End of completed work

In [None]:
# params = {'seed': 14376,'penalty': 'l2', 'pca_dims': 238, 'num_jobs': 20, 'reduce_max': 1, 
#           'finger_mult': 52.28, 'padding_mult': 17, 'max_finger_width': 3, 'finger_height': 43}
params = {'seed': 14376,'penalty': 'l2', 'pca_dims': 200, 'num_jobs': 20, 'reduce_max': 1, 
          'finger_mult': 50, 'padding_mult': 4, 'max_finger_width': 4, 'finger_height': 20}
cla = Classifier(defect, oriented_clean[-1], LogisticRegression, HighlightFrontGrid)
     
# When done, return the score 
score = cla.fit(**params)
print(score)

# Misclassified
conf, out = cla.misclassified()
print(tabulate.tabulate([['True 0', conf[0, 0], conf[0, 1]], ['True 1', conf[1, 0], conf[1, 1]]], headers=['', 'Pred 0', 'Pred 1']))

for imw in out:
    print(imw[-1].images.shape[0])
    Show(num_images=20) << imw