# AOI Course (part 1) by Hsueh-Ting Chu, Asia University, Jan., 2020
* 這個教程使用工研院AIdea人工智慧共創平台的AOI資料集做為練習的標的。
* 介紹撰寫深度學習的程式來進行自動光學檢查的瑕疵分類。
* 本notebook程式可以在雲端使用Google Colab或使用個人電腦上的Jupyter執行。

AIdea人工智慧共創平台
https://aidea-web.tw/topic/76f9ec46-cb90-4aa8-82f2-ebfed54cecfb

朱學亭老師
* EMAIL: htchu.taiwan@gmail.com
* FB: https://www.facebook.com/htchu.taiwan


#Tutorial 1: training a full CNN model for AOI 
![alt text](https://lh3.googleusercontent.com/eDcGpCgIjxaE-hcQwxbR1GuOWV6i6Yv7NYmQDc9OF46IOEVoDL68yuTtguLX2nUb8u6Qfe01l2jvFgNDHr-pfL7tFIdykYr8nmNliUfiMx_V76Y9rxAZPbZhhoPrIfmrek6cESEqKCBrZXVhjewoQ9LSJSVCeoBLlKIaY0lANzysHFOEia0_JtPD00X3Yd-1uYGgCKiVlZE8WZdZvil8jxMSE_kVM8QIQSOHYudLmZROLgdsFOoE3nWQ9zYrecuQWiq_C79d-qXi0DDM_XPMIqhKrzW-NsoPoI_Bxj4KGyzNnP2xITfnmt2Y6jqma13v8nhRPt7KI6mGiwgFEREgnjruUoDGi4G5yxMfHFOSg9aRg5TXYjxhFvWUGBW7UxxKHCcOjye61BNrQhLeKdntC6W-kT1c3cOOBlnvJRxfdcYekCkSuBg-9PpfQcvvP0LhU0vuzYgVT20VB-OWAivnlIqa59snAzSsmFoxzAxpHd-P2itJ-G4WVd0ASNfBw9FxISBioWnPkEQTU6I65cBSo1FOUiIWqlp41VJnkW2jj8zDuEAVtRmI9E1-__gz44TxBmzEa5b3EO3ujoX903yuj9YPhD8xJbtJQKKyh-08Gq2rP8bUMXN2DybXFcYE81I-oZGhqUuUuk8yO9HGfUTEW4nNXOqfvcS8qLm9Rlbv4AsL1I-xK45HvZM=w1920-h291-no)

## (A) Use TF 2.0 (Optional)
### Step 1: Choose tensorflow_version


In [None]:
try:
  # %tensorflow_version only exists in Colab.
  %tensorflow_version 2.x
except Exception:
  pass

import tensorflow as tf
print(tf.__version__)

## (B) Setup Colab and mount the AOI data folder

### Step 2: Add the data folder to your Google drive
Click https://drive.google.com/open?id=15tGIHAPAatgdB8iZh_m80jCBPa-CrI_P
![alt text](https://lh3.googleusercontent.com/r-gU6GT4MshdPDeaTjSSUIQP63ExT1ufshFMFGAjDBhu00tu9jMbMwbnjfvjSO4Q6js31X6SfOmSmQxSBS02Uj2x5HAfNvI6lyWdbgIazAlB3OQfB7d3-Cte6GrbarZzHCi_XYeufeTw-eS3GGNeOdQra9mXofIwVNBS6116XCjkVOt0zUdzJsvo_7B6HuTZLenuqJW0cTsnPdjJ_NpYcc3pqLVNlgxc7pdVeNmLhpm4AQKH5bsc6CGwpiAluRMtlPw-MmVVEtLbqCysICOHfB9xyJsc-sG6NiuLxNU4B52FFA-3xA4kVK5_xFRLYQU3ZDM9N2uSFzMszNqoSuyQaLqCcEnJHuH3IbimQEoo613MJtXuGDNMBo0nyQpWMlrRcaorrcn1eOf9IfIwAdStEMNyk-yt3PbajKS1TUnOKPXrVrS2AZZECXR3I2Mu_HQb99MapFgAG5TEJBbp0Qacer5ksM70njl3exIlWR9CosN5FXFQnlW2pEVObL-i9buIt2OtEhl5SeURzNC1vPfI-q_Knb_3VEN5OIQIO8A_chs6OJgvKxqVddw6T0vcPrGitkiyudQ1kZSel7xrSzApGuNBmUzfDYVRhMIjNF3RL6O05-daZaCTbRXtY9gulVDCaoDV7TAgRpLa3n6Kvx54QnAHtj7wuowiv9_lreRRwy9qfzTzip0MVKE=w736-h172-no)

### Step 3: Mount the AOI folder
If error, check https://myaccount.google.com/u/2/permissions 

In [None]:
#Step 3a: Mount your Google Drive
from google.colab import drive
drive.mount("/content/drive", force_remount=True)

In [None]:
#Step 3b: List the folder
!ls "/content/drive/My Drive"

### Step 4: Check the AOI data path 

In [None]:
data_path = "  "
!ls "/content/drive/My Drive/aidea-aoi2/data/"

In [None]:
#alternative data path for local computer
data_path = "../data/"
#!ls "../data/"
!dir/w "../data/"

## (C) Input training data

### Step 5: read lalels of the training set

In [None]:
import pandas as pd
df_train = pd.read_csv(data_path+ "  ")
print(df_train.shape)

In [None]:
df_train.head()

### Step 6: Build the lists of training images and labels from the dataframe

In [None]:
#limit the amount of training images for the class process
#train_num = df_train.shape[0]
train_num = 480
if train_num >= df_train.shape[0]:
  train_num = df_train.shape[0]
train_files = df_train.iloc[:train_num,0].values

print(train_labels[:20])

### Step 7: read images of the training set

In [None]:
train_path = data_path+ "train_images/"
train_images = []
from tensorflow.keras.preprocessing import image
for file in train_files:
    img = image.load_img(train_path+file, color_mode=" ")
    train_images.append(img)
    if len(train_images)%100 == 0:
      print('.', end='')
print(len(train_images))

### Step 8: show AOI images of the classes: 
0 (normal), 1 (void), 2 (horizontal  defect) 3 (vertical defect), 4 (edge defect), 5 (particle)

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
import random
curclass = 0
fig,ax=plt.subplots(2, 3)
fig.set_size_inches(10,10)
for i in range(2):
    for j in range (3):
        sel=random.randint(0,   )
        while train_labels[sel]!=curclass:
          sel +=1
          if sel == train_num -1:
            sel = 0
        curclass += 1
        curclass %= 6
        #sel=random.randint(0,train_num)
        ax[i,j].imshow(train_images[sel], cmap='gray')
        ax[i,j].set_title('No. {} Label:{} '.format(sel, train_labels[sel]))       
plt.tight_layout()

### Step 9: Show statistics of training images in the 6 classes

In [None]:
import numpy as np
labels, counts = 
print(labels, counts)

### Step 10: Plot the counts 

In [None]:
fig = plt.figure(figsize=(8, 5))
plt.bar(    ,    , width=0.7, align='center')
plt.title("Label Distribution")
plt.xlabel('Label')
plt.ylabel('Count')
plt.xticks(labels)
plt.ylim(0, 120)

for a, b in zip(labels, counts):
    plt.text(a, b, '%d' % b, ha='center', va='bottom', fontsize=10)
plt.show()

### Step 11: Check the shape of single image 

In [None]:
from tensorflow.keras.preprocessing.image import img_to_array
# convert to numpy array
img_array0 = img_to_array(train_images[0])
print(img_array0.shape)
del img_array0

### Step 12: Convert each training image into a numpy array and collect

In [None]:
arr = []
from tensorflow.keras.preprocessing.image import img_to_array
for img in train_images:
    
    
X_train = np.array(arr)
print(X_train.shape)

In [None]:
# The pixel value in [0,1)
print(X_train[0, 0 , 0 , 0])

### Step 13:One-hot encoding for labels

In [None]:
from tensorflow.keras.utils import to_categorical
# one-hot encoding
num_classes = 6
y_train = to_categorical( ,  )
print(y_train)

## (D) Model training and inference, including: 
定義（define）, 編譯（compile）, 訓練（fit）, 評估（evaluate）, 預測（prediction） 

### Step 14: define the CNN model

In [None]:
from tensorflow.keras import Sequential
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Activation, Flatten
from tensorflow.keras.layers import Input
from tensorflow.keras.layers import Dropout, Flatten, Activation
from tensorflow.keras.layers import Conv2D, MaxPooling2D
from tensorflow.keras.optimizers import Adam,SGD,Adagrad,Adadelta,RMSprop

In [None]:
#create model
model = Sequential()
#add model layers

model.add(Conv2D(filters = 32, kernel_size = (5,5),activation ='relu', input_shape = (512,512,1)))
model.add(MaxPooling2D(pool_size=(2,2)))

model.add(Conv2D(filters = 64, kernel_size = (3,3),padding = 'Same',activation ='relu'))
model.add(MaxPooling2D(pool_size=(2,2), strides=(2,2)))

model.add(Conv2D(filters =96, kernel_size = (3,3),padding = 'Same',activation ='relu'))
model.add(MaxPooling2D(pool_size=(2,2), strides=(2,2)))

model.add(Conv2D(filters = 96, kernel_size = (3,3),padding = 'Same',activation ='relu'))
model.add(MaxPooling2D(pool_size=(2,2), strides=(2,2)))

model.add(Flatten())
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dense(6, activation = "softmax"))

In [None]:
model.summary()

### Step 15: compile the model

In [None]:
#compile model using accuracy to measure model performance
model.compile(     , , metrics=['accuracy'])

### Step 16: fit the model

In [None]:
#train the model
hist = model.fit(   ,    , batch_size=20, epochs=20)

### Step 17: evaluate the model

In [None]:
plt.plot(hist.history['  '])
plt.title('Model Loss')
plt.ylabel('Loss')
plt.xlabel('Epochs')
plt.legend(['train'])
plt.show()

In [None]:
plt.plot(hist.history['accuracy'])
plt.title('Model Accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epochs')
plt.legend(['train'])
plt.show()

In [None]:
#if tf.__version__ < "2.x":
plt.plot(hist.history['acc'])
plt.title('Model Accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epochs')
plt.legend(['train'])
plt.show()

### Step 18: predict with the model for the training set

In [None]:
y_prediction = model.predict(     , batch_size=20)
print(y_prediction[:2])

In [None]:
predict = 
print(predict[0:10])

In [None]:
print(train_labels[:10])

### Step 19: Compute confusion matrix (混淆矩陣)

In [None]:
from sklearn.metrics import confusion_matrix
confusion=confusion_matrix(    ,    )
print(confusion)

### Step 20: Plot the confusion matrix

In [None]:
import seaborn as sn
df_cm = pd.DataFrame(confusion)
plt.figure(figsize = (10,7))
sn.heatmap(    , annot=True, cmap="Blues")

### Step 21: List overkills and underkills

In [None]:
overkill= []
underkill = []
for i in range(train_num):
  if train_labels[i] == 0 and :
    overkill.append(i)
  if train_labels[i] != 0 and :
    underkill.append(i)
print('# of overkill= {}; # of underkill= {} '.format(len(overkill), len(underkill)))

### Step 22: Check overkills

In [None]:
#overkill
overkill_num = len(overkill)
no = 0
fig,ax=plt.subplots(2, 3)
fig.set_size_inches(10,10)
for i in range(2):
    for j in range (3):
        if no>=overkill_num:
          break 
        sel = overkill[no]
        ax[i,j].imshow(train_images[sel], cmap='gray')
        ax[i,j].set_title('No. {} Predicted Label:{} '.format(sel, predict[sel])) 
        no += 1
plt.tight_layout()

### Step 23: Check underkills

In [None]:
#underkill
underkill_num = len(underkill)
no = 0
fig,ax=plt.subplots(2, 3)
fig.set_size_inches(10,10)
for i in range(2):
    for j in range (3):
        if no>=underkill_num:
          break 
        sel = underkill[no]
        ax[i,j].imshow(train_images[sel], cmap='gray')
        ax[i,j].set_title('No. {} train_labels:{} '.format(sel, train_labels[sel])) 
        no += 1
plt.tight_layout()

### Step 24: Save the model

In [None]:
model.save("AOICNN_10epochs-2020.h5")

In [None]:
model = tf.keras.models.load_model('AOICNN_10epochs-2020.h5')
model.summary()

### Step 25: Delete training data in memory

In [None]:
del train_images
del X_train
#Do GC
import gc
gc.collect()

# (E) Output test result
* 讀入測試資料

### Step 26: read lalels of the test set

In [None]:
df_test = pd.read_csv(data_path+ "   ")
print(df_test.shape)

In [None]:
df_test.head()

### Step 27: Build the lists of test images and labels from the dataframe

In [None]:
test_num =  #limit the amount of training images for the class process
#test_num = df_test.shape[0]
if test_num >= df_test.shape[0]:
  test_num = df_test.shape[0]
test_files  = df_test.iloc[:test_num,0].values
test_labels = df_test.iloc[:test_num,1].values
print(test_labels[:10])

### Step 28: read images of the test set

In [None]:
!ls '/content/drive/My Drive/aidea-aoi2/data/test_images/'

In [None]:
test_path = data_path+ "test_images/"
test_images = []
from tensorflow.keras.preprocessing import image
for file in test_files:
    img = image.load_img(test_path+file, color_mode="    ")
    test_images.append(img)
    if len(test_images)%100 == 0:
      print('.', end='')
print(len(test_images))

### Step 29: show AOI test images: 

In [None]:
import random
fig,ax=plt.subplots(2,3)
fig.set_size_inches(10,10)
for i in range(2):
    for j in range (3):
        sel=random.randint(0,test_num)
        ax[i,j].imshow(test_images[sel], cmap='gray')
        ax[i,j].set_title('No. {} Label:Nan '.format(sel))       
plt.tight_layout()

### Step 30: Convert each test image into a numpy array and collect

In [None]:
arr = []
from tensorflow.keras.preprocessing import image
for img in test_images:

    
X_test = np.array(arr)
print(X_test.shape)

In [None]:
print(X_test[0, 0 , 0 , 0])

### Step 31: predict with the model for the test set

In [None]:
y_prediction = model.predict(X_test, batch_size=20)
predict = 
print(predict[:20])

### Step 32: show predictions 

In [None]:
import random
fig,ax=plt.subplots(4,4)
fig.set_size_inches(10,10)
for i in range(4):
    for j in range (4):
        sel=random.randint(0,len(test_images))
        ax[i,j].imshow(test_images[sel], cmap='gray')
        ax[i,j].set_title('No. {} Predicted Label:{} '.format(sel, predict[sel]))       
plt.tight_layout()

### Step 33: output predictions

In [None]:
df_out = pd.DataFrame(df_test.iloc[:test_num])
df_out['Label'] = predict
df_out.to_csv("submission-20200114A.csv", index=False)