Team 12

Members: Ankan Roy, Anthony Cheng, Eric Huang, and Viraj Boreda

Mentor: Daphney Chery

The first step is to install and import the necessary Python libraries, and add the Google Drive folder to Colab.

In [None]:
# !pip install any necessary libraries

In [None]:
import cv2
import numpy as np
from matplotlib import pyplot as plt
from google.colab import drive
from google.colab.patches import cv2_imshow
import os
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
import joblib

In [None]:
drive.mount('/gdrive')

Next, we add the files to Google Drive so we can access them in Colab.

In [None]:
defective_dir = '/gdrive/MyDrive/InnovationHackWeek/TRAIN_def_front'
ok_dir = '/gdrive/MyDrive/InnovationHackWeek/TRAIN_ok_front'
defective_files = os.listdir(defective_dir)
ok_files = os.listdir(ok_dir)

Then, we use OpenCV to write a function to remove shadows from the images.

In [None]:
def remove_shadow(img):
    rgb_planes = cv2.split(img)
    result_planes = []
    result_norm_planes = []
    for plane in rgb_planes:
        dilated_img = cv2.dilate(plane, np.ones((7,7), np.uint8))
        bg_img = cv2.medianBlur(dilated_img, 21)
        diff_img = 255 - cv2.absdiff(plane, bg_img)
        norm_img = cv2.normalize(diff_img,None, alpha=0, beta=255, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_8UC1)
        result_planes.append(diff_img)
        result_norm_planes.append(norm_img)

    result = cv2.merge(result_planes)
    result_norm = cv2.merge(result_norm_planes)
    return result, result_norm

After that, we use OpenCV to clean up the defective & ok images. We got rid of shadows, made the images grayscale, then thresholded the images. Finally, we detected contours and wrote their areas into a list.

In [None]:
defective_contour_list = []

for filename in defective_files:
  img = cv2.imread(defective_dir + "/" + filename)

  # Get rid of shadows
  result1, result2 = remove_shadow(img)

  # Detect the contours on the binary image using cv2.CHAIN_APPROX_NONE
  img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
  ret, thresh = cv2.threshold(img_gray, 50, 255, cv2.THRESH_BINARY)
  contours, hierarchy = cv2.findContours(image=thresh, mode=cv2.RETR_TREE, method=cv2.CHAIN_APPROX_NONE)

  # Calculate the total area and count the number of contours
  total_area = 0
  num_contours = len(contours)
  contour_lst2 = []
  for contour in contours:
      contour_lst2.append(cv2.contourArea(contour))

  defective_contour_list.append(contour_lst2)

In [None]:
ok_contour_list = []

for filename in ok_files:
  img = cv2.imread(ok_dir + "/" + filename)

  # Get rid of shadows
  result1, result2 = remove_shadow(img)

  # Detect the contours on the binary image using cv2.CHAIN_APPROX_NONE
  img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
  ret, thresh = cv2.threshold(img_gray, 50, 255, cv2.THRESH_BINARY)
  contours, hierarchy = cv2.findContours(image=thresh, mode=cv2.RETR_TREE, method=cv2.CHAIN_APPROX_NONE)

  # Calculate the total area and count the number of contours
  total_area = 0
  num_contours = len(contours)
  contour_lst = []
  for contour in contours:
      contour_lst.append(cv2.contourArea(contour))

  ok_contour_list.append(contour_lst)

Next, we created the training x and y inputs. We combined the defective and ok image contour area lists into x train, and created a y train list that is 1 for all defective images and 0 for all ok images. 0 and 1 are our output labels for ok and defective respectively.

We only take the first 265 contour areas as features for the ML classifier, as additional ones are not necessary.

In [None]:
x_train_temp = defective_contour_list + ok_contour_list
y_train = [1 for x in range(706)] + [0 for x in range(506)]

x_train = []
for xt in x_train_temp:
    xt_new = xt + [0] * (265 - len(xt))
    xt_new = xt_new[:265]
    x_train.append(xt_new)

Finally, we create our Random Forest Classifier using scikit-learn and fit it on the x-train and y-train data. We have displayed the training accuracy below.

In [None]:
model = RandomForestClassifier(n_estimators=50,max_depth=8)
model.fit(x_train,y_train)
accuracy = model.score(x_train, y_train)
print(accuracy)

The code below saves the model to a file.

In [None]:
# Will not run now because we don't want to overwrite our existing model
joblib.dump(model, '/gdrive/MyDrive/InnovationHackWeek/quality_control_model.joblib')

['/gdrive/MyDrive/InnovationHackWeek/quality_control_model.joblib']

The code below loads the model from joblib, as well as the testing data from Google Drive.

In [None]:
model = joblib.load('/gdrive/MyDrive/InnovationHackWeek/quality_control_model.joblib')

test_dir_ok = '/gdrive/MyDrive/InnovationHackWeek/TEST_ok_front'
test_dir_def = '/gdrive/MyDrive/InnovationHackWeek/TEST_def_front'

ok_files = os.listdir(test_dir_ok)
def_files = os.listdir(test_dir_def)

The following code uses OpenCV to turn the test images into arrays of numbers so they can be used to make predictions.

In [None]:
defective_contour_list = []

for filename in def_files:
  img = cv2.imread(test_dir_def + "/" + filename)

  # Get rid of shadows
  result1, result2 = remove_shadow(img)

  # Detect the contours on the binary image using cv2.CHAIN_APPROX_NONE
  img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
  ret, thresh = cv2.threshold(img_gray, 50, 255, cv2.THRESH_BINARY)
  contours, hierarchy = cv2.findContours(image=thresh, mode=cv2.RETR_TREE, method=cv2.CHAIN_APPROX_NONE)

  # Calculate the total area and count the number of contours
  total_area = 0
  num_contours = len(contours)
  contour_lst2 = []
  for contour in contours:
      contour_lst2.append(cv2.contourArea(contour))

  defective_contour_list.append(contour_lst2)

In [None]:
ok_contour_list = []

for filename in ok_files:
  img = cv2.imread(test_dir_ok + "/" + filename)

  # Get rid of shadows
  result1, result2 = remove_shadow(img)

  # Detect the contours on the binary image using cv2.CHAIN_APPROX_NONE
  img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
  ret, thresh = cv2.threshold(img_gray, 50, 255, cv2.THRESH_BINARY)
  contours, hierarchy = cv2.findContours(image=thresh, mode=cv2.RETR_TREE, method=cv2.CHAIN_APPROX_NONE)

  # Calculate the total area and count the number of contours
  total_area = 0
  num_contours = len(contours)
  contour_lst = []
  for contour in contours:
      contour_lst.append(cv2.contourArea(contour))

  ok_contour_list.append(contour_lst)

In [None]:
x_test_temp = defective_contour_list + ok_contour_list
y_test = [1 for x in range(len(defective_contour_list))] + [0 for x in range(len(ok_contour_list))]

x_test = []
for xt in x_test_temp:
    xt_new = xt + [0] * (265 - len(xt))
    xt_new = xt_new[:265]
    x_test.append(xt_new)

Finally, we make predictions using the loaded RandomForestClassifier, and output the prediction accuracy.

In [None]:
  predictions = model.predict(x_test)

correct_count = 0
for i in range(len(predictions)):
  if predictions[i] == y_test[i]:
    correct_count += 1
print("Number of predictions: " + str(len(y_test)))
print("Number of correct predictions: " + str(correct_count))
print("Percentage of correct predictions: " + str(correct_count / len(y_test)))

print(predictions)

Number of predictions: 88
Number of correct predictions: 70
Percentage of correct predictions: 0.7954545454545454
[1 1 1 1 0 1 1 1 0 0 1 0 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1
 1 0 1 1 1 1 1 1 1 1 1 0 1 0 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 0 1 1 1
 1 1 0 1 1 0 1 1 0 1 0 0 0 1]


Here are some additional statistics about the test data.

In [None]:
defective_count = sum(predictions)
non_defective_count = len(predictions) - defective_count

print(f"Defective items in the test data: {defective_count}")
print(f"Non-defective items in the test data: {non_defective_count}")

Defective items in the test data: 71
Non-defective items in the test data: 17
