In [32]:
import time
import sqlite3
import os
import random
import numpy as np
from PIL import Image

In [33]:
# Import all images from camera 1
conn = sqlite3.connect(os.path.realpath('../Project/data/defects.db'))
cur = conn.cursor()
cur.execute('SELECT * from images WHERE camera_number = 1')
images = np.array(cur.fetchall())
cur.execute('SELECT * from labels')
labels = np.array(cur.fetchall())

In [34]:
print('Image Table content: ', images[0]) # Table columns ['id', 'scan_id', 'file_location', 'camera_number', 'image_set_number']
print('Dataset Size: ', len(images)) # Each gear id has 22 images
print('Label Table content: ', labels[0]) # Table columns ['id', 'scan_id', 'defect_id']
print('Number of labeled gears: ', len(labels)) # Each gear id has a defect

Image Table content:  ['2' '1' './data/images/2019-12-05/1_132205_1_0.png' '1' '0']
Dataset Size:  14336
Label Table content:  [ 1  1 35]
Number of labeled gears:  711


In [35]:
# Identify the defects of interest
gear_defects = [0, 35, 76, 77]

In [36]:
# Find the number of gear samples (each gear has 22 images)
class_sizes = [np.size(np.where(labels[:,2]==gear_defects[i])) for i in range(len(gear_defects))]
# Identify the smallest number of samples between defects
sample_size = np.min(class_sizes)
limit = 100

In [37]:
# Class 0 -> 990 samples
# Class 1 -> 836 samples
# Class 2 -> 7898 samples
# Class 3 -> 418 samples
class_sizes

[45, 38, 359, 19]

In [38]:
# Find the gear ids (gear_id) that matches the defects and label them using gear_label_table
gear_id = np.array([])
gear_label_table = np.array([])
for i in range(len(gear_defects)):
    # Find gears index that correspond the defect
    gear_id_ind = labels[np.where(labels[:,2]==gear_defects[i]), 1] 
    gear_id_ind = np.reshape(gear_id_ind, np.size(gear_id_ind))
    # Array with all indexes
    gear_id = np.append(gear_id, gear_id_ind)
    # Store defect labels
    gear_label_table = np.append(gear_label_table, np.ones(len(gear_id_ind))*gear_defects[i])

In [39]:
gear_label_table

array([ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0., 35., 35., 35., 35., 35., 35., 35.,
       35., 35., 35., 35., 35., 35., 35., 35., 35., 35., 35., 35., 35.,
       35., 35., 35., 35., 35., 35., 35., 35., 35., 35., 35., 35., 35.,
       35., 35., 35., 35., 35., 76., 76., 76., 76., 76., 76., 76., 76.,
       76., 76., 76., 76., 76., 76., 76., 76., 76., 76., 76., 76., 76.,
       76., 76., 76., 76., 76., 76., 76., 76., 76., 76., 76., 76., 76.,
       76., 76., 76., 76., 76., 76., 76., 76., 76., 76., 76., 76., 76.,
       76., 76., 76., 76., 76., 76., 76., 76., 76., 76., 76., 76., 76.,
       76., 76., 76., 76., 76., 76., 76., 76., 76., 76., 76., 76., 76.,
       76., 76., 76., 76., 76., 76., 76., 76., 76., 76., 76., 76., 76.,
       76., 76., 76., 76., 76., 76., 76., 76., 76., 76., 76., 76

In [40]:
# Create a shuffle list with row 0 to be gear id and row 1 to be defect id
# Shuffle gears
shuffle_list = np.append(np.reshape(gear_id, (len(gear_id),1)), 
                         np.reshape(gear_label_table, (len(gear_label_table),1)),
                         axis=1).astype(int)
np.random.shuffle(shuffle_list)

In [41]:
shuffle_list

array([[383,  76],
       [608,   0],
       [307,  76],
       [513,  35],
       [194,  76],
       [ 43,  76],
       [228,  76],
       [ 77,  76],
       [ 36,  76],
       [226,  76],
       [201,  76],
       [292,  76],
       [ 79,  76],
       [470,  77],
       [ 84,  76],
       [328,  76],
       [448,  76],
       [ 83,  76],
       [377,  76],
       [  6,  35],
       [303,  76],
       [382,  76],
       [501,  76],
       [344,  76],
       [202,  76],
       [100,  76],
       [374,  76],
       [109,  76],
       [218,  76],
       [126,  76],
       [207,  76],
       [293,  76],
       [242,  76],
       [ 47,  76],
       [160,  76],
       [399,  76],
       [427,  76],
       [128,  76],
       [398,  76],
       [255,  76],
       [369,  76],
       [516,  35],
       [ 23,  35],
       [368,  76],
       [236,  76],
       [290,  76],
       [ 93,  76],
       [391,  76],
       [165,  76],
       [ 50,  76],
       [ 16,  35],
       [286,  76],
       [270,

In [42]:
# Split testing and training based on the number of gears
split_perc = 0.8
train_size = round(len(shuffle_list)*split_perc)
train_list = shuffle_list[:train_size]
test_list = shuffle_list[train_size:]

In [43]:
# Make gear_table to have all gear id indexes from images table
gear_table = np.array(images[:,1], dtype=int)

In [44]:
# Define function to find the 22 images of each gear, and assign a label to all 22 images
def find_ind_label(gear_list):
    img_ind = np.array([], dtype = 'uint8')
    gear_label = np.array([], dtype = 'uint8')
    for i in range(len(gear_list)):
        # Find index of images
        gear_id_22 = np.where(gear_table==gear_list[i,0])
        # Create a label array with the defect number
        label_temp = np.ones(np.size(gear_id_22))*gear_list[i,1]
        # Append the label array to the global label variable
        gear_label = np.append(gear_label, label_temp)
        # Append the index of the image
        img_ind = np.append(img_ind, gear_id_22) 
    img_ind = np.reshape(img_ind, np.size(img_ind))
    # Find the folder location of each image
    img_dir = images[img_ind, 2]
    # Return gear labels and folder location
    return gear_label, img_dir

In [45]:
# Call find_ind_label to obtain image directory and label for train and test list of gears
y_train_label, y_train_dir = find_ind_label(train_list)
y_test_label, y_test_dir = find_ind_label(test_list)

In [49]:
np.unique(y_train_label, return_counts=True)

(array([ 0., 35., 76., 77.]), array([ 886,  624, 6462,  378], dtype=int64))

In [50]:
np.unique(y_test_label, return_counts=True)

(array([ 0., 35., 76., 77.]), array([ 140,  220, 1700,   44], dtype=int64))

In [46]:
y_test_dir

array(['./data/images/2020-02-11/324_102436_1_0.png',
       './data/images/2020-02-11/324_102436_1_1.png',
       './data/images/2020-02-11/324_102436_1_2.png', ...,
       './data/images/2020-02-06/294_111130_1_19.png',
       './data/images/2020-02-06/294_111130_1_20.png',
       './data/images/2020-02-06/294_111130_1_21.png'], dtype='<U44')

In [52]:
# Create a dictionary that takes the defect and converts to a range
data_dict = {gear_defects[i]:i for i in range(len(gear_defects))}
# Create a new set of labels that is ordered from (0 to # of defects) (this will be used for tensorflow)
y_train = [data_dict[int( y_train_label[i])] for i in range(len(y_train_label))]
y_test = [data_dict[int(y_test_label[i])] for i in range(len(y_test_label))]
y_train, y_test =  np.array(y_train), np.array(y_test)

In [54]:
np.unique(y_train, return_counts=True)

(array([0, 1, 2, 3]), array([ 886,  624, 6462,  378], dtype=int64))

In [55]:
# Re-scale pixel size of image
size = [300, 400]

In [56]:
time_now = time.time()

# Import image function
def import_images(size, location):
    data = np.zeros((len(location), size[0], size[1]), dtype='uint8')
    for i in range(len(data)):
        # Open image using PIL library
        img = Image.open(os.path.realpath('../Project/'+str(location[i])))
        # Re-size image to given size
        img = img.resize((size[1], size[0]), Image.ANTIALIAS)
        # Conver image to array
        data[i] = np.array(img, dtype='uint8')
    return data

# Call function for both training and testing datasets (re-shape to a 4-D tensor for tensorflow)
X_train = import_images(size, y_train_dir).reshape(-1, size[0], size[1], 1)
X_test = import_images(size, y_test_dir).reshape(-1, size[0], size[1], 1)

time_after = time.time()
print(time_after - time_now)

735.2338042259216


In [57]:
np.shape(X_train)

(8350, 300, 400, 1)

In [58]:
np.shape(X_test)

(2104, 300, 400, 1)

In [17]:
# Save images as npy files
np.save('gears_train_300x400_0,76,77,35.npy', X_train)
np.save('gears_test_300x400_0,76,77,35.npy', X_test)
np.save('gears_ytrain_300x400_0,76,77,35.npy', y_train)
np.save('gears_ytest_300x400_0,76,77,35.npy', y_test)

In [3]:
# Loading code snippet
time_now = time.time()
X_train = np.load('gears_train_300x400_0,76,77,35.npy')
X_test = np.load('gears_test_300x400_0,76,77,35.npy')
y_train = np.load('gears_ytrain_300x400_0,76,77,35.npy')
y_test = np.load('gears_ytest_300x400_0,76,77,35.npy')
time_after = time.time()
print(time_after - time_now)

0.7027792930603027
