# ENGR 418 Project - Stage 2
## Sorting lego blocks using image data

- Group #20
- Jesse Alele 82807728
- Zach Kelly 41637836

-------------------

### Import Statements

In [23]:
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
import os
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix, accuracy_score
from skimage import feature
from skimage.io import imread, imshow
from skimage.transform import resize

### Feature Extraction

In [16]:
def feature_extractor(folder):

    # get remaining parameters for directory
    file_names = os.listdir(folder)
    # define square image size
    im_size = 100

    # initialize feature array
    x = []

    # iterate through all images in the specified folder 
    for i in range(len(file_names)):             
        dir = os.path.join(folder,file_names[i])
        im = imread(dir,as_gray=True) # collects RGB data from an image in a form of 3D array. Since it's grayscaled, its truncated into a 2D array.
        im = resize(im, (100, 100)) # resize the image to 100x100 pixels
        im = feature.canny(image=im, sigma=5, low_threshold=0.001, high_threshold=0.07) # find edges in image using canny algorithm. Outputs true or false 2D array of (100x100)

        # get dimensions of image
        im_height = im.shape[0]
        im_width = im.shape[1]
        
        # re-initailize variables 
        width = 0
        diff_width = 0
        prev_width = 0
        min_width = 0
        max_diff_width = 0
        area = 0

        #print(f'------ Image {i} -------')

        # iterate row by row through the 2D image array
        for row in range(im_height): 
            left_edge = 0
            right_edge = 0

            # update previous row width
            prev_width = width

            # iterate through the pixels in the current row 
            for col in range(im_width): 
                # if the pixel is an edge store as first edge and break loop
                if(im[row][col] == True): 
                    left_edge = col
                    break

            # iterate through the pixels in the current row
            for col in range(im_width): 
                # if the pixel is an edge store as last_edge, iterate until out of range so the last instance of an edge pixel will replace the value in last_edge 
                if(im[row][col] == True):
                    right_edge = col

            # get the area of the shape (feature 1)
            width = right_edge - left_edge
            area += width # add up all the slices of area (rows with dimensions width x pixel) 
            
            # get the smallest width (feature 2)
            # (stores the width corresponding to the last row containing True values
            # that don't results in a width = 100 or 0)
            if(width < im_width and width != 0):
                min_width = width

            # get the largest change in width between successive rows (feature 3)
            diff_width = abs(width-prev_width) # for rectangles the first and last rows will result in large change in width
            if(diff_width > max_diff_width):
                max_diff_width = diff_width
                #print(f'max difference at row {row} is {max_diff_width}')
            
        x.append([area,min_width,max_diff_width])
    
    x = np.array(x)

    return x

### Notes:

- features 2 and 3 will be most helpful in classifying circles
- feature 1 will be most helpful in distinguishing between squares and rectangles 
- possibly extract another feature to help classify amoung squares and rectangles

### Label Extraction

In [20]:
folder = r"C:\Users\zachk\Documents\ubco\year5\ubc_term1\engr418\project\stage2\lego_dataset_2\training"

def label_extractor(folder):

    # get array containing all the file names in folder directory
    file_names = os.listdir(folder)
    # intialize label array
    y = []
    # initalize variables to store count of shapes
    cir = 0
    rec = 0
    squ = 0

    # iterate through the images in the specified folder
    for i in range(len(file_names)):        
        # fill class labels array
        if 'cir' in file_names[i]:
            y.append(0) # class circle
            cir += 1
        elif 'rec' in file_names[i]:
            y.append(1) # class rectangle
            rec += 1
        elif 'squ' in file_names[i]:
            y.append(2) # class square
            squ += 1

    # convert list to numpy array
    y = np.array(y)

    return y, cir, rec, squ

label_extractor(folder)

(array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]),
 27,
 27,
 27)

### Model Training

In [22]:
train_folder = r"C:\Users\zachk\Documents\ubco\year5\ubc_term1\engr418\project\stage2\lego_dataset_2\training"

x_train = feature_extractor(train_folder)
y_train, cir, rec, squ = label_extractor(train_folder)

# intitialize and fit K Nearest Neighbours model
knn = KNeighborsClassifier(n_neighbors=1)
knn.fit(x_train,y_train)


### Testing Function

In [47]:
#def test_function(test_folder, model):
test_folder = r"C:\Users\zachk\Documents\ubco\year5\ubc_term1\engr418\project\stage2\lego_dataset_2\testing"

# get data
x = feature_extractor(test_folder)
[y,cir,rec,squ] = label_extractor(test_folder)

# predict using trained model
y_pred = knn.predict(x)

# assess prediction using pre-defined metrics
accuracy = accuracy_score(y,y_pred)
confusion = pd.crosstab(y,y_pred)

# define list containing the lego shapes
shapes = ['circles','rectangles','squares']

for i in range(len(shapes)):

    total = confusion.loc[i,:].sum()
    shape_count = confusion.loc[i,i]
    score = shape_count/total

    print(print(f'The accuracy score on the {shapes[i]} is: {score*100:.2f}%'))

print(f'The overall accuracy score is: {accuracy*100:.2f}%')
print('The confusion matrix is:')
display(confusion)

The accuracy score on the circles is: 92.59%
None
The accuracy score on the rectangles is: 88.89%
None
The accuracy score on the squares is: 96.30%
None
The overall accuracy score is: 92.59%
The confusion matrix is:


col_0,0,1,2
row_0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,25,0,2
1,0,24,3
2,1,0,26


### Accuracy on Training Set

### Accuracy on Test Set

In [None]:
test_folder = r"C:\Users\zachk\Documents\ubco\year5\ubc_term1\engr418\project\stage2\lego_dataset_2\testing"
