In [1]:
# Group: 41
# Members: Jonathan Lake - 43265818, Logan Isaak - 22484174

# python library references
from PIL import Image
import numpy as np
from sklearn.multiclass import OneVsRestClassifier
from sklearn.metrics import accuracy_score
import pandas as pd
from sklearn import svm
from skimage.feature import hog
import os

In [2]:
def preprocessingImages(img): # function to preprocess each image using histogram of oriented gradients
    # create a flattened features array using histogram of oriented gradients for each image
    features = hog(img, orientations=9, pixels_per_cell=(8,8), cells_per_block=(2,2), multichannel=True)
    return features # return features array back to main function

In [3]:
def training(folder): # function for training the model
    # initialize training set greyscale image array
    x_train = []
    # initialize training set image classes
    y_train = []

    # for each image in the specified 'folder path', resize and read in training classification, extract feature array
    for filename in os.listdir(folder):
        # read in image, convert to array, resize to 256x512 pixels
        img = np.array(Image.open(os.path.join(folder,filename)).resize((512,256)))
        # send each image to the preprocessing function to extract features
        features = preprocessingImages(img)
        

        # classification legend: circle - 1, rectangle - 2, square - 3
        # if filename includes 'cir' append classification 1 to the training class array
        if "cir" in filename:
            y_train = np.append(y_train,1)
        
        # if filename includes 'rec' append classification 2 to the training class array
        if "rec" in filename:
            y_train = np.append(y_train,2)

        # if filename includes 'squ' append classification 3 to the training class array
        if "squ" in filename:
            y_train = np.append(y_train,3)

        # append feature into an array of arrays
        if img.all() is not None:
            x_train.append(features)
    
    # initialize training class array as a numpy array
    y_train = np.array(y_train)
    # initialize training image array as a numpy array
    x_train = np.array(x_train)
    
    # pass the training features to the one vs rest classifier, using non-linear/polynomial SVC
    classifier = OneVsRestClassifier(svm.SVC(kernel='poly', degree=5), n_jobs=3).fit(x_train, y_train)
    
    # Output to be printed
    print("Actual Labels:")
    print(y_train)
    print("Training Labels:")
    print(y_train)
    # print confusion matrix for training set
    print("Confusion Matrix:")
    print(pd.crosstab(y_train,y_train,rownames=['Predicted Y'],colnames=['Training Y']))
    # print accuracy for training set
    print("Accuracy:", accuracy_score(y_train,y_train)*100, "%")

    # return the fitted classifier
    return classifier

In [4]:
def testing(folder, classifier): # function for testing the model
        # initialize test set greyscale image array
        x_test = []
        # initialize test set image classification array
        y_pred = []

        # for each image in the specified 'folder path', resize and read in testing classification, extract feature array
        for filename in os.listdir(folder):
            # read in image, convert to array, resize to 256x512 pixels
            img = np.array(Image.open(os.path.join(folder,filename)).resize((512,256)))
            # send each image to the preprocessing function to extract features
            features = preprocessingImages(img)

            # if filename includes 'cir' append classification 1 to the training class array
            if "cir" in filename:
                y_pred = np.append(y_pred, 1)

            # if filename includes 'rec' append classification 2 to the training class array
            elif "rec" in filename:
                y_pred = np.append(y_pred, 2)
            
            # if filename includes 'squ' append classification 3 to the training class array
            elif "squ" in filename:
                y_pred = np.append(y_pred, 3)

            # append each greyscale image into an array of arrays
            if img is not None:
                x_test.append(features)

        # initialize testing image array as a numpy array
        x_test = np.array(x_test)
        # evaluate the test set and create test image classification array, y_test
        y_test = classifier.predict(x_test)
        
        # print confusion matrix for test set
        print("Actual Labels:")
        print(y_pred)
        print("Testing Labels:")
        print(y_test)
        print("Confusion Matrix:")
        print(pd.crosstab(y_pred,y_test,rownames=['Predicted Y'],colnames=['Test Y']))
        # print accuracy for test set
        print("Accuracy:", accuracy_score(y_pred,y_test)*100, "%")
    
        return y_pred, y_test # return actual and predicted values back to the caller

In [5]:
# set folder path for training images, CHANGE THIS PATH TO RUN FOR EVALUATION
trainingPath = 'C:/Users/Jonny Lake/Documents/418/Lego_dataset_2/Lego_dataset_2/training'
#trainingPath = 'C:/Users/Jonny Lake/Documents/418/training'
# call training function and assign the outputs to variable 'train'
classifier = training(trainingPath)

Actual Labels:
[1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2.
 2. 2. 2. 2. 2. 2. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3.
 3. 3. 3. 3. 3. 3. 3. 3. 3.]
Training Labels:
[1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2.
 2. 2. 2. 2. 2. 2. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3.
 3. 3. 3. 3. 3. 3. 3. 3. 3.]
Confusion Matrix:
Training Y   1.0  2.0  3.0
Predicted Y               
1.0           27    0    0
2.0            0   27    0
3.0            0    0   27
Accuracy: 100.0 %


In [6]:
# set folder path for testing images, CHANGE THIS PATH ONLY TO RUN FOR EVALUATION
testingPath = 'C:/Users/Jonny Lake/Documents/418/Lego_dataset_2/Lego_dataset_2/testing'
#testingPath = 'C:/Users/Jonny Lake/Documents/418/testing'
# call testing function and assign the outputs to variable 'test'
test = testing(testingPath, classifier)

Actual Labels:
[1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2.
 2. 2. 2. 2. 2. 2. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3.
 3. 3. 3. 3. 3. 3. 3. 3. 3.]
Testing Labels:
[1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 3. 1. 1.
 1. 3. 3. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2.
 2. 2. 3. 2. 2. 2. 3. 3. 3. 1. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 2. 1.
 1. 2. 3. 3. 3. 1. 3. 3. 3.]
Confusion Matrix:
Test Y       1.0  2.0  3.0
Predicted Y               
1.0           24    0    3
2.0            0   26    1
3.0            4    2   21
Accuracy: 87.65432098765432 %
