In [4]:
# python library references
from PIL import Image
import numpy as np
from sklearn.multiclass import OneVsRestClassifier
from sklearn.svm import LinearSVC
from sklearn.metrics import accuracy_score
import pandas as pd
import os

In [5]:
def training(folder): # function for training the model
    # initialize training set greyscale image array
    x_train = []
    # initialize training set image classes
    y_train = []

    # for each image in the specified 'folder path', convert to greyscale and read in training classification
    for filename in os.listdir(folder):
        # read in image, convert to array, convert to greyscale
        img = np.array(Image.open(os.path.join(folder,filename)).convert('L').resize((512,512)))

        # classification legend: circle - 1, rectangle - 2, square - 3
        # if filename includes 'cir' append classification 1 to the training class array
        if "cir" in filename:
            y_train = np.append(y_train,1)
        
        # if filename includes 'rec' append classification 2 to the training class array
        if "rec" in filename:
            y_train = np.append(y_train,2)

        # if filename includes 'squ' append classification 3 to the training class array
        if "squ" in filename:
            y_train = np.append(y_train,3)

        # append each greyscale image into an array of arrays
        if img.all() is not None:
            x_train.append(img)
    
    # initialize training class array as a numpy array
    y_train = np.array(y_train)

    # initialize training image array as a numpy array
    x_train = np.array(x_train)
    # read the number of samples and dimensions of pixels from the shape of the training image array 
    nsamples, nx, ny = x_train.shape
    # reshape and flatten the training image array
    x_train = x_train.reshape((nsamples,nx*ny))

    # fit the classifier using the training image array and the training class array
    # predict classes using the testing image array
    classifier = OneVsRestClassifier(LinearSVC(random_state=0,C=10000,class_weight="balanced", max_iter=250),n_jobs=3).fit(x_train, y_train)
    # print confusion matrix for training set
    print("Confusion Matrix:")
    print(pd.crosstab(y_train,y_train,rownames=['Predicted Y'],colnames=['Training Y']))
    # print accuracy for training set
    print("Accuracy:", accuracy_score(y_train,y_train)*100, "%")

    # return x_train and y_train
    return x_train, y_train, classifier


In [6]:
def testing(folder, classifier): # function for testing the model
        # initialize test set greyscale image array
        x_test = []
        # initialize test set image classification array
        y_pred = []

        # for each image in the specified 'folder path', convert to greyscale and read in training classification
        for filename in os.listdir(folder):
            # read in image, convert to array, convert to greyscale
            img = np.array(Image.open(os.path.join(folder,filename)).convert('L').resize((512,512)))

            # if filename includes 'cir' append classification 1 to the training class array
            if "cir" in filename:
                y_pred = np.append(y_pred, 1)

            # if filename includes 'rec' append classification 2 to the training class array
            elif "rec" in filename:
                y_pred = np.append(y_pred, 2)
            
            # if filename includes 'squ' append classification 3 to the training class array
            elif "squ" in filename:
                y_pred = np.append(y_pred, 3)

            # append each greyscale image into an array of arrays
            if img is not None:
                x_test.append(img)

        # initialize testing image array as a numpy array
        x_test = np.array(x_test)
        # read the number of samples and dimensions of pixels from the shape of the testing image array 
        nsamples2, nx2, ny2 = x_test.shape
        # reshape and flatten the testing image array
        x_test = x_test.reshape((nsamples2,nx2*ny2))
        # evaluate the test set and create test image classification array, y_test
        y_test = classifier.predict(x_test)
        # print confusion matrix for test set
        print("Confusion Matrix:")
        print(pd.crosstab(y_pred,y_test,rownames=['Predicted Y'],colnames=['Test Y']))
        # print accuracy for test set
        print("Accuracy:", accuracy_score(y_pred,y_test)*100, "%")
    
        return y_pred, y_test

In [7]:
# set folder path for training images, CHANGE THIS PATH ONLY TO RUN FOR EVALUATION
trainingPath = 'D:/Fourth Year/ENGR 418/project/Lego_dataset_2/Lego_dataset_2/training'
# call training function and assign the outputs to variable 'train'
train = training(trainingPath)
# assign outputs of train to their respective variables
x_train = train[0]
y_train = train[1]
classifier = train[2]

Confusion Matrix:
Training Y   1.0  2.0  3.0
Predicted Y               
1.0           18    0    0
2.0            0   18    0
3.0            0    0   18
Accuracy: 100.0 %


In [8]:
# set folder path for testing images, CHANGE THIS PATH ONLY TO RUN FOR EVALUATION
testingPath = 'D:/Fourth Year/ENGR 418/project/Lego_dataset_2/Lego_dataset_2/testing'
# call testing function and assign the outputs to variable 'test'
test = testing(testingPath, classifier)

Confusion Matrix:
Test Y       1.0  2.0  3.0
Predicted Y               
1.0           11    0    7
2.0            0   17    1
3.0            1    0   17
Accuracy: 83.33333333333334 %
