Imports

In [145]:
import os
import numpy as np
from numpy import asarray
from sklearn.linear_model import LogisticRegression
import pandas as pd
from sklearn.metrics import confusion_matrix, accuracy_score
from PIL import Image

Training Dataset

In [146]:
'''This block of code gets the training data set ready for the model.
This is done by first setting up a dataframe where each image is assigned to a class. The images are then processed by being grayscaled, resized and
turned into an array form.'''
path_testing = 'testing/'
path_training = 'training/'
im_width = 16
def train_function(path,im_width):
    file_names = os.listdir(path) #list all files in the path
    labels = [] #empty array to store classes 
    for file_name in file_names:
        shape = file_name.split('_')[0] #splitting the data on the underscore and taking the first element
        if shape == 'cir':
            labels.append(0) #if shape is circle, add 0 to the label array
        elif shape == 'rec':
            labels.append(1) #if shape is rectangle, add 1 to the label array
        else:
            labels.append(2) #if shape is square, add 2 to the label array

    dict = {
        'image':file_names, 'class':labels #dictionary initializzation to assign filename and label array to image and class
    }
    df_train = pd.DataFrame.from_dict(dict, orient='index').T #contructs a dataframe form a dictionary of arrays 
    n_samples = len(df_train) #number of images in the folder
    classes = [i for i in range(n_samples)]
    x_train = np.empty((n_samples,im_width**2))
    y_train = np.empty((n_samples,1))
    for i in range(n_samples):
        file = path + str(df_train['image'][i])
        im = Image.open(file).convert('L') #convert image to gray scale
        im = im.resize((im_width,im_width)) #resize the image
        im_array = asarray(im) #make image into an array
        x_train[i,:] = im_array.reshape(1,-1)
        y_train[i,0] = classes[df_train['class'][i]]
    return x_train, y_train

Creating model and Performance of Training set

In [147]:
'''This block of code is used to build our model using x_train/y_train from above through logistic regression.
It also displays the performace of the training set.'''
model = LogisticRegression()
x_train, y_train = train_function(path_training,im_width) #calling function to build training data
model.fit(x_train,y_train) 

y_pred = model.predict(x_train) 
print(accuracy_score(y_train,y_pred)) #calulate accuracy score
print(confusion_matrix(y_train,y_pred)) #evaluate confusion matrix

1.0
[[36  0  0]
 [ 0 36  0]
 [ 0  0 36]]


  y = column_or_1d(y, warn=True)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Test Funciton and perfromance of Testing set

In [148]:
'''The following block of code repeats the steps we took to create a dataframe in the training section of the project. 
Following which we resize and greyscale the images before testing the model against the testing dataset which is populated in the dataframe df_test.
We have also wrapped the testing portion of the code in a function as requested.'''

def test_function(path,im_width):
    file_names = os.listdir(path)
    labels = []
    for file_name in file_names:
        shape = file_name.split('_')[0]
        if shape == 'cir':
            labels.append(0)
        elif shape == 'rec':
            labels.append(1)
        else:
            labels.append(2)

    dict = {
        'image':file_names, 'class':labels
    }
    df_test = pd.DataFrame.from_dict(dict, orient='index').T
    n_samples = len(df_test)
    classes = [i for i in range(n_samples)]
    x_test = np.empty((n_samples,im_width**2))
    y_test = np.empty((n_samples,1))
    for i in range(n_samples):
        file = path + str(df_test['image'][i])
        im = Image.open(file).convert('L')
        im = im.resize((im_width,im_width))
        im_array = asarray(im)
        x_test[i,:] = im_array.reshape(1,-1)
        y_test[i,0] = classes[df_test['class'][i]]

    y_pred = model.predict(x_test)
    acc = accuracy_score(y_test,y_pred)
    conf = confusion_matrix(y_test,y_pred)
    weights = model.coef_.shape[0]*model.coef_.shape[1]
    return acc, conf, weights


In [149]:
'''The test_function is called which returns the accuracy, confusion matrix and the weights that we have used'''
acc, conf, weights = test_function(path_testing,im_width) #calling function to test the model
print(acc)
print(conf)
print(weights)

0.9629629629629629
[[18  0  0]
 [ 0 18  0]
 [ 2  0 16]]
768
