In [76]:
import os
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import numpy as np
from numpy import asarray
from sklearn.linear_model import LogisticRegression
import pandas as pd
from sklearn.metrics import confusion_matrix, accuracy_score
from PIL import Image

In [77]:
path_training = 'training/'
file_names = os.listdir(path_training)
labels = []
for file_name in file_names:
    shape = file_name.split('_')[0]
    if shape == 'cir':
        labels.append(0)
    elif shape == 'rec':
        labels.append(1)
    else:
        labels.append(2)

dict = {
    'image':file_names, 'class':labels
}
df_train = pd.DataFrame.from_dict(dict, orient='index').T
print(df_train)
#img = mpimg.imread('training/'+ str(df_train['image'][0]))
#imgplot = plt.imshow(img)
#plt.show()

          image class
0     cir_1.png     0
1    cir_11.png     0
2    cir_12.png     0
3    cir_13.png     0
4    cir_14.png     0
..          ...   ...
103   squ_5.png     2
104   squ_6.png     2
105   squ_7.png     2
106   squ_8.png     2
107   squ_9.png     2

[108 rows x 2 columns]


In [78]:
def train_data(folder,n_samples,im_width,data):
    classes = [i for i in range(n_samples)]
    x = np.empty((n_samples,im_width**2))
    y = np.empty((n_samples,1))
    for i in range(n_samples):
        path = folder + str(data['image'][i])
        im = Image.open(path).convert('L')
        im = im.resize((im_width,im_width))
        im_array = asarray(im)
        x[i,:] = im_array.reshape(1,-1)
        y[i,0] = classes[df_train['class'][i]]
    return x,y

In [79]:
im_width = 16
num_images = len(df_train)
x_train = np.empty((num_images*3,im_width**2))
y_train = np.empty((num_images*3,1))

In [80]:
for i in range(num_images):
  x_train, y_train = train_data(path_training,num_images,im_width,df_train)
print(x_train.shape,y_train.shape)


(108, 256) (108, 1)


In [81]:
model = LogisticRegression()
model.fit(x_train,y_train)
y_pred = model.predict(x_train)
print(accuracy_score(y_train,y_pred))
print(confusion_matrix(y_train,y_pred))

1.0
[[36  0  0]
 [ 0 36  0]
 [ 0  0 36]]


  y = column_or_1d(y, warn=True)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [82]:
def test_function(path,im_width):
    file_names = os.listdir(path)
    labels = []
    for file_name in file_names:
        shape = file_name.split('_')[0]
        if shape == 'cir':
            labels.append(0)
        elif shape == 'rec':
            labels.append(1)
        else:
            labels.append(2)

    dict = {
        'image':file_names, 'class':labels
    }
    df_test = pd.DataFrame.from_dict(dict, orient='index').T
    n_samples = len(df_test)
    classes = [i for i in range(n_samples)]
    x_test = np.empty((n_samples,im_width**2))
    y_test = np.empty((n_samples,1))
    for i in range(n_samples):
        file = path + str(df_test['image'][i])
        im = Image.open(file).convert('L')
        im = im.resize((im_width,im_width))
        im_array = asarray(im)
        x_test[i,:] = im_array.reshape(1,-1)
        y_test[i,0] = classes[df_test['class'][i]]

    y_pred = model.predict(x_test)
    acc = accuracy_score(y_test,y_pred)
    conf = confusion_matrix(y_test,y_pred)
    weights = model.coef_.shape[0]*model.coef_.shape[1]
    return acc, conf, weights


In [83]:
path_testing = 'testing/'
acc, conf, weights = test_function(path_testing,im_width)
print(acc)
print(conf)
print(weights)

0.9629629629629629
[[18  0  0]
 [ 0 18  0]
 [ 2  0 16]]
768
