In [1]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import cv2
import os

#Original dataset path
ORIGINAL = 'original'
IMG_SIZE = 128
size_images = dict()

def label_img(img):
    word_label = img.split('.')[0][-2]
    return word_label

#create training dataset which includes images and labels

def create_train_data():
    training_data = []
    for img in os.listdir(ORIGINAL):
        label = label_img(img)
        path = os.path.join(ORIGINAL, img)
        img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
        if(img is not None):
            heigth, width = img.shape[:2]
            size_images[path] = {width, heigth}
            #reshape the images to the same size
            img = cv2.resize(img, (IMG_SIZE, IMG_SIZE))
            training_data.append([img, label])
    return training_data
train_data = create_train_data()

col_names = ['img', 'label']
df = pd.DataFrame(train_data, columns=col_names)
train_set = df.values
X = train_set[:, 0]
#set the dataTyp of array to int8 
X = np.array(list(X), dtype=np.uint8)
#Reshape the array to 2D array
X = X.reshape(X.shape[0],IMG_SIZE*IMG_SIZE)
y = train_set[:, 1]

print(X[0:10])

[[255 255 255 ... 246 245 253]
 [ 24  28  27 ... 163 159 167]
 [255 255 255 ... 227 231 229]
 ...
 [ 68  72  69 ...  45  51  55]
 [255 255 255 ... 246 250 249]
 [ 61  63  65 ...  50  55  54]]


In [2]:
from sklearn.model_selection import train_test_split
test_size = 0.20
seed = 7
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=seed)
print(X_train.dtype)


uint8


In [3]:
#Processing the data

#from sklearn.preprocessing import StandardScaler
#scaler = StandardScaler()
# Fit only to the training data
#scaler.fit(X_train)

In [4]:
#apply the transformations to the data:
#X_train = scaler.transform(X_train)
#X_test = scaler.transform(X_test)

In [5]:
#Training the model
from sklearn.neural_network import MLPClassifier

mlp = MLPClassifier(hidden_layer_sizes=(128,128), solver="sgd")
mlp.activation = "logistic"
mlp.fit(X_train,y_train)

MLPClassifier(activation='logistic', alpha=0.0001, batch_size='auto',
       beta_1=0.9, beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(128, 128), learning_rate='constant',
       learning_rate_init=0.001, max_iter=200, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=None,
       shuffle=True, solver='sgd', tol=0.0001, validation_fraction=0.1,
       verbose=False, warm_start=False)

In [6]:
predictions = mlp.predict(X_test)
from sklearn.metrics import classification_report,confusion_matrix
print(confusion_matrix(y_test,predictions))

[[ 0  1  0  8  0  7  3  0  0  0]
 [ 0  4  0  3  0  3  2  0  0  0]
 [ 0  0  0 12  0  4  5  0  0  0]
 [ 0  2  0  6  0  0  2  0  0  0]
 [ 0  0  0  5  0  4  3  0  0  0]
 [ 0  0  0  4  0  5  1  0  0  0]
 [ 0  2  0  4  0  0  4  0  0  0]
 [ 0  0  0  9  0  2  6  0  0  0]
 [ 0  0  0  7  0  3  7  0  0  0]
 [ 0  0  0  9  0  1  6  0  0  0]]


In [7]:
print(classification_report(y_test,predictions))

             precision    recall  f1-score   support

          a       0.00      0.00      0.00        19
          b       0.44      0.33      0.38        12
          c       0.00      0.00      0.00        21
          d       0.09      0.60      0.16        10
          g       0.00      0.00      0.00        12
          h       0.17      0.50      0.26        10
          i       0.10      0.40      0.16        10
          l       0.00      0.00      0.00        17
          v       0.00      0.00      0.00        17
          y       0.00      0.00      0.00        16

avg / total       0.06      0.13      0.07       144



  'precision', 'predicted', average, warn_for)


In [8]:
len(mlp.coefs_[0])

16384

In [9]:
"""
import snips as snp
snp.prettyplot(matplotlib)
fig, ax = snp.newfig()
ax.plot(classifier.loss_curve_)
snp.labs("number of steps", "loss function", "Loss During GD (Rate=0.001)")
"""

'\nimport snips as snp\nsnp.prettyplot(matplotlib)\nfig, ax = snp.newfig()\nax.plot(classifier.loss_curve_)\nsnp.labs("number of steps", "loss function", "Loss During GD (Rate=0.001)")\n'

In [10]:
mlp.learning_rate_init = 1
mlp.fit(X_train, y_train)

MLPClassifier(activation='logistic', alpha=0.0001, batch_size='auto',
       beta_1=0.9, beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(128, 128), learning_rate='constant',
       learning_rate_init=1, max_iter=200, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=None,
       shuffle=True, solver='sgd', tol=0.0001, validation_fraction=0.1,
       verbose=False, warm_start=False)

In [11]:
print(classification_report(y_test,predictions))

             precision    recall  f1-score   support

          a       0.00      0.00      0.00        19
          b       0.44      0.33      0.38        12
          c       0.00      0.00      0.00        21
          d       0.09      0.60      0.16        10
          g       0.00      0.00      0.00        12
          h       0.17      0.50      0.26        10
          i       0.10      0.40      0.16        10
          l       0.00      0.00      0.00        17
          v       0.00      0.00      0.00        17
          y       0.00      0.00      0.00        16

avg / total       0.06      0.13      0.07       144



  'precision', 'predicted', average, warn_for)
