In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import scipy
import cv2
import os
from scipy.ndimage import convolve
%matplotlib inline

In [2]:
#get data
folders = ['triangle', 'star', 'square', 'circle']
labels = []
images = []
for folder in folders:
    for path in os.listdir('shapes/'+folder):
        img = cv2.imread('shapes/'+folder+'/'+path,0)
        images.append(cv2.resize(img, (60, 60)))
        labels.append(folders.index(folder))

In [3]:
#break data into training and test sets
to_train= 0
train_images, test_images, train_labels, test_labels = [],[],[],[]
for image, label in zip(images, labels):
    if to_train<5:
        train_images.append(image)
        train_labels.append(label)
        to_train+=1
    else:
        test_images.append(image)
        test_labels.append(label)
        to_train = 0
        
print('Number of training images: ', len(train_images))
print('Number of testimg images: ', len(test_images))

Number of training images:  12475
Number of testimg images:  2495


In [4]:
#function to flatten data
def flatten(dimData, images):
    images = np.array(images)
    images = images.reshape(len(images), dimData)
    images = images.astype('float32')
    images /=255
    return images

In [5]:
#flatten data
dataDim = np.prod(images[0].shape)
train_data  = flatten(dataDim, train_images)
test_data = flatten(dataDim, test_images)

In [6]:
from sklearn.neural_network import BernoulliRBM
from sklearn.pipeline import Pipeline
from sklearn import linear_model, datasets, metrics

# Models we will use
logr = linear_model.LogisticRegression()
rbm = BernoulliRBM(verbose=True)
classifier = Pipeline(steps=[('rbm', rbm), ('logistic', logr)])

In [10]:
rbm.n_components = 300
logr.C = 6000

# Training RBM-Logistic Pipeline
classifier.fit(train_data, train_labels)

# Training Logistic regression
logistic_classifier = linear_model.LogisticRegression(C=500)
logistic_classifier.fit(train_data, train_labels)

# Evaluation
print("Logistic regression using RBM features:\n%s\n" % (
    metrics.classification_report(
        test_labels,
        classifier.predict(test_data))))

print("Logistic regression using raw pixel features:\n%s\n" % (
    metrics.classification_report(
        test_labels,
        logistic_classifier.predict(test_data))))

[BernoulliRBM] Iteration 1, pseudo-likelihood = -2529.89, time = 38.06s
[BernoulliRBM] Iteration 2, pseudo-likelihood = -888.15, time = 39.05s
[BernoulliRBM] Iteration 3, pseudo-likelihood = -857.41, time = 39.27s
[BernoulliRBM] Iteration 4, pseudo-likelihood = -909.72, time = 39.21s
[BernoulliRBM] Iteration 5, pseudo-likelihood = -935.57, time = 39.13s
[BernoulliRBM] Iteration 6, pseudo-likelihood = -929.72, time = 39.32s
[BernoulliRBM] Iteration 7, pseudo-likelihood = -895.33, time = 39.16s
[BernoulliRBM] Iteration 8, pseudo-likelihood = -919.56, time = 39.00s
[BernoulliRBM] Iteration 9, pseudo-likelihood = -943.11, time = 39.02s
[BernoulliRBM] Iteration 10, pseudo-likelihood = -900.73, time = 38.98s
Logistic regression using RBM features:
             precision    recall  f1-score   support

          0       0.00      0.00      0.00       620
          1       0.25      1.00      0.40       627
          2       0.00      0.00      0.00       628
          3       0.00      0.00   

  'precision', 'predicted', average, warn_for)


In [11]:
from keras.utils import to_categorical
#change labels to categorical
train_labels = np.array(train_labels)
test_labels = np.array(test_labels)
train_labels_one_hot = to_categorical(train_labels)
test_labels_one_hot = to_categorical(test_labels)

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [12]:
#determine the number of classes
classes = np.unique(train_labels)
nClasses  = len(classes)

In [13]:
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout

#three layers
#activation function: both
#neurons: 256
model = Sequential()
model.add(Dense(256, activation = 'tanh', input_shape = (dataDim,)))
model.add(Dropout(0.2))
model.add(Dense(256, activation='tanh'))
model.add(Dropout(0.2))
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(nClasses, activation='softmax'))

model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])
history = model.fit(train_data, train_labels_one_hot, batch_size = 256, epochs=10, verbose=1,
                    validation_data=(test_data, test_labels_one_hot))

Train on 12475 samples, validate on 2495 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [14]:
#test model
[test_loss, test_acc] = model.evaluate(test_data, test_labels_one_hot)
print("Evaluation result on Test Data : Loss = {}, accuracy = {}".format(test_loss, test_acc))

Evaluation result on Test Data : Loss = 0.04192777874353221, accuracy = 0.9919839679358717
