In [26]:
import sklearn as sk
import numpy as np
import pandas as pd
import time
from sklearn.cross_validation import KFold
from sklearn import tree
from sklearn import ensemble
from sklearn import datasets
import theano.tensor as th
from scipy import misc
import copy
from numpy.random import uniform
from numpy.random import normal
from sklearn.datasets import load_iris
from sklearn.utils import shuffle
from sklearn.metrics import accuracy_score
from sklearn.metrics import mean_squared_error
from sklearn.datasets import make_classification
from math import copysign
from numpy.random import normal

import matplotlib.pylab as plt
%matplotlib inline




In [65]:
class autoencoder:
    def __init__(self, layers_list, learning_rate, alpha, activation_functions, cost_func, \
                 epochs_count, batch_size,\
                  sparse_num, weight_list):
        
        self.layers_count = len(layers_list) -1
        self.weight_list = []
        self.learning_rate = learning_rate
        self.layers_list = layers_list
        self.alpha = alpha
        self.cost_func = cost_func
        self.activation_functions = activation_functions
        self.batch_size = batch_size
        self.epochs_count = epochs_count
        self.sparse_num = sparse_num
        self.weight_list = weight_list
            
    def sigmoid(self, x):
        m = 1. / (1. + np.exp(-x * self.alpha))
        m = np.fmax(m, 0.001)
        m = np.fmin(m, 0.999)
        return m
    
    def der_sigmoid(self, x):
        return (1. - self.sigmoid(x)) * self.sigmoid(x) * self.alpha
    
    def square_cost_vec(self, y_true, y_pred):
        return 0.5 * ((y_true - y_pred) ** 2)
    
    def der_square_cost_vec(self, y_true, y_pred):
        return (y_pred - y_true)
    
    def predict(self, x):
        sparse_decode = None
        output = None
        neuron_out = x
        #print x.shape
        for num_layer in range(self.layers_count):
            neuron_out = np.dot(neuron_out, self.weight_list[num_layer])
            
            if (self.activation_functions[num_layer + 1] == 'sigmoid'):
                neuron_out = self.sigmoid(neuron_out)

            if (num_layer + 1 == self.sparse_num):
                sparse_decode = neuron_out
                
            if (num_layer == self.layers_count - 1):
                output = neuron_out
        
            neuron_out = np.insert(neuron_out, 0, -1, axis = 1)
            
        return sparse_decode, output
    
    def get_error(self, x, y):
        sparse, out = self.predict(x)
        y_err = np.sum((out - y) ** 2) / (x.shape[0] * x.shape[1])
        return y_err
    
    
    def fit(self, x, y):
        batch_count = x.shape[0] // self.batch_size

        x = np.insert(x, 0, -1, axis = 1)
        y = np.insert(y, 0, -1, axis = 1)
        shuffle(x, y)
        x_test = x[0.95 * x.shape[0]:]
        x = x[:0.95 * x.shape[0]]
        
        y_test = y[0.95 * y.shape[0]:]
        y = y[:0.95 * y.shape[0]]
        
        for num_epoch in range(self.epochs_count):
            print "Epoch num is: ", num_epoch
            print "err is:", self.get_error(x_test, y_test[:, 1:])
            shuffle(x, y)
            for batch_num in range(batch_count):
                data = x[batch_num * self.batch_size:(batch_num + 1) * self.batch_size]
                data_true = y[batch_num * self.batch_size:(batch_num + 1) * self.batch_size]
                
                neuron_out = [data]
                neuron_sum = [data[:, 1:]]
                w_u = []
                for num_layer in range(self.layers_count):
                    s = np.dot(neuron_out[-1], self.weight_list[num_layer])
                    neuron_sum.append(s)
                    if (self.activation_functions[num_layer + 1] == 'sigmoid'):
                        s = self.sigmoid(s)
                    if (num_layer != self.layers_count - 1):
                        s = np.insert(s, 0, -1, axis = 1)
                    neuron_out.append(s)

                if (self.cost_func == 'square'):
                    der_neuron = self.der_square_cost_vec(data_true[:, 1:], neuron_out[-1])

                for layer_num in range(self.layers_count - 1, -1, -1):
                    w_u.insert(0, np.sum( map(lambda x, y: np.dot(x.reshape(-1, 1), y.reshape(1, -1)), neuron_out[layer_num], der_neuron), axis = 0))
                    if self.activation_functions[layer_num] == 'sigmoid':
                        der_neuron = np.dot(der_neuron, self.weight_list[layer_num].T)[:, 1:] * self.der_sigmoid(neuron_sum[layer_num])
                    
                self.weight_list = map(lambda x, y: x - y * (self.learning_rate / self.batch_size), self.weight_list, w_u)
    

In [66]:
im = misc.imread('data/big_alphabet_29x29/mutant-0-0-0.bmp', flatten='grey')
alphabet_size = 26
im_size = 29

In [67]:
x = []
y = []
x_test = []
y_test = []
for letter in range(alphabet_size):
    for i in range(9):
        path = "data/big_alphabet_29x29/mutant-" + str(letter) + "-" + str(i) + "-0.bmp"
        im = misc.imread(path, flatten='grey')
        x.append(im.reshape(im_size * im_size))
        x[-1] /= 255.0

for letter in range(alphabet_size):
    path = "data/big_alphabet_29x29/class-" + str(letter) + ".bmp"
    im = misc.imread(path, flatten='grey')
    x_test.append(im.reshape(im_size * im_size))
    x_test[-1] /= 255.

    
x = np.asarray(x)
y = x.copy()
x_test = np.asarray(x_test)
y_test = x_test.copy()

In [68]:
print x.shape, y.shape

(234, 841) (234, 841)


In [69]:
weight_list = [normal(0, 0.1, (842, 500)), normal(0, 0.1, (501, 500)), normal(0, 0.1, (501, 32)), \
              normal(0, 0.1, (33, 500)), normal(0, 0.1, (501, 500)), normal(0, 0.1, (501, 841))]

In [70]:
auto = autoencoder(layers_list = [841, 500, 500, 32, 500, 500, 841], learning_rate = 0.001, alpha = 1.0,\
                activation_functions = ['x', 'sigmoid', 'sigmoid', 'sigmoid', 'sigmoid', 'sigmoid', 'x'],\
                         cost_func = 'square', epochs_count = 1000, batch_size = 25,\
                          sparse_num = 1, weight_list = weight_list)
auto.fit(x + normal(0, 1, (x.shape[0], x.shape[1])), x)
auto.predict(x)



Epoch num is:  0
err is: 2.26342333424
Epoch num is:  1
err is: 0.173681498649
Epoch num is:  2
err is: 0.0665532443165
Epoch num is:  3
err is: 0.0587958522808
Epoch num is:  4
err is: 0.0581776926403
Epoch num is:  5
err is: 0.0581143600701
Epoch num is:  6
err is: 0.0581051337722
Epoch num is:  7
err is: 0.0581033150693
Epoch num is:  8
err is: 0.0581028897088
Epoch num is:  9
err is: 0.058102782468
Epoch num is:  10
err is: 0.0581027551824
Epoch num is:  11
err is: 0.058102749016


KeyboardInterrupt: 