In [49]:
import sklearn as sk
import numpy as np
import pandas as pd
import time
from sklearn.cross_validation import KFold
from sklearn import tree
from sklearn import ensemble
from sklearn import datasets
import theano.tensor as th
from scipy import misc
import copy
from numpy.random import uniform
from numpy.random import normal
from sklearn.datasets import load_iris
from sklearn.utils import shuffle
from sklearn.metrics import accuracy_score
from sklearn.metrics import mean_squared_error
from sklearn.datasets import make_classification
from math import copysign

import matplotlib.pylab as plt
%matplotlib inline

# autoencoder + rmb

In [50]:
class rbm:
    def __init__(self, learning_rate, mu_moment, batch_size, num_epochs, hidden_count, mode):
        self.learning_rate = learning_rate
        self.mu_moment = mu_moment
        self.batch_size = batch_size
        self.num_epochs = num_epochs
        self.hidden_count = hidden_count
        self.mode = mode
        
    def sigmoid(self, matrix):
        return 1. / (1. + np.exp(-matrix))
    
    def init_weights(self, vis_count):
        self.w_vh = np.random.normal(0, 0.1, (vis_count, self.hidden_count))
        self.w_v = np.zeros(vis_count)
        self.w_h = np.zeros(self.hidden_count)
        
        self.wu_vh = np.zeros((vis_count, self.hidden_count))
        self.wu_v = np.zeros(vis_count)
        self.wu_h = np.zeros(self.hidden_count)
    
    def fit(self, x):
        self.init_weights(x.shape[1])
        sample_size = x.shape[0]
        shuffle(x)
        
        test_data = x[:(sample_size * 0.1)]
        x = x[(sample_size * 0.1):]
        
        batch_count = (x.shape[0]) // self.batch_size
        start_time = time.time()
        for epoch in range(self.num_epochs):
            print "Epoch num:", epoch+1
            err = []
            shuffle(x)
            for batch in range(batch_count):
                v = x[batch * self.batch_size : (batch + 1) * self.batch_size]
                v_true = v.copy()
                
                self.wu_vh *= self.mu_moment
                self.wu_v *= self.mu_moment
                self.wu_h *= self.mu_moment
                
                #positive phase
                h = self.sigmoid(np.dot(v, self.w_vh) + self.w_h)
                
                self.wu_vh += np.dot(v.T, h)
                self.wu_v += np.sum(v, axis = 0)
                self.wu_h += np.sum(h, axis = 0)
                    
                #sampling
                h = 1. * (np.random.uniform(0, 1, (self.batch_size, self.hidden_count)) < h)
                
                #negative phase 
                if self.mode == 'gaussian':
                    v = np.dot(h, self.w_vh.T) + self.w_v
                elif self.mode == 'bernoulli':
                    v = self.sigmoid(np.dot(h, self.w_vh.T) + self.w_v)
                    
                h = self.sigmoid(np.dot(v, self.w_vh) + self.w_h)
                
                #update weights
                self.wu_vh -= np.dot(v.T, h)
                self.wu_v -= np.sum(v, axis = 0)
                self.wu_h -= np.sum(h, axis = 0)
                
                self.w_vh += self.wu_vh * self.learning_rate / self.batch_size
                self.w_v += self.wu_v * self.learning_rate / self.batch_size
                self.w_h += self.wu_h * self.learning_rate / self.batch_size
                
                err.append(np.mean((v - v_true) ** 2))
                
            print 'Time is:', time.time() - start_time
            print 'Error is:', np.mean(err)
            h, pred = self.predict(test_data)
            print 'Error for test data:', np.mean((test_data - pred) ** 2)
                
    def predict(self, x):        
        h = np.dot(x, self.w_vh)
        h += np.tile(self.w_h, (x.shape[0], 1))
        h = self.sigmoid(h)
        h_ret = h.copy()
        h = (np.random.uniform(0, 1, (x.shape[0], self.hidden_count)) < h) * 1.
        
        pred = np.dot(h, self.w_vh.T)
        pred += np.tile(self.w_v, (x.shape[0], 1))
        pred = self.sigmoid(pred)
        return h_ret, pred

In [91]:
class autoencoder:
    def __init__(self, layers_list, learning_rate, alpha, activation_functions, cost_func, \
                 epochs_count, batch_size,\
                  sparse_num, weight_list):
        
        self.layers_count = len(layers_list) -1
        self.weight_list = []
        self.learning_rate = learning_rate
        self.layers_list = layers_list
        self.alpha = alpha
        self.cost_func = cost_func
        self.activation_functions = activation_functions
        self.batch_size = batch_size
        self.epochs_count = epochs_count
        self.sparse_num = sparse_num
        self.weight_list = weight_list
            
    def sigmoid(self, x):
        m = 1. / (1. + np.exp(-x * self.alpha))
        m = np.fmax(m, 0.001)
        m = np.fmin(m, 0.999)
        return m
    
    def der_sigmoid(self, x):
        return (1. - self.sigmoid(x)) * self.sigmoid(x) * self.alpha
    
    def square_cost_vec(self, y_true, y_pred):
        return 0.5 * ((y_true - y_pred) ** 2)
    
    def der_square_cost_vec(self, y_true, y_pred):
        return (y_pred - y_true)
    
    def predict(self, x):
        sparse_decode = None
        output = None
        neuron_out = x
        for num_layer in range(self.layers_count):
            neuron_out = np.dot(neuron_out, self.weight_list[num_layer])
            
            if (self.activation_functions[num_layer + 1] == 'sigmoid'):
                neuron_out = self.sigmoid(neuron_out)

            if (num_layer + 1 == self.sparse_num):
                sparse_decode = neuron_out
                
            if (num_layer == self.layers_count - 1):
                output = neuron_out
        
            neuron_out = np.insert(neuron_out, 0, -1, axis = 1)
            
        return sparse_decode, output
    
    def get_error(self, x, y):
        sparse, out = self.predict(x)
        y_err = np.sum((out - y) ** 2) / (x.shape[0] * x.shape[1])
        return y_err
    
    
    def fit(self, x, y):
        batch_count = x.shape[0] // self.batch_size

        x = np.insert(x, 0, -1, axis = 1)
        y = np.insert(y, 0, -1, axis = 1)
        shuffle(x, y)
        x_test = x[0.95 * x.shape[0]:]
        x = x[:0.95 * x.shape[0]]
        
        y_test = y[0.95 * y.shape[0]:]
        y = y[:0.95 * y.shape[0]]
        
        for num_epoch in range(self.epochs_count):
            print "Epoch num is: ", num_epoch
            print "err is:", self.get_error(x_test, y_test)
            shuffle(x, y)
            for batch_num in range(batch_count):
                data = x[batch_num * self.batch_size:(batch_num + 1) * self.batch_size]
                data_true = y[batch_num * self.batch_size:(batch_num + 1) * self.batch_size]
                
                neuron_out = [data]
                neuron_sum = [data[:, 1:]]
                w_u = []
                for num_layer in range(self.layers_count):
                    s = np.dot(neuron_out[-1], self.weight_list[num_layer])
                    neuron_sum.append(s)
                    if (self.activation_functions[num_layer + 1] == 'sigmoid'):
                        s = self.sigmoid(s)
                    if (num_layer != self.layers_count - 1):
                        s = np.insert(s, 0, -1, axis = 1)
                    neuron_out.append(s)

                if (self.cost_func == 'square'):
                    der_neuron = self.der_square_cost_vec(data_true[:, 1:], neuron_out[-1])

                for layer_num in range(self.layers_count - 1, -1, -1):
                    w_u.insert(0, np.sum( map(lambda x, y: np.dot(x.reshape(-1, 1), y.reshape(1, -1)), neuron_out[layer_num], der_neuron), axis = 0))
                    if self.activation_functions[layer_num] == 'sigmoid':
                        der_neuron = np.dot(der_neuron, self.weight_list[layer_num].T)[:, 1:] * self.der_sigmoid(neuron_sum[layer_num])
                    
                self.weight_list = map(lambda x, y: x - y * (self.learning_rate / self.batch_size), self.weight_list, w_u)
    




# Read data

In [89]:
data = pd.read_csv('mnist.csv', sep = ',')
data = np.asarray(data)
target = data[:, 1]
data = data[:, 1:] / 255.
print data.shape

(42000, 784)


# generate first layer

In [56]:
rbm_first_layer = rbm(learning_rate = 0.005, mu_moment = 0.9, batch_size = 200, num_epochs = 2, hidden_count = 500,\
           mode= 'bernoulli')
rbm_first_layer.fit(data)
h, pred = rbm_first_layer.predict(data)
w_vh1 = rbm_first_layer.w_vh
w_v1 = rbm_first_layer.w_v
w_h1 = rbm_first_layer.w_h



Epoch num: 1
Time is: 218.540069103
Error is: 0.0397568469087
Error for test data: 0.0259837256758
Epoch num: 2
Time is: 452.471276045
Error is: 0.0226902799886
Error for test data: 0.0206547065126


# Generate second layer

In [57]:
rbm_second_layer = rbm(learning_rate = 0.005, mu_moment = 0.9, batch_size = 200, num_epochs = 2, hidden_count = 500,\
           mode= 'bernoulli')
rbm_second_layer.fit(h)
h, pred = rbm_second_layer.predict(h)
w_vh2 = rbm_second_layer.w_vh
w_v2 = rbm_second_layer.w_v
w_h2 = rbm_second_layer.w_h



Epoch num: 1
Time is: 115.405921936
Error is: 0.0517354420641
Error for test data: 0.0325000583121
Epoch num: 2
Time is: 222.030552864
Error is: 0.0287600820124
Error for test data: 0.0263532871586


# generate third layer

In [58]:
rbm_spy = rbm(learning_rate = 0.005, mu_moment = 0.9, batch_size = 200, num_epochs = 2, hidden_count = 32,\
           mode= 'bernoulli')
rbm_spy.fit(h)
h, pred = rbm_spy.predict(h)
w_vh3 = rbm_spy.w_vh
w_v3 = rbm_spy.w_v
w_h3 = rbm_spy.w_h



Epoch num: 1
Time is: 7.38730192184
Error is: 0.0603864900101
Error for test data: 0.0389987361711
Epoch num: 2
Time is: 16.4116020203
Error is: 0.0336365695202
Error for test data: 0.0300338805671
Epoch num: 3
Time is: 24.0292189121
Error is: 0.0277019223726
Error for test data: 0.0260544694137
Epoch num: 4
Time is: 31.9193639755
Error is: 0.0246212304056
Error for test data: 0.02373476232
Epoch num: 5
Time is: 40.2244138718
Error is: 0.0227092014048
Error for test data: 0.0222051797573


In [75]:
first_layer = np.insert(w_vh1, 0, w_h1, axis = 0)
second_layer = np.insert(w_vh2, 0, w_h2, axis = 0)
third_layer = np.insert(w_vh3, 0, w_h3, axis = 0)
a = w_vh3.T
b = w_vh2.T
c = w_vh1.T
fouth_layer = np.insert(a, 0, w_v3, axis = 0)
fith_layer = np.insert(b, 0, w_v2, axis = 0)
sixth_layer = np.insert(c, 0, w_v1, axis = 0)

weight_list = [first_layer, second_layer, third_layer, fouth_layer, fith_layer, sixth_layer]

In [92]:
encoder = autoencoder(layers_list = [784, 500, 500, 32, 500, 500, 784], learning_rate = 0.001, alpha = 1.0,\
                          activation_functions = ['x', 'sigmoid', 'sigmoid', 'sigmoid', 'sigmoid', 'sigmoid', 'x'],\
                         cost_func = 'square', epochs_count = 15, batch_size = 200,\
                         sparse_num = 3, weight_list = weight_list)
encoder.fit(data + normal(0, 1, (data.shape[0], data.shape[1])), data)

decode, real = encoder.predict(np.insert(data, 0, -1, axis = 1))
print np.mean((x_test - real) ** 2)



Epoch num is:  0
err is:

ValueError: shapes (2100,785) and (786,500) not aligned: 785 (dim 1) != 786 (dim 0)

# TSNE

In [None]:
from sklearn.manifold import TSNE
tsne = TSNE(n_components=2, perplexity=30.0, early_exaggeration=4.0,\
            learning_rate=1000.0, n_iter=1000, n_iter_without_progress=30, min_grad_norm=1e-07,\
            metric='euclidean', init='random', verbose=0, random_state=None, method='barnes_hut', angle=0.5)

x = tsne.fit_transform(data)

plt.figure(figsize = (8, 6), dpi = 80)
plt.scatter(x[:, 0], x[:, 1])
plt.show()