In [1]:
import numpy as np
from scipy import optimize
from __future__ import division

In [437]:
class SigmoidActivationFunction:
    
    @staticmethod
    def value(z):
        return 1 / (1 + np.exp(-z))
    
    @staticmethod
    def derivative(z):
        sig = SigmoidActivationFunction.value(z)
        return sig * (1 - sig)
    
class WeightPacking:
    @staticmethod
    def pack(weights, biases):
        return np.concatenate([np.concatenate((b.T, w.T), 1).reshape(-1) for w,b in zip(weights, biases)])
    
    @staticmethod
    def unpack(thetas, layers):
        start = 0
        weights = []
        biases = []
        for x in range(len(layers)-1):
            Wlen = layers[x+1] * (layers[x] + 1)
            W = thetas[start:start+Wlen].reshape((layers[x+1], layers[x] + 1))
            start += Wlen
            weights.append(W[:, 1:].T)
            biases.append(W[:, 0][np.newaxis])
        return weights, biases
    
    
class CrossEntropyCostFunction:
    @staticmethod
    def cost(actual, predicted):
        return np.sum(np.nan_to_num(-actual * np.log(predicted) - (1 - actual) * np.log(1 - predicted)))
    
    @staticmethod
    def delta(actual, predicted, activationFunction):
        return predicted-actual


In [446]:
class NN_1HL(object):
    
    def __init__(self, reg_lambda=0, epsilon_init=0.12,
                 hidden_layer_size=25, opti_method='TNC', maxiter=500, 
                 ActivationFunction = SigmoidActivationFunction, CostFunction = CrossEntropyCostFunction):
        self.reg_lambda = reg_lambda
        self.epsilon_init = epsilon_init
        self.hidden_layer_size = hidden_layer_size
        self.activation_func = ActivationFunction.value
        self.activation_func_prime = ActivationFunction.derivative
        self.method = opti_method
        self.maxiter = maxiter
        self._CostFunction = CostFunction.cost
        self._CostFunctionDelta = CostFunction.delta
    
    def rand_init(self, l_in, l_out):
        return np.random.rand(l_out, l_in + 1) * 2 * self.epsilon_init - self.epsilon_init
    
    def predict(self, X):
        return self.predict_proba(X).argmax(0)
    
    def predict_proba(self, X):
        _, _, _, _, h = self._forward(X, self.weights, self.biases)
        return h.T
    
    def _forward(self, X, weights, biases):
       
        a1 = X
        z1 = np.dot(X, weights[0]) + biases[0]
        a2 = self.activation_func(z1)
        z2 = np.dot(a2, weights[1]) + biases[1]
        a3 = self.activation_func(z2)
        
        return a1, z1, a2, z2, a3   

    
    def fit(self, X, y):
        num_features = X.shape[0]
        input_layer_size = X.shape[1]
        num_labels = len(set(y))
        
        theta1_0 = self.rand_init(input_layer_size, self.hidden_layer_size)
        theta2_0 = self.rand_init(self.hidden_layer_size, num_labels)
        
        layers = [input_layer_size, self.hidden_layer_size, num_labels]
        
        t1w = theta1_0[:, 1:].T
        
        t1b = theta1_0[:, 0][np.newaxis]

        t2w = theta2_0[:, 1:].T
        t2b = theta2_0[:, 0][np.newaxis]

        thetas0 = WeightPacking.pack([t1w, t2w],[t1b, t2b])

        options = {'maxiter': self.maxiter}
        _res = optimize.minimize(self.function, thetas0, jac=True, method=self.method, 
                                 args=(layers, X, y, self.reg_lambda), options=options)
        
        self.weights, self.biases = WeightPacking.unpack(_res.x, layers)
    
    
    def function(self, thetas, layers, X, y, reg_lambda):
        m = X.shape[0]

        wk,bk = WeightPacking.unpack(thetas, layers)
        t1w = wk[0]
        t1b = bk[0]
        t2w = wk[1]
        t2b = bk[1]
        
        Y = np.eye(layers[-1])[y]
        

        _, _, _, _, h = self._forward(X, [t1w, t2w], [t1b, t2b])

        
        J = self._CostFunction(Y, h) / m

        b1 = np.zeros(layers[1])
        b2 = np.zeros(layers[2])
        
        Delta1, Delta2 = np.zeros(t1w.shape), np.zeros(t2w.shape)

        
        a1, z2, a2, z3, a3 = self._forward(X, [t1w, t2w], [t1b, t2b])
        
        D3 = (a3 - Y)
        dw3 = np.dot(a2.T, D3)/m
        
        
        D2 = np.dot(D3, t2w.T) * self.activation_func_prime(z2)
        dw2 = np.dot(a1.T, D2)/m
        

        ThetaGradW1 = dw2
        ThetaGradB1 = np.mean(D2, 0)[np.newaxis]
        
        ThetaGradW2 = dw3
        ThetaGradB2 = np.mean(D3, 0)[np.newaxis]

        if reg_lambda != 0:
            J += self.reg_lambda / (2 * m) * (np.sum(t1w**2) + np.sum(t2w**2))
            ThetaGradW1 += (reg_lambda / m) * t1w
            ThetaGradW2 += (reg_lambda / m) * t2w
            
        return (J,  WeightPacking.pack([ThetaGradW1, ThetaGradW2],
                                       [ThetaGradB1, ThetaGradB2]))


In [447]:
np.set_printoptions(formatter={'float': lambda x: "{0:0.5f}".format(x)})
np.random.seed(40)
import sklearn.datasets as datasets
from sklearn import cross_validation

iris = datasets.load_iris()
X = iris.data
y = iris.target

X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y, test_size=0.4)

nn = NN_1HL(reg_lambda = 2.1, maxiter=500)#maxiter=0
nn.fit(X_train, y_train)


from sklearn.metrics import accuracy_score

accuracy_score(y_test, nn.predict(X_test))

0.96666666666666667

In [5]:
0.96666666666666667

0.9666666666666667

In [448]:
np.random.seed(40)
from scipy.io import loadmat
data = loadmat('ex3data1.mat')
X, y = data['X'], data['y']
y = y.reshape(X.shape[0], )
y = y - 1  # Fix notation # TODO: Automaticlly fix that on the class

X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y, test_size=0.4)


nn = NN_1HL(maxiter=50, reg_lambda = 2.1)
nn.fit(X_train, y_train)

accuracy_score(y_test, nn.predict(X_test))



0.876

In [30]:
0.876

0.876

In [449]:
from scipy.io import loadmat
data = loadmat('ex3data1.mat')
X, y = data['X'], data['y']
y = y.reshape(X.shape[0], )
y = y - 1  # Fix notation # TODO: Automaticlly fix that on the class

X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y, test_size=0.4)


nn = NN_1HL(maxiter=50, reg_lambda = 2.1)
nn.fit(X_train, y_train)

accuracy_score(y_test, nn.predict(X_test))


0.875

In [1]:
import pymf

ImportError: No module named 'cvxopt'