In [1]:
import numpy as np
from scipy import optimize
from __future__ import division

In [2]:
class SigmoidActivationFunction:
    
    @staticmethod
    def value(z):
        return 1 / (1 + np.exp(-z))
    
    @staticmethod
    def derivative(z):
        sig = SigmoidActivationFunction.value(z)
        return sig * (1 - sig)
    
class WeightPacking:
    @staticmethod
    def pack(weights, biases):
        return np.concatenate([np.concatenate((b.T, w.T), 1).reshape(-1) for w,b in zip(weights, biases)])
    
    @staticmethod
    def unpack(thetas, layers):
        start = 0
        weights = []
        biases = []
        for x in range(len(layers)-1):
            Wlen = layers[x+1] * (layers[x] + 1)
            W = thetas[start:start+Wlen].reshape((layers[x+1], layers[x] + 1))
            start += Wlen
            weights.append(W[:, 1:].T)
            biases.append(W[:, 0][np.newaxis])
        return weights, biases
    
    
class CrossEntropyCostFunction:
    @staticmethod
    def cost(actual, predicted):
        return np.sum(np.nan_to_num(-actual * np.log(predicted) - (1 - actual) * np.log(1 - predicted)))
    
    @staticmethod
    def delta(actual, predicted, activationFunction):
        return predicted-actual
    
class WeightInitializationRandom:
    def __init__(self, epsilon_init):
        self.eps = epsilon_init
    
    def Initialize(self, l_in, l_out):
        w = np.random.rand(l_out, l_in + 1) * 2 * self.eps - self.eps
        tw = w[:, 1:].T
        tb = w[:, 0][np.newaxis]
        return (tw,tb)

In [95]:
class NN_1HL(object):
    
    def __init__(self, WeightInitialization, reg_lambda=0,
                 hidden_layer_size=[30, 20], opti_method='TNC', maxiter=500, 
                 ActivationFunction = SigmoidActivationFunction, CostFunction = CrossEntropyCostFunction):
        self.reg_lambda = reg_lambda
        self.hidden_layer_size = hidden_layer_size
        self.activation_func = ActivationFunction.value
        self.activation_func_prime = ActivationFunction.derivative
        self.method = opti_method
        self.maxiter = maxiter
        self._CostFunction = CostFunction.cost
        self._CostFunctionDelta = CostFunction.delta
        self._WeightInitialization = WeightInitialization
    
    def predict(self, X):
        return self.predict_proba(X).argmax(0)
    
    def predict_proba(self, X):
        a, z = self._forward(X, self.weights, self.biases)
        return a[-1].T
    
    def _forward(self, X, weights, biases):
       
        a = [X]
        z = []
        for w,b in zip(weights, biases):
            z.append(np.dot(a[-1], w) + b)
            a.append(self.activation_func(z[-1]))
        return a,z
    
    def fit(self, X, y):
        num_features = X.shape[0]
        input_layer_size = X.shape[1]
        num_labels = len(set(y))

        
        tw = []
        tb = []
        layers = [input_layer_size] + self.hidden_layer_size + [num_labels]
        for l in range(len(layers)-1):
            w, b = self._WeightInitialization.Initialize(layers[l], layers[l+1])
            tw.append(w)
            tb.append(b)

        thetas0 = WeightPacking.pack(tw, tb)

        options = {'maxiter': self.maxiter}
        _res = optimize.minimize(self.function, thetas0, jac=True, method=self.method, 
                                 args=(layers, X, y, self.reg_lambda), options=options)
        
        self.weights, self.biases = WeightPacking.unpack(_res.x, layers)
    
    
    def function(self, thetas, layers, X, y, reg_lambda):
        
        #Varible setup
        m = X.shape[0]
        Y = np.eye(layers[-1])[y]
        wk, bk = WeightPacking.unpack(thetas, layers)

        #Forward
        a, z = self._forward(X, wk, bk)
        
        #Cost
        J = self._CostFunction(Y, a[-1]) / m

        
        
        D3 = (a[3] - Y)
        ThetaGradW2 = np.dot(a[2].T, D3)/m
        
        D2 = np.dot(D3, wk[2].T) * self.activation_func_prime(z[1])
        ThetaGradW1 = np.dot(a[1].T, D2)/m
        
        D1 = np.dot(D2, wk[1].T) * self.activation_func_prime(z[0])
        ThetaGradW0 = np.dot(a[0].T, D1)/m

        ThetaGradB0 = np.mean(D1, 0)[np.newaxis]
        ThetaGradB1 = np.mean(D2, 0)[np.newaxis]
        ThetaGradB2 = np.mean(D3, 0)[np.newaxis]

        if reg_lambda != 0:
            J += self.reg_lambda / (2 * m) * np.sum([np.sum(w**2) for w in wk])
            ThetaGradW0 += (reg_lambda / m) * wk[0]
            ThetaGradW1 += (reg_lambda / m) * wk[1]
            ThetaGradW2 += (reg_lambda / m) * wk[2]
        return (J,  WeightPacking.pack([ThetaGradW0, ThetaGradW1, ThetaGradW2], [ThetaGradB0, ThetaGradB1, ThetaGradB2]))



In [96]:
np.set_printoptions(formatter={'float': lambda x: "{0:0.5f}".format(x)})
np.random.seed(40)
import sklearn.datasets as datasets
from sklearn import cross_validation

iris = datasets.load_iris()
X = iris.data
y = iris.target

X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y, test_size=0.4)

winit = WeightInitializationRandom(0.12)
nn = NN_1HL(WeightInitialization = winit, reg_lambda = 0.1, maxiter=500)#maxiter=0
nn.fit(X_train, y_train)


from sklearn.metrics import accuracy_score

accuracy_score(y_test, nn.predict(X_test))

0.93333333333333335

In [90]:
0.96666666666666667 #[25], reg = 2.1

0.9666666666666667

In [5]:
np.random.seed(40)
from scipy.io import loadmat
data = loadmat('ex3data1.mat')
X, y = data['X'], data['y']
y = y.reshape(X.shape[0], )
y = y - 1  # Fix notation # TODO: Automaticlly fix that on the class

X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y, test_size=0.4)

winit = WeightInitializationRandom(0.12)
nn = NN_1HL(winit, maxiter=50, reg_lambda = 2.1)
nn.fit(X_train, y_train)

accuracy_score(y_test, nn.predict(X_test))




0.876

In [8]:
0.876

0.876

In [195]:
from sklearn import preprocessing
from scipy.io import loadmat
data = loadmat('ex3data1.mat')
X, y = data['X'], data['y']

#X += np.abs(np.min(X))




#min_max_scaler = preprocessing.MinMaxScaler()
#X = min_max_scaler.fit_transform(X)
#X = (X + np.abs(np.min(X))) / (np.max(X) + np.abs(np.min(X)))
#print(np.min(X), np.max(X))

y = y.reshape(X.shape[0], )
y = y - 1  # Fix notation # TODO: Automaticlly fix that on the class

trialsList = []
winit = WeightInitializationRandom(0.12)
for trials in range(200):
    X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y, test_size=0.4)
    nn = NN_1HL(winit, maxiter=50, reg_lambda = 2.1)
    nn.fit(X_train, y_train)
    trialsList.append(accuracy_score(y_test, nn.predict(X_test)))
print(np.mean(trialsList))
    

0.8536125


no min max: 0.860855
min max: 0.803995
min max whole matrix: 0.8357325

In [196]:
X = np.array([[1,1], [2, 1], [3, 1.2], [4, 1], [5, 0.8], [6, 1]])
from sklearn.decomposition import NMF
model = NMF(n_components=4, init='nndsvdar', random_state=0)
#model.fit(X).gcomponents_
#print(X_train + np.abs(np.min(X_train)))
W = model.fit_transform(X_train + np.abs(np.min(X_train)))
H = model.components_

#print(W.shape)
#print(H.T)
#print(np.max(X_train))

print(np.dot(W, H))

[[0.16531 0.16531 0.16531 ..., 0.16532 0.16531 0.16531]
 [0.12314 0.12314 0.12314 ..., 0.12314 0.12314 0.12314]
 [0.10187 0.10187 0.10187 ..., 0.10188 0.10187 0.10187]
 ..., 
 [0.14059 0.14059 0.14059 ..., 0.14060 0.14059 0.14059]
 [0.14721 0.14721 0.14721 ..., 0.14721 0.14721 0.14721]
 [0.10188 0.10188 0.10188 ..., 0.10189 0.10188 0.10188]]


array([[0.50000, 0.00000, 1.00000],
       [1.00000, 0.50000, 0.40000],
       [0.00000, 1.00000, 0.00000]])