In [57]:
import numpy as np
import matplotlib.pyplot as pl
from ipywidgets import interact, widgets
from matplotlib import animation
import matplotlib.pyplot as plt
import pandas as pd

class Activation(object):
    def __tanh(self, x):
        return np.tanh(x)

    def __tanh_deriv(self, a):
        # a = np.tanh(x)   
        return 1.0 - a**2
    def __logistic(self, x):
        return (1.0 / (1.0 + np.exp(-x)))

    def __logistic_deriv(self, a):
        # a = logistic(x) 
        return  (a * (1 - a ))
    
    def __softmax(self, x):
        #return np.exp(x)/(np.sum(np.exp(x),axis=1)[:,None])
        return (np.exp(x)/(np.sum(np.exp(x))))
    
    def __softmax_deriv(self, a):
        #a = softmax(x)
        return a * (1 - a)
    
    def __init__(self,activation='tanh'):
        if activation == 'logistic':
            self.f = self.__logistic
            self.f_deriv = self.__logistic_deriv
        elif activation == 'tanh':
            self.f = self.__tanh
            self.f_deriv = self.__tanh_deriv
        elif activation == 'softmax':
            self.f = self.__softmax
            self.f_deriv = self.__logistic_deriv
            
class HiddenLayer(object):    
    def __init__(self,n_in, n_out,
                 activation_last_layer='tanh',activation='tanh', W=None, b=None):
        """
        Typical hidden layer of a MLP: units are fully-connected and have
        sigmoidal activation function. Weight matrix W is of shape (n_in,n_out)
        and the bias vector b is of shape (n_out,).

        NOTE : The nonlinearity used here is tanh

        Hidden unit activation is given by: tanh(dot(input,W) + b)

        :type n_in: int
        :param n_in: dimensionality of input

        :type n_out: int
        :param n_out: number of hidden units

        :type activation: string
        :param activation: Non linearity to be applied in the hidden
                           layer
        """
        self.input=None
        self.activation=Activation(activation).f
        
        # activation deriv of last layer
        self.activation_deriv=None
        if activation_last_layer:
            self.activation_deriv=Activation(activation_last_layer).f_deriv

        self.W = np.random.uniform(
                low=-np.sqrt(6. / (n_in + n_out)),
                high=np.sqrt(6. / (n_in + n_out)),
                size=(n_in, n_out)
        )
        if activation == 'logistic':
            self.W *= 4

        self.b = np.zeros(n_out,)
        
        self.grad_W = np.zeros(self.W.shape)
        self.grad_b = np.zeros(self.b.shape)
        
    def forward(self, input):
        '''
        :type input: numpy.array
        :param input: a symbolic tensor of shape (n_in,)
        '''
        lin_output = np.dot(input, self.W) + self.b
        self.output = (
            lin_output if self.activation is None
            else self.activation(lin_output)
        )
        self.input=input
        return self.output
    
    def backward(self, delta, output_layer=False):         
        self.grad_W = np.atleast_2d(self.input).T.dot(np.atleast_2d(delta))
        self.grad_b = delta
        if self.activation_deriv:
            delta = delta.dot(self.W.T) * self.activation_deriv(self.input)
        return delta

class MLP:
    """
    """      
    def __init__(self, layers, activation=[None,'tanh','tanh']):
        """
        :param layers: A list containing the number of units in each layer.
        Should be at least two values
        :param activation: The activation function to be used. Can be
        "logistic" or "tanh"
        """        
        ### initialize layers
        self.layers=[]
        self.params=[]
        
        self.activation=activation
        for i in range(len(layers)-1):
            self.layers.append(HiddenLayer(layers[i],layers[i+1],activation[i],activation[i+1]))
    def forward(self,input):
        for layer in self.layers:
            output=layer.forward(input)
            input=output
        return output
    def criterion_MSE(self,y,y_hat):
        activation_deriv=Activation(self.activation[-1]).f_deriv
        # MSE
        error = y-y_hat
        loss=error**2
        # calculate the delta of the output layer
        delta=-error*activation_deriv(y_hat)    
        # return loss and delta
        return loss,delta
    
    def criterion_CELoss(self,y,y_hat):
        error = y*np.log(y_hat)
        loss = -np.sum(error)
        delta = (y_hat-y)
        return loss,delta
        
    def backward(self,delta):
        delta=self.layers[-1].backward(delta,output_layer=True)
        for layer in reversed(self.layers[:-1]):
            delta=layer.backward(delta)
            
    def update(self,lr):
        for layer in self.layers:
            layer.W -= lr * layer.grad_W
            layer.b -= lr * layer.grad_b

    def fit(self,X,y,learning_rate=0.1, epochs=10):
        """
        Online learning.
        :param X: Input data or features
        :param y: Input targets
        :param learning_rate: parameters defining the speed of learning
        :param epochs: number of times the dataset is presented to the network for learning
        """ 
        X=np.array(X)
        y=np.array(y)
        to_return = np.zeros(epochs)
        
        for k in range(epochs):
            loss=np.zeros(X.shape[0])
            for it in range(X.shape[0]):
                i=np.random.randint(X.shape[0])
                
                # forward pass
                y_hat = self.forward(X[i])
                
                # backward pass
                #loss[it],delta=self.criterion_MSE(y[i],y_hat)
                loss[it],delta=self.criterion_CELoss(y[i],y_hat)
                self.backward(delta)

                # update
                self.update(learning_rate)
            to_return[k] = np.mean(loss)
        return to_return

    def predict(self, x):
        x = np.array(x)
        output = np.zeros(x.shape[0])
        for i in np.arange(x.shape[0]):
            output[i] = self.forward(x[i,:])
        return output

In [58]:
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

In [78]:
np.argmax(yhat_train,axis=1).shape

(45000,)

In [141]:
import h5py
with h5py.File('data/train_128.h5','r') as H:
    data = np.copy(H['data'])
with h5py.File('data/train_label.h5','r') as H:
    label = np.copy(H['label'])
    
scaler = StandardScaler()
    
label_dummies = pd.get_dummies(label)
X_train, X_test, y_train, y_test = train_test_split(data, label_dummies, shuffle=True)

In [142]:
X_train.shape

(45000, 128)

In [143]:
X_test.shape

(15000, 128)

In [128]:
from sklearn.preprocessing import OneHotEncoder

In [140]:
y_test

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
29335,0,0,0,0,0,1,0,0,0,0
39259,0,0,0,0,0,0,0,1,0,0
26544,0,1,0,0,0,0,0,0,0,0
15463,0,1,0,0,0,0,0,0,0,0
53788,1,0,0,0,0,0,0,0,0,0
16169,0,0,0,0,1,0,0,0,0,0
15255,0,0,1,0,0,0,0,0,0,0
30470,0,0,0,0,0,0,0,0,0,1
11138,0,1,0,0,0,0,0,0,0,0
32140,1,0,0,0,0,0,0,0,0,0


In [110]:
import h5py
with h5py.File('data/train_128.h5','r') as H:
    data = np.copy(H['data'])
with h5py.File('data/train_label.h5','r') as H:
    label = np.copy(H['label'])
    
scaler = StandardScaler()
    
label_dummies = pd.get_dummies(label)
X_train, X_test, y_train, y_test = train_test_split(data, label_dummies, shuffle=True)
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
accuracies_train = []
accuracies_test = []
for lr in [0.001,0.005]:
    mlp = MLP([128,32,10],[None,'logistic','softmax'])
    mlp.fit(X_train,y_train, learning_rate=lr, epochs=5)
    yhat_train = mlp.forward(X_train)
    yhat_test = mlp.forward(X_test)
    accuracy_train = (np.sum(np.argmax(y_train,axis=1)==np.argmax(yhat_train,axis=1)))/(y_train.shape[0])
    accuracy_test = (np.sum(np.argmax(y_test,axis=1)==np.argmax(yhat_test,axis=1)))/(y_test.shape[0])
    accuracies_train.append(accuracy_train)
    accuracies_test.append(accuracy_test)

ValueError: Shape of passed values is (1, 45000), indices imply (10, 45000)

In [109]:
X_train

array([[ 1.08287475, -0.63201033, -0.49005811, ...,  0.23009907,
        -0.2907446 , -0.88188981],
       [ 1.21669634,  1.1563025 ,  1.15015936, ...,  0.52184171,
         0.27489335, -1.50881343],
       [-0.36562876, -0.24440547,  1.61032483, ...,  1.16730681,
        -1.57731943, -0.11127073],
       ...,
       [-0.65231198, -0.13767049,  1.63858702, ...,  0.93457216,
         0.87405054, -0.5001044 ],
       [ 1.35244295,  1.02493048,  1.15641305, ...,  1.13078227,
        -0.77769052,  0.24904707],
       [-0.78221948,  0.57616119,  0.59690547, ...,  0.27912781,
        -0.67139356,  0.81602886]])