In [120]:
import numpy as np
import matplotlib.pyplot as pl
from ipywidgets import interact, widgets
from matplotlib import animation
import matplotlib.pyplot as plt
import pandas as pd

class Activation(object):
    def __tanh(self, x):
        return np.tanh(x)

    def __tanh_deriv(self, a):
        # a = np.tanh(x)   
        return 1.0 - a**2
    def __logistic(self, x):
        return 1.0 / (1.0 + np.exp(-x))

    def __logistic_deriv(self, a):
        # a = logistic(x) 
        return  a * (1 - a )
    
    def __softmax(self, x):
        print(x)
        print(type(x))
        return np.exp(x)/(np.sum(np.exp(x)))
    
    #def __softmax_deriv(self,x):
    
    def __init__(self,activation='tanh'):
        if activation == 'logistic':
            self.f = self.__logistic
            self.f_deriv = self.__logistic_deriv
        elif activation == 'tanh':
            self.f = self.__tanh
            self.f_deriv = self.__tanh_deriv
        elif activation == 'softmax':
            self.f = self.__softmax
            self.f_deriv = self.__softmax_deriv
            
class HiddenLayer(object):    
    def __init__(self,n_in, n_out,
                 activation_last_layer='tanh',activation='tanh', W=None, b=None):
        """
        Typical hidden layer of a MLP: units are fully-connected and have
        sigmoidal activation function. Weight matrix W is of shape (n_in,n_out)
        and the bias vector b is of shape (n_out,).

        NOTE : The nonlinearity used here is tanh

        Hidden unit activation is given by: tanh(dot(input,W) + b)

        :type n_in: int
        :param n_in: dimensionality of input

        :type n_out: int
        :param n_out: number of hidden units

        :type activation: string
        :param activation: Non linearity to be applied in the hidden
                           layer
        """
        self.input=None
        self.activation=Activation(activation).f
        
        # activation deriv of last layer
        self.activation_deriv=None
        if activation_last_layer:
            self.activation_deriv=Activation(activation_last_layer).f_deriv

        self.W = np.random.uniform(
                low=-np.sqrt(6. / (n_in + n_out)),
                high=np.sqrt(6. / (n_in + n_out)),
                size=(n_in, n_out)
        )
        if activation == 'logistic':
            self.W *= 4

        self.b = np.zeros(n_out,)
        
        self.grad_W = np.zeros(self.W.shape)
        self.grad_b = np.zeros(self.b.shape)
        
    def forward(self, input):
        '''
        :type input: numpy.array
        :param input: a symbolic tensor of shape (n_in,)
        '''
        lin_output = np.dot(input, self.W) + self.b
        self.output = (
            lin_output if self.activation is None
            else self.activation(lin_output)
        )
        self.input=input
        return self.output
    
    def backward(self, delta, output_layer=False):         
        self.grad_W = np.atleast_2d(self.input).T.dot(np.atleast_2d(delta))
        self.grad_b = delta
        if self.activation_deriv:
            delta = delta.dot(self.W.T) * self.activation_deriv(self.input)
        return delta

class MLP:
    """
    """      
    def __init__(self, layers, activation=[None,'tanh','tanh']):
        """
        :param layers: A list containing the number of units in each layer.
        Should be at least two values
        :param activation: The activation function to be used. Can be
        "logistic" or "tanh"
        """        
        ### initialize layers
        self.layers=[]
        self.params=[]
        
        self.activation=activation
        for i in range(len(layers)-1):
            self.layers.append(HiddenLayer(layers[i],layers[i+1],activation[i],activation[i+1]))
    def forward(self,input):
        for layer in self.layers:
            output=layer.forward(input)
            input=output
        return output
    def criterion_MSE(self,y,y_hat):
        activation_deriv=Activation(self.activation[-1]).f_deriv
        # MSE
        error = y-y_hat
        loss=error**2
        # calculate the delta of the output layer
        delta=-error*activation_deriv(y_hat)
        # return loss and delta
        return loss,delta
    
    def criterion_CELOSS(self,y,yhat):
        activation_deriv=Activation(self.activation[-1]).f_deriv
        #CrossEntropyLoss
        loss = -np.sum(y*np.log(yhat))
        delta = y - yhat
        return loss,delta
        
    def backward(self,delta):
        delta=self.layers[-1].backward(delta,output_layer=True)
        for layer in reversed(self.layers[:-1]):
            delta=layer.backward(delta)
            
    def update(self,lr):
        for layer in self.layers:
            layer.W -= lr * layer.grad_W
            layer.b -= lr * layer.grad_b

    def fit(self,X,y,learning_rate=0.1, epochs=100):
        """
        Online learning.
        :param X: Input data or features
        :param y: Input targets
        :param learning_rate: parameters defining the speed of learning
        :param epochs: number of times the dataset is presented to the network for learning
        """ 
        X=np.array(X)
        y=np.array(y)
        to_return = np.zeros(epochs)
        
        for k in range(epochs):
            loss=np.zeros(X.shape[0])
            for it in range(X.shape[0]):
                i=np.random.randint(X.shape[0])
                
                # forward pass
                y_hat = self.forward(X[i])
                
                # backward pass
                #loss[it],delta=self.criterion_MSE(y[i],y_hat)
                loss[it],delta=self.criterion_CELOSS(y[i],y_hat)
                self.backward(delta)
                # update
                self.update(learning_rate)
            to_return[k] = np.mean(loss)
        return to_return

    def predict(self, x):
        x = np.array(x)
        output = np.zeros(x.shape[0])
        for i in np.arange(x.shape[0]):
            output[i] = nn.forward(x[i,:])
        return output

In [121]:
import h5py
with h5py.File('data/train_128.h5','r') as H:
    data = np.copy(H['data'])
with h5py.File('data/train_label.h5','r') as H:
    label = np.copy(H['label'])

In [122]:
label_dummies = np.array(pd.get_dummies(label))

In [123]:
label_dummies

array([[0, 0, 0, ..., 0, 0, 1],
       [1, 0, 0, ..., 0, 0, 0],
       [1, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [1, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]], dtype=uint8)

In [124]:
mlp = MLP([128,32,10],[None,'logistic','softmax'])

AttributeError: 'Activation' object has no attribute '_Activation__softmax_deriv'

In [125]:
mlp.fit(data,label_dummies,epochs=10)

[-1.36634319  0.29170513 -0.84839109  0.06290723  0.88189672  0.18696782
 -0.98511052  0.08004667  0.07726893 -0.2255777 ]
<class 'numpy.ndarray'>


AttributeError: 'Activation' object has no attribute '_Activation__softmax_deriv'

In [83]:
np.sum(np.exp(x),axis=1)[:,None])

SyntaxError: invalid syntax (<ipython-input-83-3fecbee6a973>, line 1)

In [86]:
np.exp(x)/(np.sum(np.exp(x),axis=1)[:,None])

array([[0.0853, 0.0853, 0.0853, ..., 0.0853, 0.0853, 0.232 ],
       [0.232 , 0.0853, 0.0853, ..., 0.0853, 0.0853, 0.0853],
       [0.232 , 0.0853, 0.0853, ..., 0.0853, 0.0853, 0.0853],
       ...,
       [0.0853, 0.0853, 0.0853, ..., 0.0853, 0.0853, 0.0853],
       [0.232 , 0.0853, 0.0853, ..., 0.0853, 0.0853, 0.0853],
       [0.0853, 0.0853, 0.0853, ..., 0.0853, 0.0853, 0.0853]],
      dtype=float16)

In [85]:
x

array([[0, 0, 0, ..., 0, 0, 1],
       [1, 0, 0, ..., 0, 0, 0],
       [1, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [1, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]], dtype=uint8)

In [79]:
np.exp(x)/(np.sum(np.exp(x),axis=1)[:,None])

array([[0.0853, 0.0853, 0.0853, ..., 0.0853, 0.0853, 0.232 ],
       [0.232 , 0.0853, 0.0853, ..., 0.0853, 0.0853, 0.0853],
       [0.232 , 0.0853, 0.0853, ..., 0.0853, 0.0853, 0.0853],
       ...,
       [0.0853, 0.0853, 0.0853, ..., 0.0853, 0.0853, 0.0853],
       [0.232 , 0.0853, 0.0853, ..., 0.0853, 0.0853, 0.0853],
       [0.0853, 0.0853, 0.0853, ..., 0.0853, 0.0853, 0.0853]],
      dtype=float16)

In [14]:
predictions = pd.get_dummies(output.argmax(axis=1),columns=label_dummies.columns)

In [17]:
predictions

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,0,1,0,0,0,0,0,0,0,0
1,0,1,0,0,0,0,0,0,0,0
2,0,0,0,0,1,0,0,0,0,0
3,0,0,0,0,0,1,0,0,0,0
4,0,0,0,0,0,1,0,0,0,0
5,0,0,0,0,0,0,0,1,0,0
6,0,1,0,0,0,0,0,0,0,0
7,0,0,0,1,0,0,0,0,0,0
8,0,0,0,0,1,0,0,0,0,0
9,0,0,0,0,0,0,0,1,0,0


In [16]:
label_dummies

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,0,0,0,0,0,0,0,0,0,1
1,1,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0
3,0,0,0,1,0,0,0,0,0,0
4,1,0,0,0,0,0,0,0,0,0
5,0,0,1,0,0,0,0,0,0,0
6,0,0,0,0,0,0,0,1,0,0
7,0,0,1,0,0,0,0,0,0,0
8,0,0,0,0,0,1,0,0,0,0
9,0,0,0,0,0,1,0,0,0,0
