In [220]:
import numpy as np
import matplotlib.pyplot as pl
from ipywidgets import interact, widgets
from matplotlib import animation
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.preprocessing import StandardScaler,MinMaxScaler,Normalizer
from sklearn.model_selection import train_test_split
import h5py

class Activation(object):
    def __tanh(self, x):
        return np.tanh(x)

    def __tanh_deriv(self, a):
        # a = np.tanh(x)   
        return 1.0 - a**2
    def __logistic(self, x):
        return (1.0 / (1.0 + np.exp(-x)))

    def __logistic_deriv(self, a):
        # a = logistic(x) 
        return  (a * (1 - a ))
    
    def __softmax(self, x):
        #return np.exp(x)/(np.sum(np.exp(x),axis=1)[:,None])
        return (np.exp(x)/(np.sum(np.exp(x))))
    
    def __softmax_deriv(self, a):
        #a = softmax(x)
        return (a * (1 - a))
    
    def __ReLU(self,x):
        return np.vectorize(lambda x:x if x>0 else 0)(x)
    
    def __ReLU_deriv(self,a):
        #a = ReLU()
        return np.vectorize(lambda x:1 if x>0 else 0)(a)
    
    def __init__(self,activation='tanh'):
        if activation == 'logistic':
            self.f = self.__logistic
            self.f_deriv = self.__logistic_deriv
        elif activation == 'tanh':
            self.f = self.__tanh
            self.f_deriv = self.__tanh_deriv
        elif activation == 'softmax':
            self.f = self.__softmax
            self.f_deriv = self.__logistic_deriv
        elif activation == 'ReLU':
            self.f = self.__ReLU
            self.f_deriv = self.__ReLU_deriv
            
class HiddenLayer(object):    
    def __init__(self,n_in, n_out,
                 activation_last_layer='tanh',activation='tanh', dropout=None, W=None, b=None):
        """
        Typical hidden layer of a MLP: units are fully-connected and have
        sigmoidal activation function. Weight matrix W is of shape (n_in,n_out)
        and the bias vector b is of shape (n_out,).

        NOTE : The nonlinearity used here is tanh

        Hidden unit activation is given by: tanh(dot(input,W) + b)

        :type n_in: int
        :param n_in: dimensionality of input

        :type n_out: int
        :param n_out: number of hidden units

        :type activation: string
        :param activation: Non linearity to be applied in the hidden
                           layer
        """
        self.input=None
        self.activation=Activation(activation).f
        self.dropout=dropout
        self.dropout_vector = None
        
        # activation deriv of last layer
        self.activation_deriv=None
        if activation_last_layer:
            self.activation_deriv=Activation(activation_last_layer).f_deriv

        self.W = np.random.uniform(
                low=-np.sqrt(6. / (n_in + n_out)),
                high=np.sqrt(6. / (n_in + n_out)),
                size=(n_in, n_out)
        )
        if activation == 'logistic':
            self.W *= 4

        self.b = np.zeros(n_out,)
        
        self.grad_W = np.zeros(self.W.shape)
        self.grad_b = np.zeros(self.b.shape)
        
    def forward(self, input, mode):
        '''
        :type input: numpy.array
        :param input: a symbolic tensor of shape (n_in,)
        '''
        if (mode=='train' and self.dropout>0):
            self.dropout_vector = np.random.binomial(1, 1-self.dropout, size=input.shape)/(1-self.dropout)
            lin_output = np.dot(self.dropout_vector*input, self.W) + self.b
            self.output = (
                lin_output if self.activation is None
                else self.activation(lin_output)
            )

        lin_output = np.dot(input, self.W) + self.b
        self.output = (
            lin_output if self.activation is None
            else self.activation(lin_output)
        )
        print(input)
        print('selfinput',self.input)
        print(self.dropout_vector)
        self.input=input
        print(self.input)
    
    def backward(self, delta, output_layer=False):
        self.mode='train'
        if self.dropout > 0:
            delta *= self.dropout_vector
        
        self.grad_W = np.atleast_2d(self.input).T.dot(np.atleast_2d(delta))
        self.grad_b = delta
        
        if self.activation_deriv:
            delta = delta.dot(self.W.T) * self.activation_deriv(self.input)
        return delta

class MLP:
    """
    """      
    def __init__(self, layers, activation=[None,'tanh','tanh'], dropout=None):
        """
        :param layers: A list containing the number of units in each layer.
        Should be at least two values
        :param activation: The activation function to be used. Can be
        "logistic" or "tanh"
        """        
        ### initialize layers
        self.layers=[]
        self.params=[]
        self.mode = 'train'
        self.activation=activation
        self.dropout=dropout
        
        for i in range(len(layers)-1):
            self.layers.append(HiddenLayer(layers[i],layers[i+1],activation[i],activation[i+1],self.dropout[i]))
            
    def train(self):
        self.mode = 'train'
    
    def test(self):
        self.mode = 'test'

    def forward(self,input):
        for layer in self.layers:
            output=layer.forward(input=input, mode=self.mode)
            input=output
        return output

    def criterion_MSE(self,y,y_hat):
        activation_deriv=Activation(self.activation[-1]).f_deriv
        # MSE
        error = y-y_hat
        loss=error**2
        # calculate the delta of the output layer
        delta=-error*activation_deriv(y_hat)    
        # return loss and delta
        return loss,delta
    
    def criterion_CELoss(self,y,y_hat):
        error = y*np.log(y_hat)
        loss = -np.sum(error)
        delta = (y_hat-y)
        return loss,delta
        
    def backward(self,delta):
        delta=self.layers[-1].backward(delta,output_layer=True)
        for layer in reversed(self.layers[:-1]):
            delta=layer.backward(delta)
            
    def update(self,lr):
        for layer in self.layers:
            layer.W -= lr * layer.grad_W
            layer.b -= lr * layer.grad_b

    def fit(self,X,y,learning_rate=0.1, epochs=10):
        """
        Online learning.
        :param X: Input data or features
        :param y: Input targets
        :param learning_rate: parameters defining the speed of learning
        :param epochs: number of times the dataset is presented to the network for learning
        """
        self.train()
        X=np.array(X)
        y=np.array(y)
        to_return = np.zeros(epochs)
        
        for k in range(epochs):
            loss=np.zeros(X.shape[0])
            for it in range(X.shape[0]):
                i=np.random.randint(X.shape[0])
                
                # forward pass
                y_hat = self.forward(X[i])
                
                # backward pass
                if self.activation[-1] == 'softmax':
                    loss[it],delta=self.criterion_CELoss(y[i],y_hat)
                else:
                    loss[it],delta=self.criterion_MSE(y[i],y_hat)
                
                self.backward(delta)

                # update
                self.update(learning_rate)
            to_return[k] = np.mean(loss)
        return to_return

    def predict(self, x):
        self.test()
        x = np.array(x)
        output = np.zeros(x.shape[0])
        for i in np.arange(x.shape[0]):
            output[i] = self.forward(x[i,:])
        return output
    
    def optimize(self, X, y, learning_rate=0.01, test_size=0.25, epochs=10, verbose=True):
        """
        Online learning.
        :param X: Input data or features
        :param y: Input targets
        :param learning_rate: parameters defining the speed of learning
        :param epochs: number of times the dataset is presented to the network for learning
        """
        X=np.array(X)
        y=np.array(y)
        y_dummies = np.array(pd.get_dummies(y))
        X_train, X_val, y_train, y_val = train_test_split(X, y_dummies, test_size=test_size, shuffle=True)
        scaler = StandardScaler()
        #scaler = Normalizer()
        #scaler = MinMaxScaler()
        X_train = scaler.fit_transform(X_train)
        X_val = scaler.transform(X_val)

        losses = np.zeros(epochs)
        accuracies_val = []
        accuracies_test = []
        
        for e in range(epochs):
            loss=np.zeros(X_train.shape[0])         
            
            #yhat_train = self.forward(X_train)
            #yhat_val = self.forward(X_val)
            
            # Calculate train and Test Accuracy
            #accuracy_train = (np.sum(np.argmax(np.array(y_train),axis=1)==np.argmax(yhat_train,axis=1)))/(y_train.shape[0])
            #accuracy_val = (np.sum(np.argmax(np.array(y_val),axis=1)==np.argmax(yhat_val,axis=1)))/(y_val.shape[0])
            
            for it in range(X_train.shape[0]):
                i=np.random.randint(X_train.shape[0])
                
                self.train()
                # forward pass
                y_hat = self.forward(X_train[i])

                # backward pass
                if self.activation[-1] == 'softmax':
                    loss[it],delta = self.criterion_CELoss(y_train[i],y_hat)
                else:
                    loss[it],delta=self.criterion_MSE(y_train[i],y_hat)
                
                self.backward(delta)

                # update
                self.update(learning_rate)
                
            #yhat_train = self.forward(X_train)
            #yhat_val = self.forward(X_val)

            accuracies_val.append(accuracy_train)
            accuracies_test.append(accuracy_val)
            
            self.train()
            
            if verbose:
                print('Epoch: {}..\ntrain Accuracy: {} \nValidation Accuracy: {} \nLoss: {} \n'.
                      format(e, accuracy_train, accuracy_val, np.mean(loss)))
            
            losses[e] = np.mean(loss)
        return losses, accuracies_val, accuracies_test

In [148]:
with h5py.File('data/train_128.h5','r') as H:
    data = np.copy(H['data'])
with h5py.File('data/train_label.h5','r') as H:
    label = np.copy(H['label'])
    
#mlp = MLP([128,32,10],activation=[None, 'logistic', 'softmax'])
mlp = MLP([128,32,10],activation=[None, 'ReLU', 'softmax'])

losses, accuracies_train, accuracies_test = mlp.optimize(data, label, learning_rate=0.01,epochs=10)

plt.plot(accuracies_train, label='train')
plt.plot(accuracies_test, label='test')
plt.tight_layout()
plt.legend()
plt.savefig('accuracy_sigmoid.png')

TypeError: 'NoneType' object is not subscriptable

In [225]:
with h5py.File('data/train_128.h5','r') as H:
    data = np.copy(H['data'])
with h5py.File('data/train_label.h5','r') as H:
    label = np.copy(H['label'])
    
mlp = MLP([128,32,10],activation=[None, 'ReLU', 'softmax'], dropout=[0.5, 0.5, None])

losses, accuracies_train, accuracies_test = mlp.optimize(data, label, learning_rate=0.01,epochs=20)

plt.plot(accuracies_train, label='train')
plt.plot(accuracies_test, label='test')
plt.tight_layout()
plt.legend()
plt.savefig('accuracy_sigmoid.png')

[ 1.05778516  0.473979    1.30842024 -0.4751532  -0.57902824 -1.26005558
 -1.04704671  0.48388604  0.37575033  0.66095703  1.17493792  0.57578042
 -0.29310547 -0.62536469  0.2442653  -0.73312024 -0.52946692  0.18123304
  1.68951533 -0.71163625 -0.2034722   0.22573052 -0.75961942  0.65940552
  0.91735877  0.51433631  0.42766449 -0.88326779 -0.15511747 -1.00288678
 -1.39356599 -0.29283443 -0.90515907 -0.16275315  0.63808467 -0.43505649
  0.76398826 -2.26756638  0.72358976  0.33745921  0.22530707 -0.65888664
  1.14345481  0.29907087  0.38691209  1.40419022  0.4453685   1.21220925
 -0.51607733 -1.29167199 -0.39479709 -0.40576701  0.8253675  -0.73374311
  1.15314528 -0.65281253 -0.14220278  0.29070868 -0.35013578  2.48518638
 -1.06700246  1.32632607  0.34402067  1.22272509 -0.96487919 -1.70813368
  0.54510194 -0.74637    -0.05374896 -0.49385284 -0.10574678  1.15076542
  1.43349247  1.03318065  0.19081451 -0.49215808 -0.20377494  0.12594166
  0.42243633 -0.96857765  0.26784373 -0.83710436  0

AttributeError: 'NoneType' object has no attribute 'shape'