<h1> Feedforward Fully-connected Neuralnetwork </h1>


<h3>Proto</h3>
단순하게 학습정도만 구현할것임. 버전관리하면서 기능 추가해볼것  

batch_size : size of mini-batch INT  
n_iteration : number of iterations INT  
n_neurons : number of neurons to hidden layer INT    
n_layers : number of hidden layers INT    
learning_rate : learning rate FLOAT    
activation_function : activation function. [ 'sigmoid', 'relu' ]   
output_activation : activation function for last layer. [ 'sigmoid', 'relu', 'softmax']  
cost_function : cost function [ 'crossentropy', 'mse' ]  
optimizer : gradient descent algorithm. [ 'minibatchSGD' ] 

In [1]:
import pandas as pd
import numpy as np
from collections import Counter
from operator import attrgetter

In [2]:
#preprocess

s = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
df = pd.read_csv(s,
                 header=None,
                 encoding='utf-8')

X, y = df.iloc[:,:4], df.iloc[:,4]

def label_encoding(x):
    if x == 'Iris-setosa':
        return 1
    elif x == 'Iris-virginica':
        return 3
    else:
        return 2
    
y = list(map(label_encoding, y))

In [288]:
class FullyconnectedNN(object):
    def __init__(self, n_iteration = 1000, shuffle = True, batch_size = 32, n_neurons = 2, n_layers = 2, learning_rate = 0.1, activation_function = 'relu', output_activation = 'relu',
                 cost_function = 'mse', optimizer = 'minibatchSGD', random_state = 42):
        self.n_iteration = n_iteration
        self.shuffle = shuffle
        self.batch_size = batch_size
        self.n_layers = n_layers
        self.n_neurons = n_neurons
        self.activation_function = activation_function
        self.learning_rate = learning_rate
        self.output_activation = output_activation
        self.cost_function_ = cost_function
        self.optimizer = optimizer
        self.random_state = random_state
        
    def fit(self, X, y):   
        sigmoid = lambda x:1/(1+np.exp(-x))
        def softmax(z):
            assert len(z.shape) == 2
            s = np.max(z, axis=1)
            s = s[:, np.newaxis]
            e_x = np.exp(z - s)
            div = np.sum(e_x, axis=1)
            div = div[:, np.newaxis]
            return e_x / div
        def mse(y_hat, y):
            return np.sum(np.power((y_hat - y), 2))/2
        def cross_entropy(y_hat, y):
            #add delta to prevent y_hat to be zero
            delta = 1e-7 
            return -np.sum(y*np.log(y_hat+delta))
        
        
        if isinstance(X, pd.DataFrame):
            X = X.to_numpy()
        assert X.shape[0] == len(y), "X and y don't match."
        rgen = np.random.RandomState(self.random_state)
        
        self.w_ = []
        self.b_ = []
        self.w_.append(rgen.normal(loc = .0, scale = .01, size = X.shape[1] * self.n_neurons).reshape(X.shape[1], self.n_neurons))
        self.b_.append(rgen.normal(loc = .0, scale = .01, size = self.n_neurons))
        for _ in range(self.n_layers - 1):
            self.w_.append(rgen.normal(loc = .0, scale = .01, size = np.power(self.n_neurons,2)).reshape(self.n_neurons,self.n_neurons))
            self.b_.append(rgen.normal(loc = .0, scale = .01, size = self.n_neurons))

        if self.cost_function_ in ['crossentropy']:
            self.w_.append(rgen.normal(loc = .0, scale = .01, size = len(Counter(y)) * self.n_neurons).reshape(len(Counter(y)), self.n_neurons))
            self.b_.append(rgen.normal(loc = .0, scale = .01, size = len(Counter(y))))
        else:
            self.w_.append(rgen.normal(loc = .0, scale = .01, size = self.n_neurons))
            self.b_.append(rgen.normal(loc = .0, scale = .01, size = 1))
        
        
        self.cost_ = []
        
        def create_minibatches(y):
            indices = np.arange(len(y))
            if self.shuffle:
                rgen.shuffle(indices)
            cut = [x for x in range(0, len(indices), self.batch_size)] + [len(indices)]
            mini_batches = []
            for i in range(1,len(cut)):
                mini_batches.append(indices[cut[i-1]:cut[i]])
            return mini_batches

        for _ in range(self.n_iteration):
            batches = create_minibatches(y)
            cost = []
            cost_prime = []
            for batch in batches:
                X_batch = X[tuple(batch),:]
                y_batch = [y[int(batch_idx)] for batch_idx in batch]
                #forward propagation
                y_hat = self.predict(X_batch)
                if self.cost_function_ == 'crossentropy':
                    cost.append(cross_entropy(y_hat, y_batch))
                if self.cost_function_ == 'mse':
                    cost.append(mse(y_hat, y_batch))
                
#                 for i in reversed(range(1,self.n_layers + 2)):
#                     if self.cost_function_ == 'mse':
#                         if i == 1:
#                             self.w_[-i] = self.w_[-i] - ( self.learning_rate * (y_hat - y_batch) * self._o_prime[-i] * self._o[-i])
#                             self.b_[-i] = self.b_[-i] - ( self.learning_rate * (y_hat - y_batch) * self._o_prime[-i])
    
            self.cost_.append(np.mean(cost))
        return self
            

        
    def predict(self, X):
        try: x
        except NameError: x = None
        self._o = []
        self._o_prime = []
        for i in range(self.n_layers):
            if i == 0:
                if self.activation_function == 'relu':
                    self._o.append(np.maximum(np.matmul(X, self.w_[i]) + self.b_[i], 0))
                    self._o_prime.append(np.heaviside(np.matmul(X, self.w_[i]) + self.b_[i], 0))
                if self.activation_function == 'sigmoid':
                    self._o.append(sigmoid(np.matmul(X, self.w_[i]) + self.b_[i]))
                    self._o_prime.append( sigmoid(np.matmul(X, self.w_[i]) + self.b_[i]) * (1 - sigmoid(np.matmul(X, self.w_[i]) + self.b_[i])) )
            else:
                if self.activation_function == 'relu':
                    self._o.append(np.maximum(np.matmul(self._o[i-1], self.w_[i]) + self.b_[i], 0))
                    self._o_prime.append(np.heaviside(np.matmul(self._o[i-1], self.w_[i]) + self.b_[i], 0))
                    
                if self.activation_function == 'sigmoid':
                    self._o.append(sigmoid(np.matmul(self._o[i-1], self.w_[i]) + self.b_[i]))
                    self._o_prime.append( sigmoid(np.matmul(self._o[i-1], self.w_[i]) + self.b_[i]) * (1 - sigmoid(np.matmul(self._o[i-1], self.w_[i]) + self.b_[i])) )
                    
        if self.output_activation == 'relu':
            self._o_prime.append(np.heaviside(np.matmul(self._o[self.n_layers-1], self.w_[self.n_layers]) + self.b_[self.n_layers], 0))
            return np.maximum(np.matmul(self._o[self.n_layers-1], self.w_[self.n_layers]) + self.b_[self.n_layers], 0)
        
        if self.output_activation == 'sigmoid':
            self._o_prime.append( sigmoid(np.matmul(self._o[self.n_layers-1], self.w_[self.n_layers]) + self.b_[self.n_layers]) * (1 - sigmoid(np.matmul(self._o[self.n_layers-1], self.w_[self.n_layers]) + self.b_[self.n_layers])) )
            return sigmoid(np.matmul(self._o[self.n_layers-1], self.w_[self.n_layers]) + self.b_[self.n_layers])
        
        if self.output_activation == 'softmax':
            return softmax(np.matmul(self._o[self.n_layers-1], self.w_[self.n_layers]) + self.b_[self.n_layers])
        

In [289]:
nn = FullyconnectedNN()

In [290]:
nn.fit(X,y)

<__main__.FullyconnectedNN at 0x2b18da6c100>

In [291]:
nn._o[-1]

array([[0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.]])

In [292]:
nn._o_prime[-1]

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0.])

In [293]:
nn.w_[-1]

array([-0.01012831,  0.00314247])

In [294]:
nn.w_[-1]

array([-0.01012831,  0.00314247])

In [295]:
nn.cost_

[70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,
 70.0,