In [1]:
from random import seed
from random import random
from math import exp
import pandas as pd
import numpy as np
from sklearn.preprocessing import OneHotEncoder
from sklearn.datasets import load_iris


In [2]:
class Network:
  #constructor
  def __init__(self, n_inputs, n_hidden, n_outputs=3, bias=1, learning_rate=0.1):
    self.n_inputs = n_inputs # number of input unit
    self.n_hidden = n_hidden # number of hidden unit
    self.n_outputs = n_outputs # number of output unit
    self.bias = bias # bias parameter
    self.learning_rate = learning_rate
    
    # parameters of weight on input to hidden layer 
    self.weights_ItoH = np.random.uniform(-1, 1, (n_inputs+1, n_hidden)) 
    self.dweights_ItoH = np.zeros((n_inputs+1, n_hidden))
    
    # parameters of weight on hidden to output layer 
    self.weights_HtoO = np.random.uniform(-1, 1, (n_hidden+1, n_outputs))
    self.dweights_HtoO = np.zeros((n_hidden+1, n_outputs))
    
    # output value and error of hidden layer
    self.pre_activation_H = np.zeros(n_hidden)
    self.post_activation_H = np.zeros(n_hidden)
    self.error_H = np.zeros(n_hidden)
    
    # output value and error of output layer
    self.pre_activation_O = np.zeros(n_outputs)
    self.post_activation_O = np.zeros(n_outputs)
    self.error_O = np.zeros(n_outputs)
  
  # Net calculation method
  ## Calculate net for an input
  def calculate_net_ItoH(self, sample, node):
    input_plus_bias = np.append(self.data[sample,:], self.bias)
    return np.dot(input_plus_bias, self.weights_ItoH[:, node])
  ## Calculate net for a hidden unit
  def calculate_net_HtoO(self, node):
    hidden_plus_bias =  np.append(self.post_activation_H, self.bias)
    return np.dot(hidden_plus_bias, self.weights_HtoO[:, node])

  # activation function
  def activation(self, x):
  	return 1.0/(1.0 + np.exp(-x))

  def one_hot_encode(self, target):
    encoder = OneHotEncoder(sparse=False)
    new_target = target.reshape(len(target), 1)
    target_encode = encoder.fit_transform(new_target)
    return target_encode

  #fit the network to the data
  def fit(self, data, target, epoch_limit=100, mini_batch_limit=10):
    self.data = data
    self.target = self.one_hot_encode(target)
    self.epoch_limit = epoch_limit

    len_data = len(data)

    # iterate each epoch
    for epoch in range(epoch_limit):

      #iterate each instance
      mini_batch_count = 0
      for instance in range(len_data):
            
        # From input layer to hidden layer
        ## iterate every hidden layer to fill the values
        for hidden_unit in range(self.n_hidden):
          ### calculate the net input
          self.pre_activation_H[hidden_unit] = self.calculate_net_ItoH(instance, hidden_unit)
          ### calculate the activated value
          self.post_activation_H[hidden_unit] = self.activation(self.pre_activation_H[hidden_unit])

        # From hidden layer to output layer
        for output_unit in range(self.n_outputs):
          ### calculate the net input
          self.pre_activation_O[output_unit] = self.calculate_net_HtoO(output_unit)
          ### calculate the activated value
          self.post_activation_O[output_unit] = self.activation(self.pre_activation_O[output_unit])

        # Backpropagation
        ## if already at minibatch limit or at the last instance, update the weight 
        if((mini_batch_count == mini_batch_limit) or (instance == len_data - 1)):
          
          #update weight - input to hidden
          self.weights_ItoH = np.add(self.weights_ItoH, self.dweights_ItoH)
          #update weight - hidden to output
          self.weights_HtoO = np.add(self.weights_HtoO, self.dweights_HtoO)

          #reset delta weight to zero
          self.dweights_ItoH = np.zeros((self.n_inputs+1, self.n_hidden))
          self.dweights_HtoO = np.zeros((self.n_hidden+1, self.n_outputs))

          #reset iterator
          mini_batch_count = 0
        
        ## if below minibatch limit, update delta-weight
        else:
          ### update delta-weight from output
          for hidden_unit in range(self.n_hidden + 1): # (+1 accomodating bias)
            for output_unit in range(self.n_outputs):
              #### (Minus sign merged). Formula: (target_ok - out_ok) * out_ok * (1 - out_ok) * out_hj
              target_o = self.target[instance][output_unit]
              out_o = self.post_activation_O[output_unit]
              
              ##### calculating weight of bias
              if (hidden_unit == self.n_hidden): 
                out_h = self.bias
              ##### calculating weight of activated hidden unit
              else:
                out_h = self.post_activation_H[hidden_unit]

              self.error_O[output_unit] = (target_o - out_o) * out_o * (1 - out_o) 
              self.dweights_HtoO[hidden_unit][output_unit] += self.error_O[output_unit] * out_h * self.learning_rate

          ### update delta-weight from hidden layer
          for input_unit in range(self.n_inputs + 1): # (+1 accomodating bias)
            for hidden_unit in range(self.n_hidden):
              #### Formula: sigma_ok(error_o * w_ho) * out_hj * (1 - out_hj) * input_i
              sigma_err_output = np.dot(self.error_O, self.weights_HtoO[hidden_unit,:])
              out_h = self.post_activation_H[hidden_unit]

              ##### calculating weight of bias
              if(input_unit == self.n_inputs): 
                input_i = self.bias
              ##### calculating weight of input unit
              else:
                input_i = self.data[instance, input_unit] 
              
              self.error_H[hidden_unit] = sigma_err_output * out_h * (1 - out_h) 
              self.dweights_ItoH[input_unit][hidden_unit] += self.error_H[hidden_unit] * input_i * self.learning_rate
          
          #increment iterator
          mini_batch_count += 1
        
        

  def predict(self, data):
    self.data = data
    result = []
    #iterate each instance
    for instance in range(len(data)):      
      ## iterate every hidden layer to fill the values
      for hidden_unit in range(self.n_hidden):
        ### calculate the net input
        self.pre_activation_H[hidden_unit] = self.calculate_net_ItoH(instance, hidden_unit)
        ### calculate the activated value
        self.post_activation_H[hidden_unit] = self.activation(self.pre_activation_H[hidden_unit])

      max_value = 0
      max_index = -1 
      # From hidden layer to output layer
      for output_unit in range(self.n_outputs):
        ### calculate the net input
        self.pre_activation_O[output_unit] = self.calculate_net_HtoO(output_unit)
        ### calculate the activated value
        self.post_activation_O[output_unit] = self.activation(self.pre_activation_O[output_unit])
        if(self.post_activation_O[output_unit] >= max_value ):
          max_value = self.post_activation_O[output_unit]
          max_index = output_unit
      
      print(self.post_activation_O)
      print('instance no:', instance, 'prediction result:', max_index)
      result = np.append(result, max_index)
    
    return result

  #print weight  
  def print_w_ItoH(self):
    index=[]
    for n in range(self.n_inputs+1):
      index.append('WInput'+str(n))
    column=[]
    for n in range(self.n_hidden):
      column.append('Hidden'+str(n))
    return pd.DataFrame(net.weights_ItoH,index,column)
  #print weight  
  def print_w_HtoO(self):
    index=[]
    for n in range(self.n_hidden+1):
      index.append('WHidden'+str(n))
    column=[]
    for n in range(self.n_outputs):
      column.append('Output'+str(n))
    return pd.DataFrame(net.weights_HtoO,index,column)


In [3]:
# Training
print('Data Iris')
load, target = load_iris(return_X_y=True)
iris_data = pd.DataFrame(load, columns=['sepal_length', 'sepal_width', 'petal_length', 'petal_width'])
iris_data['label'] = pd.Series(target)

shuffled_data = iris_data.copy().sample(frac=1)
train_X = shuffled_data.drop('label',axis=1,inplace=False).values
train_y = shuffled_data['label'].values

net = Network(4, 4)
net.fit(load, target, epoch_limit=100)

Data Iris


In [4]:
iris_data

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,label
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,2
146,6.3,2.5,5.0,1.9,2
147,6.5,3.0,5.2,2.0,2
148,6.2,3.4,5.4,2.3,2


In [5]:
#Testing
shuffled_data = iris_data.sample(n=20)
test_X = shuffled_data.drop('label',axis=1,inplace=False).values
test_y = shuffled_data['label'].values

result = net.predict(test_X)
result

[0.85089215 0.22041045 0.0087021 ]
instance no: 0 prediction result: 0
[0.01039844 0.26995498 0.68094265]
instance no: 1 prediction result: 2
[0.02741    0.26169502 0.47401506]
instance no: 2 prediction result: 2
[0.01396629 0.26783536 0.62273039]
instance no: 3 prediction result: 2
[0.00675952 0.27364501 0.75725736]
instance no: 4 prediction result: 2
[0.8533757  0.21872283 0.00836771]
instance no: 5 prediction result: 0
[0.0066824  0.27352676 0.7587341 ]
instance no: 6 prediction result: 2
[0.00861628 0.27216878 0.71704794]
instance no: 7 prediction result: 2
[0.00729269 0.27306774 0.7449072 ]
instance no: 8 prediction result: 2
[0.00694608 0.27343777 0.75287653]
instance no: 9 prediction result: 2
[0.01498327 0.2667378  0.60678616]
instance no: 10 prediction result: 2
[0.0076801  0.27279344 0.73646983]
instance no: 11 prediction result: 2
[0.0107637  0.27049323 0.67610803]
instance no: 12 prediction result: 2
[0.85337264 0.21826389 0.00831561]
instance no: 13 prediction result: 0
[0

array([0., 2., 2., 2., 2., 0., 2., 2., 2., 2., 2., 2., 2., 0., 2., 2., 2.,
       2., 0., 2.])

In [6]:
net.print_w_ItoH()

Unnamed: 0,Hidden0,Hidden1,Hidden2,Hidden3
WInput0,-1.466713,0.455637,-0.487798,-1.173691
WInput1,0.308435,0.561372,-0.665985,-1.856365
WInput2,-0.859972,0.647075,0.043949,3.06773
WInput3,-0.157834,-0.075064,-0.1269,1.145672
WInput4,0.425976,0.994432,-0.009026,-0.089883


In [7]:
net.print_w_HtoO()

Unnamed: 0,Output0,Output1,Output2
WHidden0,-0.721286,0.565732,-0.163582
WHidden1,0.294909,-0.141371,-2.520354
WHidden2,0.779879,0.512019,0.284924
WHidden3,-6.768946,0.30101,5.936865
WHidden4,1.464148,-1.137096,-2.265323


In [8]:
index=[]
for n in range(4):
    index.append('WInput'+str(n))
index

['WInput0', 'WInput1', 'WInput2', 'WInput3']