In [1]:
import numpy as np
import random
import math
import matplotlib.pyplot as plt
import sys

In [2]:
def read_file(data):
  res_list = []
  file = open(data, 'r')
  for row in file:
    row = row.rstrip('\n')
    split_value = row.split(',')
    map_value = list(map(np.float32, split_value))
    res_list.append(map_value)
  file.close()
  return np.asarray(res_list, dtype=np.float32)

In [3]:
class BackPropagation:
  def __init__(self, n_input, n_hidden, n_output, seed):
    self.n_input = n_input
    self.n_hidden = n_hidden
    self.n_output = n_output

    self.input_node = np.zeros(shape=[self.n_input], dtype=np.float32)
    self.hidden_node = np.zeros(shape=[self.n_hidden], dtype=np.float32)
    self.output_node = np.zeros(shape=[self.n_output], dtype=np.float32)

    self.i2h_weight = np.zeros(shape=[self.n_input,self.n_hidden], dtype=np.float32)
    self.h2o_weight = np.zeros(shape=[self.n_hidden,self.n_output], dtype=np.float32)

    self.hidden_bias = np.zeros(shape=[self.n_hidden], dtype=np.float32)
    self.output_bias = np.zeros(shape=[self.n_output], dtype=np.float32)
    
    self.rand = random.Random(seed)
    self.initial_weight()

  def set_weight(self, weight):
    index = 0
    for i in range(self.n_input):
      for j in range(self.n_hidden):
        self.i2h_weight[i,j] = weight[index]
        index += 1

    for j in range(self.n_hidden):
      self.hidden_bias[j] = weight[index]
      index += 1

    for j in range(self.n_hidden):
      for k in range(self.n_output):
        self.h2o_weight[j,k] = weight[index]
        index += 1

    for k in range(self.n_output):
      self.output_bias[k] = weight[index]
      index += 1

  def get_weight(self):
    tw = self.total_weight(self.n_input, self.n_hidden, self.n_output)
    result = np.zeros(shape=[tw], dtype=np.float32)
    index = 0  # points into result
    
    for i in range(self.n_input):
      for j in range(self.n_hidden):
        result[index] = self.i2h_weight[i,j]
        index += 1

    for j in range(self.n_hidden):
      result[index] = self.hidden_bias[j]
      index += 1

    for j in range(self.n_hidden):
      for k in range(self.n_output):
        result[index] = self.h2o_weight[j,k]
        index += 1

    for k in range(self.n_output):
      result[index] = self.output_bias[k]
      index += 1

    return result

  def initial_weight(self):
    n_weight = self.total_weight(self.n_input, self.n_hidden, self.n_output)
    weightss = np.zeros(shape=[n_weight], dtype=np.float32)
    lo = -0.1; hi = 0.1
    for index in range(len(weightss)):
      weightss[index] = (hi - lo) * self.rand.random() + lo
    self.set_weight(weightss)

  def compute_output(self, x_value):
    hidden_sum = np.zeros(shape=[self.n_hidden], dtype=np.float32)
    output_sum = np.zeros(shape=[self.n_output], dtype=np.float32)

    for i in range(self.n_input):
      self.input_node[i] = x_value[i]

    for j in range(self.n_hidden):
      for i in range(self.n_input):
        hidden_sum[j] += self.input_node[i] * self.i2h_weight[i,j]

    for j in range(self.n_hidden):
      hidden_sum[j] += self.hidden_bias[j]

    for j in range(self.n_hidden):
      self.hidden_node[j] = self.hypertan(hidden_sum[j])

    for k in range(self.n_output):
      for j in range(self.n_hidden):
        output_sum[k] += self.hidden_node[j] * self.h2o_weight[j,k]

    for k in range(self.n_output):
      output_sum[k] += self.output_bias[k]
 
    softOut = self.softmax(output_sum)
    for k in range(self.n_output):
      self.output_node[k] = softOut[k]

    result = np.zeros(shape=self.n_output, dtype=np.float32)
    for k in range(self.n_output):
      result[k] = self.output_node[k]

    return result

  def train(self, training_data, max_epoch, learning_rate):
    h2o_gradient = np.zeros(shape=[self.n_hidden, self.n_output], dtype=np.float32)  # hidden-to-output weight gradients
    outputbias_gradient = np.zeros(shape=[self.n_output], dtype=np.float32)  # output node biases gradients
    i2h_gradient = np.zeros(shape=[self.n_input, self.n_hidden], dtype=np.float32)  # input-to-hidden weight gradients
    hiddenbias_gradient = np.zeros(shape=[self.n_hidden], dtype=np.float32)  # hidden biases gradients

    output_signal = np.zeros(shape=[self.n_output], dtype=np.float32)  # output signals: gradients w/o assoc. input terms
    hidden_signal = np.zeros(shape=[self.n_hidden], dtype=np.float32)  # hidden signals: gradients w/o assoc. input terms

    epoch = 0
    x_values = np.zeros(shape=[self.n_input], dtype=np.float32)
    t_values = np.zeros(shape=[self.n_output], dtype=np.float32)
    n_training = len(training_data)
    indices = np.arange(n_training)

    while epoch < max_epoch:
      self.rand.shuffle(indices)
      for ii in range(n_training):
        index = indices[ii]

        for j in range(self.n_input):
          x_values[j] = training_data[index, j]
        for j in range(self.n_output):
          t_values[j] = training_data[index, j+self.n_input]
        self.compute_output(x_values)

        # 1. compute output node signals
        for k in range(self.n_output):
          derivative = (1 - self.output_node[k]) * self.output_node[k]  # softmax
          output_signal[k] = derivative * (self.output_node[k] - t_values[k])

        # 2. compute hidden-to-output weight gradients using output signals
        for j in range(self.n_hidden):
          for k in range(self.n_output):
            h2o_gradient[j, k] = output_signal[k] * self.hidden_node[j]

        # 3. compute output node bias gradients using output signals
        for k in range(self.n_output):
          outputbias_gradient[k] = output_signal[k] * 1.0
  
        # 4. compute hidden node signals
        for j in range(self.n_hidden):
          sum = 0.0
          for k in range(self.n_output):
            sum += output_signal[k] * self.h2o_weight[j,k]
          derivative = (1 - self.hidden_node[j]) * (1 + self.hidden_node[j])  # tanh activation
          hidden_signal[j] = derivative * sum
 
        # 5 compute input-to-hidden weight gradients using hidden signals
        for i in range(self.n_input):
          for j in range(self.n_hidden):
            i2h_gradient[i, j] = hidden_signal[j] * self.input_node[i]

        # 6. compute hidden node bias gradients using hidden signals
        for j in range(self.n_hidden):
          hiddenbias_gradient[j] = hidden_signal[j] * 1.0

        # update weight and bias using the gradients

        # 1. update input-to-hidden weight
        for i in range(self.n_input):
          for j in range(self.n_hidden):
            delta = -1.0 * learning_rate * i2h_gradient[i,j]
            self.i2h_weight[i, j] += delta

        # 2. update hidden node biases
        for j in range(self.n_hidden):
          delta = -1.0 * learning_rate * hiddenbias_gradient[j]
          self.hidden_bias[j] += delta      

        # 3. update hidden-to-output weight
        for j in range(self.n_hidden):
          for k in range(self.n_output):
            delta = -1.0 * learning_rate * h2o_gradient[j,k]
            self.h2o_weight[j, k] += delta
            
        # 4. update output node biases
        for k in range(self.n_output):
          delta = -1.0 * learning_rate * outputbias_gradient[k]
          self.output_bias[k] += delta
 
      epoch += 1
  
      mse = self.mean_squared_error(training_data)
      #print("epoch = " + str(epoch) + " error = %0.4f " % mse)
      print(str(epoch)+",%0.4f " % mse)

    result = self.get_weight()
    return result
  
  def accuracy(self, data):
    num_correct = 0; num_wrong = 0
    x_values = np.zeros(shape=[self.n_input], dtype=np.float32)
    t_values = np.zeros(shape=[self.n_output], dtype=np.float32)

    for i in range(len(data)):
      for j in range(self.n_input):
        x_values[j] = data[i,j]
      for j in range(self.n_output):
        t_values[j] = data[i, j+self.n_input]

      y_values = self.compute_output(x_values)
      max_index = np.argmax(y_values)

      if abs(t_values[max_index] - 1.0) < 1.0e-5:
        num_correct += 1
      else:
        num_wrong += 1
        
    return (num_correct * 1.0) / (num_correct + num_wrong)

  def mean_squared_error(self, data):
    sumSquaredError = 0.0
    x_values = np.zeros(shape=[self.n_input], dtype=np.float32)
    t_values = np.zeros(shape=[self.n_output], dtype=np.float32)

    for ii in range(len(data)):
      for jj in range(self.n_input):
        x_values[jj] = data[ii, jj]
      for jj in range(self.n_output):
        t_values[jj] = data[ii, jj+self.n_input]

      y_values = self.compute_output(x_values)
 
      for j in range(self.n_output):
        err = t_values[j] - y_values[j]
        sumSquaredError += err * err

    return sumSquaredError / len(data)

          
  @staticmethod
  def hypertan(x):
    if x < -20.0:
      return -1.0
    elif x > 20.0:
      return 1.0
    else:
      return math.tanh(x)

  @staticmethod	  
  def softmax(output_sum):
    result = np.zeros(shape=[len(output_sum)], dtype=np.float32)
    m = max(output_sum)
    divisor = 0.0
    for k in range(len(output_sum)):
       divisor += math.exp(output_sum[k] - m)
    for k in range(len(result)):
      result[k] =  math.exp(output_sum[k] - m) / divisor
    return result

  @staticmethod
  def total_weight(n_input, n_hidden, n_output):
   total_w = (n_input * n_hidden) + (n_hidden * n_output) + n_hidden + n_output
   return total_w

In [4]:
def main():
  n_input = 4
  n_hidden = 5
  n_output = 3

  bp = BackPropagation(n_input, n_hidden, n_output, seed=3)
  
  training_data = read_file("iris-trainingdata.csv")
  validation_data = read_file("iris-validationdata.csv")
  
  max_epoch = 200
  learning_rate = 0.05

  bp.train(training_data, max_epoch, learning_rate)

  
  training_accuracy = bp.accuracy(training_data)
  validation_accuracy = bp.accuracy(validation_data)
    
  
  print("\nTraining Accuracy   = %0.4f " % training_accuracy)
  print("Validation Accuracy   = %0.4f " % validation_accuracy)
   
if __name__ == "__main__":
  main()

1,0.6092 
2,0.4666 
3,0.3738 
4,0.3356 
5,0.3286 
6,0.3080 
7,0.2706 
8,0.2613 
9,0.2284 
10,0.1969 
11,0.1897 
12,0.1662 
13,0.1475 
14,0.2350 
15,0.1872 
16,0.1131 
17,0.1025 
18,0.1450 
19,0.0960 
20,0.0901 
21,0.0944 
22,0.0790 
23,0.1302 
24,0.1736 
25,0.1693 
26,0.2536 
27,0.1606 
28,0.0840 
29,0.1242 
30,0.0956 
31,0.0750 
32,0.1275 
33,0.0870 
34,0.0615 
35,0.0715 
36,0.0716 
37,0.0763 
38,0.1797 
39,0.0575 
40,0.0582 
41,0.0822 
42,0.0984 
43,0.0773 
44,0.1027 
45,0.0540 
46,0.0807 
47,0.0658 
48,0.0609 
49,0.0599 
50,0.0737 
51,0.0703 
52,0.0607 
53,0.0941 
54,0.1158 
55,0.0607 
56,0.1427 
57,0.0975 
58,0.1086 
59,0.0886 
60,0.0858 
61,0.0863 
62,0.0636 
63,0.1626 
64,0.0868 
65,0.0710 
66,0.0594 
67,0.0650 
68,0.1070 
69,0.0490 
70,0.0498 
71,0.1023 
72,0.0588 
73,0.1084 
74,0.0641 
75,0.0583 
76,0.0869 
77,0.0694 
78,0.0859 
79,0.0821 
80,0.0728 
81,0.0718 
82,0.0769 
83,0.0573 
84,0.1062 
85,0.0473 
86,0.0563 
87,0.0459 
88,0.1098 
89,0.0639 
90,0.0556 
91,0.0502 
92,0.046