<a href="https://colab.research.google.com/github/diputs03/AI-Studies/blob/main/Creating_network/dymamic_architect_rebuilt_with_adam_paralleltrain_graphically.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
#@title Aiming a Dynaimic Graph-structured NeuronNetwork
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import random
from collections import deque
import multiprocessing as mp

In [6]:
"""
Activation function in this case in \tanh, thus
\dfrac{d\tanh(x)}{dx}=1-\tanh^2(x)
however, for other activation funtions
\dfrac{d\sigma(x)}{dx}=\sigma(x)\cdot\left\big(1-\sigma(x)\right\big)
\dfrac{d\mathop{\mathrm{ReLu}}(x)}{dx}=\begin{cases}1&x\ge0\\0&\text{else}\end{cases}
Loss is the Euclidean loss
\dfrac{d\L}
"""
class Model:
  def __init__(self, input_size, output_size):
    self.idcnt = 0
    self.prev, self.next = {}, {}
    self.neurons = set()

    self.Input_layer = [self.idcnt+i for i in range(input_size)]
    self.neurons.add(self.idcnt+i for i in range(input_size))
    self.idcnt+=input_size

    self.Output_layer = [self.idcnt+o for o in range(output_size)]
    self.neurons.add(self.idcnt+o for o in range(output_size))
    self.idcnt+=output_size

    for i in self.Input_layer: self.next[i], self.prev[i] = self.Output_layer, []
    for o in self.Output_layer: self.prev[o], self.next[o] = self.Input_layer, []

    self.weight = {}
    self.weight_gsum, self.weight_gsqr = {}, {}
    for u in self.Input_layer:
      for v in self.Output_layer:
        self.weight[(u,v)] = np.random.uniform(-0.1, 0.1)
        self.weight_gsum[(u,v)], self.weight_gsqr[(u,v)] = 0, 0

    self.bias = {}
    self.bias_gsum, self.bias_gsqr = {}, {}
    for i in self.neurons:
      self.bias[i] = np.random.uniform(-0.1, 0.1)
      self.bias_gsum[i], self.bias_gsqr[i] = 0, 0

  def forward(self, X, batch_size):
    assert X.shape == (batch_size,len(self.Input_layer))
    a = {q: np.zeros(batch_size) for q in self.neurons}

    for i, n in enumerate(self.Input_layer):
      a[n] = X[:, i]

    q = deque()
    for i in self.Input_layer:
      q.append(i)

    cnt = {q: 0 for q in self.neurons}

    while len(q) != 0:
      c = q.popleft()
      a[c] = np.tanh(a[c] + self.bias[c])
      for n in next[c]:
        a[n] = a[n] + a[c] * self.weight[(c,n)]
        cnt[n] += 1
        if cnt[n] == len(self.prev[n]):
          q.append(n)
    return a

  def evaluate(self, X):
    a = self.forward(X, len(X))
    return np.array([a[o] for o in self.Output_layer]).T

  def backward(self, X, Y, batch_size, learning_rate, dsum, dsqr):
    assert X.shape == (batch_size,len(self.Input_layer))
    assert Y.shape == (batch_size,len(self.Output_layer))
    a = self.forward(X, batch_size)

    delta_b, delta_w = {}, {}

    par_a = {q.i: np.zeros(batch_size) for q in self.neurons}
    for o, n in enumerate(self.Output_layer):
      par_a[n] = 2 * (a[n] - Y[:, o])

    q = deque()
    for o in self.Output_layer:
      q.append(o)

    cnt = {q: 0 for q in self.neurons}

    while len(q) != 0:
      c = q.popleft()
      par_b = par_a[c] * (1-a[c]**2)

      gbias = par_b
      self.bias_gsum[c] = (1-dsum)*np.sum(gbias)/batch_size + dsum*self.bias_gsum[c]
      self.bias_gsqr[c] = (1-dsqr)*np.sum(gbias**2)/batch_size + dsqr*self.bias_gsqr[c]
      delta_b[c] = -learning_rate * self.bias_gsum[c] / (self.bias_gsqr[c]**(1/2)+1)

      for p in self.prev[c]:
        par_a[p] += par_a[c] * (1-a[c]**2) * self.weight[(p,c)]
        gweight = par_a[c] * (1-a[c]**2) * a[p]
        self.weight_gsum[(p,c)] = \
         (1-dsum)*np.sum(gweight)/batch_size + dsum*self.weight_gsum[(p,c)]
        self.weight_gsqr[(p,c)] = \
         (1-dsqr)*np.sum(gweight**2)/batch_size + dsqr*self.weight_gsqr[(p,c)]
        delta_w[(p,c)] = \
         -learning_rate * self.weight_gsum[(p,c)] / (self.weight_gsqr[(p,c)]**(1/2)+1)

        cnt[p] += 1
        if cnt[p] == len(p.next):
          q.append(p)

    return delta_w, delta_b

  def update(self, X, Y, batch_size, learning_rate, dsum=0.9, dsqr=0.9):
    delta_w, delta_b = \
     self.backward(X, Y, batch_size, learning_rate, dsum, dsqr)
    for key in self.weight:
        self.weight[key] += delta_w[key]
    for p in self.neurons:
        p.bias += delta_b[p]

  def addLayer(self, mid_size, UP, DOWN):
    Mid_layer = [self.idcnt+m for m in range(mid_size)]
    self.neurons.add(self.idcnt+m for m in range(mid_size))
    self.idcnt+=mid_size

    for m in Mid_layer:
      self.bias[m] = np.random.uniform(-0.1, 0.1)
      self.bias_gsum[m], self.bias_gsqr[m] = 0, 0

      self.prev[m] = UP
      for u in UP:
        self.weight[(u,m)] = np.random.uniform(-0.1, 0.1)
        self.weight_gsum[(u,m)], self.weight_gsqr[(u,m)] = 0, 0

      self.next[m] = DOWN
      for v in DOWN:
        self.weight[(m,v)] = np.random.uniform(-0.1, 0.1)
        self.weight_gsum[(m,v)], self.weight_gsqr[(m,v)] = 0, 0

    for u in UP:
      self.next[u] = Mid_layer
    for v in DOWN:
      self.prev[v] = Mid_layer

    for u in UP:
      for v in DOWN:
        self.weight.pop((u,v))
        self.weight_gsum.pop((u,v))
        self.weight_gsqr.pop((u,v))
    return Mid_layer

  def train(self, X, Y, batch_size, epochs, learning_rate):
    assert len(X) == len(Y)
    l = len(X)
    for epoch in range(epochs):
      data=[(X[_], Y[_]) for _ in range(len(X))]
      random.shuffle(data)
      for _ in range(len(X)):
        X[_],Y[_]=data[_]
      loss = 0
      for batch in range(int(l / batch_size)):
        L, R = batch * batch_size, (batch + 1) * batch_size
        x_train, y_train = X[L:R], Y[L:R]
        self.update(x_train, y_train, batch_size, learning_rate)
        output = self.evaluate(x_train)
        loss += np.sum(((y_train-output) ** 2), axis=(0,1))
      loss = ((loss) ** 0.5) / (int(l / batch_size) * batch_size)
      print(f"Epoch {epoch}/{epochs}, Loss:{loss}")

  def parallel_train(self, X, Y, batch_size, epochs=10, learning_rate=0.1):
    assert len(X) == len(Y)
    l = len(X)
    for epoch in range(epochs):
      data=[(X[_], Y[_]) for _ in range(len(X))]
      random.shuffle(data)
      for _ in range(len(X)):
        X[_],Y[_]=data[_]

      k = int(l / batch_size)
      with mp.Pool(processes=k) as pool: results = pool.starmap(Model.backward, \
       [(self, X[b*batch_size:(b+1)*batch_size], Y[b*batch_size:(b+1)*batch_size],\
         batch_size, learning_rate, 0.9, 0.9) for b in range(k)])
      delta_w = {key: np.sum([res[0][key] for res in results], axis=0) / k for key in self.weight}
      delta_b = {n: np.sum([res[1][n] for res in results], axis=0) / k for n in self.neurons}
      for key in self.weight:
        self.weight[key] += delta_w[key]
      for p in self.neurons:
        self.bias[p] += delta_b[p]
      output = self.evaluate(X)
      loss = np.sum(((Y-output) ** 2), axis=(0,1))
      print(f"Epoch {epoch}/{epochs}, Loss:{loss}")

In [7]:
X=np.array([[0,0],[0,1],[1,0],[1,1]])
Y=np.array([[0],[1],[1],[0]])
mod=Model(2, 1)
mid1=mod.addLayer(4, mod.Input_layer, mod.Output_layer)
mod.addLayer(4, mid1, mod.Output_layer)
mod.evaluate(X)

KeyError: 0

In [None]:
mod.parallel_train(X, Y, 4, 500, 0.1)

Epoch 0/500, Loss:1.214644913626065
Epoch 1/500, Loss:2.358760289369366
Epoch 2/500, Loss:3.453697005546797
Epoch 3/500, Loss:3.384178873381895
Epoch 4/500, Loss:2.226237847718614
Epoch 5/500, Loss:3.266775820395009
Epoch 6/500, Loss:2.1474517377584372
Epoch 7/500, Loss:0.004669510085433876
Epoch 8/500, Loss:0.004485331557508367
Epoch 9/500, Loss:0.004308333954674422
Epoch 10/500, Loss:0.004138242415824734
Epoch 11/500, Loss:0.003974792285257439
Epoch 12/500, Loss:0.00381772875340271
Epoch 13/500, Loss:0.0036668065089516563
Epoch 14/500, Loss:0.003521789402107219
Epoch 15/500, Loss:0.0033824501186780157
Epoch 16/500, Loss:0.003248569864737628
Epoch 17/500, Loss:0.0031199380615739387
Epoch 18/500, Loss:0.002996352050655576
Epoch 19/500, Loss:0.002877616808345455
Epoch 20/500, Loss:0.002763544670094539
Epoch 21/500, Loss:0.0026539550638524784
Epoch 22/500, Loss:0.002548674252435519
Epoch 23/500, Loss:0.002447535084596108
Epoch 24/500, Loss:0.0023503767545427334
Epoch 25/500, Loss:0.00225

In [None]:
print(X, Y)
mod.evaluate(X)

[[1 1]
 [1 1]
 [1 1]
 [1 1]] [[0]
 [0]
 [0]
 [0]]


array([[-1.40259787e-06],
       [-1.40259787e-06],
       [-1.40259787e-06],
       [-1.40259787e-06]])

In [None]:
import tensorflow as tf
tf.random.set_seed(42)
# Load and preprocess the MNIST dataset
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()

# Flatten images to 1D vector of 784 features (28*28)
x_train = x_train.reshape(-1, 784).astype('float32') / 255.0
x_test = x_test.reshape(-1, 784).astype('float32') / 255.0

# One-hot encode the labels
y_train = tf.keras.utils.to_categorical(y_train, 10)
y_test = tf.keras.utils.to_categorical(y_test, 10)

def test(model, X, Y, batch_size):
  k = int(len(X)/batch_size)
  for i in range(k):
    Y_hat=model.evaluate(X[i*batch_size:(i+1)*batch_size])
    wrong=0
    for j in range(batch_size):
      max1,max2,id1,id2=-999,-999,-1,-1
      for l in range(10):
        if max1 < Y_hat[j][l]:
          max1,id1=Y_hat[j][l],l
        if max2 < Y[i*batch_size+j][l]:
          max2,id2=Y[i*batch_size+j][l],l
      if id1 != id2: wrong+=1
    print(f"batch: {i}, accuracy: {(batch_size-wrong)/batch_size*100}%")
mod2 = Model(784, 10)
mod2.addLayer(32, mod2.Input_layer, mod2.Output_layer)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
[1m11490434/11490434[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 0us/step


[<__main__.Model.Neuron at 0x7dd201290890>,
 <__main__.Model.Neuron at 0x7dd1fd893390>,
 <__main__.Model.Neuron at 0x7dd1fd8935d0>,
 <__main__.Model.Neuron at 0x7dd1fd893690>,
 <__main__.Model.Neuron at 0x7dd1fd893750>,
 <__main__.Model.Neuron at 0x7dd1fd893850>,
 <__main__.Model.Neuron at 0x7dd1fd893910>,
 <__main__.Model.Neuron at 0x7dd1fd8939d0>,
 <__main__.Model.Neuron at 0x7dd1fd893a90>,
 <__main__.Model.Neuron at 0x7dd1fd893810>,
 <__main__.Model.Neuron at 0x7dd1fd893bd0>,
 <__main__.Model.Neuron at 0x7dd1fd893c90>,
 <__main__.Model.Neuron at 0x7dd1fd893d50>,
 <__main__.Model.Neuron at 0x7dd1fd893e10>,
 <__main__.Model.Neuron at 0x7dd1fd893ed0>,
 <__main__.Model.Neuron at 0x7dd1fd893f90>,
 <__main__.Model.Neuron at 0x7dd1fd898090>,
 <__main__.Model.Neuron at 0x7dd1fd898150>,
 <__main__.Model.Neuron at 0x7dd1fd898210>,
 <__main__.Model.Neuron at 0x7dd1fd8982d0>,
 <__main__.Model.Neuron at 0x7dd1fd898390>,
 <__main__.Model.Neuron at 0x7dd1fd898450>,
 <__main__.Model.Neuron at 0x7dd

In [None]:
print(len(mod2.Input_layer))
mod2.parallel_train(x_train, y_train, 2048, 5, 0.01)
"""
for i in range(1,9):
  plt.subplot(330+i)
  plt.imshow(x_test[i].reshape(28, 28), cmap=plt.get_cmap('gray'))
print(mod2.evaluate(np.array([x_test[5]])))
print(y_test[5])
"""
test(mod2, x_test, y_test, 500)

784


KeyboardInterrupt: 

In [None]:
test(mod2, x_test, y_test, 5000)

batch: 0, accuracy: 69.34%
batch: 1, accuracy: 78.25999999999999%
