<a href="https://colab.research.google.com/github/diputs03/AI-Studies/blob/main/Creating_network/dymamic_architect_rebuilt_with_adam_paralleltrain_graphically.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
#@title Aiming a Dynaimic Graph-structured NeuronNetwork
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import random
from collections import deque
from concurrent.futures import ThreadPoolExecutor

In [48]:
"""
Activation function in this case in \tanh, thus
\dfrac{d\tanh(x)}{dx}=1-\tanh^2(x)
however, for other activation funtions
\dfrac{d\sigma(x)}{dx}=\sigma(x)\cdot\left\big(1-\sigma(x)\right\big)
\dfrac{d\mathop{\mathrm{ReLu}}(x)}{dx}=\begin{cases}1&x\ge0\\0&\text{else}\end{cases}
Loss is the Euclidean loss
\dfrac{d\L}
"""

class Model:
  def __init__(self, input_size, output_size):
    self.idcnt = 0
    self.prev, self.next = {}, {}
    self.neurons = set()

    self.Input_layer = [self.idcnt+i for i in range(input_size)]
    self.neurons.update([self.idcnt+i for i in range(input_size)])
    self.idcnt+=input_size

    self.Output_layer = [self.idcnt+o for o in range(output_size)]
    self.neurons.update([self.idcnt+o for o in range(output_size)])
    self.idcnt+=output_size

    for i in self.Input_layer: self.next[i], self.prev[i] = self.Output_layer, []
    for o in self.Output_layer: self.prev[o], self.next[o] = self.Input_layer, []

    self.weight = {}
    self.weight_gsum, self.weight_gsqr = {}, {}
    for u in self.Input_layer:
      for v in self.Output_layer:
        self.weight[(u,v)] = np.random.uniform(-0.1, 0.1)
        self.weight_gsum[(u,v)], self.weight_gsqr[(u,v)] = 0, 0

    self.bias = {}
    self.bias_gsum, self.bias_gsqr = {}, {}
    for i in self.neurons:
      self.bias[i] = np.random.uniform(-0.1, 0.1)
      self.bias_gsum[i], self.bias_gsqr[i] = 0, 0

  def __forward(self, X, batch_size):
    assert X.shape == (batch_size,len(self.Input_layer)), \
      f"X.shape={X.shape}, where {(batch_size,len(self.Input_layer))} is expected"
    a = {q: np.zeros(batch_size) for q in self.neurons}

    for i, n in enumerate(self.Input_layer):
      a[n] = X[:, i].copy()

    q = deque()
    for i in self.Input_layer:
      q.append(i)

    cnt = {q: 0 for q in self.neurons}

    while len(q) != 0:
      c = q.popleft()
      a[c] = np.tanh(a[c] + self.bias[c])
      for n in self.next[c]:
        a[n] = a[n] + a[c] * self.weight[(c,n)]
        cnt[n] += 1
        if cnt[n] == len(self.prev[n]):
          q.append(n)
    return a

  def evaluate(self, X):
    a = self.__forward(X, len(X))
    return np.array([a[o] for o in self.Output_layer]).T

  def __backward(self, X, Y, batch_size, learning_rate, dsum, dsqr):
    assert X.shape == (batch_size,len(self.Input_layer)), \
      f"X.shape={X.shape}, where {(batch_size,len(self.Input_layer))} is expected"
    assert Y.shape == (batch_size,len(self.Output_layer)), \
      f"X.shape={Y.shape}, where {(batch_size,len(self.Output_layer))} is expected"
    a = self.__forward(X, batch_size)

    delta_b, delta_w = {}, {}

    par_a = {q: np.zeros(batch_size) for q in self.neurons}
    for o, n in enumerate(self.Output_layer):
      par_a[n] = 2 * (a[n] - Y[:, o])

    q = deque()
    for o in self.Output_layer:
      q.append(o)

    cnt = {q: 0 for q in self.neurons}

    while len(q) != 0:
      c = q.popleft()
      par_b = par_a[c] * (1-a[c]**2)

      gbias = par_b
      self.bias_gsum[c] = (1-dsum)*np.sum(gbias)/batch_size + dsum*self.bias_gsum[c]
      self.bias_gsqr[c] = (1-dsqr)*np.sum(gbias**2)/batch_size + dsqr*self.bias_gsqr[c]
      delta_b[c] = -learning_rate * self.bias_gsum[c] / (self.bias_gsqr[c]**(1/2)+1)

      for p in self.prev[c]:
        par_a[p] += par_a[c] * (1-a[c]**2) * self.weight[(p,c)]
        gweight = par_a[c] * (1-a[c]**2) * a[p]
        self.weight_gsum[(p,c)] = \
         (1-dsum)*np.sum(gweight)/batch_size + dsum*self.weight_gsum[(p,c)]
        self.weight_gsqr[(p,c)] = \
         (1-dsqr)*np.sum(gweight**2)/batch_size + dsqr*self.weight_gsqr[(p,c)]
        delta_w[(p,c)] = \
         -learning_rate * self.weight_gsum[(p,c)] / (self.weight_gsqr[(p,c)]**(1/2)+1)

        cnt[p] += 1
        if cnt[p] == len(self.next[p]):
          q.append(p)

    return delta_w, delta_b

  def update(self, X, Y, batch_size, learning_rate, dsum=0.9, dsqr=0.9):
    delta_w, delta_b = \
     self.__backward(X, Y, batch_size, learning_rate, dsum, dsqr)
    for w in self.weight:
        self.weight[w] += delta_w[w]
    for p in self.neurons:
        self.bias[p] += delta_b[p]

  def addLayer(self, mid_size, UP, DOWN):
    Mid_layer = [self.idcnt+m for m in range(mid_size)]
    self.neurons.update([self.idcnt+m for m in range(mid_size)])
    self.idcnt+=mid_size

    for m in Mid_layer:
      self.bias[m] = np.random.uniform(-0.1, 0.1)
      self.bias_gsum[m], self.bias_gsqr[m] = 0, 0

      self.prev[m] = UP
      for u in UP:
        self.weight[(u,m)] = np.random.uniform(-0.1, 0.1)
        self.weight_gsum[(u,m)], self.weight_gsqr[(u,m)] = 0, 0

      self.next[m] = DOWN
      for v in DOWN:
        self.weight[(m,v)] = np.random.uniform(-0.1, 0.1)
        self.weight_gsum[(m,v)], self.weight_gsqr[(m,v)] = 0, 0

    for u in UP:
      self.next[u] = Mid_layer
    for v in DOWN:
      self.prev[v] = Mid_layer

    for u in UP:
      for v in DOWN:
        self.weight.pop((u,v))
        self.weight_gsum.pop((u,v))
        self.weight_gsqr.pop((u,v))
    return Mid_layer

  def train(self, x, y, batch_size, epochs, learning_rate):
    assert len(x) == len(y)
    l = len(x)
    for epoch in range(epochs):
      X, Y = x.copy(), y.copy()
      data=[(X[_], Y[_]) for _ in range(l)]
      random.shuffle(data)
      for _ in range(l):
        X[_],Y[_]=data[_]
      loss = 0
      for batch in range(int(l / batch_size)):
        L, R = batch * batch_size, (batch + 1) * batch_size
        x_train, y_train = X[L:R], Y[L:R]
        self.update(x_train, y_train, batch_size, learning_rate)
        output = self.evaluate(x_train)
        loss += np.sum(((y_train-output) ** 2), axis=(0,1))
      loss = ((loss) ** 0.5) / (int(l / batch_size) * batch_size)
      print(f"Epoch {epoch}/{epochs}, Loss:{loss}")

  def parallel_train(self, x, y, proc, batch_size, epochs=10, learning_rate=0.1):
    assert len(x) == len(y)
    l = len(x)
    for epoch in range(epochs):
      X, Y = x.copy(), y.copy()
      data=[(X[_], Y[_]) for _ in range(l)]
      random.shuffle(data)
      for _ in range(l):
        X[_],Y[_]=data[_]

      k = int(l / batch_size)
      def train_proc(mod, X_split, Y_split):
        tdelta_w = {w: 0 for w in mod.weight}
        tdelta_b = {q: 0 for q in mod.neurons}
        nonlocal proc, k, batch_size, learning_rate
        for c in range(int(k / proc)):
          delta_w, delta_b = mod.__backward(
            X_split[c*batch_size:(c+1)*batch_size],
            Y_split[c*batch_size:(c+1)*batch_size],
            batch_size, learning_rate, 0.9, 0.9
          )
          for w in self.weight:
            self.weight[w] += delta_w[w]
            tdelta_w[w] += delta_w[w]
          for p in self.neurons:
            self.bias[p] += delta_b[p]
            tdelta_b[p] += delta_b[p]
        return tdelta_w, tdelta_b

      with ThreadPoolExecutor(max_workers=proc) as executor:
        handles = [
          executor.submit(train_proc, self,
            X[b*int(k/proc)*batch_size:(b+1)*int(k/proc)*batch_size],
            Y[b*int(k/proc)*batch_size:(b+1)*int(k/proc)*batch_size])
            for b in range(proc)
          ]

      results = [f.result() for f in handles]

      delta_w = {w: np.mean([res[0][w] for res in results], axis=0) for w in self.weight}
      delta_b = {n: np.mean([res[1][n] for res in results], axis=0) for n in self.neurons}
      for w in self.weight:
        self.weight[w] += delta_w[w]
      for p in self.neurons:
        self.bias[p] += delta_b[p]
      output = self.evaluate(X)
      loss = (np.sum(((Y-output)**2), axis=(0,1)) ** 0.5)\
       / (int(l / batch_size) * batch_size)
      print(f"Epoch {epoch}/{epochs}, Loss:{loss}")

In [49]:
X=np.array([[0,0],[0,1],[1,0],[1,1]])
Y=np.array([[0],[1],[1],[0]])
mod=Model(2, 1)
mid1=mod.addLayer(4, mod.Input_layer, mod.Output_layer)
mod.addLayer(4, mid1, mod.Output_layer)
mod.evaluate(X)

array([[0.08765865],
       [0.08768841],
       [0.08731752],
       [0.08734712]])

In [66]:
mod.parallel_train(X, Y, 1, 4, 500, 0.1)

Epoch 0/500, Loss:0.004844040913498024
Epoch 1/500, Loss:0.004839749161304804
Epoch 2/500, Loss:0.00030484857268106905
Epoch 3/500, Loss:0.007898054370048363
Epoch 4/500, Loss:0.006889159446078488
Epoch 5/500, Loss:0.005630083985322584
Epoch 6/500, Loss:0.003989998909459919
Epoch 7/500, Loss:0.005625429936492033
Epoch 8/500, Loss:0.006811195091298022
Epoch 9/500, Loss:0.006805980405899401
Epoch 10/500, Loss:0.00624102031133002
Epoch 11/500, Loss:0.005608761583371809
Epoch 12/500, Loss:0.00678880361414753
Epoch 13/500, Loss:0.007850590690224332
Epoch 14/500, Loss:0.0055911932090442484
Epoch 15/500, Loss:0.007834162866084437
Epoch 16/500, Loss:0.008275842114177897
Epoch 17/500, Loss:0.003940838834759267
Epoch 18/500, Loss:0.0010466885238167441
Epoch 19/500, Loss:0.007804279491340792
Epoch 20/500, Loss:0.006810893182419987
Epoch 21/500, Loss:0.0039292455905821705
Epoch 22/500, Loss:0.007307995952615289
Epoch 23/500, Loss:0.0055464511233130065
Epoch 24/500, Loss:0.0072933710182794325
Epoch

In [67]:
print(X, Y)
mod.evaluate(X)

[[0 0]
 [0 1]
 [1 0]
 [1 1]] [[0]
 [1]
 [1]
 [0]]


array([[2.76240638e-03],
       [9.84417306e-01],
       [9.87014795e-01],
       [5.63063583e-04]])

In [34]:
import tensorflow as tf
tf.random.set_seed(42)
# Load and preprocess the MNIST dataset
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()

# Flatten images to 1D vector of 784 features (28*28)
x_train = x_train.reshape(-1, 784).astype('float32') / 255.0
x_test = x_test.reshape(-1, 784).astype('float32') / 255.0

# One-hot encode the labels
y_train = tf.keras.utils.to_categorical(y_train, 10)
y_test = tf.keras.utils.to_categorical(y_test, 10)

def test(model, X, Y, batch_size):
  k = int(len(X)/batch_size)
  for i in range(k):
    Y_hat=model.evaluate(X[i*batch_size:(i+1)*batch_size])
    wrong=0
    for j in range(batch_size):
      max1,max2,id1,id2=-999,-999,-1,-1
      for l in range(10):
        if max1 < Y_hat[j][l]:
          max1,id1=Y_hat[j][l],l
        if max2 < Y[i*batch_size+j][l]:
          max2,id2=Y[i*batch_size+j][l],l
      if id1 != id2: wrong+=1
    print(f"batch: {i}, accuracy: {(batch_size-wrong)/batch_size*100}%")
mod2 = Model(784, 10)
#mod2.addLayer(32, mod2.Input_layer, mod2.Output_layer)

In [37]:
print(len(mod2.Input_layer))
mod2.parallel_train(x_train, y_train, 1, 512, 5, 0.01)
"""
for i in range(1,9):
  plt.subplot(330+i)
  plt.imshow(x_test[i].reshape(28, 28), cmap=plt.get_cmap('gray'))
print(mod2.evaluate(np.array([x_test[5]])))
print(y_test[5])
"""
test(mod2, x_test, y_test, 500)

784
Epoch 0/5, Loss:0.003301105296252907
Epoch 1/5, Loss:0.003130849243302807
Epoch 2/5, Loss:0.0031157990038879913
Epoch 3/5, Loss:0.0034394485536595166
Epoch 4/5, Loss:0.003034747843717671
batch: 0, accuracy: 77.0%
batch: 1, accuracy: 76.6%
batch: 2, accuracy: 70.8%
batch: 3, accuracy: 76.4%
batch: 4, accuracy: 73.2%
batch: 5, accuracy: 75.8%
batch: 6, accuracy: 77.8%
batch: 7, accuracy: 72.39999999999999%
batch: 8, accuracy: 71.8%
batch: 9, accuracy: 78.8%
batch: 10, accuracy: 89.60000000000001%
batch: 11, accuracy: 78.8%
batch: 12, accuracy: 88.2%
batch: 13, accuracy: 82.6%
batch: 14, accuracy: 85.39999999999999%
batch: 15, accuracy: 85.8%
batch: 16, accuracy: 86.6%
batch: 17, accuracy: 93.2%
batch: 18, accuracy: 84.8%
batch: 19, accuracy: 76.4%


In [38]:
test(mod2, x_test, y_test, 5000)

batch: 0, accuracy: 75.06%
batch: 1, accuracy: 85.14%
