<a href="https://colab.research.google.com/github/diputs03/AI-Studies/blob/main/Creating_network/test_lab.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [52]:
#@title Aiming a Dynaimic Graph-structured NeuronNetwork
import numpy as np
import matplotlib.pyplot as plt
import random
from collections import deque
from concurrent.futures import ThreadPoolExecutor

"""
Activation function in this case in \tanh, thus
\dfrac{d\tanh(x)}{dx}=1-\tanh^2(x)
however, for other activation funtions
\dfrac{d\sigma(x)}{dx}=\sigma(x)\cdot\left\big(1-\sigma(x)\right\big)
\dfrac{d\mathop{\mathrm{ReLu}}(x)}{dx}=\begin{cases}1&x\ge0\\0&\text{else}\end{cases}
Loss is the Euclidean loss
\dfrac{d\L}
"""

class Model:
  def __init__(self, input_size, output_size):
    self.idcnt = 0
    self.prev, self.next = {}, {}
    self.neurons = set()

    self.Input_layer = [self.idcnt+i for i in range(input_size)]
    self.neurons.update([self.idcnt+i for i in range(input_size)])
    self.idcnt+=input_size

    self.Output_layer = [self.idcnt+o for o in range(output_size)]
    self.neurons.update([self.idcnt+o for o in range(output_size)])
    self.idcnt+=output_size

    for i in self.Input_layer: self.next[i], self.prev[i] = self.Output_layer.copy(), []
    for o in self.Output_layer: self.prev[o], self.next[o] = self.Input_layer.copy(), []

    self.weight = {}
    self.weight_gsum, self.weight_gsqr = {}, {}
    for u in self.Input_layer:
      for v in self.Output_layer:
        self.weight[(u,v)] = np.random.uniform(-0.1, 0.1)
        self.weight_gsum[(u,v)], self.weight_gsqr[(u,v)] = 0, 0

    self.bias = {}
    self.bias_gsum, self.bias_gsqr = {}, {}
    for i in self.neurons:
      self.bias[i] = np.random.uniform(-0.1, 0.1)
      self.bias_gsum[i], self.bias_gsqr[i] = 0, 0

  def addLayer(self, mid_size, UP, DOWN):
    Mid_layer = [self.idcnt+m for m in range(mid_size)]
    self.neurons.update([self.idcnt+m for m in range(mid_size)])
    self.idcnt+=mid_size

    for m in Mid_layer:
      self.bias[m] = np.random.uniform(-0.1, 0.1)
      self.bias_gsum[m], self.bias_gsqr[m] = 0, 0

      self.prev[m] = UP.copy()
      for u in UP:
        self.weight[(u,m)] = np.random.uniform(-0.1, 0.1)
        self.weight_gsum[(u,m)], self.weight_gsqr[(u,m)] = 0, 0

      self.next[m] = DOWN.copy()
      for v in DOWN:
        self.weight[(m,v)] = np.random.uniform(-0.1, 0.1)
        self.weight_gsum[(m,v)], self.weight_gsqr[(m,v)] = 0, 0

    for u in UP:
      self.next[u] = Mid_layer.copy()
    for v in DOWN:
      self.prev[v] = Mid_layer.copy()

    for u in UP:
      for v in DOWN:
        self.weight.pop((u,v))
        self.weight_gsum.pop((u,v))
        self.weight_gsqr.pop((u,v))
    return Mid_layer

  def addNode(self, u, v):
    n = self.idcnt
    self.idcnt += 1
    self.neurons.add(n)
    self.next[n], self.prev[n] = [], []
    self.bias[n] = np.random.uniform(-0.1, 0.1)
    self.bias_gsum[n], self.bias_gsqr[n] = 0, 0

    self.next[u].append(n)
    self.prev[n].append(u)
    self.next[n].append(v)
    self.prev[v].append(n)
    self.weight[(u,n)] = np.random.uniform(-0.1, 0.1)
    self.weight_gsum[(u,n)], self.weight_gsqr[(u,n)] = 0, 0
    self.weight[(n,v)] = np.random.uniform(-0.1, 0.1)
    self.weight_gsum[(n,v)], self.weight_gsqr[(n,v)] = 0, 0

  def __forward(self, X, batch_size):
    assert X.shape == (batch_size,len(self.Input_layer)), \
      f"X.shape={X.shape}, where {(batch_size,len(self.Input_layer))} is expected"
    a = {q: np.zeros(batch_size) for q in self.neurons}

    for i, n in enumerate(self.Input_layer):
      a[n] = X[:, i].copy()

    q = deque()
    for i in self.Input_layer:
      q.append(i)

    cnt = {q: 0 for q in self.neurons}

    while len(q) != 0:
      c = q.popleft()
      a[c] = np.tanh(a[c] + self.bias[c])
      for n in self.next[c]:
        a[n] = a[n] + a[c] * self.weight[(c,n)]
        cnt[n] += 1
        if cnt[n] == len(self.prev[n]):
          q.append(n)
    return a

  def evaluate(self, X):
    a = self.__forward(X, len(X))
    return np.array([a[o] for o in self.Output_layer]).T

  def __backward(self, X, Y, batch_size, learning_rate, dsum, dsqr):
    assert X.shape == (batch_size,len(self.Input_layer)), \
      f"X.shape={X.shape}, where {(batch_size,len(self.Input_layer))} is expected"
    assert Y.shape == (batch_size,len(self.Output_layer)), \
      f"X.shape={Y.shape}, where {(batch_size,len(self.Output_layer))} is expected"
    a = self.__forward(X, batch_size)

    db, dw = {}, {}

    par_a = {q: np.zeros(batch_size) for q in self.neurons}
    for o, n in enumerate(self.Output_layer):
      par_a[n] = 2 * (a[n] - Y[:, o])

    q = deque()
    for o in self.Output_layer:
      q.append(o)

    cnt = {q: 0 for q in self.neurons}

    msg = []

    while len(q) != 0:
      c = q.popleft()
      par_b = par_a[c] * (1-a[c]**2)

      gbias = par_b
      self.bias_gsum[c] = (1-dsum)*np.sum(gbias)/batch_size + dsum*self.bias_gsum[c]
      self.bias_gsqr[c] = (1-dsqr)*np.sum(gbias**2)/batch_size + dsqr*self.bias_gsqr[c]
      db[c] = -learning_rate * self.bias_gsum[c] / (self.bias_gsqr[c]**(1/2)+1)

      for p in self.prev[c]:
        par_a[p] += par_a[c] * (1-a[c]**2) * self.weight[(p,c)]
        gweight = par_a[c] * (1-a[c]**2) * a[p]
        self.weight_gsum[(p,c)] = \
         (1-dsum)*np.sum(gweight)/batch_size + dsum*self.weight_gsum[(p,c)]
        self.weight_gsqr[(p,c)] = \
         (1-dsqr)*np.sum(gweight**2)/batch_size + dsqr*self.weight_gsqr[(p,c)]
        dw[(p,c)] = \
         -learning_rate * self.weight_gsum[(p,c)] / (self.weight_gsqr[(p,c)]**(1/2)+1)

        if np.std(gweight, axis=0)/np.mean(gweight, axis=0) > 1000 and len(self.neurons) < 20:
          msg.append((p,c))

        cnt[p] += 1
        if cnt[p] == len(self.next[p]):
          q.append(p)

    return dw, db, msg

  def train(self, x, y, batch_size, epochs, learning_rate):
    assert len(x) == len(y)
    l = len(x)
    for epoch in range(epochs):
      X, Y = x.copy(), y.copy()
      data=[(X[_], Y[_]) for _ in range(l)]
      random.shuffle(data)
      for _ in range(l):
        X[_],Y[_]=data[_]
      loss = 0
      for batch in range(int(l / batch_size)):
        L, R = batch * batch_size, (batch + 1) * batch_size
        x_split, y_split = X[L:R], Y[L:R]
        dw, db, msg = self.__backward(
          x_split, y_split, batch_size, learning_rate,
          .9, .9
        )
        for w in self.weight:
            self.weight[w] += dw[w]
        for p in self.neurons:
            self.bias[p] += db[p]
        for (u,v) in msg:
          self.addNode(u,v)
        output = self.evaluate(x_split)
        loss += np.sum(((y_split-output) ** 2), axis=(0,1))
      loss = ((loss) ** 0.5) / (int(l / batch_size) * batch_size)
      print(f"Epoch {epoch}/{epochs}, Loss:{loss}")

  def parallel_train(self, x, y, proc, batch_size, epochs=10, learning_rate=0.1):
    assert len(x) == len(y)
    l = len(x)
    for epoch in range(epochs):
      X, Y = x.copy(), y.copy()
      data=[(X[_], Y[_]) for _ in range(l)]
      random.shuffle(data)
      for _ in range(l):
        X[_],Y[_]=data[_]

      k = int(l / batch_size)
      def train_proc(mod, X_split, Y_split):
        tdw = {w: 0 for w in mod.weight}
        tdb = {q: 0 for q in mod.neurons}
        tmsg = []
        nonlocal proc, k, batch_size, learning_rate
        for c in range(int(k / proc)):
          dw, db, msg = mod.__backward(
            X_split[c*batch_size:(c+1)*batch_size],
            Y_split[c*batch_size:(c+1)*batch_size],
            batch_size, learning_rate, 0.9, 0.9
          )
          for w in self.weight:
            self.weight[w] += dw[w]
            tdw[w] += dw[w]
          for p in self.neurons:
            self.bias[p] += db[p]
            tdb[p] += db[p]
          tmsg.extend(msg)
        return tdw, tdb, tmsg

      with ThreadPoolExecutor(max_workers=proc) as executor:
        handles = [
          executor.submit(train_proc, self,
            X[b*int(k/proc)*batch_size:(b+1)*int(k/proc)*batch_size],
            Y[b*int(k/proc)*batch_size:(b+1)*int(k/proc)*batch_size])
            for b in range(proc)
          ]

      results = [f.result() for f in handles]

      dw = {w: np.mean([res[0][w] for res in results], axis=0) for w in self.weight}
      db = {n: np.mean([res[1][n] for res in results], axis=0) for n in self.neurons}
      dm = [m for res in results for m in res[2]]
      for w in self.weight:
        self.weight[w] += dw[w]
      for p in self.neurons:
        self.bias[p] += db[p]
      for (u,v) in dm:
        self.addNode(u,v)
        break
      output = self.evaluate(X)
      loss = (np.sum(((Y-output)**2), axis=(0,1)) ** 0.5)\
       / (int(l / batch_size) * batch_size)
      print(f"Epoch {epoch}/{epochs}, Loss:{loss}")


In [53]:
X=np.array([[0,0],[0,1],[1,0],[1,1]])
Y=np.array([[0],[1],[1],[0]])
mod=Model(2, 1)
mid1=mod.addLayer(4, mod.Input_layer, mod.Output_layer)
mod.addLayer(4, mid1, mod.Output_layer)
mod.evaluate(X)

array([[-0.04320133],
       [-0.04445075],
       [-0.04314593],
       [-0.04438858]])

In [58]:
mod.parallel_train(X, Y, 2, 4, 100000, 0.01)

Epoch 0/100000, Loss:0.45202009099669427
Epoch 1/100000, Loss:0.45183173455741105
Epoch 2/100000, Loss:0.022194288943420726
Epoch 3/100000, Loss:0.2614814148924571
Epoch 4/100000, Loss:0.2614938426786673
Epoch 5/100000, Loss:0.45183173455741105
Epoch 6/100000, Loss:0.36936326284947973
Epoch 7/100000, Loss:0.45220836898067335
Epoch 8/100000, Loss:0.2614814148924571
Epoch 9/100000, Loss:0.36914153526256993
Epoch 10/100000, Loss:0.022194288943420726
Epoch 11/100000, Loss:0.36913273172574773
Epoch 12/100000, Loss:0.2614938426786673
Epoch 13/100000, Loss:0.45220836898067335
Epoch 14/100000, Loss:0.45202009099669427
Epoch 15/100000, Loss:0.3693720608918712
Epoch 16/100000, Loss:0.36936326284947973
Epoch 17/100000, Loss:0.4523965686073026
Epoch 18/100000, Loss:0.3696024427395052
Epoch 19/100000, Loss:0.45183173455741105
Epoch 20/100000, Loss:0.36914153526256993
Epoch 21/100000, Loss:0.022047381636142734
Epoch 22/100000, Loss:0.2618067543927435
Epoch 23/100000, Loss:0.2614814148924571
Epoch 24

KeyboardInterrupt: 

In [59]:
print(X, Y)
mod.evaluate(X)

[[0 0]
 [0 1]
 [1 0]
 [1 1]] [[0]
 [1]
 [1]
 [0]]


array([[-0.04320133],
       [-0.04445075],
       [-0.04314593],
       [-0.04438858]])

In [None]:
import tensorflow as tf
tf.random.set_seed(42)
# Load and preprocess the MNIST dataset
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()

# Flatten images to 1D vector of 784 features (28*28)
x_train = x_train.reshape(-1, 784).astype('float32') / 255.0
x_test = x_test.reshape(-1, 784).astype('float32') / 255.0

# One-hot encode the labels
y_train = tf.keras.utils.to_categorical(y_train, 10)
y_test = tf.keras.utils.to_categorical(y_test, 10)

def test(model, X, Y, batch_size):
  k = int(len(X)/batch_size)
  for i in range(k):
    Y_hat=model.evaluate(X[i*batch_size:(i+1)*batch_size])
    wrong=0
    for j in range(batch_size):
      max1,max2,id1,id2=-999,-999,-1,-1
      for l in range(10):
        if max1 < Y_hat[j][l]:
          max1,id1=Y_hat[j][l],l
        if max2 < Y[i*batch_size+j][l]:
          max2,id2=Y[i*batch_size+j][l],l
      if id1 != id2: wrong+=1
    print(f"batch: {i}, accuracy: {(batch_size-wrong)/batch_size*100}%")
mod2 = Model(784, 10)
#mod2.addLayer(32, mod2.Input_layer, mod2.Output_layer)

In [None]:
print(len(mod2.Input_layer))
mod2.parallel_train(x_train, y_train, 1, 512, 5, 0.01)
"""
for i in range(1,9):
  plt.subplot(330+i)
  plt.imshow(x_test[i].reshape(28, 28), cmap=plt.get_cmap('gray'))
print(mod2.evaluate(np.array([x_test[5]])))
print(y_test[5])
"""
test(mod2, x_test, y_test, 500)

784
Epoch 0/5, Loss:0.003301105296252907
Epoch 1/5, Loss:0.003130849243302807
Epoch 2/5, Loss:0.0031157990038879913
Epoch 3/5, Loss:0.0034394485536595166
Epoch 4/5, Loss:0.003034747843717671
batch: 0, accuracy: 77.0%
batch: 1, accuracy: 76.6%
batch: 2, accuracy: 70.8%
batch: 3, accuracy: 76.4%
batch: 4, accuracy: 73.2%
batch: 5, accuracy: 75.8%
batch: 6, accuracy: 77.8%
batch: 7, accuracy: 72.39999999999999%
batch: 8, accuracy: 71.8%
batch: 9, accuracy: 78.8%
batch: 10, accuracy: 89.60000000000001%
batch: 11, accuracy: 78.8%
batch: 12, accuracy: 88.2%
batch: 13, accuracy: 82.6%
batch: 14, accuracy: 85.39999999999999%
batch: 15, accuracy: 85.8%
batch: 16, accuracy: 86.6%
batch: 17, accuracy: 93.2%
batch: 18, accuracy: 84.8%
batch: 19, accuracy: 76.4%


In [None]:
test(mod2, x_test, y_test, 5000)

batch: 0, accuracy: 75.06%
batch: 1, accuracy: 85.14%


In [None]:
import numpy as np

# Set seed such that we always get the same dataset
# (this is a good idea in general)
np.random.seed(42)

def generate_dataset(num_sequences=2**8):
    """
    Generates a number of sequences as our dataset.

    Args:
     `num_sequences`: the number of sequences to be generated.

    Returns a list of sequences.
    """
    samples = []

    for _ in range(num_sequences):
        num_tokens = np.random.randint(1, 12)
        sample = ['a'] * num_tokens + ['b'] * num_tokens + ['EOS']
        samples.append(sample)

    return samples


sequences = generate_dataset()

print('A single sample from the generated dataset:')
print(sequences[0])

A single sample from the generated dataset:
['a', 'a', 'a', 'a', 'a', 'a', 'a', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'EOS']


In [None]:
from collections import defaultdict

def sequences_to_dicts(sequences):
    """
    Creates word_to_idx and idx_to_word dictionaries for a list of sequences.
    """
    # A bit of Python-magic to flatten a nested list
    flatten = lambda l: [item for sublist in l for item in sublist]

    # Flatten the dataset
    all_words = flatten(sequences)

    # Count number of word occurences
    word_count = defaultdict(int)
    for word in flatten(sequences):
        word_count[word] += 1

    # Sort by frequency
    word_count = sorted(list(word_count.items()), key=lambda l: -l[1])

    # Create a list of all unique words
    unique_words = [item[0] for item in word_count]

    # Add UNK token to list of words
    unique_words.append('UNK')

    # Count number of sequences and number of unique words
    num_sentences, vocab_size = len(sequences), len(unique_words)

    # Create dictionaries so that we can go from word to index and back
    # If a word is not in our vocabulary, we assign it to token 'UNK'
    word_to_idx = defaultdict(lambda: vocab_size-1)
    idx_to_word = defaultdict(lambda: 'UNK')

    # Fill dictionaries
    for idx, word in enumerate(unique_words):
        # YOUR CODE HERE!
        word_to_idx[word] = idx
        idx_to_word[idx] = word

    return word_to_idx, idx_to_word, num_sentences, vocab_size


word_to_idx, idx_to_word, num_sequences, vocab_size = sequences_to_dicts(sequences)

print(f'We have {num_sequences} sentences and {len(word_to_idx)} unique tokens in our dataset (including UNK).\n')
print('The index of \'b\' is', word_to_idx['b'])
print(f'The word corresponding to index 2 is \'{idx_to_word[2]}\'')

assert idx_to_word[word_to_idx['b']] == 'b', \
    'Consistency error: something went wrong in the conversion.'

We have 256 sentences and 4 unique tokens in our dataset (including UNK).

The index of 'b' is 1
The word corresponding to index 2 is 'EOS'


In [1]:
mod3=Model(1,1)


NameError: name 'Model' is not defined