<a href="https://colab.research.google.com/github/h-g-gervais/autodiff-exp/blob/main/sequential.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
import numpy as np


In [5]:
rng = np.random.default_rng(seed=42)

In [3]:
rng.random((3, 5))

array([[0.77395605, 0.43887844, 0.85859792, 0.69736803, 0.09417735],
       [0.97562235, 0.7611397 , 0.78606431, 0.12811363, 0.45038594],
       [0.37079802, 0.92676499, 0.64386512, 0.82276161, 0.4434142 ]])

In [7]:
A = np.arange(25).reshape((5, 5))
B = np.arange(25).reshape((5, 5))

print(A)
print(B)

print(A * B)
print(A @ B)
print(np.dot(A, B))
print(np.matmul(A, B))
print(np.einsum('ij,jk->ki', A, B))

[[ 0  1  2  3  4]
 [ 5  6  7  8  9]
 [10 11 12 13 14]
 [15 16 17 18 19]
 [20 21 22 23 24]]
[[ 0  1  2  3  4]
 [ 5  6  7  8  9]
 [10 11 12 13 14]
 [15 16 17 18 19]
 [20 21 22 23 24]]
[[  0   1   4   9  16]
 [ 25  36  49  64  81]
 [100 121 144 169 196]
 [225 256 289 324 361]
 [400 441 484 529 576]]
[[ 150  160  170  180  190]
 [ 400  435  470  505  540]
 [ 650  710  770  830  890]
 [ 900  985 1070 1155 1240]
 [1150 1260 1370 1480 1590]]
[[ 150  160  170  180  190]
 [ 400  435  470  505  540]
 [ 650  710  770  830  890]
 [ 900  985 1070 1155 1240]
 [1150 1260 1370 1480 1590]]
[[ 150  160  170  180  190]
 [ 400  435  470  505  540]
 [ 650  710  770  830  890]
 [ 900  985 1070 1155 1240]
 [1150 1260 1370 1480 1590]]
[[ 150  400  650  900 1150]
 [ 160  435  710  985 1260]
 [ 170  470  770 1070 1370]
 [ 180  505  830 1155 1480]
 [ 190  540  890 1240 1590]]


In [14]:
a = np.arange(6)
print(a)
print(a.shape)

print()

b = np.einsum('i,j->ij', np.ones(6), a)
print(b)
print(b.shape)

[0 1 2 3 4 5]
(6,)

[[0. 1. 2. 3. 4. 5.]
 [0. 1. 2. 3. 4. 5.]
 [0. 1. 2. 3. 4. 5.]
 [0. 1. 2. 3. 4. 5.]
 [0. 1. 2. 3. 4. 5.]
 [0. 1. 2. 3. 4. 5.]]
(6, 6)


In [16]:
X = np.arange(100).reshape((10,10))
W = np.ones((10, 10))
b = np.ones(10)

print(X)
print(X.shape)

print(W)
print(W.shape)

print(b)
print(b.shape)

[[ 0  1  2  3  4  5  6  7  8  9]
 [10 11 12 13 14 15 16 17 18 19]
 [20 21 22 23 24 25 26 27 28 29]
 [30 31 32 33 34 35 36 37 38 39]
 [40 41 42 43 44 45 46 47 48 49]
 [50 51 52 53 54 55 56 57 58 59]
 [60 61 62 63 64 65 66 67 68 69]
 [70 71 72 73 74 75 76 77 78 79]
 [80 81 82 83 84 85 86 87 88 89]
 [90 91 92 93 94 95 96 97 98 99]]
(10, 10)
[[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]]
(10, 10)
[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
(10,)


In [25]:
print(np.matmul(X, b))

print(np.einsum('...j,j->...', X, b))

[ 45. 145. 245. 345. 445. 545. 645. 745. 845. 945.]
[ 45. 145. 245. 345. 445. 545. 645. 745. 845. 945.]


In [16]:
"""
Notation:
  Use Einstein summation convention
  1(r) = (1, 1, ..., 1) r times
  1(r,s) = 1 if r == s else 0
"""

class Affine:
  """
  Let:
    N = num_outputs
    M = num_inputs

  Parameters:
    W_ij = weight matrix
    b_i = bias vector

  Input:
    X_bi = input matrix
      b index is the batch index
      i is the feature index

  Output:
    T_bi = output matrix
      b = batch index
      i = output vector index

    T_ri = X_rj * W_ji + 1(r) * b_i

  Derivatives:
    d(T_ri) / d(X_sj) = 1(r, s) * W_ji
    d(T_ri) / d(W_jk) = X_rj * 1(i, k)
    d(T_ri) / d(b_j) = 1(r) * 1(i, j)

  """
  def __init__(self, num_inputs, num_outputs):
    self.num_inputs = num_inputs
    self.num_outputs = num_outputs
    self.W = rng.random((num_inputs, num_outputs))
    self.b = rng.random(num_outputs)
    self.set_inputs(np.ones(self.num_inputs))

  def set_inputs(self, X):
    self.batch_rows = X.shape[0]
    self.input_matrix = X

  def evaluate(self, X):
    self.set_inputs(X)
    return np.einsum('...j,ji->...i', X, self.W) + np.einsum('...,i->...i', np.ones(self.batch_rows), self.b)

  def partial_inputs(self):
    return np.einsum('rs,ji->risj', np.identity(self.batch_rows), self.W)

  def partial_params(self):
    return [self._partial_weights(), self._partial_biases()]

  def _partial_weights(self):
    return np.einsum('rj,ik->rijk', self.input_matrix, np.identity(self.num_outputs))

  def _partial_biases(self):
    return np.einsum('r, ij->rij', np.ones(self.batch_rows), np.identity(self.num_outputs))


In [11]:
class ReLU:
  def __init__(self, num_inputs):
    self.num_inputs = num_inputs
    self.set_inputs(np.ones(self.num_inputs))

  def set_inputs(self, X):
    self.batch_rows = X.shape[0]
    self.input_matrix = X

  def evaluate(self, X):
    self.set_inputs(X)
    return np.maximum(X, 0)

  def partial_inputs(self):
    return np.heaviside(self.input_matrix, 0)

  def partial_params(self):
    return []

In [12]:
class Model:
  def __init__(self):
    self.layers_shapes = [(2, 5), (5, 5), (5, 1)]
    self.operations = [
      Affine(2, 5),
      ReLU(5),
      Affine(5, 5),
      ReLU(5),
      Affine(5, 1)
    ]

  def forward(self, X):
    output = X
    for op in self.operations:
      output = op.evaluate(output)
    return output

  def backward(self, X):
    partial = None
    for op in self.operations:



In [13]:
X = rng.random((100, 2))

In [14]:
model = Model()

In [15]:
model.forward(X)

array([[4.93565687],
       [8.7695544 ],
       [7.2253518 ],
       [7.04200084],
       [7.35847868],
       [6.55937395],
       [6.74514956],
       [5.28317261],
       [6.10814253],
       [5.26149093],
       [7.81367065],
       [5.61136457],
       [6.1492497 ],
       [6.75833678],
       [9.49226596],
       [4.95962655],
       [6.3461909 ],
       [6.97526343],
       [8.79987249],
       [8.57324534],
       [5.56022799],
       [7.83710225],
       [8.22839603],
       [7.45162642],
       [7.17462516],
       [8.30556639],
       [7.10747251],
       [5.60383406],
       [5.23088876],
       [8.53487552],
       [6.52951101],
       [7.33674751],
       [9.61640506],
       [6.42092024],
       [7.22513467],
       [7.49524023],
       [6.89060973],
       [6.41174429],
       [5.09739294],
       [7.57907506],
       [6.49701917],
       [7.33257097],
       [9.30062719],
       [7.93140495],
       [7.00930378],
       [7.97941594],
       [6.13797819],
       [6.273