# Optimization using Auto-Differentiation

Purpose: To familiarize yourself with the `Mat` and `MatOperation` classes, which implement matrices and mathematical operations on them.

In [1]:
import numpy as np
from matad import *

## Using `Mat` and `MatOperation` objects

In [30]:
# Create a couple Mat objects to play with.
x = Mat(np.array([[1,2,3],[4,5,6]], dtype=float))
a = Mat(np.array([[-1,2,-1],[2,-2,4]], dtype=float))

Perform a `MatOperation`. In this case, `x + z` actually computes `matad.plus(x, a)`.

In [31]:
# Perform an operation.
z = x + a
print(z)
print(z())

[[ 0.  4.  2.]
 [ 6.  3. 10.]]
[[ 0.  4.  2.]
 [ 6.  3. 10.]]


In [32]:
z.zero_grad()

In [35]:
a.grad

array([[0., 0., 0.],
       [0., 0., 0.]])

In [4]:
# You can see where z came from.
z.creator

<matad.Plus at 0x7f368401f358>

In [5]:
# And you can even reach further down the expression graph.
z.creator.args[0]   # returns the first arg of the operation that created f

[[1. 2. 3.]
 [4. 5. 6.]]

Now for something nonlinear!

In [6]:
y = logistic(z)
print(y)

[[0.5        0.98201379 0.88079708]
 [0.99752738 0.95257413 0.9999546 ]]


A scalar function caps the top of the expression graph.

In [7]:
l = sum(y)
print(l)
print(l.val[0,0])

[[5.31286697]]
5.312866973812887


## Automatic Differentiation

In [8]:
l.zero_grad()  # reset grads to zero
l.backward()   # propagate derivatives down through graph
print(f'y.grad\n {y.grad}')  # dldy
print(f'z.grad\n {z.grad}')  # dldz
print(f'a.grad\n {a.grad}')  # dlda

y.grad
 [[1. 1. 1.]
 [1. 1. 1.]]
z.grad
 [[2.50000000e-01 1.76627062e-02 1.04993585e-01]
 [2.46650929e-03 4.51766597e-02 4.53958077e-05]]
a.grad
 [[2.50000000e-01 1.76627062e-02 1.04993585e-01]
 [2.46650929e-03 4.51766597e-02 4.53958077e-05]]


In [27]:
m1 = np.array([[1,2,3], [1,2,4]])
m2 = np.array([[1,1,1,1],[1,1,1,1],[1,1,1,1]])
m3 = m1 @ m2

In [29]:
0.1 * m3

array([[0.6, 0.6, 0.6, 0.6],
       [0.7, 0.7, 0.7, 0.7]])

In [22]:
x = Mat(np.array([[1,2,3],[4,5,6]], dtype=float))
W = Mat(np.array([[1,2,3,4],[1,2,3,4],[1,2,3,4]]))
b = Mat(np.array([1,1,1,1]))

In [24]:
mul(x, W)

NameError: name 'mul' is not defined