In [9]:
from icecream import ic
from abc import ABC, abstractmethod
import numpy as np
from numpy import linalg as LA
import graphviz

## Libraries
- $\texttt{forward\_backward\_functions\_and\_nodes}$: this library contains three things: 
    - The mathematical operators are defined as classes
    - The nodes of a node tree are defined by their behavior: variables/endnodes are defined as instances of $\texttt{Expr\_end\_node()}$, while functions are defined as instances of $\texttt{Expr\_node()}$. 
    - ABC-Function: Each time a class is called, it is defined as an instance of $\texttt{Expr\_node()}$. This means that calling an operator class is enough for the propagation algorithm to understand what and where the nodes are. 
- $\texttt{print\_graph}$

In [10]:
from forward_backward_functions_and_nodes import * 
from print_graph import print_graph

## Propagation algorithms
The forward pass gives the computated value of the specified node. If the node is not an endnode, it will keep recurring itself until it reaches one. \
The backward propagation is a recursive function that will record the argument of the outer derivative, compute the outer derivative, and mulitplty it with the derivative of the argument. The derivative of the argument is done the same way. The algorithm will keep doing this unit it reaches an endnode, where it will assign the value of the endnode with the total accumulated product. \
The possibility of multiple arguments is considered with the $\texttt{if len(node.childs) == 1}$ - clause.

In [11]:
def forward(node):
    return node.forward_func(*(forward(child) for child in node.childs)) if type(node) is not Expr_end_node else node.value 
    
def backward(node, value = np.float64(1)):
    if type(node) is not Expr_end_node:
        child_values = [forward(child) for child in node.childs] # computes the argument of the outer derivative. In other words: it computes g of f'(g)
        if len(node.childs) == 1:
            new_value = node.backward_func(*child_values) # computes the outer derivative f'(g)
            if value.ndim == 0 or new_value.ndim == 0:
                backward(node.childs[0], value * new_value)
            else: 
                backward(node.childs[0], new_value.T @ value) # @ is matrix product
        else:
            for child, new_value in zip(node.childs, node.backward_func(*child_values), strict=True):
                if value.ndim == 0 or new_value.ndim == 0:
                    backward(child, value * new_value)
                else: 
                    backward(child, new_value.T @ value)                 
    else:
        node.grad_value += value


## Example functions

The following cells are structured as follows: 
- Each cell contains one example function. \
For each function, we define the operators, parameters, and the function itself. The operators are defined as their respective operator classes. \
Example: 
| mathematical operator | operator class  |
| --- | --- |
| + | Add() |
| $\cdot$ | Multiply()|
| sin() | Sin() | 

- This function will then be depicted as a node tree via the $\texttt{print\_graph}$ function.
- Finally, we perform the forward and backward propagation. The values of the propagations will be compared with values of the analytically solven function and derivative(s).

### Function 1: $f(x_1,x_2) = \log(x_1 \cdot x_2) \cdot \sin(x_2) $

In [12]:
# defining the function
log = Log()
multiply = Multiply()
sin = Sin()
x1 = Expr_end_node(np.float64(.5))
x2 = Expr_end_node(np.float64(3.3))
func1 = multiply(log(multiply(x1, x2)), sin(x2))

# graphical depiction
graph1 = graphviz.Digraph('graph1', comment='test') 
graph1.attr(rankdir="LR")
print_graph(func1, graph1)
graph1.render(directory='graph_out/tt', view=True)


# analytical function and its derivative(s)
mfunc1 = np.log(x1.value * x2.value) * np.sin(x2.value) # analytical function
mdfunc1dx1 = np.sin(x2.value) / x1.value # analytical derivative w.r.t. x1 
mdfunc1dx2 = np.sin(x2.value) / x2.value + np.log(x1.value * x2.value) * np.cos(x2.value) # analytical derivative w.r.t. x2

# comparison to analytical value
# comparison 1: forward propagation
ic(forward(func1)) # value of the function via forward propagation
ic(np.log(x1.value * x2.value) * np.sin(x2.value)) # value of the analytical function

# comparison 2: backward propagation
# as we reuse the same parameter names for multiple functions, we set the derivatives w.r.t. the parameters to zero, before performing the derivatives.
x1.grad_value=0
x2.grad_value=0
backward(func1) # performing the derivative via backward propagation
ic(x1.grad_value) # value of the derivative w.r.t. x1 via backward propagation
ic(np.sin(x2.value) / x1.value) # value of the analytical derivative w.r.t. x1 
ic(x2.grad_value) # value of the derivative w.r.t. x2 via backward propagation
ic(np.sin(x2.value) / x2.value + np.log(x1.value * x2.value) * np.cos(x2.value)) # value of the analytical derivative w.r.t. x2


# Result
print("function 1")
print("Calculus values of x1 derivative and x2 derivative:", mdfunc1dx1, mdfunc1dx2 )
print("Compared to derivatives through chain rule:        ", x1.grad_value, x2.grad_value)

ic| forward(func1): -0.07899514540154058
ic| np.log(x1.value * x2.value) * np.sin(x2.value): -0.07899514540154058
ic| x1.grad_value: -0.3154913882864964
ic| np.sin(x2.value) / x1.value: -0.3154913882864964
ic| x2.grad_value: -0.5423071915818244
ic| np.sin(x2.value) / x2.value + np.log(x1.value * x2.value) * np.cos(x2.value): -0.5423071915818244


function 1
Calculus values of x1 derivative and x2 derivative: -0.3154913882864964 -0.5423071915818244
Compared to derivatives through chain rule:         -0.3154913882864964 -0.5423071915818244


### Function 2: $g(x_1, x_2) = x_1 \cdot x_2 (x_1 + x_2) $ 

In [13]:
# defining the function
multiply = Multiply()
add = Add()
x1 = Expr_end_node(np.float64(.5))
x2 = Expr_end_node(np.float64(3.3))
func2 = multiply(x1, multiply(x2, add(x1, x2)))

# graphical depiction
graph2 = graphviz.Digraph('graph2', comment='test') 
graph2.attr(rankdir="LR")
print_graph(func2, graph2)
graph2.render(directory='graph_out/tt', view=True)

# analytical function and its derivative(s)
mfunc2 = x1.value * x2.value * (x1.value + x2.value)
mdfunc2dx1 = x2.value * (x1.value + x2.value) + x1.value * x2.value
mdfunc2dx2 = x1.value * (x1.value + x2.value) + x1.value * x2.value

# comparison
ic(forward(func2))
ic(x1.value * x2.value * (x1.value + x2.value))

x1.grad_value=0
x2.grad_value=0
backward(func2)
ic(x1.grad_value)
ic(mdfunc2dx1)
ic(x2.grad_value)
ic(mdfunc2dx2)

# result
print("Function 2")
print("Calculus values of x1 derivative and x2 derivative:", mdfunc2dx1, mdfunc2dx2 )
print("Compared to derivatives through chain rule:        ", x1.grad_value, x2.grad_value)

ic| forward(func2): 6.27
ic| x1.value * x2.value * (x1.value + x2.value): 6.27
ic| x1.grad_value: 14.19
ic| mdfunc2dx1: 14.19
ic| x2.grad_value: 3.55
ic| mdfunc2dx2: 3.55


Function 2
Calculus values of x1 derivative and x2 derivative: 14.19 3.55
Compared to derivatives through chain rule:         14.19 3.55


### Function 3: $h(x) = 3x^2 + 4x + 2$

In [14]:
# defining the function
add = Add()
add_2 = Add_scalar(2)
x = Expr_end_node(np.float64(.5))
multiply_3 = Multiply_scalar(3)
multiply_4 = Multiply_scalar(4)
func3 = add_2( add( multiply_3(multiply(x,x)) , multiply_4(x) ))

# graphical depiction
graph3 = graphviz.Digraph('graph3', comment='test') 
graph3.attr(rankdir="LR")
print_graph(func3, graph3)
graph3.render(directory='graph_out/tt', view=True)

# comparison
mfunc3 = 3*x.value**2 + 4 * x.value + 2
mdfunc3dx = 6 * x.value + 4

ic(forward(func3))
ic(3*x.value**2 + 4 * x.value + 2)

x.grad_value = 0
backward(func3)
ic(mdfunc3dx)
ic(x.grad_value)


# Result
print("Function 3")
print("Calculus values of x derivative:                   ", mdfunc3dx)
print("Compared to derivatives through chain rule:        ", x.grad_value)

ic| forward(func3): 4.75
ic| 3*x.value**2 + 4 * x.value + 2: 4.75
ic| mdfunc3dx: 7.0
ic| x.grad_value: 7.0


Function 3
Calculus values of x derivative:                    7.0
Compared to derivatives through chain rule:         7.0


## Function 4: Neuron$(\vec x, w, \vec b) = \tanh(w\cdot \vec x + \vec b)$

In [15]:
# defining the function
w = Expr_end_node(np.float64(2))
x = Expr_end_node(np.float64(.5))
b = Expr_end_node(np.float64(3.1))
tanh = Tanh()
add = Add()
multiply = Multiply()
func4 = tanh(add( multiply(w,x) , b))

# graphical depiction
graph4 = graphviz.Digraph('graph4', comment='test') 
graph4.attr(rankdir="LR")
print_graph(func4, graph4)
graph4.render(directory='graph_out/tt', view=True)

# comparison
mfunc4 = np.tanh(w.value*x.value + b.value)
mdfunc4dw = 1 / np.cosh(w.value * x.value + b.value)**2 * x.value
mdfunc4dx = 1 / np.cosh(w.value * x.value + b.value)**2 * w.value
mdfunc4db = 1 / np.cosh(w.value * x.value + b.value)**2

ic(forward(func4))
ic(np.tanh(w.value*x.value + b.value))

w.grad_value = 0
x.grad_value = 0
b.grad_value = 0
backward(func4)
ic(w.grad_value)
ic(mdfunc4dw)
ic(x.grad_value)
ic(mdfunc4dx)
ic(b.grad_value)
ic(mdfunc4db)


# Result
print("Function 4")
print("Calculus values of w,x and b derivative:           ", mdfunc4dw, mdfunc4dx, mdfunc4db)
print("Compared to derivatives through chain rule:        ", w.grad_value, x.grad_value, b.grad_value)

ic| forward(func4): 0.9994508436877974
ic| np.tanh(w.value*x.value + b.value): 0.9994508436877974
ic| w.grad_value: 0.000549005525875057
ic| mdfunc4dw: 0.0005490055258750498
ic| x.grad_value: 0.002196022103500228
ic| mdfunc4dx: 0.002196022103500199
ic| b.grad_value: 0.001098011051750114
ic| mdfunc4db: 0.0010980110517500995


Function 4
Calculus values of w,x and b derivative:            0.0005490055258750498 0.002196022103500199 0.0010980110517500995
Compared to derivatives through chain rule:         0.000549005525875057 0.002196022103500228 0.001098011051750114
