# Problem 2
Fill in the code for `def max` and `def min` below. Then run the Grad check.

In [None]:
import math
class ag: # AutoGrad
    def log(input):
        output = ag.Scalar(math.log(input.value), inputs=[input], op="log")

        def _backward():
            input.grad += output.grad / input.value
            return None

        output._backward = _backward
        return output

    def exp(input):

        output = ag.Scalar(math.exp(input.value), inputs=[input], op="exp")

        def _backward():
            input.grad += output.grad * output.value
            return None

        output._backward = _backward
        return output

    def relu(input):
        output = ag.Scalar(max(0, input.value), inputs=[input], op="relu")

        def _backward():
            if input.value > 0:
                input.grad += output.grad

        output._backward = _backward
        return output
    
    def max(a, b):
        if a.value >= b.value:
            out = ag.Scalar(a.value)
        else:
            out = ag.Scalar(b.value)

        def _backward(): 
            # partial deriv of max(a,b) respect a is equal to 1 if a>b
            if a.value >= b.value:
                a.grad += out.grad
            else:
                b.grad += out.grad

        out._backward = _backward
        return out
        
    def min(a, b):
        if a.value < b.value:
            out = ag.Scalar(a.value)
        else:
            out = ag.Scalar(b.value)

        def _backward():
            if a.value <= b.value:
                a.grad += out.grad
            else:
                b.grad += out.grad

        out._backward = _backward
        return out

    class Scalar: # Scalars with grads
        def __init__(self,  value, op="", _backward= lambda : None, inputs=[], label=""):

            self.value = float(value)
            self.grad = 0.0

            self._backward = _backward
            self.inputs = inputs

            self.op = op
            self.label = label


        def topological_sort(self):
            topo_order = []
            visited = set()

            def dfs(node):
                if node not in visited:
                    visited.add(node)
                    for input in node.inputs:
                        dfs(input)
                    topo_order.append(node)

            dfs(self)
            return topo_order

        def backward(self):
            self.grad = 1.0

            topo_order = self.topological_sort()

            for node in reversed(topo_order):
                node._backward()


        def __add__(self, other):
            if not isinstance(other, ag.Scalar):
                other = ag.Scalar(other, label=f"{other}\nconst")

            output = ag.Scalar(self.value + other.value,
                               inputs=[self, other], op="add")

            def _backward():
                # pass
                self.grad += output.grad
                other.grad += output.grad

            output._backward = _backward
            return output


        def __mul__(self, other):
            assert isinstance(other, ag.Scalar)
            output = ag.Scalar(self.value * other.value, inputs=[self, other], op="mul")

            def _backward():
                self.grad += other.value * output.grad
                other.grad += self.value * output.grad

                return None

            output._backward = _backward

            return output

        def __pow__(self, exponent): # exponent is just a python float
            output = ag.Scalar(self.value ** exponent, inputs=[self], op=f"pow({exponent})")

            def _backward():
                self.grad += (exponent * self.value**(exponent-1)) * output.grad
                return None

            output._backward = _backward
            return output

        def __neg__(self): # exponent is just a python float
            output = ag.Scalar(-self.value, inputs=[self], op=f"neg")

            def _backward():
                self.grad += (-1) * output.grad
                return None

            output._backward = _backward
            return output
        

            
        def __repr__(self) -> str:
            return str("val:" + str(self.value) + ", grad:" + str(self.grad))

In [6]:
def _get_values(scalars):
    """ 
    [a,b,c, ...] --> [a.value, b.value, c.value, ...]
    """
    return [s.value for s in scalars]

def _make_ag_scalars(vals, labels=None):
    """Create fresh ag.Scalar variables from raw floats."""
    out = []
    for i, v in enumerate(vals):
        lab = "" if labels is None else labels[i]
        out.append(ag.Scalar(v, label=lab))
    return out

def _zero_grads(output):
    for node in output.topological_sort():
        node.grad = 0.0

def gradcheck(f, inputs, eps=1e-6, atol=1e-6, rtol=1e-4, verbose=False):
    """
    f is a function that takes...
    a list of ag.Scalar, treated as leaves
    to output...
    a single ag.Scalar loss
    """
    # compute forward
    base_vals = _get_values(inputs)
    inputs_ag_scalars = _make_ag_scalars(base_vals, [x.label for x in inputs])
    out = f(*inputs_ag_scalars)
    f_val_at_inputs = out.value
    
    # compute grads via backward (i.e., back-propagation)
    _zero_grads(out)
    out.backward()
    grads_auto = [x.grad for x in inputs_ag_scalars]
    

    # compute grads via finite difference
    grads_numerical = []
    for i in range(len(inputs)):
        # compute the i-th partial derivative using tiny perturbations
        v_plus  = base_vals.copy(); v_plus[i]  += eps
        v_minus = base_vals.copy(); v_minus[i] -= eps

        out_plus  = f(*_make_ag_scalars(v_plus)).value
        out_minus = f(*_make_ag_scalars(v_minus)).value
        g_num = (out_plus - out_minus) / (2.0 * eps)
        grads_numerical.append(g_num)

    # compute the difference between grads_auto and grads_numerical
    ok_all = True
    if verbose:
        reports = []
    for i, (g_a, g_n) in enumerate(zip(grads_auto, grads_numerical)):
        abs_err = abs(g_a - g_n)
        denom = max(1.0, abs(g_a), abs(g_n))
        rel_err = abs_err / denom
        ok = (abs_err <= atol) or (rel_err <= rtol)
        ok_all = ok_all and ok
        if verbose:
            reports.append({
                "index": i,
                "value": base_vals[i],
                "grads_auto": g_a,
                "grads_numerical": g_n,
                "abs_err": abs_err,
                "rel_err": rel_err,
                "ok": ok,
            })

    if verbose:
        print(f"f(x) = {f_val_at_inputs}")
        for r in reports:
            status = "PASS" if r["ok"] else "FAIL"
            print(f"[{status}] x[{r['index']}]={r['value']:.6g} "
                  f"grads_auto={r['grads_auto']:.6g}  grads_numerical={r['grads_numerical']:.6g}  "
                  f"abs_err={r['abs_err']:.3e}  rel_err={r['rel_err']:.3e}")

    return ok_all



In [None]:
def test_func(a,b,c):
    return ag.max(a,b)**2 + ag.min(b,c)**2

# Grad check

In [None]:
import numpy as np
import random

for _ in range(100):
    a = ag.Scalar(random.uniform(-1,1))
    b = ag.Scalar(random.uniform(-1,1))
    c = ag.Scalar(random.uniform(-1,1))
    inputs = [a,b,c]
    assert(gradcheck(test_func, inputs,verbose=False))