In [23]:
import numpy as np
import math
import torch
import tensorflow as tf

In [160]:
class Value:
  
  def __init__(self, data, _children=(), _op='', label=''):
    self.data = data
    self.grad = 0.0
    self._backward = lambda: None
    self._prev = set(_children)
    self._op = _op
    self.label = label

  def __repr__(self):
    return f"Value(data={self.data})"
  
  def __add__(self, other):
    other = other if isinstance(other, Value) else Value(other)
    out = Value(self.data + other.data, (self, other), '+')
    
    def _backward():
      self.grad += 1.0 * out.grad
      other.grad += 1.0 * out.grad
    out._backward = _backward
    
    return out

  def __mul__(self, other):
    other = other if isinstance(other, Value) else Value(other)
    out = Value(self.data * other.data, (self, other), '*')
    
    def _backward():
      self.grad += other.data * out.grad
      other.grad += self.data * out.grad
    out._backward = _backward
      
    return out
  
  def __pow__(self, other):
    assert isinstance(other, (int, float)), "only supporting int/float powers for now"
    out = Value(self.data**other, (self,), f'**{other}')

    def _backward():
        self.grad += other * (self.data ** (other - 1)) * out.grad
    out._backward = _backward

    return out
  
  def __rmul__(self, other): # other * self
    return self * other

  def __truediv__(self, other): # self / other
    return self * other**-1

  def __neg__(self): # -self
    return self * -1

  def __sub__(self, other): # self - other
    return self + (-other)

  def __radd__(self, other): # other + self
    return self + other

  def tanh(self):
    x = self.data
    t = (math.exp(2*x) - 1)/(math.exp(2*x) + 1)
    out = Value(t, (self, ), 'tanh')
    
    def _backward():
      self.grad += (1 - t**2) * out.grad
    out._backward = _backward
    
    return out
  
  def exp(self):
    x = self.data
    out = Value(math.exp(x), (self, ), 'exp')
    
    def _backward():
      self.grad += out.data * out.grad # NOTE: in the video I incorrectly used = instead of +=. Fixed here.
    out._backward = _backward
    
    return out
  
  
  def backward(self):
    
    topo = []
    visited = set()
    def build_topo(v):
      if v not in visited:
        visited.add(v)
        for child in v._prev:
          build_topo(child)
        topo.append(v)
    build_topo(self)
    
    self.grad = 1.0
    for node in reversed(topo):
      node._backward()


c = Value(2.0)
a = Value(5.0)
d = a + c # a.__add__(c), en este caso, el objeto (a), se le suma otro objeto c
# pero para eso tienen que estar definidos como objetos.

# backprogration is the implementation of the chainrule
# to get the final gradient from each weight, so we can manipulate the data of them
# and get the result that we want

# el mecanismo de _backward es el siguiente:
# agarra y distribuye el gradiente del resultado, hacia sus hijos.
# lo hace dependiendo la operacion matematica.
# pero cuando llamamamos out._backward lo que pasa es que 
# ._backward busca los hijos de out, y calcula el gradiente y se los inserta.


In [161]:
# inputs x1, x2
x1 = Value(2.0)
x2= Value(0.0)
#weights w1 w2
w1 = Value(-3.0)
w2 = Value(1.0)
# bias of the neuron
b = Value(6.7)
# x1w1 +x2w2 + b, lo hace separado para tener a mano las variables desmenuzadas
x1w1 = x1*w1
x2w2 = x2*w2
x1w1x2w2 = x1w1 + x2w2
x2.data
x2
# the sum
n = x1w1x2w2 + b
# luego va a estar L
e = (2*n).exp()
L = (e-1)/(e+1)

In [162]:
x1 = torch.Tensor([2.0]).double()                ; x1.requires_grad = True
x2 = torch.Tensor([0.0]).double()                ; x2.requires_grad = True
w1 = torch.Tensor([-3.0]).double()               ; w1.requires_grad = True
w2 = torch.Tensor([1.0]).double()                ; w2.requires_grad = True
b = torch.Tensor([6.8813735870195432]).double()  ; b.requires_grad = True
n = x1*w1 + x2*w2 + b
o = torch.tanh(n)

print(o.data.item())
o.backward()

print('---')
print('x2', x2.grad.item())
print('w2', w2.grad.item())
print('x1', x1.grad.item())
print('w1', w1.grad.item())


0.7071066904050358
---
x2 0.5000001283844369
w2 0.0
x1 -1.5000003851533106
w1 1.0000002567688737


In [163]:
import random

class Neuron:
    def __init__(self, nin):
        self.w = [Value(random.uniform(-1,1)) for _ in range(nin)]# random.uniform(1,-1) devuelve un numero random entre esos valores, y el loop lo hace hasta la cantidad de nin (number of inputs)
        self.b = Value(random.uniform(-1,1)) # aca queremos un numero random entre 1 y -1 como bias, a los dos self.w, self.b los encerramos en Value
        # aca lo que va a pasar es que cuando tengamos una neurona, vamos a poder ver sus weights y su bias usando self.w y self.b
    # una neurona tiene nin weights y un solo bias, y la neurona es la suma del producto entre los inputs y los weight más el bias.
    def __call__(self, x): # call is a built in funtion, it returns the x
        act = sum((wi*xi for wi, xi in zip(self.w, x)), self.b)
        out = act.tanh()
        return out
    def parameters(self):
        return self.w + [self.b] # una lista de Values, con .grad y .data

class Layer:
    def __init__(self, nin, nout):
        self.neurons = [Neuron(nin) for _ in range(nout)]

    def __call__(self, x):
        outs = [n(x) for n in self.neurons] # aca puede estar el error con Neuron(x)
        return outs[0] if len(self.neurons) == 1 else outs

    def parameters(self):
        params = []
        for neuron in self.neurons:
            params.extend(neuron.parameters())
        return params
        # return [p for neuron in self.neurons for p in neuron.parameters()] es otra forma de hacerlo tambien.
        
class MLP:
    def __init__(self, nin, nouts):
        sz = [nin] + nouts
        self.layers  = [Layer(sz[i], sz[i+1]) for i in range(len(nouts))]
    def __call__(self, x):
        for layer in self.layers:
            x = layer(x)
        return x
    def parameters(self):
        return [p for layer in self.layers for p in layer.parameters()]

In [250]:
n = MLP(3,[4,4,1])

In [251]:
xs = [
    [1.0, 3.0 , -2.0],
    [2.0, -3.0 , -1.0],
    [-1.0, 4.0 , -1.0],
    [2.0, -1.0 , 2.0]
]
ys = [1.0, -1.0 , -1.0, 1.0]

In [252]:
for k in range(20):
    #foward pass
    ypred = [n(x) for x in xs] # son los 4 outputs individuales
    loss = sum((yout - ygt)**2 for ygt, yout in zip (ys,ypred))

    #backward pass. Osea vamos a calcular el gradiente de todos los weighs and biases con respecto al resultado final
    loss.backward()
    
    #gradient descent, changing the a little to improve results.
    # changin in direction of the negative gradient of the parameters with respect to the result
    for p in n.parameters():
        p.data += -0.05 * p.grad # mueve un poco a cada weight en direccion hacia el punto de menor loss, osea punto bajo.
    
    print(k ,loss.data)
    
    
    

    

0 6.96259472502544
1 2.902523465127536
2 0.38703656694649125
3 2.1251900437111844
4 0.17645565901959162
5 0.5475034783785049
6 0.15831778336241756
7 2.292336926009287
8 0.5049343882823522
9 0.022413440428784088
10 0.1685684637798484
11 0.44594039073880143
12 0.05689450931710563
13 0.02848399014780882
14 4.01577742222443
15 0.035853226640015994
16 0.0009118074075504345
17 0.00018034660238233082
18 3.6513845986803832
19 1.1950851266459446e-06


In [253]:
loss

Value(data=1.1950851266459446e-06)

In [254]:
ypred

[Value(data=0.999876161725799),
 Value(data=-0.9999999607160655),
 Value(data=-0.999939913229783),
 Value(data=0.998915500674509)]