In [2]:
import math
import os
os.environ['DEBUG'] = '4' # set this to 4 for maximum verbosity - as seen in `/docs/env_vars.md`

from typing import Tuple, Optional, cast
from tinygrad.helpers import argsort, DType
from tinygrad.ops import UnaryOps, BinaryOps, TernaryOps, ReduceOps
from tinygrad.tensor import Function
from tinygrad.lazy import LazyBuffer
from tinygrad.shape.symbolic import sint

In [3]:
# This file contains all the core ml ops (28 i think?) that tinygrad uses
# My plan is to read and annotate this file before working
# upwards to Tensor
# downwards to the actual ops (UnaryOps, BinaryOps, etc)
# I might have to go take a trip to the shape.symbolic thing that build the shapetracker
# but I'll cross that bridge when I get there. lets go.

# Function is an abstract class that is the base class for all the ops
# its defined in tensor.py and keeps track of stuff like the device, requires_grad, etc.
# it has an `apply` method used to apply the function to a tensor

In [None]:

# TODO - figure out why these are separate - maybe for functionality to be adde later?
class Contiguous(Function):
    def forward(self, x: LazyBuffer) -> LazyBuffer:
        return x.contiguous() # make sure data is stored in a contiguous chunk of memory
    
    def backward(self, grad_output: LazyBuffer) -> LazyBuffer:
        return grad_output # this is a no-op essentially

class ContiguousBackward(Function):
    def forward(self, x: LazyBuffer) -> LazyBuffer:
        return x # no-op

    def backward(self, grad_output: LazyBuffer) -> LazyBuffer:
        return grad_output.contiguous()

# This is a function to cast the input to a different dtype
class Cast(Function):
    def forward(self, x: LazyBuffer, dtype: DType, bitcast: bool = False) -> LazyBuffer:
        self.input_dtype, self.bitcast = x.dtype, bitcast 
        return x.cast(dtype, bitcast) # cast to the input dtype

    def backward(self, grad_output: LazyBuffer) -> LazyBuffer:
        return grad_output.cast(self.input_dtype, self.bitcast) # cast back to the input dtype

In [None]:

# ! Unary Ops ================================================================

class Zero(Function):
    def forward(self, x: LazyBuffer) -> LazyBuffer:
        return x.const(0) # return a tensor of zeros (const is a method of LazyBuffer)
    
    def backward(self, grad:LazyBuffer) -> LazyBuffer:
        return grad.const(0) # zero out the gradient

class Neg(Function):
    def forward(self, x: LazyBuffer) -> LazyBuffer:
        return x.e(UnaryOps.NEG) # apply the unary op NEG to the input (e stands for elementwise)

    def backward(self, grad: LazyBuffer) -> LazyBuffer:
        return grad.e(UnaryOps.NEG) # apply the unary op NEG to the gradient
    
class Sin(Function):
    def forward(self, x: LazyBuffer) -> LazyBuffer:
        self.x = x # store the input
        return x.e(UnaryOps.SIN) # apply the unary op SIN to the input
    
    def backward(self, grad: LazyBuffer) -> LazyBuffer: # derivative
        return self.x.const(math.pi / 2).e(BinaryOps.SUB, self.x).e(UnaryOps.SIN).e(BinaryOps.MUL, grad) # apply the chain rule

# NOTE: maximum(x, 0) behaves differently where x=0 
class ReLu(Function):
    def forward(self, x: LazyBuffer) -> LazyBuffer:
        self.ret = x.e(BinaryOps.MAX, x.const(0)) # apply the unary op RELU to the input
        return self.ret

    def backward(self, grad_output: LazyBuffer) -> LazyBuffer:
        return self.ret.const(0).e(BinaryOps.CMPLT, self.ret).e(BinaryOps.MUL, grad_output) # apply the chain rule

class Log(Function):
    def forward(self, x:LazyBuffer) -> LazyBuffer:
        self.x = x
        return x.e(UnaryOps.LOG2).e(BinaryOps.MUL, x.const(math.log(2)))

    def backward(self, grad_output:LazyBuffer) -> LazyBuffer:
        return grad_output.e(BinaryOps.DIV, self.x)

class Exp(Function):
    def forward(self, x:LazyBuffer) -> LazyBuffer:
        self.ret = x.e(BinaryOps.MUL, x.const(1/math.log(2))).e(UnaryOps.EXP2)
        return self.ret

    def backward(self, grad_output:LazyBuffer) -> LazyBuffer:
        return self.ret.e(BinaryOps.MUL, grad_output)

class Sqrt(Function):
    def forward(self, x:LazyBuffer) -> LazyBuffer:
        self.ret = x.e(UnaryOps.SQRT)
        return self.ret

    def backward(self, grad_output:LazyBuffer) -> LazyBuffer:
        return grad_output.e(BinaryOps.DIV, self.ret.e(BinaryOps.MUL, self.ret.const(2)))
    
# NOTE: the implicit derivative of sigmoid is not stable
# https://towardsdatascience.com/derivative-of-the-sigmoid-function-536880cf918e
# TODO: have the backend automatically find this 
class Sigmoid(Function):
    def forward(self, x:LazyBuffer) -> LazyBuffer:
        self.ret = x.const(1).e(BinaryOps.DIV, x.const(1).e(BinaryOps.ADD, x.e(BinaryOps.MUL, x.const(-1/math.log(2))).e(UnaryOps.EXP2)))
        return self.ret

    def backward(self, grad_output:LazyBuffer) -> LazyBuffer:
        return self.ret.e(BinaryOps.MUL, self.ret.const(1).e(BinaryOps.SUB, self.ret)).e(BinaryOps.MUL, grad_output)

In [None]:

# ! Binary Ops ===============================================================

class Less(Function):
    def forward(self, x: LazyBuffer, y: LazyBuffer) -> LazyBuffer:
        return x.e(BinaryOps.ADD, y)