<a href="https://colab.research.google.com/github/ishandahal/ml_projects/blob/main/nn.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
class NeuronModel():
    """ 
    Single layered neural network
    """

    def __init__(self, num_features):
        self.num_features = num_features
        self.weights = torch.zeros(num_features, 1, 
                                   dtype=torch.float)
        self.bias = torch.zeros(1, dtype=torch.float)
        
    def activation_func(self, x):
        """ 
        Calculates tanh activation function
        """

        return (torch.exp(x) - torch.exp(-x)) / (torch.exp(x) + torch.exp(-x))
    
    def netinput_func(self, x, w, b):
        """
        Multliplies inputs with weights plus bias term
        """

         return torch.add(torch.mm(x, w), b)

    def forward(self, x):
        """
        Forward pass through the network
        """

        netinputs = self.netinput_func(x, self.weights, self.bias)
        activations = self.activation_func(netinputs)
        return activations.view(-1)
        
    def backward(self, x, yhat, y):
        """
        Calculates gradients (partial derivaties of loss function with respect to weights and bias)
        """  
        
        # note that here, "yhat" are the "activations" 
        netinputs = self.netinput_func(x, self.weights, self.bias)
        
        
        grad_loss_yhat = 2 * (yhat - y)         # x[0] = (10,)          # partial derivaitve of the loss with respect to activation


        # grad_yhat_bias = (4 / (torch.exp(netinputs) + torch.exp(-netinputs))**2)            # dim of n            ## different method of coming to the derivative.
        # grad_yhat_weights = (4 / (torch.exp(netinputs) + torch.exp(-netinputs))**2) * x     # dim of x

        grad_yhat_weights = (1 - self.activation_func(netinputs)**2) * x        #dim of x (10X2)            partial derivative of activation with respect to weights 
        grad_yhat_bias = (1 - self.activation_func(netinputs)**2)               #dim of x[0] (10,)          partial derivative of activation with respect to bias
        
        grad_loss_weights = torch.mm(grad_yhat_weights.t(), 
                                     grad_loss_yhat.view(-1, 1)) / y.size(0)     #dim = (2X10).dot(10X1) = 2X1       Using the chain rule (inner and outer)
        grad_loss_bias = torch.sum(grad_loss_yhat * grad_yhat_bias) / y.size(0)  #dim = sum((10)*(10)) = 1          Using the chain rule (inner and outer)

        
        return (-1)*grad_loss_weights, (-1)*grad_loss_bias

def loss(yhat, y):
    """
    Calculates the mean squared error
    """

    return torch.mean((yhat - y)**2)