# Assignment 1

In this assignment, you will investigate the precision issues in computing the gradient. You will also implement a simple linear regression model using the custom autograd engine.

## Task 1: Precision issues

In [3]:
def f(x):
    return 3 * x ** 2 - 4 * x + 5

def gradient(f, x, h=0.0001):
    return (f(x + h) - f(x)) / h

gradient(f, 2)

8.000300000023941

In [None]:
# What happens if we keep decreasing h?
gradient(f, 2, h=0.0000000000000001)

################################################################################
# TODO:                                                                        #
# Why is the gradient 0?                                                       #
# If you don't know, google it!                                                #
################################################################################
# *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
# ANSWER: 수학적으로는 h가 점점 감소함에 따라 gradient 값이 실제값에 가까워져야 하지만, 파이썬의 부동소수점 오차 때문에 일정 값 이하로 떨어지면 0이 된다.
# 즉 h가 일정 값 이하로 떨어지면 파이썬이 h값을 그냥 0으로 간주해버려 계산식의 분자(f(x + h) - f(x))가 0이 되기 때문에 최종 gradient가 0이 되는 것이다.
# *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

0.0

## Task 2: Linear Regression

Let's review the training loop of a simple linear regression model.

### Importing Libraries

In [4]:
import math
import random

In [128]:
class Tensor:
    def __init__(self, data, _children=(), _operation=''):
        self.data = data
        self._prev = set(_children)
        self.gradient = 0
        self._backward = lambda: None

    def __repr__(self):
        return f"tensor=({self.data})"

    def __add__(self, other):  # self + other
        output = Tensor(self.data + other.data, (self, other), '+')
        def _backward():
            self.gradient = 1 * output.gradient
            other.gradient = 1 * output.gradient
        output._backward = _backward
        return output

    def __mul__(self, other):  # self * other
        output = Tensor(self.data * other.data, (self, other), '*')
        def _backward():
            self.gradient = other.data * output.gradient
            other.gradient = self.data * output.gradient
        output._backward = _backward
        return output

    def tanh(self):  # tanh(self)
        output = Tensor(math.tanh(self.data), (self,), 'tanh')
        def _backward():
            self.gradient = (1.0 - math.tanh(self.data) ** 2) * output.gradient
        output._backward = _backward
        return output

    def __pow__(self, power):  # self ** power
        assert isinstance(power, (int, float)), "Power must be an int or a float"
        output = Tensor(self.data ** power, (self,), f'**{power}')
        def _backward():
            self.gradient = power * (self.data ** (power - 1)) * output.gradient
        output._backward = _backward
        return output

    def backward(self):
        topo = []
        visited = set()
        def build_topo(v):
            if v not in visited:
                visited.add(v)
                for child in v._prev:
                    build_topo(child)
                topo.append(v)
        build_topo(self)
        self.gradient = 1
        for node in reversed(topo):
            node._backward()

    def __neg__(self): # -self
        return self * Tensor(-1.0)

    def __sub__(self, other): # self - other
        return self + (-other)

### Data

**GOAL: Find the best line that fits the following data.**

![Data](../../assets/linear.png)

(Image credit: MIT 18.06)

(1, 1), (2, 2), (3, 2)

In [121]:
# Input, Target data
input = [Tensor(1), Tensor(2), Tensor(3)]
target = [Tensor(1), Tensor(2), Tensor(2)]

### Model

In [122]:
# Linear regression model
class Linear:
    def __init__(self):
        self.a = Tensor(random.uniform(-1, 1))
        self.b = Tensor(random.uniform(-1, 1))

    def __call__(self, x):
        y = self.a * x + self.b
        return y

    def parameters(self):
        return self.a, self.b

In [169]:
# Initialize the model
model = Linear()

# Example forward pass
print(f"Output: {model(input[0])}")

Output: tensor=(1.3559944791146348)


### Training

Implement the training loop for the linear regression model.

Choose an appropriate learning rate.

In [170]:
# 2e-4
lr = 2e-3  # learning rate

# Training loop
for step in range(10):
    total_loss = Tensor(0)
    
    # Forward pass
    for x, y in zip(input, target):
        ################################################################################
        # TODO:                                                                        #
        # Implement the forward pass.                                                  #
        ################################################################################
        # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
        logits = model(x)
        loss = (logits - y) ** 2
        # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
        total_loss += loss

    # Backward pass
    ################################################################################
    # TODO:                                                                        #
    # Implement the backward pass.                                                 #
    ################################################################################
    # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
    for param in model.parameters():
        param.gradient = 0 
    loss.backward()
    # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

    # Update weights
    ################################################################################
    # TODO:                                                                        #
    # Update the weights of the model using the gradients.                         #
    ################################################################################
    # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
    for param in model.parameters():
        param.data = param.data - lr * param.gradient
    # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
            
    print(f"Step: {step}, Loss: {total_loss.data}")

Step: 0, Loss: 0.36947214512163445
Step: 1, Loss: 0.34778128220387106
Step: 2, Loss: 0.3281329756159371
Step: 3, Loss: 0.31035340923449134
Step: 4, Loss: 0.294282941308545
Step: 5, Loss: 0.2797749713011397
Step: 6, Loss: 0.26669489644613614
Step: 7, Loss: 0.2549191509514317
Step: 8, Loss: 0.24433432133551386
Step: 9, Loss: 0.23483633189620493


In [171]:
# Print the final weights of the model
print(f"y = {model.a.data}*x + {model.b.data}")

y = 0.5162706695042661*x + 0.7745218598367809


## Extra Credit

Linear regression is the simplest form of neural networks. It actually does not require gradient descent to solve for the weights.

**Find a way to get the weights of the linear regression model without using gradient descent.**

In [None]:
################################################################################
# TODO:                                                                        #
# y = ax + b                                                                   #
# x, y = (1, 1), (2, 2), (3, 2)                                                #
# Find the values of a and b without using gradient descent.                   #
################################################################################
# *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
#
# 최소제곱법(Ordinary Least Squares)을 사용하여 풀 수 있다. 
# a = (n∑xy - ∑x∑y)/(n∑x^2 - (∑x)^2)
#   = (3*11 - 6*5)/3*14 - 6^2 = 0.5
# b = (∑y − a∑x)/n
#   = (5 − 0.5*6)/3 = 약 0.67
#
# a = 0.5
# b = 0.67
#
# *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****