In [None]:
# Pseudocode
# for epoch in range(num_epochs):
#     for batch in training_data:
#         # 1. Forward pass - make predictions
#         predictions = model(batch)
        
#         # 2. Calculate loss - how wrong are we?
#         loss = loss_function(predictions, targets)
        
#         # 3. Backward pass - calculate gradients
#         gradients = compute_gradients(loss)
        
#         # 4. Update weights - take a step downhill
#         for param in model.parameters():
#             param -= learning_rate * gradient
# ```

# **That's it. That's gradient descent.**

# ---

# ### **The Update Rule**

# **Core equation:**
# ```
# w_new = w_old - α × ∂L/∂w
# ```

# Where:
# - **w** = weight
# - **α** (alpha) = learning rate (step size)
# - **∂L/∂w** = gradient (slope)

# **In plain English:**
"Move each weight in the opposite direction of the gradient, proportional to how much it affects the error"

# ---

### **Learning Rate: The Most Important Hyperparameter**

# **Learning rate (α)** controls step size:
# ```
# Too small (α = 0.00001):
# - Safe, won't overshoot
# - But SLOW - takes forever
# - Gets stuck in local minima

# Too large (α = 0.1):
# - Fast initially
# - But OVERSHOOTS minimum
# - Loss explodes, model diverges

# Just right (α = 0.001):
# - Fast enough
# - Stable convergence
# - Goldilocks zone
# ```

# **Visual:**
# ```
# Loss
#   |     α too large
#   |    /\  /\  /\    (bouncing around)
#   |   /  \/  \/  \
#   |  
#   |     α just right
#   |    \
#   |     \___
#   |        \____    (smooth descent)
#   |            \___
#   |
#   |     α too small
#   |    \
#   |     \
#   |      \
#   |       \_________  (painfully slow)
#   |_________________ Iterations

In [2]:
import torch
from torch import nn

In [3]:
x = torch.randn(3, requires_grad=True)
print(x)

tensor([ 1.6695, -1.7944, -0.0315], requires_grad=True)


In [4]:
y= x+2

In [5]:
print(y)

tensor([3.6695, 0.2056, 1.9685], grad_fn=<AddBackward0>)


In [6]:
z = y*y*2
print(z)

tensor([26.9299,  0.0846,  7.7497], grad_fn=<MulBackward0>)


In [8]:
z = z.mean()

In [9]:
z.backward() 

In [10]:
print(z)

tensor(11.5881, grad_fn=<MeanBackward0>)


In [11]:
print(x.grad)

tensor([4.8926, 0.2742, 2.6246])


In [None]:
v = torch.tensor([0.1,1.0,0.001], dtype=torch.float32)