In [1]:
import numpy as np
import torch

import utils

### (I) NumPy

In [2]:
NUM_FREQUENCIES = 2
TARGET_FUNC = lambda x: 1 * (x > 1)
TOTAL_STEPS = 4000
LEARNING_RATE = 1e-6

x = np.linspace(-np.pi, np.pi, 2000)
y = TARGET_FUNC(x)

x_cos = np.array([np.cos(n*x) for n in range(1, NUM_FREQUENCIES+1)])
x_sin = np.array([np.sin(n*x) for n in range(1, NUM_FREQUENCIES+1)])

a_0 = np.random.randn()
A_n = np.random.randn(NUM_FREQUENCIES)
B_n = np.random.randn(NUM_FREQUENCIES)

y_pred_list = []
coeffs_list = []

for step in range(TOTAL_STEPS):

    # compute `y_pred` using your coeffs, and the terms `x_cos`, `x_sin`
    y_pred = (0.5 * a_0) + (A_n @ x_cos) + (B_n @ x_sin)

    # compute `loss`, which is the sum of squared error between `y` and `y_pred`
    sq_error = (y - y_pred) ** 2
    loss = sq_error.sum()

    if step % 100 == 0:
        print(f"loss = {loss:.3f}")
        coeffs_list.append([a_0, A_n.copy(), B_n.copy()])
        y_pred_list.append(y_pred)

    # compute gradients of coeffs with respect to `loss`
    dL_dy_pred = 2 * (y_pred - y)
    dL_da_0 = (0.5 * dL_dy_pred).sum()
    dL_dA_n = (dL_dy_pred * x_cos).sum(axis=1)
    dL_dB_n = (dL_dy_pred * x_sin).sum(axis=1)

    # update weights using gradient descent (using the parameter `LEARNING_RATE`)
    a_0 -= dL_da_0 * LEARNING_RATE
    A_n -= dL_dA_n * LEARNING_RATE
    B_n -= dL_dB_n * LEARNING_RATE

loss = 2339.975
loss = 1599.035
loss = 1100.922
loss = 765.791
loss = 540.099
loss = 387.933
loss = 285.196
loss = 215.717
loss = 168.634
loss = 136.653
loss = 114.866
loss = 99.975
loss = 89.757
loss = 82.713
loss = 77.832
loss = 74.428
loss = 72.039
loss = 70.349
loss = 69.144
loss = 68.276
loss = 67.646
loss = 67.184
loss = 66.841
loss = 66.585
loss = 66.391
loss = 66.243
loss = 66.129
loss = 66.041
loss = 65.972
loss = 65.917
loss = 65.874
loss = 65.840
loss = 65.812
loss = 65.790
loss = 65.773
loss = 65.758
loss = 65.747
loss = 65.737
loss = 65.730
loss = 65.723


In [50]:
# utils.visualise_fourier_coeff_convergence(x, y, y_pred_list, coeffs_list)

### (II) PyTorch & Tensors

In [4]:
NUM_FREQUENCIES = 2
TARGET_FUNC = lambda x: 1 * (x > 1)
TOTAL_STEPS = 4000
LEARNING_RATE = 1e-6

x = torch.linspace(-torch.pi, torch.pi, 2000)
y = TARGET_FUNC(x)

x_cos = torch.stack([torch.cos(n*x) for n in range(1, NUM_FREQUENCIES+1)])
x_sin = torch.stack([torch.sin(n*x) for n in range(1, NUM_FREQUENCIES+1)])

a_0 = torch.randn(1)
A_n = torch.randn(NUM_FREQUENCIES)
B_n = torch.randn(NUM_FREQUENCIES)

y_pred_list = []
coeffs_list = []

for step in range(TOTAL_STEPS):

    # compute `y_pred` using your coeffs, and the terms `x_cos`, `x_sin`
    y_pred = (0.5 * a_0) + (A_n @ x_cos) + (B_n @ x_sin)

    # compute `loss`, which is the sum of squared error between `y` and `y_pred`
    sq_error = (y - y_pred) ** 2
    loss = sq_error.sum()

    if step % 100 == 0:
        print(f"loss = {loss:.3f}")
        coeffs_list.append([a_0.item(), A_n.clone().numpy(), B_n.clone().numpy()])
        y_pred_list.append(y_pred)

    # compute gradients of coeffs with respect to `loss`
    dL_dy_pred = 2 * (y_pred - y)
    dL_da_0 = (0.5 * dL_dy_pred).sum()
    dL_dA_n = (dL_dy_pred * x_cos).sum(axis=1)
    dL_dB_n = (dL_dy_pred * x_sin).sum(axis=1)

    # update weights using gradient descent (using the parameter `LEARNING_RATE`)
    a_0 -= dL_da_0 * LEARNING_RATE
    A_n -= dL_dA_n * LEARNING_RATE
    B_n -= dL_dB_n * LEARNING_RATE

loss = 2529.620
loss = 1742.727
loss = 1210.694
loss = 850.312
loss = 605.660
loss = 439.137
loss = 325.438
loss = 247.522
loss = 193.898
loss = 156.809
loss = 131.011
loss = 112.951
loss = 100.215
loss = 91.163
loss = 84.674
loss = 79.978
loss = 76.548
loss = 74.017
loss = 72.130
loss = 70.710
loss = 69.631
loss = 68.804
loss = 68.164
loss = 67.666
loss = 67.274
loss = 66.965
loss = 66.719
loss = 66.523
loss = 66.366
loss = 66.240
loss = 66.138
loss = 66.056
loss = 65.989
loss = 65.935
loss = 65.891
loss = 65.855
loss = 65.826
loss = 65.802
loss = 65.783
loss = 65.767


In [49]:
# utils.visualise_fourier_coeff_convergence(x, y, y_pred_list, coeffs_list)

### (III) Autograd

In [6]:
a = torch.tensor(2, dtype=torch.float, requires_grad=True)
b = torch.tensor(3, dtype=torch.float, requires_grad=True)

Q = 3*a**3 - b**2
Q.backward()

assert 9*a**2 == a.grad
assert -2*b == b.grad
print("Grads computed successfully!")

Grads computed successfully!


In [10]:
NUM_FREQUENCIES = 2
TARGET_FUNC = lambda x: 1 * (x > 1)
TOTAL_STEPS = 4000
LEARNING_RATE = 1e-6

x = torch.linspace(-torch.pi, torch.pi, 2000)
y = TARGET_FUNC(x)

x_cos = torch.stack([torch.cos(n*x) for n in range(1, NUM_FREQUENCIES+1)])
x_sin = torch.stack([torch.sin(n*x) for n in range(1, NUM_FREQUENCIES+1)])

a_0 = torch.randn(1, requires_grad=True)
A_n = torch.randn(NUM_FREQUENCIES, requires_grad=True)
B_n = torch.randn(NUM_FREQUENCIES, requires_grad=True)

y_pred_list = []
coeffs_list = []

for step in range(TOTAL_STEPS):

    # compute `y_pred` using your coeffs, and the terms `x_cos`, `x_sin`
    y_pred = (0.5 * a_0) + (A_n @ x_cos) + (B_n @ x_sin)

    # compute `loss`, which is the sum of squared error between `y` and `y_pred`
    sq_error = (y - y_pred) ** 2
    loss = sq_error.sum()

    if step % 100 == 0:
        print(f"loss = {loss:.3f}")
        coeffs_list.append([a_0.item(), A_n.clone().detach().numpy(), B_n.clone().detach().numpy()])
        y_pred_list.append(y_pred.detach().numpy())

    # compute gradients of coeffs with respect to `loss`
    loss.backward()

    # update weights using gradient descent (using the parameter `LEARNING_RATE`)
    with torch.no_grad():
        a_0 -= a_0.grad * LEARNING_RATE
        A_n -= A_n.grad * LEARNING_RATE
        B_n -= B_n.grad * LEARNING_RATE
        a_0.grad = None
        A_n.grad = None
        B_n.grad = None

loss = 5217.872
loss = 3609.214
loss = 2514.821
loss = 1767.991
loss = 1256.489
loss = 904.672
loss = 661.495
loss = 492.460
loss = 374.206
loss = 290.881
loss = 231.703
loss = 189.309
loss = 158.659
loss = 136.284
loss = 119.787
loss = 107.502
loss = 98.261
loss = 91.244
loss = 85.866
loss = 81.709
loss = 78.471
loss = 75.930
loss = 73.924
loss = 72.331
loss = 71.061
loss = 70.043
loss = 69.224
loss = 68.564
loss = 68.030
loss = 67.598
loss = 67.247
loss = 66.961
loss = 66.729
loss = 66.540
loss = 66.386
loss = 66.260
loss = 66.157
loss = 66.073
loss = 66.004
loss = 65.948


In [48]:
# utils.visualise_fourier_coeff_convergence(x, y, y_pred_list, coeffs_list)