In [508]:
import torch
import numpy as np
import torch.nn.functional as F

from toy_model import *
# np.random.seed(seed=1)
# torch.manual_seed(1)
G = G2Model()

# Part 1. Tensor basics (2p)

In [509]:
w = torch.tensor(1, dtype = torch.float32)
x = torch.tensor(2.0)
t = torch.tensor(np.float32(3))
b = torch.tensor(4, dtype = torch.float32)

w.requires_grad = True
a = x + b
y = torch.maximum(a*w,torch.tensor(0))
l = torch.pow(y - t, 2) + torch.pow(w,2)

print("Derivative of l w.r.t. y is ", torch.autograd.grad(l,y,retain_graph=True))

l.backward() # Compute derivative w.r.t all leaf variables
print("w.grad value is ",w.grad)

print("Initial w: ", w.data)
with torch.no_grad():
    w = w - 0.1*w.grad
print("Changed w: ", w.data)

Derivative of l w.r.t. y is  (tensor(6.),)
w.grad value is  tensor(38.)
Initial w:  tensor(1.)
Changed w:  tensor(-2.8000)


# Task 2

## Step 1

In [510]:
def nn_forward(x,y,w1,b1,w2,b2):
    # Calculating linear layers
    linear1 = torch.tanh(torch.matmul(x, w1.t()) + b1)
    linear2 = (torch.matmul(w2, linear1.t()) + b2).T

    # Calculating loss
    loss = torch.mean(F.logsigmoid(linear2 * y))

    return loss

N = 40

# Generate train/test data
train_data = G.generate_sample(N)
x_train,y_train = train_data

# Setting train data
x = torch.from_numpy(x_train)
x.float()
y = torch.from_numpy(y_train)
y = y.T

# Hyperparameters
hidden_size = 500

In [511]:
def generate_params(x,hidden_size, dtype):

    # Setting parameters
    w1 = torch.rand(hidden_size, x.shape[1],dtype=dtype) * 2 - 1
    b1 = torch.rand(hidden_size,dtype=dtype) * 2 - 1
    w2 = torch.rand(1, hidden_size,dtype=dtype) * 2 - 1
    b2 = torch.rand(1,dtype=dtype) * 2 - 1

    w1.requires_grad=True
    b1.requires_grad=True
    w2.requires_grad=True
    b2.requires_grad=True
    return w1,b1,w2,b2

w1,b1,w2,b2 = generate_params(x, hidden_size,dtype=torch.float32)
loss = nn_forward(x,y,w1,b1,w2,b2)
loss.backward()

RuntimeError: expected m1 and m2 to have the same dtype, but got: double != float

## Step 2

In [None]:
epsilon = 1e-4

def normalize_tensor(tensor):
  """Normalizes a tensor to have a norm of 1."""
  return tensor / tensor.norm()

def calc_grad(loss1,loss2,epsilon):
  return (loss1 - loss2) / (2*epsilon)


u_w1 = normalize_tensor(torch.rand(w1.shape,dtype=torch.float32) * 2 - 1)
u_b1 = normalize_tensor(torch.rand(b1.shape,dtype=torch.float32) * 2 - 1)
u_w2 = normalize_tensor(torch.rand(w2.shape,dtype=torch.float32) * 2 - 1)
u_b2 = normalize_tensor(torch.rand(b2.shape,dtype=torch.float32) * 2 - 1)

g_w1 = calc_grad(nn_forward(x,y,w1 + epsilon * u_w1,b1,w2,b2), nn_forward(x,y,w1 - epsilon * u_w1,b1,w2,b2), epsilon)
g_b1 = calc_grad(nn_forward(x,y,w1,b1 + epsilon * u_b1,w2,b2), nn_forward(x,y,w1,b1 - epsilon * u_b1,w2,b2), epsilon)
g_w2 = calc_grad(nn_forward(x,y,w1,b1,w2 + epsilon * u_w2,b2), nn_forward(x,y,w1,b1,w2 - epsilon * u_w2,b2), epsilon)
g_b2 = calc_grad(nn_forward(x,y,w1,b1,w2,b2 + epsilon * u_b2), nn_forward(x,y,w1,b1,w2,b2 - epsilon * u_b2), epsilon)

print(f"epsilon : {epsilon}, dtype: torch.float32")
print(f"Grad error in w1: {g_w1}")
print(f"Grad error in b1: {g_b1}")
print(f"Grad error in w2: {g_w2}")
print(f"Grad error in b2: {g_b2}")

epsilon : 0.0001, dtype: torch.float32
Grad error in w1: 0.06556510925292969
Grad error in b1: 0.013113021850585938
Grad error in w2: -0.035762786865234375
Grad error in b2: -0.30040740966796875
torch.Size([500, 2])
torch.Size([500, 2])


In [None]:
x = torch.from_numpy(x_train)
x = x.double()

# Setting parameters
w1 = torch.rand(hidden_size, x.shape[1],dtype=torch.float64) * 2 - 1
b1 = torch.rand(hidden_size,dtype=torch.float64) * 2 - 1
w2 = torch.rand(1, hidden_size,dtype=torch.float64) * 2 - 1
b2 = torch.rand(1,dtype=torch.float64) * 2 - 1

w1.requires_grad=True
b1.requires_grad=True
w2.requires_grad=True
b2.requires_grad=True

loss = nn_forward(x,y,w1,b1,w2,b2)

# Backward
loss.backward()


In [None]:
epsilon = 1e-5

def normalize_tensor(tensor):
  """Normalizes a tensor to have a norm of 1."""
  return tensor / tensor.norm()

def calc_grad(loss1,loss2,epsilon):
  return (loss1 - loss2) / (2*epsilon)


u_w1 = normalize_tensor(torch.rand(w1.shape,dtype=torch.float64) * 2 - 1)
u_b1 = normalize_tensor(torch.rand(b1.shape,dtype=torch.float64) * 2 - 1)
u_w2 = normalize_tensor(torch.rand(w2.shape,dtype=torch.float64) * 2 - 1)
u_b2 = normalize_tensor(torch.rand(b2.shape,dtype=torch.float64) * 2 - 1)

x.double()

g_w1 = calc_grad(nn_forward(x,y,w1 + epsilon * u_w1,b1,w2,b2), nn_forward(x,y,w1 - epsilon * u_w1,b1,w2,b2), epsilon)
g_b1 = calc_grad(nn_forward(x,y,w1,b1 + epsilon * u_b1,w2,b2), nn_forward(x,y,w1,b1 - epsilon * u_b1,w2,b2), epsilon)
g_w2 = calc_grad(nn_forward(x,y,w1,b1,w2 + epsilon * u_w2,b2), nn_forward(x,y,w1,b1,w2 - epsilon * u_w2,b2), epsilon)
g_b2 = calc_grad(nn_forward(x,y,w1,b1,w2,b2 + epsilon * u_b2), nn_forward(x,y,w1,b1,w2,b2 - epsilon * u_b2), epsilon)

print(f"epsilon : {epsilon}, dtype: torch.float64")
print(f"Grad error in w1: {g_w1}")
print(f"Grad error in b1: {g_b1}")
print(f"Grad error in w2: {g_w2}")
print(f"Grad error in b2: {g_b2}")

epsilon : 1e-05, dtype: torch.float64
Grad error in w1: 0.1279879019433494
Grad error in b1: 0.03840802764720763
Grad error in w2: 0.18297967032676607
Grad error in b2: 0.018187132155844665


## Step 3

In [None]:
N = 200

# Generate train/test data
train_data = G.generate_sample(N)
x_train,y_train = train_data

# Setting train data
x = torch.from_numpy(x_train)
y = torch.from_numpy(y_train)
y = y.T

def train(hidden_size,lr=0.1,epoch=1000):
    w1,b1,w2,b2 = generate_params(x,hidden_size,dtype=torch.float32)
    for ep in epoch:
        loss = nn_forward(x,y,w1,b1,w2,b2)
        loss.backward()
        with torch.no_grad():
            w1 = w1 - lr * w1.grad
            b1 = b1 - lr * b1.grad
            w2 = w2 - lr * w2.grad
            b2 = b2 - lr * b2.grad
    return w1, b1, w2, b2

w1, b1, w2, b2 = train(hidden_size=5)

# I don't know how to use it with my data, because I didn't know that I should use "main_template.py"
G2Model.plot_predictor()