# Worch vs Torch: Tests

In [1]:
import torch
import worch
from worch import nn
from worch import optim
from test_utils import generate_data
import matplotlib.pyplot as plt
torch.set_grad_enabled(False)
%load_ext autoreload
%autoreload 2

# Linear Forward (YES)

In [287]:
x = torch.empty((2, 2)).normal_()
y = torch.empty((2, 2)).normal_()

In [288]:
wfc = worch.nn.Linear(x.shape[1], 10)
tfc = torch.nn.Linear(x.shape[1], 10)
p = tfc.state_dict()
wfc.params[0].shape == p['weight'].shape

True

In [289]:
wfc.params[0] = p['weight']
wfc.params[1] = p['bias']

In [290]:
(wfc(x)).allclose(tfc(x))

True

In [291]:
torch.max(torch.abs(wfc(x)-tfc(x)))

tensor(0.)

# Linear Backward (YES)

In [292]:
x = torch.Tensor([[1, 2, 3],
                  [3, 4, 2]])
y_ = torch.ones((x.shape[0], 1))

In [293]:
wfc = worch.nn.Linear(x.shape[1], 1)
tfc = torch.nn.Linear(x.shape[1], 1)
p = tfc.state_dict()
wfc.params[0] = p['weight']
wfc.params[1] = p['bias']
wc = worch.nn.MSELoss()
wc.register_previous_module(wfc)
tc = torch.nn.MSELoss()

In [294]:
# Worch
y_worch = wfc(x)
loss_worch = wc(y_worch, y_)
wc.backward()
wfc.params[0].grad, wfc.params[1].grad, loss_worch

(tensor([[-0.8032, -1.0905, -0.6042]]), tensor([-0.2874]), tensor(0.1348))

In [295]:
# Torch
torch.set_grad_enabled(True)
x_torch = x.clone()
x_torch.requires_grad_(True)
y_torch = tfc(x_torch)
loss_torch = tc(y_torch, y_)
torch.set_grad_enabled(False)
loss_torch.backward()
tfc.weight.grad, tfc.bias.grad, loss_torch

(tensor([[-1.6064, -2.1811, -1.2083]]),
 tensor([-0.5747]),
 tensor(0.1348, grad_fn=<MseLossBackward>))

# ReLU Forward (YES)

In [296]:
x = torch.empty((2, 2)).normal_()
y = torch.empty((2, 2)).normal_()

In [297]:
(worch.nn.ReLU()(x)).allclose(torch.nn.ReLU()(x))

True

In [298]:
torch.max(torch.abs(worch.nn.ReLU()(x)-torch.nn.ReLU()(x)))

tensor(0.)

# ReLU Backward (YES)

In [299]:
x

tensor([[ 0.0582, -0.5564],
        [-1.2003, -0.5015]])

In [300]:
# Worch
wrelu = worch.nn.ReLU()
b = wrelu(x)
wrelu.backward(b)

tensor([[0.0582, 0.0000],
        [0.0000, 0.0000]])

In [301]:
# Torch
torch.set_grad_enabled(True)
trelu = torch.nn.ReLU()
x_torch = x.clone()
x_torch.requires_grad_(True)
b = trelu(x_torch)
b.backward(x_torch)
x_torch.grad

tensor([[0.0582, 0.0000],
        [0.0000, 0.0000]])

# Sigmoid Forward (YES)

In [302]:
x = torch.empty((2, 2)).normal_()
y = torch.empty((2, 2)).normal_()

In [109]:
(worch.nn.Sigmoid()(x)).allclose(torch.nn.Sigmoid()(x))

True

In [110]:
torch.max(torch.abs(worch.nn.Sigmoid()(x)-torch.nn.Sigmoid()(x)))

tensor(0.)

# Sigmoid Backward (NO)

In [111]:
x

tensor([[-0.3085,  0.7158],
        [-0.3074,  1.4279]])

In [112]:
# Worch
wsig = worch.nn.Sigmoid()
b = wsig(x)
wsig.backward(b)

tensor([[0.1034, 0.1481],
        [0.1035, 0.1258]])

In [113]:
# Torch
torch.set_grad_enabled(True)
tsig = torch.nn.Sigmoid()
x_torch = x.clone()
x_torch.requires_grad_(True)
b = tsig(x_torch)
b.backward(x_torch)
x_torch.grad

tensor([[-0.0753,  0.1579],
        [-0.0751,  0.2228]])

# Tanh Forward (YES)

In [114]:
x = torch.empty((2, 2)).normal_()
y = torch.empty((2, 2)).normal_()

In [115]:
(worch.nn.Tanh()(x)).allclose(torch.nn.Tanh()(x))

True

In [116]:
torch.max(torch.abs(worch.nn.Tanh()(x)-torch.nn.Tanh()(x)))

tensor(1.1921e-07)

# Tanh Backward (NO)

In [117]:
x

tensor([[ 1.4869, -1.6125],
        [ 1.0155, -0.9902]])

In [118]:
# Worch
wtgh = worch.nn.Tanh()
b = wtgh(x)
wtgh.backward(b)

tensor([[ 0.1670, -0.1358],
        [ 0.3150, -0.3229]])

In [119]:
# Torch
torch.set_grad_enabled(True)
tgh = torch.nn.Tanh()
x_torch = x.clone()
x_torch.requires_grad_(True)
b = tgh(x_torch)
b.backward(x_torch)
x_torch.grad

tensor([[ 0.2751, -0.2372],
        [ 0.4165, -0.4221]])

# MSE Forward (YES)
like pytorch for comparison `mse = mean((input-target)**2)`

In [120]:
x = torch.empty((2, 2)).normal_()
y = torch.empty((2, 2)).normal_()

In [121]:
(worch.nn.MSELoss()(x, y)).allclose(torch.nn.MSELoss()(x, y))

True

In [122]:
torch.max(worch.nn.MSELoss()(x, y)-torch.nn.MSELoss()(x, y))

tensor(0.)

# MSE Backward (YES)
like pytorch for comparison `g = (input-target)*0.5`

In [123]:
x, y

(tensor([[-0.2213,  1.2161],
         [ 0.3322, -0.7773]]),
 tensor([[ 0.4168, -0.2098],
         [ 1.5163, -0.3114]]))

In [124]:
# Worch
wc = worch.nn.MSELoss()
b = wc(x, y)
g = wc.backward()
g

tensor([[-0.3191,  0.7130],
        [-0.5921, -0.2330]])

In [125]:
# Torch
torch.set_grad_enabled(True)
tc = torch.nn.MSELoss()
x_torch = x.clone()
x_torch.requires_grad_(True)
b = tc(x_torch, y)
b.backward()
torch.set_grad_enabled(False)
x_torch.grad

tensor([[-0.3191,  0.7130],
        [-0.5921, -0.2330]])

# Sequential Forward (YES)

In [2]:
x = torch.empty((2, 2)).normal_()
y = torch.empty((2, 2)).normal_()

In [3]:
wfc1 = worch.nn.Linear(x.shape[1], 10)
tfc1 = torch.nn.Linear(x.shape[1], 10)
p = tfc1.state_dict()
wfc1.params[0] = p['weight'].clone()
wfc1.params[1]=p['bias'].clone()
wfc2 = worch.nn.Linear(10, y.shape[1])
tfc2 = torch.nn.Linear(10, y.shape[1])
p = tfc2.state_dict()
wfc2.params[0] = p['weight'].clone()
wfc2.params[1]=p['bias'].clone()

In [4]:
wnet =  worch.nn.Sequential(
    wfc1,
    worch.nn.ReLU(),
    wfc2,
)
tnet =  torch.nn.Sequential(
    tfc1,
    torch.nn.ReLU(),
    tfc2,
)

In [21]:
(wnet(x)).allclose(tnet(x))

True

In [6]:
torch.max(torch.abs(wnet(x)-tnet(x)))

tensor(0.)

# Sequential Backward (YES)
Cells must be run the exact same number of times because of accumulation.

In [53]:
x, y

(tensor([[-0.2701,  0.4139],
         [-0.5171,  0.9391]]),
 tensor([[ 0.6209,  0.6340],
         [ 1.3865, -0.7368]]))

In [68]:
# Worch
wc = worch.nn.MSELoss()
wc.register_previous_module(wnet)
y_worch = wnet(x)
loss_worch = wc(y_worch, y)
wgl = wc.backward()
wgw0 = wnet[0].params[0].grad
wgb0 = wnet[0].params[1].grad
wgw1 = wnet[2].params[0].grad
wgb1 = wnet[2].params[1].grad

In [69]:
# Torch
tc = torch.nn.MSELoss()
torch.set_grad_enabled(True)
x_torch = x.clone()
x_torch.requires_grad_(True)
y_torch = tnet(x_torch)
loss_torch = tc(y_torch, y)
torch.set_grad_enabled(False)
tgl = loss_torch.backward()
tgw0 = tnet[0].weight.grad
tgb0 = tnet[0].bias.grad
tgw1 = tnet[2].weight.grad
tgb1 = tnet[2].bias.grad

In [70]:
tgb1.allclose(wgb1)

True

In [71]:
tgw1.allclose(wgw1)

True

In [72]:
tgb0.allclose(wgb0)

True

In [73]:
tgw0.allclose(wgw0)

True

In [74]:
wgw0

tensor([[ 0.1050, -0.1609],
        [-1.6591,  2.8551],
        [ 0.0000,  0.0000],
        [ 0.0000,  0.0000],
        [ 0.0000,  0.0000],
        [ 0.0000,  0.0000],
        [ 0.4068, -0.7835],
        [ 0.0000,  0.0000],
        [ 0.8720, -1.4602],
        [ 0.0000,  0.0000]])

# SGD (YES)
Cells must be run the exact same number of times because of sgd updates.

In [156]:
x = torch.empty((2, 2)).normal_()
y = torch.empty((2, 2)).normal_()

In [157]:
# Reusing network above
wsgd = worch.optim.SGD(wnet.parameters(), lr=0.01)
tsgd = torch.optim.SGD(tnet.parameters(), lr=0.01)

In [158]:
# Worch
wc = worch.nn.MSELoss()
wc.register_previous_module(wnet)
y_worch = wnet(x)
loss_worch = wc(y_worch, y)
wsgd.zero_grad()
wc.backward()
wsgd.step()
ww0 = wnet[0].params[0]
wb0 = wnet[0].params[1]
ww1 = wnet[2].params[0]
wb1 = wnet[2].params[1]

In [159]:
# Torch
tc = torch.nn.MSELoss()
torch.set_grad_enabled(True)
x_torch = x.clone()
x_torch.requires_grad_(True)
y_torch = tnet(x_torch)
loss_torch = tc(y_torch, y)
tsgd.zero_grad()
loss_torch.backward()
tsgd.step()
torch.set_grad_enabled(False)
tw0 = tnet[0].weight
tb0 = tnet[0].bias
tw1 = tnet[2].weight
tb1 = tnet[2].bias

In [160]:
tb1.allclose(wb1)

True

In [161]:
tw1.allclose(ww1)

True

In [162]:
tw0.allclose(ww0)

True

In [163]:
tb0.allclose(wb0)

True