In [12]:
import os 
import sys 
sys.path.append("../src")

import torch
import torch.nn as nn 
import numpy as np

from utils import seed, copy_weights, forall
from GatedConv2d import GatedConv2d
from GatedLinear import GatedLinear

## Check if fresh `GatedConv2d` gives same output as `nn.Conv2d`


In [37]:
seed(0)
#glin = GatedLinear(100, 100, bias=True)
gConv = GatedConv2d(16, 33, 3, bias=True)
seed(0)
#lin = nn.Linear(100, 100, bias=True)
rConv = nn.Conv2d(16, 33, 3, bias=True)
#randvec = torch.rand((100, 100))
randvec = torch.randn(20, 16, 50, 100)
assert torch.all(torch.eq(gConv(randvec), rConv(randvec)))

RuntimeError: Boolean value of Tensor with more than one value is ambiguous

## Check if `copy_weights` shares weights but not masks

In [6]:
# initialize 2 linear layers
glin1 = GatedLinear(100, 100, bias=True)
glin2 = GatedLinear(100, 100, bias=True)

In [7]:
# save their initial parameters
def save_W_params(model):
    return [model.WW.detach().numpy().copy(), model.bW.detach().numpy().copy()]

def save_M_params(model):
    return [model.WM.detach().numpy().copy(), model.bM.detach().numpy().copy()]

glin1_save_M, glin1_save_W = save_M_params(glin1), save_W_params(glin1)
glin2_save_M, glin2_save_W = save_M_params(glin2), save_W_params(glin2)

In [8]:
assert forall(lambda p1, p2: p1 != p2, glin1_save_W, glin2_save_W) # weights are not the same
assert forall(lambda p1, p2: p1 == p2, glin1_save_M, glin2_save_M) # masks are the same

In [9]:
copy_weights(glin1, glin2) # copy weights ONLY
glin2_save_M, glin2_save_W = save_M_params(glin2), save_W_params(glin2) # update saved weights
assert forall(lambda p1, p2: p1 == p2, glin1_save_W, glin2_save_W) # saved weights are the same
assert forall(lambda p1, p2: p1 == p2, glin1_save_M, glin2_save_M) # saved masks are the same

In [10]:
# do a couple of steps of gradient descent on both
dummy_x = torch.ones((1, 100))
dummy_y1 = torch.ones((1, 100))
dummy_y2 = torch.zeros((1, 100))

optim1 = torch.optim.Adam(glin1.parameters(), lr=1)
optim2 = torch.optim.Adam(glin2.parameters(), lr=1)
criterion = torch.nn.MSELoss()

for _ in range(3):
    optim1.zero_grad()
    optim2.zero_grad()
    # gradient descent on first model
    y_hat = glin1(dummy_x)
    loss = criterion(y_hat, dummy_y1)
    loss.backward()
    
    # gradient descent on the second model
    y_hat = glin2(dummy_x)
    loss = criterion(y_hat, dummy_y2)
    loss.backward()
    
    optim1.step()
    optim2.step()

In [11]:
glin1_new_M, glin1_new_W = save_M_params(glin1), save_W_params(glin1) # update saved weights
glin2_new_M, glin2_new_W = save_M_params(glin2), save_W_params(glin2) # update saved weights

# all weights should have changed in both models
assert forall(lambda p1, p2: p1 != p2, glin1_save_W + glin1_save_M, glin1_new_W + glin1_new_M)
assert forall(lambda p1, p2: p1 != p2, glin2_save_W + glin2_save_M, glin2_new_W + glin2_new_M)

assert forall(lambda p1, p2: p1 != p2, glin1_new_M, glin2_new_M) # masks should be different
assert forall(lambda p1, p2: p1 == p2, glin1_new_W, glin2_new_W) # weights should be shared