In [19]:
3*64*11**2*55**2 + 64*192*5**2*27**2 + 192*384*3**2*13**2 + 384*256*3**2*13**2 + 256*256*3**2*13**2

655566528

In [20]:
256*6**2*4096 + 4096**2 + 4096*1000

58621952

Problem 3
======

In [73]:
import torch.nn as nn
from torch.utils.data import DataLoader
import torch
import torchvision
import torchvision.transforms as transforms


# Instantiate model with BN and load trained parameters
class smallNetTrain(nn.Module) :
    # CIFAR-10 data is 32*32 images with 3 RGB channels
    def __init__(self, input_dim=3*32*32) :
        super().__init__()

        self.conv1 = nn.Sequential(
                            nn.Conv2d(3, 16, kernel_size=3, padding=1),
                            nn.BatchNorm2d(16),
                            nn.ReLU()
                            )      
        self.conv2 = nn.Sequential(
                            nn.Conv2d(16, 16, kernel_size=3, padding=1),
                            nn.BatchNorm2d(16),
                            nn.ReLU()
                            ) 
        self.fc1 = nn.Sequential(
                            nn.Linear(16*32*32, 32*32),
                            nn.BatchNorm1d(32*32),
                            nn.ReLU()
                            )   
        self.fc2 = nn.Sequential(
                            nn.Linear(32*32, 10),
                            nn.ReLU()
                            )   
    def forward(self, x) :
        x = self.conv1(x)
        x = self.conv2(x)
        x = x.float().view(-1, 16*32*32)
        x = self.fc1(x)
        x = self.fc2(x)
        
        return x
    
model = smallNetTrain()
model.load_state_dict(torch.load("../hw7/smallNetSaved.html",map_location=torch.device('cpu')))


# Instantiate model without BN
class smallNetTest(nn.Module) :
    # CIFAR-10 data is 32*32 images with 3 RGB channels
    def __init__(self, input_dim=3*32*32) :
        super().__init__()

        self.conv1 = nn.Sequential(
                            nn.Conv2d(3, 16, kernel_size=3, padding=1),
                            nn.ReLU()
                            )      
        self.conv2 = nn.Sequential(
                            nn.Conv2d(16, 16, kernel_size=3, padding=1),
                            nn.ReLU()
                            ) 
        self.fc1 = nn.Sequential(
                            nn.Linear(16*32*32, 32*32),
                            nn.ReLU()
                            )   
        self.fc2 = nn.Sequential(
                            nn.Linear(32*32, 10),
                            nn.ReLU()
                            )   
    def forward(self, x) :
        x = self.conv1(x)
        x = self.conv2(x)
        x = x.float().view(-1, 16*32*32)
        x = self.fc1(x)
        x = self.fc2(x)
        
        return x
    
model_test = smallNetTest()

# Initialize weights of model without BN

conv1_bn_beta, conv1_bn_gamma = model.conv1[1].bias, model.conv1[1].weight
conv1_bn_mean, conv1_bn_var = model.conv1[1].running_mean, model.conv1[1].running_var
conv2_bn_beta, conv2_bn_gamma = model.conv2[1].bias, model.conv2[1].weight
conv2_bn_mean, conv2_bn_var = model.conv2[1].running_mean, model.conv2[1].running_var
fc1_bn_beta, fc1_bn_gamma = model.fc1[1].bias, model.fc1[1].weight
fc1_bn_mean, fc1_bn_var = model.fc1[1].running_mean, model.fc1[1].running_var
eps = 1e-05

# Conv1 layer
weights_bn, bias_bn = model.conv1[0].weight, model.conv1[0].bias
weights = (conv1_bn_gamma.view(-1, 1, 1, 1)/torch.sqrt(conv1_bn_var.view(-1, 1, 1, 1) + eps)) * weights_bn
bias = (conv1_bn_gamma/torch.sqrt(conv1_bn_var + eps)) * (bias_bn - conv1_bn_mean) + conv1_bn_beta

model_test.conv1[0].weight.data.copy_(weights)
model_test.conv1[0].bias.data.copy_(bias)

# Conv2 layer
weights_bn, bias_bn = model.conv2[0].weight, model.conv2[0].bias
weights = (conv2_bn_gamma.view(-1, 1, 1, 1)/torch.sqrt(conv2_bn_var.view(-1, 1, 1, 1) + eps)) * weights_bn
bias = (conv2_bn_gamma/torch.sqrt(conv2_bn_var + eps)) * (bias_bn - conv2_bn_mean) + conv2_bn_beta

model_test.conv2[0].weight.data.copy_(weights)
model_test.conv2[0].bias.data.copy_(bias)

# FC1 layer
weights_bn, bias_bn = model.fc1[0].weight, model.fc1[0].bias
scale = torch.div(fc1_bn_gamma,torch.sqrt(fc1_bn_var + eps))
weights = torch.mul(weights_bn,scale[:, None])
bias = torch.mul(scale, (bias_bn - fc1_bn_mean)) + fc1_bn_beta

model_test.fc1[0].weight.data = weights.clone().detach()
model_test.fc1[0].bias.data = bias.clone().detach()

# FC2 layer
weights_bn, bias_bn = model.fc2[0].weight, model.fc2[0].bias

model_test.fc2[0].weight.data = weights_bn.clone().detach()
model_test.fc2[0].bias.data = bias_bn.clone().detach()

# Verify difference between model and model_test

model.eval()  
# model_test.eval()  # not necessary since model_test has no BN or dropout 


test_dataset = torchvision.datasets.CIFAR10(root='./cifar_10data/',
                                train=False, 
                                transform=transforms.ToTensor(), download = True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=100, shuffle=False)

diff = []
with torch.no_grad():
    for images, _ in test_loader:
        diff.append(torch.norm(model(images) - model_test(images))**2) 
        
print(max(diff)) # If less than 1e-08, you got the right answer.

Files already downloaded and verified
tensor(1.08654695907262066612e-07)


Problem 5
=========

In [24]:
import torch.nn as nn
import torch
import torchvision


class Net1(nn.Module):
    def __init__(self, num_classes=10):
        super(Net1, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=7, stride=1),
            nn.ReLU(),
            nn.Conv2d(64, 192, kernel_size=3, stride=1),
            nn.ReLU(),
            nn.Conv2d(192, 384, kernel_size=3, stride=1),
            nn.ReLU(),
            nn.Conv2d(384, 256, kernel_size=3, stride=1),
            nn.ReLU(),
            nn.Conv2d(256, 256, kernel_size=3, stride=1),
        )
        self.classifier = nn.Sequential(
            nn.Linear(256 * 18 * 18, 4096),
            nn.ReLU(),
            nn.Linear(4096, 4096),
            nn.ReLU(),
            nn.Linear(4096, num_classes)
        )

    def forward(self, x):
        x = self.features(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x


class Net2(nn.Module):
    def __init__(self, num_classes=10):
        super(Net2, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=7, stride=1),
            nn.ReLU(),
            nn.Conv2d(64, 192, kernel_size=3, stride=1),
            nn.ReLU(),
            nn.Conv2d(192, 384, kernel_size=3, stride=1),
            nn.ReLU(),
            nn.Conv2d(384, 256, kernel_size=3, stride=1),
            nn.ReLU(),
            nn.Conv2d(256, 256, kernel_size=3, stride=1),
        )

        self.classifier = nn.Sequential(
          nn.Conv2d(256, 4096, kernel_size=18),
          nn.ReLU(),
          nn.Conv2d(4096,4096,kernel_size=1),
          nn.ReLU(),
          nn.Conv2d(4096, num_classes,kernel_size=1)
      )

    def copy_weights_from(self, net1):
        with torch.no_grad():
            for i in range(0, len(self.features), 2):
                self.features[i].weight.copy_(net1.features[i].weight)
                self.features[i].bias.copy_(net1.features[i].bias)

            for i in range(len(self.classifier)):
                if i == 1 or i == 3:
                    continue
                self.classifier[i].weight.copy_(net1.classifier[i].weight.view(self.classifier[i].weight.size()))
                self.classifier[i].bias.copy_(net1.classifier[i].bias)

    def forward(self, x):
        x = self.features(x)
        x = self.classifier(x)
        return x



model1 = Net1() # model1 randomly initialized
model2 = Net2()
model2.copy_weights_from(model1)

test_dataset = torchvision.datasets.CIFAR10(
    root='./data',
    train=False,
    transform=torchvision.transforms.ToTensor(),
    download=True
)

test_loader = torch.utils.data.DataLoader(
    dataset=test_dataset,
    batch_size=10
)

imgs, _ = next(iter(test_loader))
diff = torch.mean((model1(imgs) - model2(imgs).squeeze()) ** 2)
print(f"Average Pixel Difference: {diff.item()}") # should be small


test_dataset = torchvision.datasets.CIFAR10(
    root='./data',
    train=False,
    transform=torchvision.transforms.Compose([
        torchvision.transforms.Resize((36, 38)),
        torchvision.transforms.ToTensor()
        ]),
    download=True
)

test_loader = torch.utils.data.DataLoader(
    dataset=test_dataset,
    batch_size=10,
    shuffle=False
)

images, _ = next(iter(test_loader))
b, w, h = images.shape[0], images.shape[-1], images.shape[-2]
out1 = torch.empty((b, 10, h - 31, w - 31))
for i in range(h - 31):
    for j in range(w - 31):
        out1[:, :, i, j] = model1(images[:, :, i:i+32, j:j+32])
out2 = model2(images)
diff = torch.mean((out1 - out2) ** 2)

print(f"Average Pixel Diff: {diff.item()}")


Files already downloaded and verified
Average Pixel Difference: 7.968885967768458e-17
Files already downloaded and verified
Average Pixel Diff: 1.87191713774579e-16
