# Combining Two Models

## Links Referred
- [Combining Trained Models in PyTorch](https://discuss.pytorch.org/t/combining-trained-models-in-pytorch/28383)
- [How the pytorch freeze network in some layers, only the rest of the training?](https://discuss.pytorch.org/t/how-the-pytorch-freeze-network-in-some-layers-only-the-rest-of-the-training/7088)
- [Best practice for freezing layers?](https://discuss.pytorch.org/t/best-practice-for-freezing-layers/58156)

## Importing the Libraries

In [170]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

# Using the Two Models

In [171]:
class ModelA(nn.Module):
    
    def __init__(self):
        super().__init__()
        self.f1 = nn.Linear(10, 2)
        
    def forward(self, x):
        out = self.f1(x)
        return out

In [172]:
class ModelB(nn.Module):
    
    def __init__(self):
        super().__init__()
        self.f1 = nn.Linear(10, 2)
        
    def forward(self, x):
        out = self.f1(x)
        return out

In [173]:
class ModelC(nn.Module):
    
    def __init__(self):
        super().__init__()
        self.f1 = nn.Linear(2, 1)
        
    def forward(self, x):
        out = self.f1(x)
        return out

In [174]:
class CombinedModel(nn.Module):
    
    def __init__(self, modela, modelb, modelc):
        super().__init__()
        self.modela = modela
        self.modelb = modelb
        self.modelc = modelc
    
    def forward(self, x1, x2):
        out1 = self.modela(x1)
        out2 = self.modelb(x2)
        combined = out1 + out2
        out = self.modelc(combined)
        return out

In [175]:
model_a = ModelA()
print(model_a.eval())
print('-------------')

model_b = ModelB()
print(model_b.eval())
print('-------------')

model_c = ModelC()
print(model_c.eval())
print('-------------')

model = CombinedModel(model_a, model_b, model_c)
print(model.eval())
print('-------------')

ModelA(
  (f1): Linear(in_features=10, out_features=2, bias=True)
)
-------------
ModelB(
  (f1): Linear(in_features=10, out_features=2, bias=True)
)
-------------
ModelC(
  (f1): Linear(in_features=2, out_features=1, bias=True)
)
-------------
CombinedModel(
  (modela): ModelA(
    (f1): Linear(in_features=10, out_features=2, bias=True)
  )
  (modelb): ModelB(
    (f1): Linear(in_features=10, out_features=2, bias=True)
  )
  (modelc): ModelC(
    (f1): Linear(in_features=2, out_features=1, bias=True)
  )
)
-------------


In [176]:
x_1 = torch.randn((1, 10))
x_2 = torch.randn((1, 10))
out = model(x_1, x_2)
print(out.size())
print(out)

torch.Size([1, 1])
tensor([[1.3534]], grad_fn=<AddmmBackward>)


## Checking if the Gradients Backprpogates

In [177]:
out1 = torch.randn((1, 1))
loss = torch.sum((out1 - out) * (out1 - out))

In [178]:
loss.backward()

In [179]:
for w in model.parameters():
    print(w.grad)

tensor([[-0.5345, -0.0671, -0.6861,  0.2198, -0.4625,  0.1486,  0.3617, -0.1481,
         -0.4088,  0.2953],
        [ 0.8029,  0.1009,  1.0307, -0.3301,  0.6948, -0.2232, -0.5434,  0.2225,
          0.6142, -0.4436]])
tensor([-0.4151,  0.6236])
tensor([[ 0.2569,  0.3994, -0.1701,  0.3060,  0.5866,  0.1089, -0.6538, -0.1235,
         -0.4634,  0.6891],
        [-0.3860, -0.5999,  0.2555, -0.4597, -0.8813, -0.1637,  0.9821,  0.1856,
          0.6962, -1.0352]])
tensor([-0.4151,  0.6236])
tensor([[-0.5263,  1.0358]])
tensor([1.0011])


## Checking If you can Freeze Layers

In [190]:
x_1 = torch.randn((1, 10))
x_2 = torch.randn((1, 10))
out = model(x_1, x_2)
out1 = torch.randn((1, 1))
loss1 = torch.sum((out1 - out) * (out1 - out))

In [191]:
# Freezing the layers in ModelB and ModelC

for param in model.modela.parameters():
    param.grad.zero_()
    param.requires_grad = False

for param in model.modelc.parameters():
    param.grad.zero_()
    param.requires_grad = False

In [192]:
loss1.backward()

In [193]:
for w in model.parameters():
    print(w.grad)

tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]])
tensor([0., 0.])
tensor([[-0.4007,  0.3081, -0.3959,  0.2569, -0.8426, -0.7942,  1.0928, -0.8156,
         -0.4102,  0.5447],
        [ 0.6020, -0.4628,  0.5948, -0.3860,  1.2657,  1.1931, -1.6417,  1.2252,
          0.6162, -0.8182]])
tensor([-1.9246,  2.8912])
tensor([[0., 0.]])
tensor([0.])


In [202]:
with torch.no_grad():
    for w in model.parameters():
        print(w)
        print(w.grad)
        w -= w.grad
        print(w)
        print('---------------')

Parameter containing:
tensor([[-0.2723,  0.2184, -0.1530, -0.1236,  0.3158,  0.0755, -0.0315,  0.0718,
          0.0338, -0.1783],
        [ 0.1576, -0.2730,  0.2558,  0.0843,  0.2682,  0.2816,  0.0759, -0.3078,
          0.1048,  0.0202]])
tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]])
Parameter containing:
tensor([[-0.2723,  0.2184, -0.1530, -0.1236,  0.3158,  0.0755, -0.0315,  0.0718,
          0.0338, -0.1783],
        [ 0.1576, -0.2730,  0.2558,  0.0843,  0.2682,  0.2816,  0.0759, -0.3078,
          0.1048,  0.0202]])
---------------
Parameter containing:
tensor([-0.1891,  0.0270])
tensor([0., 0.])
Parameter containing:
tensor([-0.1891,  0.0270])
---------------
Parameter containing:
tensor([[ 0.7708, -0.7561,  0.9680, -0.3373,  1.7980,  1.6047, -2.2810,  1.9434,
          0.6652, -1.1358],
        [-0.9095,  0.9213, -1.4506,  0.5033, -2.7141, -2.0701,  3.0423, -2.2158,
         -1.2398,  1.3586]], requires_grad=True)
tensor([[

## Checking if Multiple Outputs give you the Gradients

In [248]:
class CombinedModel(nn.Module):
    
    def __init__(self, modela, modelb, modelc):
        super().__init__()
        self.modela = modela
        self.modelb = modelb
        self.modelc = modelc
    
    def forward(self, x1, x2):
        out1 = self.modela(x1)
        out2 = self.modelb(x2)
        combined = out1 + out2
        out = self.modelc(combined)
        return combined, out

In [249]:
model_a = ModelA()
print(model_a.eval())
print('-------------')

model_b = ModelB()
print(model_b.eval())
print('-------------')

model_c = ModelC()
print(model_c.eval())
print('-------------')

model_ = CombinedModel(model_a, model_b, model_c)
print(model.eval())
print('-------------')

ModelA(
  (f1): Linear(in_features=10, out_features=2, bias=True)
)
-------------
ModelB(
  (f1): Linear(in_features=10, out_features=2, bias=True)
)
-------------
ModelC(
  (f1): Linear(in_features=2, out_features=1, bias=True)
)
-------------
CombinedModel(
  (modela): ModelA(
    (f1): Linear(in_features=10, out_features=2, bias=True)
  )
  (modelb): ModelB(
    (f1): Linear(in_features=10, out_features=2, bias=True)
  )
  (modelc): ModelC(
    (f1): Linear(in_features=2, out_features=1, bias=True)
  )
)
-------------


In [250]:
x_1 = torch.randn((1, 10))
x_2 = torch.randn((1, 10))
combined, out = model_(x_1, x_2)
print(combined.size())
print(out.size())

torch.Size([1, 2])
torch.Size([1, 1])


In [251]:
combined1 = torch.rand((1, 2))
out1 = torch.randn((1, 1))
loss2 = torch.sum((combined1-combined) * (combined1-combined).t()) + torch.sum((out1 - out) * (out1 - out))
print(loss2)

tensor(0.7363, grad_fn=<AddBackward0>)


In [252]:
loss2.backward()

In [254]:
# Freezing the layers in ModelB and ModelC

for param in model_.modela.parameters():
    param.grad.zero_()
    param.requires_grad = False

for param in model_.modelc.parameters():
    param.grad.zero_()
    param.requires_grad = False

In [255]:
for w in model_.parameters():
    print(w.grad)

tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]])
tensor([0., 0.])
tensor([[-0.1662,  0.1188, -0.0749,  0.0866,  0.2091, -0.2654,  0.1461,  0.0766,
          0.1266,  0.3058],
        [-0.1266,  0.0905, -0.0571,  0.0660,  0.1593, -0.2022,  0.1113,  0.0583,
          0.0964,  0.2329]])
tensor([-0.1744, -0.1328])
tensor([[0., 0.]])
tensor([0.])
