In [1]:
import json
import torch
import torch.nn.utils.prune as prune
import torch.nn.functional as F
import torch_pruning as tp
import torchvision.models as models
import caption
from torchsummary import summary

In [2]:
class Encoder(torch.nn.Module):
    """
    Encoder.
    """

    def __init__(self, encoded_image_size=14):
        super(Encoder, self).__init__()
        self.enc_image_size = encoded_image_size

        #resnet = torchvision.models.resnet101(pretrained=True)  # pretrained ImageNet ResNet-101
        resnet = models.resnet101()
        #resnet.load_state_dict(torch.load("resnet101-2.pth"))

        # Remove linear and pool layers (since we're not doing classification)
        modules = list(resnet.children())[:-2]
        self.resnet = torch.nn.Sequential(*modules)

        # Resize image to fixed size to allow input images of variable size
        self.adaptive_pool = torch.nn.AdaptiveAvgPool2d((encoded_image_size, encoded_image_size))

        self.fine_tune()

    def forward(self, images):
        """
        Forward propagation.

        :param images: images, a tensor of dimensions (batch_size, 3, image_size, image_size)
        :return: encoded images
        """
        out = self.resnet(images)  # (batch_size, 2048, image_size/32, image_size/32)
        out = self.adaptive_pool(out)  # (batch_size, 2048, encoded_image_size, encoded_image_size)
        out = out.permute(0, 2, 3, 1)  # (batch_size, encoded_image_size, encoded_image_size, 2048)
        return out

    def fine_tune(self, fine_tune=True):
        """
        Allow or prevent the computation of gradients for convolutional blocks 2 through 4 of the encoder.

        :param fine_tune: Allow?
        """
        for p in self.resnet.parameters():
            p.requires_grad = False
        # If fine-tuning, only fine-tune convolutional blocks 2 through 4
        for c in list(self.resnet.children())[5:]:
            for p in c.parameters():
                p.requires_grad = fine_tune

In [3]:
word_map = "WORDMAP_coco_5_cap_per_img_5_min_word_freq.json"
model = "BEST_checkpoint_coco_5_cap_per_img_5_min_word_freq.pth.tar"
beam_size = 5
device = 'cpu'

checkpoint = torch.load(model, map_location=device)
decoder = checkpoint['decoder']
decoder = decoder.to(device)
decoder.eval()
encoder = checkpoint['encoder']
encoder = encoder.to(device)
#encoder.eval()
summary(encoder, (3,256,256))
# Load word map (word2ix)
with open(word_map, 'r') as j:
    word_map = json.load(j)
rev_word_map = {v: k for k, v in word_map.items()}  # ix2word



----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 128, 128]           9,408
       BatchNorm2d-2         [-1, 64, 128, 128]             128
              ReLU-3         [-1, 64, 128, 128]               0
         MaxPool2d-4           [-1, 64, 64, 64]               0
            Conv2d-5           [-1, 64, 64, 64]           4,096
       BatchNorm2d-6           [-1, 64, 64, 64]             128
              ReLU-7           [-1, 64, 64, 64]               0
            Conv2d-8           [-1, 64, 64, 64]          36,864
       BatchNorm2d-9           [-1, 64, 64, 64]             128
             ReLU-10           [-1, 64, 64, 64]               0
           Conv2d-11          [-1, 256, 64, 64]          16,384
      BatchNorm2d-12          [-1, 256, 64, 64]             512
           Conv2d-13          [-1, 256, 64, 64]          16,384
      BatchNorm2d-14          [-1, 256,

In [4]:
myencoder = Encoder()
myencoder.eval()
encoder = checkpoint['encoder']
encoder = encoder.to(device)
encoder.eval()

Encoder(
  (resnet): Sequential(
    (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (4): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (downsample): Sequential(
          (0): Conv2d(64, 2

In [5]:
sd = encoder.state_dict()
sd2 = myencoder.state_dict()

dict_keys(['weight'])

In [6]:
# param = dict()
# for key in sd.keys():
#     param[key] = torch.Tensor(sd[key].shape)
# for key in sd.keys():
#     condition = (sd[key] == False)
#     for i in list(condition.nonzero()):
#         param[key][tuple(i)] = False
# # print(param[tuple(i)].dtype)
# # print(sd['resnet.0.weight'])
# # index_list = condition.nonzero()
# # print(index_list.shape)
# # idd = tuple(index_list[0])
# # print(sd['resnet.0.weight'][idd])
# # print(sd['resnet.0.weight'][index_list[0]])
# # print(sd['resnet.0.weight'][index_list[0]-[0,0,0,1]])

In [7]:
# A = torch.tensor([4,15,False])
# c = A[2].type(torch.bool)
# print(c.dtype)

In [8]:
# eps = 1e-4
# for key in sd.keys():
#     sd[key][abs(sd[key])<eps] = 0
#     sd2[key] = sd[key].type(torch.float16)
#     param[key] = torch.Tensor(sd[key].shape)
#     sd[key][abs(sd[key])<eps] = False
# for key in sd.keys():
#     if len(sd[key].shape) == 4:
#         for i in range(sd[key].shape[0]):
#             for j in range(sd[key].shape[1]):
#                 for k in range(sd[key].shape[2]):
#                     for l in range(sd[key].shape[3]):
#                         if sd[key][i,j,k,l] == 0:
#                             param[key][i,j,k,l] = False
#                         else:
#                             param[key][i,j,k,l] = sd[key][i,j,k,l].type(torch.float16)
#     else:
#         param[key] = sd[key].type(torch.float16)
#     
#     sd[key][abs(sd[key])==False]
#     sd2[key] = param

In [26]:
# for key in sd.keys():
#     if 'weight' in key:
#         pruner = prune.ln_structured(sd[key], name='weight',amount=0.1, n=2,dim=0)
#         sd2[key] = pruner.prune(sd[key])
#         pruner.remove(sd2[key], 'weight')

AttributeError: 'Tensor' object has no attribute '_forward_pre_hooks'

In [None]:
# summ = 0
# for key in sd2.keys():
#     condition = sd2[key]== 0
#     summ += len(list(condition.nonzero()))
#     print(key, ' has :', len(list(condition.nonzero())), 'zeroes')
# print('total zeroes deducted: ', summ)
# print(sd2['resnet.0.weight'])
# print(sd2['resnet.0.weight'].dtype)

In [None]:
myencoder.load_state_dict(sd2)

In [None]:
summary(myencoder, (3,256,256))

In [None]:
summ = 0
for name, param in myencoder.named_parameters():
    if 'weight' in name:
        print(len(param[param!=dict(encoder.named_parameters())[name.split('_')[0]]]))
        summ +=len(param[param!=dict(encoder.named_parameters())[name.split('_')[0]]])
print(summ)    

In [None]:
summary(myencoder, (3,256,256))
summary(encoder, (3,256,256))

In [None]:
# print(sd)
# print(sd.keys())
# print("\nNEW ENCODER\n")
# print(sd2.keys())
# for key in sd.keys():
#     if key not in sd2.keys():
#         print('They are not the same')
# print('END')

In [None]:
img = r"C:\Users\xiaomi\OneDrive\TUM\WS 2021-2022\Advanced Topics in Communication Electronics\image-captioning-on-pytorch-master\image-captioning-on-pytorch-master\img5.jpg"
seq, alphas = caption.caption_image_beam_search(myencoder, decoder, img, word_map, beam_size)
seq_orig, alphas_orig = caption.caption_image_beam_search(encoder, decoder, img, word_map, beam_size) 

In [None]:
alphas = torch.FloatTensor(alphas)
alphas_orig = torch.FloatTensor(alphas_orig)
words = [rev_word_map[ind] for ind in seq]
words_orig = [rev_word_map[ind] for ind in seq_orig]
sentence = ""
sentence_orig = ""
for word in words:
    sentence = sentence + " " + word
for word in words_orig:
    sentence_orig = sentence_orig + " " + word

In [None]:
print(sentence)
print(sentence_orig)

In [None]:
pruner = prune.L1Unstructured(amount=0.7)
for name, module in myencoder.named_modules():
    if isinstance(module, torch.nn.Conv2d):
        prune.random_unstructured(module, name='weight', amount=0.3)
        print("salam")
    elif isinstance(module, torch.nn.Linear):
        print("linear")
# myencoder.resnet[0].weight_orig
pruned_tensor = pruner.prune(tensor)
print(myencoder.resnet[0].weight_mask)
torch.mul(myencoder.resnet[0].weight_orig,myencoder.resnet[0].weight_mask )               

In [None]:
# print(encoder.resnet[0].weight[0,0,0,0].dtype)
# print(encoder.resnet[1])
# print(encoder.resnet[2])
# print(encoder.resnet[3])
# print(encoder.resnet[4])
# print(encoder.resnet[5])
# print(encoder.resnet[6])
# print(encoder.resnet[7])
# for name, module in myencoder.named_modules():
#     print('Name:',name)#, 'Module:\n',module)
# #     print(module.s)

In [None]:
for name, param in myencoder.named_parameters():
    print('Name:',name, 'Parameter:\n', param.dtype)

In [None]:
print(dict(myencoder.named_parameters())['resnet.0.weight_orig'])
print(dict(myencoder.named_parameters())['resnet.0.weight'])

In [None]:
myencoder.eval()

In [None]:
# from torchtext.data.metrics import bleu_score