# Add to the path

In [1]:
%load_ext autoreload
%autoreload 2

import sys
sys.path.append('..')

# Import Libraries

In [2]:
from Models.basic_components.encoder_block import Encoder as base_encoder
from Models.tensorized_components.encoder_block import Encoder as tensorized_encoder
from Models.basic_components.patch_embedding import PatchEmbedding as base_embedding
from Models.tensorized_components.patch_embedding import PatchEmbedding as tensorized_embedding
from Utils.Num_parameter import count_parameters

import torch
import torch.nn as nn
from torch import optim
import time

# Dummy Data

In [3]:
device = 'cuda'
batch_size = 16
dummy = torch.rand(batch_size, 3, 224, 224).to(device)
print(f'Current shape is : {dummy.shape}')

Current shape is : torch.Size([16, 3, 224, 224])


# Encoder base method

1. Embed using the base patch embedding method
2. Apply it to the Encoder

In [4]:
st = time.time()
embedding = base_embedding(input_size=dummy.shape,
                           patch_size = 16,
                           embed_dim= 16*16*3,
                           bias=True,
                           device=device,
                           ignore_modes=None).to(device)
embedded_base = embedding(dummy)
elapsed = time.time() - st
print(f'output shape of patch embedding : {embedded_base.shape}\nnumber of patches = {embedded_base.shape[1]}')

print(f'This embedding has {count_parameters(embedding)} parameters')
print(f'This embedding took : {elapsed}')

output shape of patch embedding : torch.Size([16, 196, 768])
number of patches = 196
This embedding has 590592 parameters
This embedding took : 0.26332592964172363


In [5]:
for key in embedding.state_dict():
    print(key)

projection.weight
projection.bias


In [6]:
st = time.time()
encoder = base_encoder(input_size=dummy.shape,
               patch_size=16,
               embed_dim= 768,
               num_heads=4,
               mlp_dim=768,
               dropout=0.5,
               bias=True,
               out_embed= True,
               device=device,
               ignore_modes=None
               ).to(device)
result_encode = encoder(embedded_base)
elapsed = time.time() - st
print(f'Encoder output shape is : {result_encode.shape}')
print(f'Input of Encoder shape was : {embedded_base.shape}')
print(f'This Encoder has {count_parameters(encoder)} parameters')
print(f'This Encoder took : {elapsed}')

Encoder output shape is : torch.Size([16, 196, 768])
Input of Encoder shape was : torch.Size([16, 196, 768])
This Encoder has 3546624 parameters
This Encoder took : 0.03625774383544922


In [7]:
new_classifier = nn.Sequential(
    embedding,
    encoder,
    nn.Flatten(),
    nn.Linear(150528,2)
).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(new_classifier.parameters())

temp_y = torch.randint(0, 2, (embedded_base.shape[0],)).to(device)

optimizer.zero_grad()    
outputs = new_classifier(dummy)
loss = criterion(outputs, temp_y)
loss.backward()
for p in encoder.parameters():
    print(p.shape, p.device, p.grad.device)
optimizer.step()

print('second backward')
optimizer.zero_grad()    
outputs = new_classifier(dummy)
loss = criterion(outputs, temp_y)
loss.backward()
for p in encoder.parameters():
    print(p.shape, p.device, p.grad.device)
optimizer.step()

torch.Size([768]) cuda:0 cuda:0
torch.Size([768]) cuda:0 cuda:0
torch.Size([768]) cuda:0 cuda:0
torch.Size([768]) cuda:0 cuda:0
torch.Size([768, 768]) cuda:0 cuda:0
torch.Size([768]) cuda:0 cuda:0
torch.Size([768, 768]) cuda:0 cuda:0
torch.Size([768]) cuda:0 cuda:0
torch.Size([768, 768]) cuda:0 cuda:0
torch.Size([768]) cuda:0 cuda:0
torch.Size([768, 768]) cuda:0 cuda:0
torch.Size([768]) cuda:0 cuda:0
torch.Size([768, 768]) cuda:0 cuda:0
torch.Size([768]) cuda:0 cuda:0
torch.Size([768, 768]) cuda:0 cuda:0
torch.Size([768]) cuda:0 cuda:0
second backward
torch.Size([768]) cuda:0 cuda:0
torch.Size([768]) cuda:0 cuda:0
torch.Size([768]) cuda:0 cuda:0
torch.Size([768]) cuda:0 cuda:0
torch.Size([768, 768]) cuda:0 cuda:0
torch.Size([768]) cuda:0 cuda:0
torch.Size([768, 768]) cuda:0 cuda:0
torch.Size([768]) cuda:0 cuda:0
torch.Size([768, 768]) cuda:0 cuda:0
torch.Size([768]) cuda:0 cuda:0
torch.Size([768, 768]) cuda:0 cuda:0
torch.Size([768]) cuda:0 cuda:0
torch.Size([768, 768]) cuda:0 cuda:0
t

# Encoder tensorized method

1. Embed using the tensorized patch embedding method
2. Apply it to the Encoder

In [8]:
st = time.time()
embedding = tensorized_embedding(input_size=dummy.shape,
                                 patch_size = 16,
                                 embed_dim= (16,16,3),
                                 bias=True,
                                 device=device,
                                 ignore_modes=(0,1,2)).to(device)
embedded_tensorized = embedding(dummy)
elapsed = time.time() - st
print(f'output shape of patch embedding : {embedded_tensorized.shape}\nnumber pf patches = {embedded_tensorized.shape[1]} x {embedded_tensorized.shape[2]} = {embedded_tensorized.shape[1]*embedded_tensorized.shape[2]}')

print(f'This embedding has {count_parameters(embedding)} parameters')
print(f'This embedding took : {elapsed}')

output shape of patch embedding : torch.Size([16, 14, 14, 16, 16, 3])
number pf patches = 14 x 14 = 196
This embedding has 1289 parameters
This embedding took : 0.0007266998291015625


In [9]:
for key in embedding.state_dict():
    print(key)

tcl.b
tcl.u0
tcl.u1
tcl.u2


In [10]:
st = time.time()
encoder = tensorized_encoder(input_size=dummy.shape,
               patch_size=16,
               embed_dim= (16,16,3),
               num_heads=(2,2,1),
               mlp_dim=(16,16,3),
               dropout=0.5,
               bias=True,
               out_embed= True,
               device=device,
               ignore_modes=(0,1,2)
               ).to(device)
result_encode = encoder(embedded_tensorized)
elapsed = time.time() - st
print(f'Encoder output shape is : {result_encode.shape}')
print(f'Input of Encoder shape was : {embedded_base.shape}')
print(f'This Encoder has {count_parameters(encoder)} parameters')
print(f'This Encoder took : {elapsed}')

Encoder output shape is : torch.Size([16, 14, 14, 16, 16, 3])
Input of Encoder shape was : torch.Size([16, 196, 768])
This Encoder has 601151 parameters
This Encoder took : 0.0033636093139648438


In [11]:
new_classifier = nn.Sequential(
    embedding,
    encoder,
    nn.Flatten(),
    nn.Linear(150528,2)
).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(new_classifier.parameters())

temp_y = torch.randint(0, 2, (embedded_base.shape[0],)).to(device)

optimizer.zero_grad()    
outputs = new_classifier(dummy)
loss = criterion(outputs, temp_y)
loss.backward()
for p in encoder.parameters():
    print(p.shape, p.device, p.grad.device)
optimizer.step()

print('second backward')
optimizer.zero_grad()    
outputs = new_classifier(dummy)
loss = criterion(outputs, temp_y)
loss.backward()
for p in encoder.parameters():
    print(p.shape, p.device, p.grad.device)
optimizer.step()

torch.Size([16, 16, 3]) cuda:0 cuda:0
torch.Size([16, 16, 3]) cuda:0 cuda:0
torch.Size([16, 16, 3]) cuda:0 cuda:0
torch.Size([16, 16, 3]) cuda:0 cuda:0
torch.Size([16, 16, 3]) cuda:0 cuda:0
torch.Size([16, 16]) cuda:0 cuda:0
torch.Size([16, 16]) cuda:0 cuda:0
torch.Size([3, 3]) cuda:0 cuda:0
torch.Size([16, 16, 3]) cuda:0 cuda:0
torch.Size([16, 16]) cuda:0 cuda:0
torch.Size([16, 16]) cuda:0 cuda:0
torch.Size([3, 3]) cuda:0 cuda:0
torch.Size([16, 16, 3]) cuda:0 cuda:0
torch.Size([16, 16]) cuda:0 cuda:0
torch.Size([16, 16]) cuda:0 cuda:0
torch.Size([3, 3]) cuda:0 cuda:0
torch.Size([16, 16, 3]) cuda:0 cuda:0
torch.Size([16, 16]) cuda:0 cuda:0
torch.Size([16, 16]) cuda:0 cuda:0
torch.Size([3, 3]) cuda:0 cuda:0
torch.Size([16, 16, 3]) cuda:0 cuda:0
torch.Size([16, 16]) cuda:0 cuda:0
torch.Size([16, 16]) cuda:0 cuda:0
torch.Size([3, 3]) cuda:0 cuda:0
torch.Size([16, 16, 3]) cuda:0 cuda:0
torch.Size([16, 16, 3, 16, 16, 3]) cuda:0 cuda:0
torch.Size([16, 16]) cuda:0 cuda:0
torch.Size([16, 16]) 