# Add to the path

In [1]:
%load_ext autoreload
%autoreload 2

import sys
sys.path.append('..')

# Import Libraries

In [2]:
from Tensorized_Layers.TCL import TCL
from Utils.Num_parameter import count_parameters

import torch
import torch.nn as nn
from torch import optim
import time

# Dummy Data

In [3]:
device = 'cuda'
batch_size = 5
dummy = torch.rand(batch_size, 14, 14, 16, 16, 3).to(device)
print(f'Current shape is : {dummy.shape}')

Current shape is : torch.Size([5, 14, 14, 16, 16, 3])


# TCL
- input size is (5, 14, 14, 16, 16, 3)
- ranks are (16,16,3) # excluding the ignored modes
- ignored modes are 0: batch , 1:patch index 1, 2: patch index 2
- device is set to cuda
- output shape is ignore modes + ranks

In [4]:
tcl = TCL(input_size=(5,14,14,16,16,3),
            rank=(16,16,3),
            ignore_modes=(0,1,2),
            bias = True,
            device='cuda'
            ).to('cuda')
st = time.time()
output = tcl(dummy)
elapsed = time.time() - st
print(f'output shape of tcl: {output.shape}')

print(f'This tcl has {count_parameters(tcl)} parameters')
print(f'This tcl took : {elapsed}')

output shape of tcl: torch.Size([5, 14, 14, 16, 16, 3])
This tcl has 1289 parameters
This tcl took : 0.027681350708007812


In [5]:
for key in tcl.state_dict():
    print(key, tcl.state_dict()[key].device)

b cuda:0
u0 cuda:0
u1 cuda:0
u2 cuda:0


In [6]:
new_classifier = nn.Sequential(
    tcl,
    nn.Flatten(),
    nn.Linear(150528,2)
).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(new_classifier.parameters())

temp_y = torch.randint(0, 2, (dummy.shape[0],)).to(device)

optimizer.zero_grad()    
outputs = new_classifier(dummy)
loss = criterion(outputs, temp_y)
loss.backward()
for p in tcl.parameters():
    print(p.shape, p.device, p.grad.device)
optimizer.step()

print('second backward')
optimizer.zero_grad()    
outputs = new_classifier(dummy)
loss = criterion(outputs, temp_y)
loss.backward()
for p in tcl.parameters():
    print(p.shape, p.device, p.grad.device)
optimizer.step()

torch.Size([16, 16, 3]) cuda:0 cuda:0
torch.Size([16, 16]) cuda:0 cuda:0
torch.Size([16, 16]) cuda:0 cuda:0
torch.Size([3, 3]) cuda:0 cuda:0
second backward
torch.Size([16, 16, 3]) cuda:0 cuda:0
torch.Size([16, 16]) cuda:0 cuda:0
torch.Size([16, 16]) cuda:0 cuda:0
torch.Size([3, 3]) cuda:0 cuda:0


# Compare to tltorch TRL from Tensorly

In [7]:
device = 'cuda'
batch_size = 5
dummy = torch.rand(batch_size, 16, 16, 3).to(device)
print(f'Current shape is : {dummy.shape}')

Current shape is : torch.Size([5, 16, 16, 3])


## Forward Pass

In [8]:
tcl1 = TCL(input_size=dummy.shape,
            rank=(16,16,3),
            ignore_modes=(0,),
            bias=False,
            device = device
            ).to(device)
st = time.time()
output1 = tcl1(dummy)
elapsed = time.time() - st
print(f'output shape of tcl (our method): {output1.shape}')

print(f'This tcl (our method) has {count_parameters(tcl1)} parameters')
print(f'This tcl (our method) took : {elapsed}')

output shape of tcl (our method): torch.Size([5, 16, 16, 3])
This tcl (our method) has 521 parameters
This tcl (our method) took : 0.0002510547637939453


In [9]:
import tltorch # doesnt work with bias

tcl2 = tltorch.TCL(input_shape = (16,16,3), 
                   bias = False,
                   rank = (16,16,3,)).to(device)
st = time.time()
output2 = tcl2(dummy)
elapsed = time.time() - st
print(f'output shape of trl (tltorch method): {output2.shape}')

print(f'This trl (tltorch method) has {count_parameters(tcl2)} parameters')
print(f'This trl (tltorch method) took : {elapsed}')

output shape of trl (tltorch method): torch.Size([5, 16, 16, 3])
This trl (tltorch method) has 521 parameters
This trl (tltorch method) took : 0.00028324127197265625




## Backward Pass

In [10]:
new_classifier = nn.Sequential(
    tcl1,
    nn.Flatten(),
    nn.Linear(16*16*3,2)
).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(new_classifier.parameters())

temp_y = torch.randint(0, 2, (dummy.shape[0],)).to(device)

st = time.time()
optimizer.zero_grad()    
outputs = new_classifier(dummy)
loss = criterion(outputs, temp_y)
loss.backward()
optimizer.step()
elapsed = time.time() - st
print(f'This backward tcl (our method) took : {elapsed}')


This backward tcl (our method) took : 0.001107931137084961


In [11]:
new_classifier = nn.Sequential(
    tcl2,
    nn.Flatten(),
    nn.Linear(16*16*3,2)
).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(new_classifier.parameters())

temp_y = torch.randint(0, 2, (dummy.shape[0],)).to(device)

st = time.time()
optimizer.zero_grad()    
outputs = new_classifier(dummy)
loss = criterion(outputs, temp_y)
loss.backward()
optimizer.step()
elapsed = time.time() - st
print(f'This backward tcl (tltorch method) took : {elapsed}')


This backward tcl (tltorch method) took : 0.0012278556823730469
