In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys, os
if os.path.abspath('../') not in sys.path:
    sys.path.append(os.path.abspath('../'))
if os.path.abspath('../../musco-pytorch-private') not in sys.path:
    sys.path.append(os.path.abspath('../../musco-pytorch-private'))

In [3]:
from functools import partial
import torch

import numpy as np
from maxvolpy.maxvol import rect_maxvol
import matplotlib.pyplot as plt

In [4]:
from maxvol_compression.sketch_matrix import RandomSums
from maxvol_compression.vmbf import EVBMF, weaken_rank
from maxvol_compression.layers import LinearMaxvol
from utils.dummy import DummyDatasetCifar10, DummyModelCifar10
from musco.pytorch.compressor.layers.conv1d_toeplitz import Conv1Dtoeplitz

# Dataset and Model

In [5]:
BATCH_SIZE = 10

In [6]:
model = DummyModelCifar10()
model.load_state_dict(torch.load('data/dummy.weights'))
cifar10 = DummyDatasetCifar10(batch_size=BATCH_SIZE, data_root='../data')

NVIDIA A100-SXM4-40GB with CUDA capability sm_80 is not compatible with the current PyTorch installation.
The current PyTorch install supports CUDA capabilities sm_37 sm_50 sm_60 sm_70.
If you want to use the NVIDIA A100-SXM4-40GB GPU with PyTorch, please check the instructions at https://pytorch.org/get-started/locally/



Files already downloaded and verified
Files already downloaded and verified


In [7]:
model.eval()

DummyModelCifar10(
  (conv1): Conv1d(3, 15, kernel_size=(100,), stride=(1,))
  (pool): MaxPool1d(kernel_size=4, stride=4, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv1d(15, 30, kernel_size=(50,), stride=(1,))
  (fc1): Linear(in_features=1350, out_features=500, bias=True)
  (fc2): Linear(in_features=500, out_features=250, bias=True)
  (fc3): Linear(in_features=250, out_features=10, bias=True)
)

In [8]:
correct = 0
all_ = len(cifar10.testloader) * BATCH_SIZE
with torch.no_grad():
    for i, data in enumerate(cifar10.testloader, 0):
        inputs, labels = data
        _, predicted = torch.max(model(inputs), 1)
        correct += (labels == predicted).sum().detach().numpy()
        
print(f'accuracy: {correct / all_}')

accuracy: 0.6571


# FC layer maxvol

### computing sketch matrix

In [30]:
rs = RandomSums(500, 500, keep_original=False)
def update_sketchmatrix(self, input, output, alg):
    alg.update(torch.flatten(output, 1).cpu().numpy())
    
handle = model.fc1.register_forward_hook(
    partial(update_sketchmatrix, alg=rs))

In [43]:
# handle.remove()

In [31]:
%%time
for i, (batch,_) in enumerate(cifar10.trainloader, 1):
    with torch.no_grad():
        _ = model(batch)
    if i % 200 == 0: print(f'{i} batches completed')
handle.remove()

200 batches completed
400 batches completed
600 batches completed
800 batches completed
1000 batches completed
1200 batches completed
1400 batches completed
1600 batches completed
1800 batches completed
2000 batches completed
2200 batches completed
2400 batches completed
2600 batches completed
2800 batches completed
3000 batches completed
3200 batches completed
3400 batches completed
3600 batches completed
3800 batches completed
4000 batches completed
4200 batches completed
4400 batches completed
4600 batches completed
4800 batches completed
5000 batches completed
CPU times: user 5min 48s, sys: 18.4 s, total: 6min 6s
Wall time: 2min 1s


### EVBMF rank estimation

In [32]:
_, sigma, Vt = np.linalg.svd(rs.sketch_matrix, full_matrices=False)
_, vbmf_s, _, vbmf_post = EVBMF(None, pretrained_svd=(None, sigma, Vt))

In [33]:
vbmf_s.shape

(113, 113)

In [34]:
rank = weaken_rank(rank=min(*Vt.shape), extreme_rank =len(vbmf_s), weakenen_factor=1.0)
rank

113

### rect-maxvol

In [35]:
V = Vt.T[:, :rank]
V.shape

(500, 113)

In [36]:
%%time
idxs, _ = rect_maxvol(V, maxK=int(1.7*min(*V.shape)))

CPU times: user 214 ms, sys: 125 ms, total: 339 ms
Wall time: 200 ms


In [37]:
%%time
invSV = np.linalg.pinv(V[idxs, :])

CPU times: user 21 ms, sys: 27 µs, total: 21 ms
Wall time: 34.7 ms


In [38]:
invSV.shape, V.shape

((113, 192), (500, 113))

### compression

In [39]:
model.fc1 = LinearMaxvol(model.fc1, idxs, V)

In [40]:
model.eval()

DummyModelCifar10(
  (conv1): Conv1d(3, 15, kernel_size=(100,), stride=(1,))
  (pool): MaxPool1d(kernel_size=4, stride=4, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv1d(15, 30, kernel_size=(50,), stride=(1,))
  (fc1): LinearMaxvol(in_features=1350, out_features=500, bias=True, idxs_len=192)
  (fc2): Linear(in_features=500, out_features=250, bias=True)
  (fc3): Linear(in_features=250, out_features=10, bias=True)
)

### accuracy drop

In [42]:
correct = 0
all_ = len(cifar10.testloader) * BATCH_SIZE
with torch.no_grad():
    for i, data in enumerate(cifar10.testloader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data
        _, predicted = torch.max(model(inputs), 1)
        correct += (labels == predicted).sum().detach().numpy()
        
print(f'accuracy: {correct / all_}')

accuracy: 0.6457


# Conv1d layer maxvol

### computing sketch matrix

In [9]:
rs = RandomSums(500, 13875, keep_original=False)
def update_sketchmatrix(self, input, output, alg):
    alg.update(torch.flatten(output, 1).cpu().numpy())
    
handle = model.conv1.register_forward_hook(
    partial(update_sketchmatrix, alg=rs))

In [10]:
# handle.remove()

In [48]:
%%time
for i, (batch,_) in enumerate(cifar10.trainloader, 1):
    with torch.no_grad():
        _ = model(batch)
    if i % 200 == 0: print(f'{i} batches completed')
handle.remove()

200 batches completed
400 batches completed
600 batches completed
800 batches completed
1000 batches completed
1200 batches completed
1400 batches completed
1600 batches completed
1800 batches completed
2000 batches completed
2200 batches completed
2400 batches completed
2600 batches completed
2800 batches completed
3000 batches completed
3200 batches completed
3400 batches completed
3600 batches completed
3800 batches completed
4000 batches completed
4200 batches completed
4400 batches completed
4600 batches completed
4800 batches completed
5000 batches completed
CPU times: user 13min 53s, sys: 7min 48s, total: 21min 41s
Wall time: 16min 50s


In [50]:
rs.save('data/conv1_sketch_matrix')

Sketch matrix was saved to data/conv1_sketch_matrix.npy


### EVBMF rank estimation

In [11]:
rs = RandomSums(500, 13875, keep_original=True)
rs.load('data/conv1_sketch_matrix.npy')

Sketch matrix was loaded from data/conv1_sketch_matrix.npy


In [12]:
%%time
_, sigma, Vt = np.linalg.svd(rs.sketch_matrix, full_matrices=False)
_, vbmf_s, _, vbmf_post = EVBMF(None, pretrained_svd=(None, sigma, Vt))

CPU times: user 1.84 s, sys: 1.02 s, total: 2.86 s
Wall time: 1.48 s


In [13]:
vbmf_s.shape

(404, 404)

In [14]:
rank = weaken_rank(rank=min(*Vt.shape), extreme_rank =len(vbmf_s), weakenen_factor=1.0)
rank

404

### rect-maxvol

In [15]:
V = Vt.T[:, :rank]
V.shape

(13875, 404)

In [16]:
%%time
idxs, _ = rect_maxvol(V, maxK=int(1.7*min(*V.shape)))

CPU times: user 1.12 s, sys: 11 ms, total: 1.13 s
Wall time: 1.14 s


In [17]:
%%time
invSV = np.linalg.pinv(V[idxs, :])

CPU times: user 93.7 ms, sys: 64 ms, total: 158 ms
Wall time: 81.6 ms


In [18]:
invSV.shape, V.shape

((404, 686), (13875, 404))

### compression

In [19]:
conv1d_tplz = Conv1Dtoeplitz(model.conv1, (BATCH_SIZE, 3, 1024))
conv1d_tplz.dense_layer = LinearMaxvol(conv1d_tplz.dense_layer, idxs, V)
model.conv1 = conv1d_tplz

In [20]:
conv1d_tplz.dense_layer.bias.shape

torch.Size([686])

In [21]:
correct = 0
all_ = len(cifar10.testloader) * BATCH_SIZE
with torch.no_grad():
    for i, data in enumerate(cifar10.testloader, 0):
        inputs, labels = data
        _, predicted = torch.max(model(inputs), 1)
        correct += (labels == predicted).sum().detach().numpy()
        
print(f'accuracy: {correct / all_}')

accuracy: 0.6276
