## Working space setup

In [1]:
# check GPU
!nvidia-smi

Thu Jan 12 17:01:51 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.32.03    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  A100-SXM4-40GB      Off  | 00000000:00:04.0 Off |                    0 |
| N/A   28C    P0    46W / 400W |      0MiB / 40536MiB |      0%      Default |
|                               |                      |             Disabled |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [2]:
# clone apex
!git clone https://github.com/NVIDIA/apex

Cloning into 'apex'...
remote: Enumerating objects: 10686, done.[K
remote: Counting objects: 100% (208/208), done.[K
remote: Compressing objects: 100% (146/146), done.[K
remote: Total 10686 (delta 120), reused 119 (delta 62), pack-reused 10478[K
Receiving objects: 100% (10686/10686), 15.22 MiB | 9.45 MiB/s, done.
Resolving deltas: 100% (7348/7348), done.


In [3]:
# install apex
!cd apex && pip install -v --disable-pip-version-check --no-cache-dir --global-option="--permutation_search" ./

[0mUsing pip 22.0.4 from /usr/local/lib/python3.8/dist-packages/pip (python 3.8)
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Processing /content/apex
  Running command python setup.py egg_info


  torch.__version__  = 1.13.1+cu116


  running egg_info
  creating /tmp/pip-pip-egg-info-ffby5ej3/apex.egg-info
  writing /tmp/pip-pip-egg-info-ffby5ej3/apex.egg-info/PKG-INFO
  writing dependency_links to /tmp/pip-pip-egg-info-ffby5ej3/apex.egg-info/dependency_links.txt
  writing requirements to /tmp/pip-pip-egg-info-ffby5ej3/apex.egg-info/requires.txt
  writing top-level names to /tmp/pip-pip-egg-info-ffby5ej3/apex.egg-info/top_level.txt
  writing manifest file '/tmp/pip-pip-egg-info-ffby5ej3/apex.egg-info/SOURCES.txt'
  adding license file 'LICENSE'
  writing manifest file '/tmp/pip-pip-egg-info-ffby5ej3/apex.egg-info/SOURCES.txt'
  Preparing metadata (setup.py) ... [?25l[?25hdone
Skipping wheel build for apex, due to binaries being 

In [4]:
# reload modules in .py files
%load_ext autoreload
%autoreload 2

In [5]:
# pull repo
!git clone https://github.com/char-tan/sparsity

Cloning into 'sparsity'...
remote: Enumerating objects: 168, done.[K
remote: Counting objects: 100% (168/168), done.[K
remote: Compressing objects: 100% (94/94), done.[K
remote: Total 168 (delta 81), reused 144 (delta 60), pack-reused 0[K
Receiving objects: 100% (168/168), 85.34 KiB | 9.48 MiB/s, done.
Resolving deltas: 100% (81/81), done.


In [6]:
# change working directory, make dir for models
import os

os.chdir("sparsity")
os.makedirs("models", exist_ok=True)

In [7]:
# checkout branch
!git checkout ct_dev

Branch 'ct_dev' set up to track remote branch 'ct_dev' from 'origin'.
Switched to a new branch 'ct_dev'


## Training config

In [8]:
import torch

from training.training import *
from training.utils import *

from apex.contrib.sparsity import ASP

Found permutation search CUDA kernels
[ASP][Info] permutation_search_kernels can be imported.


In [9]:
config = Config(num_epochs=50)

torch.manual_seed(config.seed)

model = resnet18_small_input().to(config.device)

torch.save(model.state_dict(), "models/init.pt")

optimizer = torch.optim.SGD(
    model.parameters(),
    lr=config.lr,
    momentum=config.momentum,
    weight_decay=config.weight_decay,
)

train_loader, test_loader = cifar10_dataloaders(config)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to data/cifar-10-python.tar.gz


  0%|          | 0/170498071 [00:00<?, ?it/s]

Extracting data/cifar-10-python.tar.gz to data


## Phase 1 training

In [10]:
train_phase(model, optimizer, train_loader, test_loader, config)

torch.save(model.state_dict(), "models/phase1.pt")

epoch: 0 | time: 17.75 | train loss: 1.951 | train acc: 26.8 | test loss: 1.757 | test acc: 35.69 | 
epoch: 1 | time: 10.34 | train loss: 1.504 | train acc: 44.01 | test loss: 1.388 | test acc: 48.8 | 
epoch: 2 | time: 10.12 | train loss: 1.164 | train acc: 57.46 | test loss: 1.221 | test acc: 56.15 | 
epoch: 3 | time: 10.09 | train loss: 0.9549 | train acc: 65.75 | test loss: 1.576 | test acc: 53.63 | 
epoch: 4 | time: 10.31 | train loss: 0.8319 | train acc: 70.27 | test loss: 0.877 | test acc: 69.21 | 
epoch: 5 | time: 10.15 | train loss: 0.7159 | train acc: 74.65 | test loss: 0.8309 | test acc: 72.5 | 
epoch: 6 | time: 10.04 | train loss: 0.6297 | train acc: 77.9 | test loss: 0.7852 | test acc: 72.77 | 
epoch: 7 | time: 10.38 | train loss: 0.5631 | train acc: 80.15 | test loss: 0.6142 | test acc: 79.35 | 
epoch: 8 | time: 10.21 | train loss: 0.5114 | train acc: 82.35 | test loss: 0.6075 | test acc: 79.52 | 
epoch: 9 | time: 10.03 | train loss: 0.4688 | train acc: 83.74 | test loss: 

## Prune model, evaluate after pruning

In [11]:
# don't want to have sparsity on downsampling layer
excempt_layers = find_downsample_layers(model)
                
# prune model + applying mask s.t params stay zeroed
ASP.init_model_for_pruning(model, mask_calculator="m4n2_1d", verbosity=2, whitelist=[torch.nn.Linear, torch.nn.Conv2d], allow_recompute_mask=False,
                           disallowed_layer_names=excempt_layers, allow_permutation=False)
ASP.init_optimizer_for_pruning(optimizer)
ASP.compute_sparse_masks()


torch.save(model.state_dict(), "models/phase1_pruned.pt")

[ASP] torchvision is imported, can work with the MaskRCNN/KeypointRCNN from torchvision.
[ASP] Auto skipping pruning conv1::weight of size=torch.Size([64, 3, 3, 3]) and type=torch.float32 for sparsity
[ASP] Auto skipping pruning fc::weight of size=torch.Size([10, 512]) and type=torch.float32 for sparsity
[ASP] Enabled 50.00% sparsity for layer1.0.conv1::weight of size=torch.Size([64, 64, 3, 3]) and type=torch.float32
[ASP] Enabled 50.00% sparsity for layer1.0.conv2::weight of size=torch.Size([64, 64, 3, 3]) and type=torch.float32
[ASP] Enabled 50.00% sparsity for layer1.1.conv1::weight of size=torch.Size([64, 64, 3, 3]) and type=torch.float32
[ASP] Enabled 50.00% sparsity for layer1.1.conv2::weight of size=torch.Size([64, 64, 3, 3]) and type=torch.float32
[ASP] Enabled 50.00% sparsity for layer2.0.conv1::weight of size=torch.Size([128, 64, 3, 3]) and type=torch.float32
[ASP] Enabled 50.00% sparsity for layer2.0.conv2::weight of size=torch.Size([128, 128, 3, 3]) and type=torch.float32
[

In [16]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [12]:
# evaluate on train + test data
train_loss, train_acc = test_epoch(model, train_loader, config.device)
test_loss, test_acc = test_epoch(model, test_loader, config.device)

epoch_summary(
    {
        "train loss": train_loss,
        "train acc": train_acc,
        "test loss": test_loss,
        "test acc": test_acc,
    }
)

train loss: 0.087 | train acc: 97 | test loss: 0.3572 | test acc: 89.91 | 


## Phase 2 training

In [13]:
train_phase(model, optimizer, train_loader, test_loader, config)

torch.save(model.state_dict(), "models/phase2.pt")

epoch: 0 | time: 10.18 | train loss: 0.1666 | train acc: 94.4 | test loss: 0.6319 | test acc: 81.57 | 
epoch: 1 | time: 10.12 | train loss: 0.2421 | train acc: 91.59 | test loss: 0.6717 | test acc: 81.58 | 
epoch: 2 | time: 10.2 | train loss: 0.2069 | train acc: 92.72 | test loss: 0.4761 | test acc: 85.52 | 
epoch: 3 | time: 10.39 | train loss: 0.1943 | train acc: 93.3 | test loss: 0.6244 | test acc: 82.69 | 
epoch: 4 | time: 10.38 | train loss: 0.1856 | train acc: 93.42 | test loss: 0.6585 | test acc: 80.01 | 
epoch: 5 | time: 10.46 | train loss: 0.1802 | train acc: 93.71 | test loss: 0.7504 | test acc: 80.03 | 
epoch: 6 | time: 10.47 | train loss: 0.1738 | train acc: 94.05 | test loss: 0.4759 | test acc: 85.66 | 
epoch: 7 | time: 10.3 | train loss: 0.1711 | train acc: 94.01 | test loss: 0.4754 | test acc: 85.72 | 
epoch: 8 | time: 10.14 | train loss: 0.1653 | train acc: 94.18 | test loss: 0.4272 | test acc: 87.63 | 
epoch: 9 | time: 10.44 | train loss: 0.1574 | train acc: 94.5 | test

## Train from original init with mask (LTH)

In [14]:
# apply mask to init params then load into model
model.load_state_dict(
    mask_checkpoint(torch.load("models/init.pt"), model), strict=False
)

torch.save(model.state_dict(), "models/init_pruned.pt")

train_phase(model, optimizer, train_loader, test_loader, config)

torch.save(model.state_dict(), "models/lottery_ticket.pt")

epoch: 0 | time: 10.08 | train loss: 1.938 | train acc: 27.05 | test loss: 1.943 | test acc: 31.7 | 
epoch: 1 | time: 10.46 | train loss: 1.391 | train acc: 48.36 | test loss: 1.567 | test acc: 48.22 | 
epoch: 2 | time: 10.44 | train loss: 0.9938 | train acc: 64.23 | test loss: 1.034 | test acc: 64.29 | 
epoch: 3 | time: 10.49 | train loss: 0.7829 | train acc: 72.15 | test loss: 0.7701 | test acc: 73.56 | 
epoch: 4 | time: 10.32 | train loss: 0.6306 | train acc: 77.92 | test loss: 0.8387 | test acc: 72.25 | 
epoch: 5 | time: 10.18 | train loss: 0.5306 | train acc: 81.56 | test loss: 0.6712 | test acc: 77.05 | 
epoch: 6 | time: 10.35 | train loss: 0.4608 | train acc: 84.1 | test loss: 0.5529 | test acc: 81.48 | 
epoch: 7 | time: 10.34 | train loss: 0.4183 | train acc: 85.46 | test loss: 0.5076 | test acc: 83.28 | 
epoch: 8 | time: 10.31 | train loss: 0.3778 | train acc: 86.99 | test loss: 0.6754 | test acc: 78.28 | 
epoch: 9 | time: 10.29 | train loss: 0.3455 | train acc: 88.01 | test l

## Train from random init with mask

In [15]:
torch.manual_seed(config.seed + 1)

# produce new initalisation
new_init_params = resnet18_small_input().cuda().state_dict()

torch.save(new_init_params, "models/new_init.pt")

# apply mask to params then load into model
model.load_state_dict(mask_checkpoint(new_init_params, model), strict=False)

torch.save(model.state_dict(), "models/new_init_pruned.pt")

train_phase(model, optimizer, train_loader, test_loader, config)

torch.save(model.state_dict(), "models/random_lottery_ticket.pt")

epoch: 0 | time: 10.22 | train loss: 1.938 | train acc: 27.17 | test loss: 1.85 | test acc: 33.97 | 
epoch: 1 | time: 10.46 | train loss: 1.478 | train acc: 45.24 | test loss: 1.322 | test acc: 52.5 | 
epoch: 2 | time: 10.49 | train loss: 1.154 | train acc: 58.11 | test loss: 1.195 | test acc: 58.29 | 
epoch: 3 | time: 10.26 | train loss: 0.9591 | train acc: 65.8 | test loss: 1.195 | test acc: 61.85 | 
epoch: 4 | time: 10.41 | train loss: 0.8241 | train acc: 70.83 | test loss: 0.8194 | test acc: 71.72 | 
epoch: 5 | time: 10.37 | train loss: 0.7047 | train acc: 75.23 | test loss: 0.7953 | test acc: 73.37 | 
epoch: 6 | time: 10.12 | train loss: 0.6172 | train acc: 78.54 | test loss: 0.717 | test acc: 75.78 | 
epoch: 7 | time: 10.35 | train loss: 0.5601 | train acc: 80.74 | test loss: 0.7638 | test acc: 75.33 | 
epoch: 8 | time: 10.13 | train loss: 0.5049 | train acc: 82.41 | test loss: 0.6087 | test acc: 79.57 | 
epoch: 9 | time: 10.22 | train loss: 0.4692 | train acc: 83.81 | test loss: