In [1]:
# All the required imports

import os
import random
import numpy as np
import torch
import numpy as np
from torchvision import datasets, transforms
import torchvision
import math
from tqdm.notebook import tqdm
from sklearn.metrics import accuracy_score
from torch.utils.data.sampler import SubsetRandomSampler
import string
import pickle
import gc 

In [2]:
def set_seed(seed=27):
    np.random.seed(seed)
    random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    # When running on the CuDNN backend, two further options must be set
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    # Set a fixed value for the hash seed
    os.environ["PYTHONHASHSEED"] = str(seed)
    print(f"Random seed set as {seed}")

set_seed(seed=27)

Random seed set as 27


In [3]:
if not os.path.exists(f"{os.getcwd()}/saves"):
    os.mkdir(f"{os.getcwd()}/saves")

In [4]:
# Device will determine whether to run the training on GPU or CPU.
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


In [5]:
# train_dataset = datasets.CIFAR10(root="dataset/", train=True, transform=transforms.ToTensor(), download=True)
# test_dataset = datasets.CIFAR10(root="dataset/", train=False, transform=transforms.ToTensor(), download=True)

In [6]:
# print(len(train_dataset)) 
# print(len(test_dataset))

In [7]:
batch_size =512
# dataloader_train = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=0)
# dataloader_val = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=True, num_workers=0)

# CIFAR10 dataset 

def get_train_valid_loader(data_dir,batch_size,augment,random_seed,valid_size=0.1,shuffle=True):
    normalize = transforms.Normalize(
        mean=[0.4914, 0.4822, 0.4465],
        std=[0.2023, 0.1994, 0.2010],
    )

    # define transforms
    valid_transform = transforms.Compose([
            transforms.Resize((227,227)),
            transforms.ToTensor(),
            normalize,
    ])
    if augment:
        train_transform = transforms.Compose([
            transforms.RandomCrop(32, padding=4),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            normalize,
        ])
    else:
        train_transform = transforms.Compose([
            transforms.Resize((227,227)),
            transforms.ToTensor(),
            normalize,
        ])

    # load the dataset
    train_dataset = torchvision.datasets.CIFAR10(root=data_dir, train=True,download=True, transform=train_transform, )

    valid_dataset = torchvision.datasets.CIFAR10(root=data_dir, train=True, download=True, transform=valid_transform, )

    num_train = len(train_dataset)
    indices = list(range(num_train))
    split = int(np.floor(valid_size * num_train))

    if shuffle:
        np.random.seed(random_seed)
        np.random.shuffle(indices)

    train_idx, valid_idx = indices[split:], indices[:split]
    train_sampler = SubsetRandomSampler(train_idx)
    valid_sampler = SubsetRandomSampler(valid_idx)

    train_loader = torch.utils.data.DataLoader(
        train_dataset, batch_size=batch_size, sampler=train_sampler)
 
    valid_loader = torch.utils.data.DataLoader(
        valid_dataset, batch_size=batch_size, sampler=valid_sampler)

    return (train_loader, valid_loader)

def get_test_loader(data_dir,  batch_size, shuffle=True):
    normalize = transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225],
    )

    # define transform
    transform = transforms.Compose([
        transforms.Resize((227,227)),
        transforms.ToTensor(),
        normalize,
    ])

    dataset = torchvision.datasets.CIFAR10(
        root=data_dir, train=False,
        download=True, transform=transform,
    )

    data_loader = torch.utils.data.DataLoader(
        dataset, batch_size=batch_size, shuffle=shuffle
    )

    return data_loader

dataloader_train, dataloader_val = get_train_valid_loader(data_dir = './Cifar10', batch_size = batch_size, augment = False,random_seed = 27)

test_loader = get_test_loader(data_dir = './Cifar10', batch_size = batch_size)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./Cifar10/cifar-10-python.tar.gz


  0%|          | 0/170498071 [00:00<?, ?it/s]

Extracting ./Cifar10/cifar-10-python.tar.gz to ./Cifar10
Files already downloaded and verified
Files already downloaded and verified


In [8]:
labels_map = {
    0: "airplane",
    1: "automobile",
    2: "bird",
    3: "cat",
    4: "deer",
    5: "dog",
    6: "frog",
    7: "horse",
    8: "ship",
    9: "truck",
}

#### Let's have a look at the data

In [9]:
# import matplotlib.pyplot as plt
# figure = plt.figure(figsize=(6, 6))
# cols, rows = 5,5
# for i in range(1, cols * rows + 1):
#     sample_idx = torch.randint(len(train_dataset), size=(1,)).item()
#     img, label = train_dataset[sample_idx]
#     figure.add_subplot(rows, cols, i)
#     plt.title(labels_map[label], size=8)
#     plt.axis("off")
#     plt.imshow(img.T);
# plt.show()

#### Creating the CNN model

In [10]:
    
class View(torch.nn.Module):
    def __init__(self, shape):
        super(View, self).__init__()
        self.shape = shape

    def forward(self, x):
        return x.view(*self.shape)
    
class AlexNet(torch.nn.Module):
  def __init__(self, num_classes=10):
    super().__init__()
    # Define the layers
    self.layers = torch.nn.Sequential(*[torch.nn.Conv2d(3, 96, kernel_size=11, stride=4, padding=0),
                                        torch.nn.BatchNorm2d(96), 
                                        torch.nn.ReLU(), 
                                        torch.nn.MaxPool2d(kernel_size = 3, stride = 2), 
                                        torch.nn.Conv2d(96, 256, kernel_size=5, stride=1, padding=2),
            torch.nn.BatchNorm2d(256),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(kernel_size = 3, stride = 2),
            torch.nn.Conv2d(256, 384, kernel_size=3, stride=1, padding=1),
            torch.nn.BatchNorm2d(384),
            torch.nn.ReLU(),
            torch.nn.Conv2d(384, 384, kernel_size=3, stride=1, padding=1),
            torch.nn.BatchNorm2d(384),
            torch.nn.ReLU(),
            torch.nn.Conv2d(384, 256, kernel_size=3, stride=1, padding=1),
            torch.nn.BatchNorm2d(256),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(kernel_size = 3, stride = 2),
            View((-1, 9216)),
            torch.nn.Dropout(0.5),
            torch.nn.Linear(9216, 4096),
            torch.nn.ReLU(),
            torch.nn.Dropout(0.5),
            torch.nn.Linear(4096, 4096),
            torch.nn.ReLU(),
            torch.nn.Linear(4096, num_classes)])
    # Define the corresponding masks if the layer has weight
    self.masks = torch.nn.ParameterList([torch.nn.Parameter(torch.Tensor(torch.ones(self.layers[i].weight.data.shape)), requires_grad=False) \
                                         for i in range(len(self.layers)) \
                                         if hasattr(self.layers[i], 'weight')])
    
  def forward(self, x):
    # First multiply each layer's weights with its corresponding mask
    with torch.no_grad():
      j = 0
      for i in range(len(self.layers)):
        if hasattr(self.layers[i], 'weight'):
          self.layers[i].weight.copy_(self.layers[i].weight.data * self.masks[j])
          j += 1
        else:
          pass
    return self.layers(x)



In [11]:
model = AlexNet().to(device)
model

AlexNet(
  (layers): Sequential(
    (0): Conv2d(3, 96, kernel_size=(11, 11), stride=(4, 4))
    (1): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (4): Conv2d(96, 256, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (5): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): ReLU()
    (7): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (8): Conv2d(256, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (10): ReLU()
    (11): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (12): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (13): ReLU()
    (14): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    

In [12]:
# Check which layer has weight

for i in range(len(model.layers)):
  print(f"({i}): {model.layers[i]}")
  if hasattr(model.layers[i], 'weight'):
    print(model.layers[i].weight.data.shape)
  else:
    print("No weight")
  print("----------------------------------------")

(0): Conv2d(3, 96, kernel_size=(11, 11), stride=(4, 4))
torch.Size([96, 3, 11, 11])
----------------------------------------
(1): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
torch.Size([96])
----------------------------------------
(2): ReLU()
No weight
----------------------------------------
(3): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
No weight
----------------------------------------
(4): Conv2d(96, 256, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
torch.Size([256, 96, 5, 5])
----------------------------------------
(5): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
torch.Size([256])
----------------------------------------
(6): ReLU()
No weight
----------------------------------------
(7): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
No weight
----------------------------------------
(8): Conv2d(256, 384, kernel_size=(3, 3), stride=(1, 1), p

In [13]:
def train_model(epochs, dataloader, device, model, optimizer, loss_function):
    # Set model to training mode in order to unfreeze all layers and allow gradient propagation
    model.train()
    # These two lists will be used to store average loss and accuracy for each epoch
    total_loss, acc = list(), list()
    # Now write out the training procedure
    for epoch in range(epochs):
        print("Epoch:", epoch+1)
        # Each batch produces a loss, predictions and target
        batch_loss, batch_preds, batch_target = 0, list(), list()
        # For each batch, train the model
        for x, y in tqdm(dataloader, total=len(dataloader)):
            # Make sure that data is on the same device as the model
            x, y = x.to(device), y.to(device)
            # Remove all previous gradients
            optimizer.zero_grad()
            # Get predictions by performing a forward pass
            preds = model.forward(x.float())
            # Calculate error
            loss = loss_function(preds, y.to(torch.int64))
            # Calculate all the gradients for each layer
            loss.backward()
            # Finall, update the weights
            optimizer.step()
            # Save the loss
            batch_loss+= loss.item()
            # Save the predictions and target
            batch_preds.extend(np.argmax(preds.cpu().detach().numpy(), axis=1))
            batch_target.extend(y.cpu().detach().numpy())
        # Calculate average loss
        total_loss.append(batch_loss/len(dataloader))
        # Calculate accuracy for this epoch
        acc.append(accuracy_score(batch_target, batch_preds))
        print("Loss:", total_loss[-1], "\tAcc:", acc[-1])
    return model, total_loss, acc

def test_model(dataloader, device, model):
    # Set model to eval mode in order to freeze all layers so that no parameter gets updated during testing
    model.eval()
    # Each batch produces a loss, predictions and target
    batch_preds, batch_target = list(), list()
    # For each batch, train the model
    for x, y in tqdm(dataloader, total=len(dataloader)):
        # Make sure that data is on the same device as the model
        x, y = x.to(device), y.to(device)
        preds = model.forward(x.float())
        # Save the predictions and target
        batch_preds.extend(np.argmax(preds.cpu().detach().numpy(), axis=1))
        batch_target.extend(y.cpu().detach().numpy())
    return accuracy_score(batch_target, batch_preds)

## Getting the baseline performance

In [14]:
epochs = 50
baseline_model = AlexNet().to(device)

loss_function = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(baseline_model.parameters(), lr=0.003)


In [15]:

# baseline_model = AlexNet().to(device)
# loss_function = torch.nn.CrossEntropyLoss()
# optimizer = torch.optim.Adam(baseline_model.parameters(), lr=0.003)
# baseline_model, _, _ = train_model(epochs, dataloader_train, device, baseline_model, optimizer, loss_function)

In [16]:
# # Finding the test accuracy of our baseline model

# test_accuracy = test_model(dataloader_val, device, baseline_model)
# print(f"Validation accuracy of the baseline model is {test_accuracy}")

# #saving the baseline model
# torch.save(baseline_model, f"{os.getcwd()}/saves/baseline-50epoch.pth.tar")

In [17]:

# del baseline_model  #deleting the model 


# # model will still be on cache until its place is taken by other objects so also execute the below lines
# gc.collect()
# torch.cuda.empty_cache() 

## Now performing Lottery Ticket pruning

In [18]:
def lth_pruning(rounds, epochs, sparsity):
    # Set the number of epochs to be used
    epochs = epochs
    # Set the number of rounds
    rounds = rounds
    # Set the sparsity level for each layer
    sparsity = sparsity
    # Create the model
    model = AlexNet().to(device)
    # Define Loss
    loss_function = torch.nn.CrossEntropyLoss()
    # Define Optimizer
    optimizer = torch.optim.Adam(model.parameters(), lr=0.003)

    # First save the model weights that have been initialized
    init_weights = [model.layers[i].weight.data.to(device) for i in range(len(model.layers)) if hasattr(model.layers[i], 'weight')]
    with open(f"{os.getcwd()}" + r"/saves/lt" + str(rounds) + "-" + str(epochs) + "-" + str(sparsity).replace('.', '') + "initial-weights.pickle", 'wb') as handle:
        pickle.dump(init_weights, handle)

    for round_ in range(rounds):
        print("\n\n\nROUND", round_+1, "Started\n----------------------")
        # First train the model for some epochs
        model, _, _ = train_model(epochs, dataloader_train, device, model, optimizer, loss_function)
        if round_ == 0:
            print("Test Accuracy before pruning:", test_model(dataloader_val, device, model))
        else:
            print("Test Accuracy after pruning and retraining:", test_model(dataloader_val, device, model))
        with torch.no_grad():
            # Now prune the model weights
            j = 0
            for i in range(len(model.layers)):
              if hasattr(model.layers[i], 'weight'):
                  flatten_size = model.layers[i].weight.data.flatten().shape[0]
                  # Lottery Ticket Style Pruning
                  indices = torch.argsort(torch.reshape(torch.abs(model.layers[i].weight.data), (1, flatten_size)).squeeze())
                  # Since we already have the indices to prune, first reset the parameters
                  model.layers[i].weight.copy_(init_weights[j])
                  # Now prune
                  model.masks[j] = torch.reshape(model.masks[j], (1, flatten_size)).squeeze()
                  val = ((sparsity*100)**((round_+1)/rounds))/100
                  model.masks[j][indices[:math.ceil(val * flatten_size)]] = 0
                  model.masks[j] = model.masks[j].view(*model.layers[i].weight.data.shape)
                  j += 1
        print("Test Accuracy after pruning but no retraining:", test_model(dataloader_val, device, model))
        
    torch.save(model, f"{os.getcwd()}" + r"/saves/lt" + str(rounds) + "-" + str(epochs) + "-" + str(sparsity).replace('.','') + ".pth.tar")
    return model

### Sparsity : 0.1

In [19]:
# lt_model01 = lth_pruning(5, 5, 0.1)

In [20]:
# torch.save(lt_model01, f"{os.getcwd()}/saves/lt-01.pth.tar")

In [21]:

# del lt_model01  #deleting the model 


# # model will still be on cache until its place is taken by other objects so also execute the below lines
# gc.collect()
# torch.cuda.empty_cache() 

### Sparsity : 0.2

In [22]:
# lth_pruning(5, 5, 0.2)

### Sparsity : 0.3

In [23]:
# lt_model03 = lth_pruning(5, 5, 0.3)


In [24]:
# torch.save(lt_model03, f"{os.getcwd()}/saves/lt-03.pth.tar")

In [25]:

# del lt_model03  #deleting the model 


# # model will still be on cache until its place is taken by other objects so also execute the below lines
# gc.collect()
# torch.cuda.empty_cache() 

### Sparsity : 0.4

In [26]:
# lth_pruning(5, 5, 0.4)

### Sparsity : 0.5

In [27]:
# lt_model05 = lth_pruning(5, 5, 0.5)

In [28]:
# torch.save(lt_model05, f"{os.getcwd()}/saves/lt-05.pth.tar")

In [29]:

# del lt_model05  #deleting the model 


# # model will still be on cache until its place is taken by other objects so also execute the below lines
# gc.collect()
# torch.cuda.empty_cache() 

## Now performing Random pruning

In [30]:
def random_pruning(rounds, epochs, sparsity):
    # Set the number of epochs to be used
    epochs = epochs
    # Set the number of rounds
    rounds = rounds
    # Set the sparsity level for each layer
    sparsity = sparsity
    # Create the model
    model = AlexNet().to(device)
    # Define Loss
    loss_function = torch.nn.CrossEntropyLoss()
    # Define Optimizer
    optimizer = torch.optim.Adam(model.parameters(), lr=0.003)

    # First save the model weights that have been initialized
    init_weights = [model.layers[i].weight.data.to(device) for i in range(len(model.layers)) if hasattr(model.layers[i], 'weight')]
    

    for round_ in range(rounds):
        print("\n\n\nROUND", round_+1, "Started\n----------------------")
        # First train the model for some epochs
        model, _, _ = train_model(epochs, dataloader_train, device, model, optimizer, loss_function)
        if round_ == 0:
            print("Test Accuracy before pruning:", test_model(dataloader_val, device, model))
        else:
            print("Test Accuracy after pruning and retraining:", test_model(dataloader_val, device, model))
        with torch.no_grad():
            # Now prune the model weights
            j = 0
            for i in range(len(model.layers)):
              if hasattr(model.layers[i], 'weight'):
                  flatten_size = model.layers[i].weight.data.flatten().shape[0]
                  # Random pruning
                  indices = np.random.randint(0, flatten_size, flatten_size)
                  # Since we already have the indices to prune, first reset the parameters
                  model.layers[i].weight.copy_(init_weights[j])
                  # Now prune
                  model.masks[j] = torch.reshape(model.masks[j], (1, flatten_size)).squeeze()
                  val = ((sparsity*100)**((round_+1)/rounds))/100
                  model.masks[j][indices[:math.ceil(val * flatten_size)]] = 0
                  model.masks[j] = model.masks[j].view(*model.layers[i].weight.data.shape)
                  j += 1
        print("Test Accuracy after pruning but no retraining:", test_model(dataloader_val, device, model))
    return model

### Sparsity : 0.1

In [31]:
rp01 = random_pruning(5, 5, 0.1)
torch.save(rp01, f"{os.getcwd()}/saves/rp-5-5-01.pth.tar")

del rp01  #deleting the model 


# model will still be on cache until its place is taken by other objects so also execute the below lines
gc.collect()
torch.cuda.empty_cache() 




ROUND 1 Started
----------------------
Epoch: 1


  0%|          | 0/88 [00:00<?, ?it/s]

Loss: 7.634532389315692 	Acc: 0.14933333333333335
Epoch: 2


  0%|          | 0/88 [00:00<?, ?it/s]

Loss: 2.0467687222090634 	Acc: 0.21137777777777778
Epoch: 3


  0%|          | 0/88 [00:00<?, ?it/s]

Loss: 1.8948435133153743 	Acc: 0.2508888888888889
Epoch: 4


  0%|          | 0/88 [00:00<?, ?it/s]

Loss: 1.810800244862383 	Acc: 0.28395555555555557
Epoch: 5


  0%|          | 0/88 [00:00<?, ?it/s]

Loss: 1.7541949979283593 	Acc: 0.3136


  0%|          | 0/10 [00:00<?, ?it/s]

Test Accuracy before pruning: 0.3798


  0%|          | 0/10 [00:00<?, ?it/s]

Test Accuracy after pruning but no retraining: 0.3566



ROUND 2 Started
----------------------
Epoch: 1


  0%|          | 0/88 [00:00<?, ?it/s]

Loss: 1.7128708389672367 	Acc: 0.32795555555555556
Epoch: 2


  0%|          | 0/88 [00:00<?, ?it/s]

Loss: 1.6773320964791558 	Acc: 0.34702222222222223
Epoch: 3


  0%|          | 0/88 [00:00<?, ?it/s]

Loss: 1.6440849114548077 	Acc: 0.3605777777777778
Epoch: 4


  0%|          | 0/88 [00:00<?, ?it/s]

Loss: 1.5880910740657286 	Acc: 0.39082222222222224
Epoch: 5


  0%|          | 0/88 [00:00<?, ?it/s]

Loss: 1.5673689191991633 	Acc: 0.40142222222222224


  0%|          | 0/10 [00:00<?, ?it/s]

Test Accuracy after pruning and retraining: 0.4126


  0%|          | 0/10 [00:00<?, ?it/s]

Test Accuracy after pruning but no retraining: 0.3584



ROUND 3 Started
----------------------
Epoch: 1


  0%|          | 0/88 [00:00<?, ?it/s]

Loss: 1.5462260815230282 	Acc: 0.4116222222222222
Epoch: 2


  0%|          | 0/88 [00:00<?, ?it/s]

Loss: 1.5136909958991138 	Acc: 0.4269777777777778
Epoch: 3


  0%|          | 0/88 [00:00<?, ?it/s]

Loss: 1.4737887341867795 	Acc: 0.44737777777777776
Epoch: 4


  0%|          | 0/88 [00:00<?, ?it/s]

Loss: 1.4525532749566166 	Acc: 0.4553333333333333
Epoch: 5


  0%|          | 0/88 [00:00<?, ?it/s]

Loss: 1.421204623850909 	Acc: 0.47371111111111114


  0%|          | 0/10 [00:00<?, ?it/s]

Test Accuracy after pruning and retraining: 0.4846


  0%|          | 0/10 [00:00<?, ?it/s]

Test Accuracy after pruning but no retraining: 0.3824



ROUND 4 Started
----------------------
Epoch: 1


  0%|          | 0/88 [00:00<?, ?it/s]

Loss: 1.4067592214454303 	Acc: 0.4750888888888889
Epoch: 2


  0%|          | 0/88 [00:00<?, ?it/s]

Loss: 1.372218599373644 	Acc: 0.4903111111111111
Epoch: 3


  0%|          | 0/88 [00:00<?, ?it/s]

Loss: 1.3306725797328083 	Acc: 0.5088888888888888
Epoch: 4


  0%|          | 0/88 [00:00<?, ?it/s]

Loss: 1.2952091883529315 	Acc: 0.5276
Epoch: 5


  0%|          | 0/88 [00:00<?, ?it/s]

Loss: 1.2619593888521194 	Acc: 0.5416666666666666


  0%|          | 0/10 [00:00<?, ?it/s]

Test Accuracy after pruning and retraining: 0.5938


  0%|          | 0/10 [00:00<?, ?it/s]

Test Accuracy after pruning but no retraining: 0.3162



ROUND 5 Started
----------------------
Epoch: 1


  0%|          | 0/88 [00:00<?, ?it/s]

Loss: 1.2718248719518834 	Acc: 0.5413333333333333
Epoch: 2


  0%|          | 0/88 [00:00<?, ?it/s]

Loss: 1.2200462167913264 	Acc: 0.5587333333333333
Epoch: 3


  0%|          | 0/88 [00:00<?, ?it/s]

Loss: 1.1743366488001563 	Acc: 0.5796666666666667
Epoch: 4


  0%|          | 0/88 [00:00<?, ?it/s]

Loss: 1.1482460742646998 	Acc: 0.5889777777777778
Epoch: 5


  0%|          | 0/88 [00:00<?, ?it/s]

Loss: 1.1025136289271442 	Acc: 0.6062222222222222


  0%|          | 0/10 [00:00<?, ?it/s]

Test Accuracy after pruning and retraining: 0.5858


  0%|          | 0/10 [00:00<?, ?it/s]

Test Accuracy after pruning but no retraining: 0.1922


### Sparsity : 0.2

In [32]:
# random_pruning(5, 5, 0.2)

### Sparsity : 0.3

In [33]:
# random_pruning(5, 5, 0.3)

### Sparsity : 0.4

In [34]:
# random_pruning(5, 5, 0.4)

### Sparsity : 0.5

In [35]:
# random_pruning(5, 5, 0.5)
rp05 = random_pruning(5, 5, 0.5)
torch.save(rp05, f"{os.getcwd()}/saves/rp-5-5-05.pth.tar")

del rp05  #deleting the model 


# model will still be on cache until its place is taken by other objects so also execute the below lines
gc.collect()
torch.cuda.empty_cache() 




ROUND 1 Started
----------------------
Epoch: 1


  0%|          | 0/88 [00:00<?, ?it/s]

Loss: 7.218144955960187 	Acc: 0.15357777777777779
Epoch: 2


  0%|          | 0/88 [00:00<?, ?it/s]

Loss: 2.0663797719912096 	Acc: 0.21057777777777778
Epoch: 3


  0%|          | 0/88 [00:00<?, ?it/s]

Loss: 1.9105793495069852 	Acc: 0.2466
Epoch: 4


  0%|          | 0/88 [00:00<?, ?it/s]

Loss: 1.8180937265807933 	Acc: 0.2817777777777778
Epoch: 5


  0%|          | 0/88 [00:00<?, ?it/s]

Loss: 1.7487080246210098 	Acc: 0.3128666666666667


  0%|          | 0/10 [00:00<?, ?it/s]

Test Accuracy before pruning: 0.385


  0%|          | 0/10 [00:00<?, ?it/s]

Test Accuracy after pruning but no retraining: 0.3518



ROUND 2 Started
----------------------
Epoch: 1


  0%|          | 0/88 [00:00<?, ?it/s]

Loss: 1.6824854720722546 	Acc: 0.34186666666666665
Epoch: 2


  0%|          | 0/88 [00:00<?, ?it/s]

Loss: 1.6203594194217161 	Acc: 0.37024444444444443
Epoch: 3


  0%|          | 0/88 [00:00<?, ?it/s]

Loss: 1.5731451931324871 	Acc: 0.39095555555555556
Epoch: 4


  0%|          | 0/88 [00:00<?, ?it/s]

Loss: 1.5438059852881865 	Acc: 0.4042
Epoch: 5


  0%|          | 0/88 [00:00<?, ?it/s]

Loss: 1.5100323476574637 	Acc: 0.41644444444444445


  0%|          | 0/10 [00:00<?, ?it/s]

Test Accuracy after pruning and retraining: 0.49


  0%|          | 0/10 [00:00<?, ?it/s]

Test Accuracy after pruning but no retraining: 0.3896



ROUND 3 Started
----------------------
Epoch: 1


  0%|          | 0/88 [00:00<?, ?it/s]

Loss: 1.4731555689464917 	Acc: 0.4338666666666667
Epoch: 2


  0%|          | 0/88 [00:00<?, ?it/s]

Loss: 1.445161591876637 	Acc: 0.4517333333333333
Epoch: 3


  0%|          | 0/88 [00:00<?, ?it/s]

Loss: 1.408200583674691 	Acc: 0.46828888888888887
Epoch: 4


  0%|          | 0/88 [00:00<?, ?it/s]

Loss: 1.367542028427124 	Acc: 0.48604444444444445
Epoch: 5


  0%|          | 0/88 [00:00<?, ?it/s]

Loss: 1.3387766670097003 	Acc: 0.5000444444444444


  0%|          | 0/10 [00:00<?, ?it/s]

Test Accuracy after pruning and retraining: 0.528


  0%|          | 0/10 [00:00<?, ?it/s]

Test Accuracy after pruning but no retraining: 0.2808



ROUND 4 Started
----------------------
Epoch: 1


  0%|          | 0/88 [00:00<?, ?it/s]

Loss: 1.3903919512575322 	Acc: 0.4844888888888889
Epoch: 2


  0%|          | 0/88 [00:00<?, ?it/s]

Loss: 1.294102504849434 	Acc: 0.525
Epoch: 3


  0%|          | 0/88 [00:00<?, ?it/s]

Loss: 1.2681367546319962 	Acc: 0.5361111111111111
Epoch: 4


  0%|          | 0/88 [00:00<?, ?it/s]

Loss: 1.2268024696545168 	Acc: 0.5534222222222223
Epoch: 5


  0%|          | 0/88 [00:00<?, ?it/s]

Loss: 1.2007837701927533 	Acc: 0.5655555555555556


  0%|          | 0/10 [00:00<?, ?it/s]

Test Accuracy after pruning and retraining: 0.5904


  0%|          | 0/10 [00:00<?, ?it/s]

Test Accuracy after pruning but no retraining: 0.1424



ROUND 5 Started
----------------------
Epoch: 1


  0%|          | 0/88 [00:00<?, ?it/s]

Loss: 1.2889325022697449 	Acc: 0.5361555555555556
Epoch: 2


  0%|          | 0/88 [00:00<?, ?it/s]

Loss: 1.187302279201421 	Acc: 0.5703777777777778
Epoch: 3


  0%|          | 0/88 [00:00<?, ?it/s]

Loss: 1.133449593728239 	Acc: 0.5968666666666667
Epoch: 4


  0%|          | 0/88 [00:00<?, ?it/s]

Loss: 1.088501582091505 	Acc: 0.6117555555555556
Epoch: 5


  0%|          | 0/88 [00:00<?, ?it/s]

Loss: 1.0481591380455277 	Acc: 0.6284222222222222


  0%|          | 0/10 [00:00<?, ?it/s]

Test Accuracy after pruning and retraining: 0.6058


  0%|          | 0/10 [00:00<?, ?it/s]

Test Accuracy after pruning but no retraining: 0.099


In [36]:
# import matplotlib.pyplot as plt
# import seaborn as sns

# sns.lineplot(x=[10, 20, 30, 40, 50], y=[75.10, 75.10, 75.10, 75.10, 75.10], linestyle="--", label="Baseline", color="black")
# sns.lineplot(x=[10, 20, 30, 40, 50], y=[76.72, 74.15, 76.32, 75.37, 77.71], label="Random")
# sns.lineplot(x=[10, 20, 30, 40, 50], y=[77.62, 77.71, 78.08, 75.33, 77.54], label="LTH")
# plt.ylim(70, 80)
# plt.xlabel("Sparsity(%)")
# plt.ylabel("Validation Accuracy")
# sns.despine()
# plt.show()

The LTH performance is always staying above the baseline performance. Hence our hypothesis is correct.

## Effect  of  the  number  of  rounds  on  model performance

### Sparsity = 0.5 (fixed)

#### Lottery Ticket style pruning

##### Round = 1

In [37]:
# lth_pruning(1, 30, 0.5)

##### Rounds = 2

In [38]:
# lth_pruning(2, 15, 0.5)

##### Rounds = 3

In [39]:
# lth_pruning(3, 10, 0.5)

##### Rounds = 4

In [40]:
# lth_pruning(4, 7, 0.5)

##### Rounds = 5

In [41]:
# lth_pruning(5, 6, 0.5)

#### Random pruning

##### Round(s) = 1

In [42]:
# random_pruning(1, 30, 0.5)

##### Round(s) = 2

In [43]:
# random_pruning(2, 15, 0.5)

##### Round(s) = 3

In [44]:
# random_pruning(3, 10, 0.5)

##### Round(s) = 4

In [45]:
# random_pruning(4, 7, 0.5)

##### Round(s) = 5

In [46]:
# random_pruning(5, 6, 0.5)

In [47]:
# import matplotlib.pyplot as plt
# import seaborn as sns

# sns.lineplot(x=[1, 2, 3, 4, 5], y=[75.10, 75.10, 75.10, 75.10, 75.10], linestyle="--", label="Baseline", color="black")
# sns.lineplot(x=[1, 2, 3, 4, 5], y=[77.22, 79.15, 78.59, 76.72, 74.50], label="LTH")
# sns.lineplot(x=[1, 2, 3, 4, 5], y=[78.53, 78.14, 76.64, 77.30, 77.46], label="Random")
# plt.ylim(60, 85)
# plt.xticks((1, 2, 3, 4, 5))
# plt.xlabel("Rounds")
# plt.ylabel("Validation Accuracy")
# sns.despine()

### Run an experiment wherein, instead of resetting the weights back to the initial weights after pruning, we initialize the weights **randomly**.

##### I am using xavier initialization

In [48]:
# epochs = 5
# rounds = 5
# sparsity = 0.5
# model = AlexNet().to(device)
# loss_function = torch.nn.CrossEntropyLoss()
# optimizer = torch.optim.Adam(model.parameters(), lr=0.003)

# for round_ in range(rounds):
#     print("\n\n\nROUND", round_+1, "Started\n----------------------")
#     model, _, _ = train_model(epochs, dataloader_train, device, model, optimizer, loss_function)
#     if round_ == 0:
#         print("Test Accuracy before pruning:", test_model(dataloader_val, device, model))
#     else:
#         print("Test Accuracy after pruning and retraining:", test_model(dataloader_val, device, model))
#         torch.save(model, f"{os.getcwd()}/saves/random_reinit_lt-5-5-05.pth.tar")
#     with torch.no_grad():
#         j = 0
#         for i in range(len(model.layers)):
#           if hasattr(model.layers[i], 'weight'):
#               flatten_size = model.layers[i].weight.data.flatten().shape[0]
#               # Lottery Ticket Style Pruning
#               indices = torch.argsort(torch.reshape(torch.abs(model.layers[i].weight.data), (1, flatten_size)).squeeze())
#               # Since we already have the indices to prune, let's re-initialize the weights randomly
#               torch.nn.init.xavier_uniform_(model.layers[i].weight.data.unsqueeze(0))
#               model.masks[j] = torch.reshape(model.masks[j], (1, flatten_size)).squeeze()
#               val = ((sparsity*100)**((round_+1)/rounds))/100
#               model.masks[j][indices[:math.ceil(val * flatten_size)]] = 0
#               model.masks[j] = model.masks[j].view(*model.layers[i].weight.data.shape)
#               j += 1
#     print("Test Accuracy after pruning but no retraining:", test_model(dataloader_val, device, model))


# del model  #deleting the model 


# # model will still be on cache until its place is taken by other objects so also execute the below lines
# gc.collect()
# torch.cuda.empty_cache() 

In [49]:
# sns.lineplot(x=[1, 2, 3, 4, 5], y=[57.57, 52.92, 56.93, 54.86, 57.88])
# plt.ylim(10, 70)
# plt.xticks((1, 2, 3, 4, 5))
# plt.xlabel("Rounds")
# plt.ylabel("Validation Accuracy")
# sns.despine()

We can see that the there is no significant change in accuracy if we randomly initialize the weights instead of setting it to the initial value. The accuracy just stays at around 57% because in every round its like the model is freshly getting trained.