<a href="https://colab.research.google.com/github/amirmulla/Lottery-Ticket-Hypothesis/blob/main/pruning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [143]:
###########################
# Mount Google Drive      #
###########################

from google.colab import drive
drive.mount('/content/drive/', force_remount=True)
results_dir = '/content/drive/MyDrive/Deep Learning Project/Results'
model_dir = '/content/drive/MyDrive/Deep Learning Project/Models'

Mounted at /content/drive/


In [144]:
import torch
from torch import nn
import torch.nn.utils.prune as prune
import numpy as np
from torch import optim
import time

from torchvision import datasets, transforms
from torch.utils.data.sampler import SubsetRandomSampler
import helper

# number of subprocesses to use for data loading
num_workers = 0
# how many samples per batch to load
batch_size = 20

# convert data to torch.FloatTensor
transform = transforms.ToTensor()

# choose the training and test datasets
train_data = datasets.MNIST(root='data', train=True, download=True, transform=transform)
test_data = datasets.MNIST(root='data', train=False, download=True, transform=transform)

# prepare data loaders
train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, num_workers=num_workers)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=batch_size, num_workers=num_workers)

In [145]:
# define the NN architecture
class LeNet(nn.Module):
    def __init__(self, int_range=0.5):
        super(LeNet, self).__init__()
        # number of hidden nodes in each layer
        hidden_1 = 300
        hidden_2 = 100
        # linear layer (784 -> hidden_1)
        self.fc1 = nn.Linear(28 * 28, hidden_1)
        # linear layer (n_hidden -> hidden_2)
        self.fc2 = nn.Linear(hidden_1, hidden_2)
        # linear layer (n_hidden -> 10)
        self.fc3 = nn.Linear(hidden_2, 10)
        # dropout layer (p=0.2)
        # dropout prevents overfitting of data
        self.dropout = nn.Dropout(0.2)
        # activation
        self.activation = nn.ReLU()

    def forward(self, x):
        # flatten image input
        x = x.view(-1, 28 * 28)
        # add hidden layer, with relu activation function
        x = self.activation(self.fc1(x))
        # add dropout layer
        x = self.dropout(x)
        # add hidden layer, with relu activation function
        x = self.activation(self.fc2(x))
        # add dropout layer
        x = self.dropout(x)
        # add output layer
        x = self.fc3(x)
        return x

# initialize the NN
model = LeNet()
print(model)

LeNet(
  (fc1): Linear(in_features=784, out_features=300, bias=True)
  (fc2): Linear(in_features=300, out_features=100, bias=True)
  (fc3): Linear(in_features=100, out_features=10, bias=True)
  (dropout): Dropout(p=0.2, inplace=False)
  (activation): ReLU()
)


In [146]:
###########################
# Model Evaluation        #
###########################

def eval_model(model, criterion, loader):
    running_loss = 0.0
    class_correct = list(0. for i in range(10))
    class_total = list(0. for i in range(10))    

    for data, target in loader:
        data, target = data.cuda(), target.cuda()
        output = model(data)
        loss = criterion(output, target)
        running_loss += loss.item()*data.size(0)
        _, pred = torch.max(output, 1)
        correct = np.squeeze(pred.eq(target.data.view_as(pred)))
        for i in range(len(target)):
            label = target.data[i]
            class_correct[label] += correct[i].item()
            class_total[label] += 1

    out_loss = running_loss/len(loader.sampler)
    out_acc = np.sum(class_correct) / np.sum(class_total)

    return out_loss, out_acc

###########################
# Model Training          #
###########################

def train_model(model, model_name, epochs = 20, lr=0.001):      
    model_res_path = results_dir + '/' + model_name + '.pt'
    model_name = model_dir + '/' + model_name + '.pt'
    print(model_name)
    
    model.cuda()

    # Define the loss and optimizer
    criterion = nn.CrossEntropyLoss().cuda()
    optimizer = optim.Adam(model.parameters(), lr=lr)

    train_loss_list = list(0. for i in range(epochs))
    test_loss_list = list(0. for i in range(epochs))
    train_acc_list = list(0. for i in range(epochs))
    test_acc_list = list(0. for i in range(epochs)) 

    test_loss_min = np.Inf

    for epoch in range(epochs):
        t0 = time.time()

        # Train the model
        model.train()
        for images, labels in train_loader:
            images, labels = images.cuda(), labels.cuda()       
            optimizer.zero_grad()
            output = model(images)
            loss = criterion(output, labels)
            loss.backward()
            optimizer.step()

        # Evaluate the model
        model.eval()
        with torch.no_grad():
            train_loss, train_acc = eval_model(model, criterion, train_loader)
            test_loss, test_acc = eval_model(model, criterion, test_loader)             

        print('Epoch: {} ({:.2f} seconds) Train Loss: {:.6f} Train Accuracy: {:.6f} Test Loss: {:.6f} Test Accuracy: {:.6f}'
        .format(epoch+1,time.time()-t0, train_loss, train_acc, test_loss, test_acc))
        
        # Save model if validation loss has decreased
        if test_loss <= test_loss_min:
          print('Test loss decreased ({:.6f} --> {:.6f}).  Saving model ...'.format(test_loss_min,test_loss))
          torch.save(model.state_dict(), model_name)
          test_loss_min = test_loss

        train_loss_list[epoch] = train_loss
        test_loss_list[epoch] = test_loss
        train_acc_list[epoch] = train_acc
        test_acc_list[epoch] = test_acc
    
    acc_tesor_dict = {'train_loss': torch.tensor(train_loss_list),
                      'test_loss': torch.tensor(test_loss_list),
                      'train_acc': torch.tensor(train_acc_list),
                      'test_acc': torch.tensor(test_acc_list)}    

    print('Saving...')
    torch.save(acc_tesor_dict, model_res_path)
    print('Saved')

In [147]:
######################################
# Pruning functions.                 #
######################################

def random_init_model(model, init_range=0.5):
  init_params = []
  for name, param in model.named_parameters():
    param.data.uniform_(-init_range, init_range)
    init_params.append(param.data.clone())

  return init_params

def rewind_model_params(model, init_params):
  i = 0
  for _ , param in model.named_parameters():
    param.data = init_params[i].clone()
    i+=1

def prune_init_model(model):
  for layer in model.children():
    if isinstance(layer, nn.Linear):
      prune.identity(layer, name='weight')

def prune_model(model, prune_ratio=0.2, prune_method='l1_unstructured'):
  for layer in model.children():
    if isinstance(layer, nn.Linear):
      prune.l1_unstructured(layer, name='weight', amount=prune_ratio)

In [148]:
def calc_model_sparsity(model):
  zero_weights = 0.0
  n_elements = 0.0
  for layer in model.children():
    if isinstance(layer, nn.Linear):
      zero_weights += torch.sum(layer.weight == 0)
      n_elements += layer.weight.nelement()

  return (100.* zero_weights/ n_elements)

def print_sparsity(model):
  print("Model sparsity: {:.2f}%".format(calc_model_sparsity(model)))

In [149]:
##############################
# Run                        #
##############################
epochs = 30
lr = 0.01
batch_size = 64
prune_ratio = 0.85
rounds = 1

model = LeNet()
prune_init_model(model)
init_params = random_init_model(model)

print('Train and evaluate lenet model with Pruning:')

for round in range(rounds):
  print('Start prune round: {}'.format(round+1))
  t0 = time.time()
  train_model(model, 'model_lenet', epochs=epochs, lr=lr)
  prune_model(model, prune_ratio)
  print_sparsity(model)
  random_init_model(model)
  train_model(model, 'model_prune_lenet_random_init', epochs=epochs, lr=lr)
  rewind_model_params(model, init_params)
  train_model(model, 'model_prune_lenet_rewind_init', epochs=epochs, lr=lr)

print('Final Training:')
t0 = time.time()
train_model(model, 'model_prune_lenet', epochs=epochs, lr=lr)

print(f'lenet model, took {time.time()-t0: .2f} seconds')

Train and evaluate lenet model with Pruning:
Start prune round: 1
/content/drive/MyDrive/Deep Learning Project/Models/model_lenet.pt
Epoch: 1 (23.67 seconds) Train Loss: 0.358376 Train Accuracy: 0.895300 Test Loss: 0.356828 Test Accuracy: 0.898400
Test loss decreased (inf --> 0.356828).  Saving model ...
Epoch: 2 (23.81 seconds) Train Loss: 0.275444 Train Accuracy: 0.927333 Test Loss: 0.271646 Test Accuracy: 0.930900
Test loss decreased (0.356828 --> 0.271646).  Saving model ...
Epoch: 3 (23.09 seconds) Train Loss: 0.291970 Train Accuracy: 0.942300 Test Loss: 0.356526 Test Accuracy: 0.938000
Epoch: 4 (23.59 seconds) Train Loss: 0.260035 Train Accuracy: 0.943317 Test Loss: 0.321230 Test Accuracy: 0.941900
Epoch: 5 (23.79 seconds) Train Loss: 0.220122 Train Accuracy: 0.944017 Test Loss: 0.263843 Test Accuracy: 0.942300
Test loss decreased (0.271646 --> 0.263843).  Saving model ...
Epoch: 6 (23.67 seconds) Train Loss: 0.268574 Train Accuracy: 0.943750 Test Loss: 0.388325 Test Accuracy: 0.

In [141]:
######################################
# Loading Trained model              #
######################################
criterion = nn.CrossEntropyLoss().cuda()

train_model = LeNet()
prune_init_model(train_model)
init_params = random_init_model(train_model)

train_model.cuda()
model_name = model_dir + '/model_prune_lenet.pt'
train_model.load_state_dict(torch.load(model_name))

train_model.eval()
with torch.no_grad():
  test_loss, test_acc = EvaluateModel(train_model, criterion, test_loader)         
  print('Test Loss: {:.6f} Test Accuracy: {:.6f}'.format(test_loss, test_acc))

Test Loss: 0.354958 Test Accuracy: 0.908400


In [128]:
params = sum([np.prod(p.size()) for p in model.parameters()])
print(params)

266610
