In [1]:
import torch
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from transformers import ViTForImageClassification, ViTImageProcessor, AutoFeatureExtractor
from tqdm import tqdm
import time
import torch.nn.functional as F

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
from torch.profiler import profile, record_function, ProfilerActivity, schedule

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Hyperparameters for training
learning_rate = 0.0002
train_batch_size = 16
eval_batch_size = 8
num_epochs = 4
seed = 42

# Set seed for reproducibility
torch.manual_seed(seed)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(seed)

# Transform for CIFAR100 dataset
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])

In [3]:
# Load Dataset and make ddata loader

trainset = torchvision.datasets.CIFAR100(root='./data', train=True, download=True, transform=transform)
testset = torchvision.datasets.CIFAR100(root='./data', train=False, download=True, transform=transform)

train_loader = DataLoader(trainset, batch_size=train_batch_size, shuffle=True, num_workers=2)
test_loader = DataLoader(testset, batch_size=eval_batch_size, shuffle=False, num_workers=2)

# Load pre-trained ViT model
model = ViTForImageClassification.from_pretrained("google/vit-base-patch16-224-in21k", num_labels=100)
model.to(device)

Files already downloaded and verified
Files already downloaded and verified


Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


ViTForImageClassification(
  (vit): ViTModel(
    (embeddings): ViTEmbeddings(
      (patch_embeddings): ViTPatchEmbeddings(
        (projection): Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ViTEncoder(
      (layer): ModuleList(
        (0-11): 12 x ViTLayer(
          (attention): ViTSdpaAttention(
            (attention): ViTSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): ViTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ViTIntermediate(
            (dense): Linear(in_fe

In [4]:
# Define optimizer and loss function
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, betas=(0.9, 0.999), eps=1e-08)
criterion = torch.nn.CrossEntropyLoss()

In [None]:
# Training loop
for epoch in range(num_epochs):
    model.train()
    train_loss = 0.0
    train_correct = 0
    train_total = 0

    for images, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}"):
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images).logits
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        _, predicted = outputs.max(1)
        train_total += labels.size(0)
        train_correct += predicted.eq(labels).sum().item()

    train_loss /= len(train_loader)
    train_accuracy = train_correct / train_total

    # Evaluation loop
    model.eval()
    eval_loss = 0.0
    eval_correct = 0
    eval_total = 0

    with torch.no_grad():
        for images, labels in tqdm(test_loader, desc="Evaluating"):
            images, labels = images.to(device), labels.to(device)

            outputs = model(images).logits
            loss = criterion(outputs, labels)

            eval_loss += loss.item()
            _, predicted = outputs.max(1)
            eval_total += labels.size(0)
            eval_correct += predicted.eq(labels).sum().item()

    eval_loss /= len(test_loader)
    eval_accuracy = eval_correct / eval_total

    print(f"Epoch {epoch+1}/{num_epochs}")
    print(f"Train Loss: {train_loss:.4f}, Train Accuracy: {train_accuracy:.4f}")
    print(f"Eval Loss: {eval_loss:.4f}, Eval Accuracy: {eval_accuracy:.4f}")
    print()

Epoch 1/4: 100%|██████████| 3125/3125 [29:53<00:00,  1.74it/s]
Evaluating: 100%|██████████| 1250/1250 [01:58<00:00, 10.58it/s]


Epoch 1/4
Train Loss: 1.1488, Train Accuracy: 0.7275
Eval Loss: 0.7587, Eval Accuracy: 0.7886



Epoch 2/4: 100%|██████████| 3125/3125 [29:54<00:00,  1.74it/s]
Evaluating: 100%|██████████| 1250/1250 [01:59<00:00, 10.48it/s]


Epoch 2/4
Train Loss: 0.5075, Train Accuracy: 0.8518
Eval Loss: 0.6755, Eval Accuracy: 0.8113



Epoch 3/4: 100%|██████████| 3125/3125 [30:04<00:00,  1.73it/s]
Evaluating: 100%|██████████| 1250/1250 [01:59<00:00, 10.50it/s]


Epoch 3/4
Train Loss: 0.3714, Train Accuracy: 0.8891
Eval Loss: 0.6988, Eval Accuracy: 0.8056



Epoch 4/4: 100%|██████████| 3125/3125 [30:01<00:00,  1.73it/s]
Evaluating: 100%|██████████| 1250/1250 [01:59<00:00, 10.48it/s]

Epoch 4/4
Train Loss: 0.2958, Train Accuracy: 0.9101
Eval Loss: 0.8210, Eval Accuracy: 0.7794






In [None]:
# Save the fine-tuned model
torch.save(model.state_dict(), "vit_cifar100_finetuned.pth")

In [None]:
# state_dict = torch.load('vit_cifar100_finetuned.pth', weights_only=True)
state_dict = torch.load('vit_cifar100_finetuned.pth', weights_only=True, map_location=torch.device('cpu'))

# Load the state dict into your model
model.load_state_dict(state_dict)

<All keys matched successfully>

In [None]:
# Define evalutation function with latency and accuracy

def eval_op(model, prof):
    model.eval()
    eval_loss = 0.0
    eval_correct_top1 = 0
    eval_correct_top3 = 0
    eval_total = 0

    # Start timing
    if device == 'cuda':
        torch.cuda.synchronize()
    start_time = time.time()

    with torch.no_grad():
        for images, labels in tqdm(test_loader, desc="Evaluating"):
            images, labels = images.to(device), labels.to(device)

            outputs = model(images).logits
            loss = criterion(outputs, labels)

            eval_loss += loss.item()

            # Get the top 1 and top 3 predictions
            _, top1_predicted = outputs.max(1)
            _, top3_predicted = outputs.topk(3, 1, largest=True, sorted=True)

            # Calculate top-1 accuracy
            eval_correct_top1 += top1_predicted.eq(labels).sum().item()

            # Expand labels to match the shape of top3_predicted
            labels_expanded = labels.view(-1, 1).expand_as(top3_predicted)

            # Check if the correct label is in the top 3 predictions
            correct_top3 = top3_predicted.eq(labels_expanded).any(dim=1)

            eval_total += labels.size(0)
            eval_correct_top3 += correct_top3.sum().item()
            prof.step()

    # End timing
    if device == 'cuda':
        torch.cuda.synchronize()
    end_time = time.time()

    # Calculate evaluation time
    eval_time = end_time - start_time

    eval_loss /= len(test_loader)
    eval_accuracy_top1 = eval_correct_top1 / eval_total
    eval_accuracy_top3 = eval_correct_top3 / eval_total

    print(f"Eval Loss: {eval_loss:.4f}")
    print(f"Eval Top-1 Accuracy: {eval_accuracy_top1:.4f}")
    print(f"Eval Top-3 Accuracy: {eval_accuracy_top3:.4f}")
    print(f"Evaluation Time: {eval_time:.2f} seconds")
    

In [6]:
# Function to count parameters
def count_parameters(model):
    return sum(p.numel() for p in model.parameters())# if p.requires_grad)

In [None]:
# Profile evaluation for models with torch profiling

print(count_parameters(model))
with profile(
    activities=[ProfilerActivity.CPU],
    schedule=torch.profiler.schedule(
        wait=1,
        warmup=1,
        active=3,
        repeat=2),
    record_shapes=True,
    profile_memory=True,
    with_stack=True
) as prof:
    eval_op(model, prof)

85875556


Evaluating: 100%|██████████| 1250/1250 [03:28<00:00,  6.00it/s]

Eval Loss: 0.8210
Eval Top-1 Accuracy: 0.7794
Eval Top-3 Accuracy: 0.9228
Evaluation Time: 208.27 seconds





In [21]:
key_averages = prof.key_averages()
total_memory = sum(avg.cpu_memory_usage for avg in key_averages)
print(f"Total CPU Memory Usage: {total_memory / (1024 * 1024*1024):.2f} GB")

Total CPU Memory Usage: 11.13 GB


In [22]:
print(prof.key_averages().table(sort_by="cpu_time_total", row_limit=10))
print(prof.key_averages().table(sort_by="self_cpu_memory_usage", row_limit=10))

-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                   Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg       CPU Mem  Self CPU Mem    # of Calls  
-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                          ProfilerStep*         3.22%      66.993ms       100.00%        2.082s     346.947ms       6.96 Kb      -5.98 Gb             6  
                                           aten::linear         0.43%       9.031ms        57.08%        1.188s       2.713ms       2.92 Gb           0 b           438  
                                            aten::addmm        39.14%     814.859ms        56.16%        1.169s       2.669ms       2.92 Gb       2.92

In [None]:
# Load student model as deit-tiny

teacher_model = ViTForImageClassification.from_pretrained("google/vit-base-patch16-224-in21k", num_labels=100)
teacher_model.to(device)

state_dict = torch.load('vit_cifar100_finetuned.pth', weights_only=True, map_location=torch.device('cpu'))

# Load the state dict into your model
teacher_model.load_state_dict(state_dict)
student_model = ViTForImageClassification.from_pretrained("facebook/deit-tiny-patch16-224")
student_model.classifier = torch.nn.Linear(student_model.classifier.in_features, 100)

Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
# Only train student model
for param in teacher_model.parameters():
    param.requires_grad = False

# Set models to evaluation mode
teacher_model.eval()
student_model.train()

# Load feature extractor
feature_extractor = AutoFeatureExtractor.from_pretrained("google/vit-base-patch16-224-in21k")

In [None]:
# Define optimizer
optimizer = torch.optim.AdamW(student_model.parameters(), lr=1e-4)

# Distillation parameters
temperature = 2.0
alpha = 0.5

In [None]:
# Define distillation loss for student training - combination of cross entropy and KL diverence between teacher and student
def distillation_loss(student_logits, teacher_logits, labels, temperature=2.0, alpha=0.5):
    hard_loss = F.cross_entropy(student_logits, labels)
    soft_loss = F.kl_div(
        F.log_softmax(student_logits / temperature, dim=1),
        F.softmax(teacher_logits / temperature, dim=1),
        reduction='batchmean'
    ) * (temperature ** 2)
    return alpha * hard_loss + (1 - alpha) * soft_loss

# Evaluation function
def evaluate(model, data_loader, device):
    model.eval()
    eval_total = 0
    eval_correct_top1 = 0
    eval_correct_top3 = 0

    with torch.no_grad():
        for images, labels in data_loader:
            images = images.to(device)
            labels = labels.to(device)

            outputs = model(images)

            # Top-1 Accuracy
            _, predicted = torch.max(outputs.logits, 1)
            eval_correct_top1 += (predicted == labels).sum().item()

            # Top-3 Accuracy
            _, top3_predicted = outputs.logits.topk(3, 1, largest=True, sorted=True)
            labels_expanded = labels.view(-1, 1).expand_as(top3_predicted)
            correct = top3_predicted.eq(labels_expanded).any(dim=1)
            eval_correct_top3 += correct.sum().item()

            eval_total += labels.size(0)

    accuracy_top1 = eval_correct_top1 / eval_total
    accuracy_top3 = eval_correct_top3 / eval_total

    return accuracy_top1, accuracy_top3

In [None]:
# Student train loop
num_epochs = 4
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
teacher_model.to(device)
student_model.to(device)

for epoch in range(num_epochs):
    student_model.train()
    total_loss = 0
    for images, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}"):
        images = images.to(device)
        labels = labels.to(device)

        # Get teacher predictions
        with torch.no_grad():
            teacher_outputs = teacher_model(images)
            teacher_logits = teacher_outputs.logits

        # Get student predictions
        student_outputs = student_model(images)
        student_logits = student_outputs.logits

        # Calculate loss
        loss = distillation_loss(student_logits, teacher_logits, labels, temperature, alpha)

        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    # Calculate test accuracy
    test_accuracy_top1, test_accuracy_top3 = evaluate(student_model, test_loader, device)

    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {total_loss/len(train_loader):.4f}, "
          f"Test Accuracy Top-1: {test_accuracy_top1:.4f}, Top-3: {test_accuracy_top3:.4f}")


Epoch 1/4: 100%|██████████| 3125/3125 [12:56<00:00,  4.02it/s]


Epoch 1/4, Loss: 1.9217, Test Accuracy Top-1: 0.7226, Top-3: 0.9002


Epoch 2/4: 100%|██████████| 3125/3125 [12:56<00:00,  4.03it/s]


Epoch 2/4, Loss: 0.9378, Test Accuracy Top-1: 0.7553, Top-3: 0.9142


Epoch 3/4: 100%|██████████| 3125/3125 [12:55<00:00,  4.03it/s]


Epoch 3/4, Loss: 0.6997, Test Accuracy Top-1: 0.7682, Top-3: 0.9167


Epoch 4/4: 100%|██████████| 3125/3125 [12:57<00:00,  4.02it/s]


Epoch 4/4, Loss: 0.5574, Test Accuracy Top-1: 0.7785, Top-3: 0.9256


In [None]:
torch.save(student_model.state_dict(), 'distilled_deit_tiny.pth')

In [None]:
# Profile student
print(count_parameters(student_model))

with profile(
    activities=[ProfilerActivity.CPU],
    schedule=torch.profiler.schedule(
        wait=1,
        warmup=1,
        active=3,
        repeat=2),
    record_shapes=True,
    profile_memory=True,
    with_stack=True
) as stud_prof:
    eval_op(student_model, stud_prof)

5543716


Evaluating: 100%|██████████| 1250/1250 [01:17<00:00, 16.22it/s]

Eval Loss: 0.7914
Eval Top-1 Accuracy: 0.7785
Eval Top-3 Accuracy: 0.9256
Evaluation Time: 77.09 seconds





In [40]:
key_averages = stud_prof.key_averages()
total_memory = sum(avg.cpu_memory_usage for avg in key_averages)
print(f"Total CPU Memory Usage: {total_memory / (1024 * 1024*1024):.2f} GB")

Total CPU Memory Usage: 2.87 GB


In [41]:
print(stud_prof.key_averages().table(sort_by="cpu_time_total", row_limit=10))
print(stud_prof.key_averages().table(sort_by="self_cpu_memory_usage", row_limit=10))

-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                   Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg       CPU Mem  Self CPU Mem    # of Calls  
-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                          ProfilerStep*         6.46%      44.926ms       100.00%     694.978ms     115.830ms      -9.18 Mb      -1.51 Gb             6  
                                           aten::linear         0.63%       4.387ms        36.48%     253.516ms     578.803us     748.00 Mb           0 b           438  
                                            aten::addmm         9.54%      66.330ms        35.00%     243.219ms     555.293us     748.00 Mb     748.00

In [16]:
# Load pre-trained ViT model
model = ViTForImageClassification.from_pretrained("google/vit-base-patch16-224-in21k", num_labels=100)
model.to(device)

state_dict = torch.load('vit_cifar100_finetuned.pth', weights_only=True, map_location=torch.device('cpu'))

# Load the state dict into your model
model.load_state_dict(state_dict)

## Train model below before running this

student_model = ViTForImageClassification.from_pretrained("facebook/deit-tiny-patch16-224")
student_model.classifier = torch.nn.Linear(student_model.classifier.in_features, 100)
student_model.to(device)

state_dict = torch.load('distilled_deit_tiny.pth', weights_only=True, map_location=torch.device('cpu'))

# Load the state dict into your model
student_model.load_state_dict(state_dict)

Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


<All keys matched successfully>

In [None]:
# Function for pruning attention heads with redefinition of forward function

import torch.nn.functional as F
import torch_pruning as tp
from transformers.models.vit.modeling_vit import ViTSelfAttention
import math
import copy

def prune_vit_model(original_model, device="cpu", pruning_ratio=0.1, iterative_steps=5):
    model = copy.deepcopy(original_model)
    
    def new_forward(self, hidden_states, head_mask=None, output_attentions=False):
        batch_size, seq_length, _ = hidden_states.shape
        mixed_query_layer = self.query(hidden_states)
        mixed_key_layer = self.key(hidden_states)
        mixed_value_layer = self.value(hidden_states)
        query_layer = self.transpose_for_scores(mixed_query_layer)
        key_layer = self.transpose_for_scores(mixed_key_layer)
        value_layer = self.transpose_for_scores(mixed_value_layer)
        attention_scores = torch.matmul(query_layer, key_layer.transpose(-1, -2))
        attention_scores = attention_scores / math.sqrt(self.attention_head_size)
        attention_probs = F.softmax(attention_scores, dim=-1)
        attention_probs = self.dropout(attention_probs)
        if head_mask is not None:
            attention_probs = attention_probs * head_mask
        context_layer = torch.matmul(attention_probs, value_layer)
        context_layer = context_layer.permute(0, 2, 1, 3).contiguous()
        new_context_layer_shape = context_layer.size()[:-2] + (self.all_head_size,)
        context_layer = context_layer.view(*new_context_layer_shape)
        outputs = (context_layer, attention_probs) if output_attentions else (context_layer,)
        return outputs

    model.eval().to(device)
    example_inputs = torch.randn(1, 3, 224, 224).to(device)
    num_heads = {}
    ignored_layers = [model.classifier]

    for m in model.modules():
        if isinstance(m, ViTSelfAttention):
            m.forward = new_forward.__get__(m, ViTSelfAttention)
            num_heads[m.query] = m.num_attention_heads

    imp = tp.importance.GroupNormImportance(2)
    pruner = tp.pruner.MetaPruner(
        model,
        example_inputs,
        iterative_steps=iterative_steps,
        global_pruning=False,
        importance=imp,
        ignored_layers=ignored_layers,
        num_heads=num_heads,
        prune_head_dims=False,
        prune_num_heads=True,
        head_pruning_ratio=pruning_ratio,
        round_to=2,
    )

    for i, g in enumerate(pruner.step(interactive=True)):
        g.prune()

    for m in model.modules():
        if isinstance(m, ViTSelfAttention):
            m.num_attention_heads = pruner.num_heads[m.query]
            m.attention_head_size = m.query.out_features // m.num_attention_heads
            m.all_head_size = m.num_attention_heads * m.attention_head_size

    return model

In [24]:
# step 7
model7 = prune_vit_model(model, device="cpu", pruning_ratio=0.1, iterative_steps=7)
print(count_parameters(model7))


with profile(
    activities=[ProfilerActivity.CPU],
    schedule=torch.profiler.schedule(
        wait=1,
        warmup=1,
        active=3,
        repeat=2),
    record_shapes=True,
    profile_memory=True,
    with_stack=True
) as prof7:
    eval_op(model7, prof7)

 Torch-Pruning will prune the last non-singleton dimension of these parameters. If you wish to change this behavior, please provide an unwrapped_parameters argument.


75856508


Evaluating: 100%|██████████| 1250/1250 [04:11<00:00,  4.97it/s]

Eval Loss: 0.9397
Eval Top-1 Accuracy: 0.7445
Eval Top-3 Accuracy: 0.8975
Evaluation Time: 251.68 seconds





In [25]:
key_averages = prof7.key_averages()
total_memory = sum(avg.cpu_memory_usage for avg in key_averages)
print(f"Total CPU Memory Usage: {total_memory / (1024 * 1024*1024):.2f} GB")

Total CPU Memory Usage: 20.28 GB


In [26]:
print(prof7.key_averages().table(sort_by="cpu_time_total", row_limit=10))
print(prof7.key_averages().table(sort_by="self_cpu_memory_usage", row_limit=10))

-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                   Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg       CPU Mem  Self CPU Mem    # of Calls  
-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                          ProfilerStep*         2.10%      80.397ms       100.00%        3.823s     637.193ms      -9.18 Mb      -8.97 Gb             6  
                                            aten::copy_        42.89%        1.640s        42.89%        1.640s       2.024ms           0 b           0 b           810  
                                           aten::linear         0.17%       6.351ms        39.46%        1.509s       3.444ms       2.78 Gb           

In [27]:
# step 6
model6 = prune_vit_model(model, device="cpu", pruning_ratio=0.1, iterative_steps=6)
print(count_parameters(model6))


with profile(
    activities=[ProfilerActivity.CPU],
    schedule=torch.profiler.schedule(
        wait=1,
        warmup=1,
        active=3,
        repeat=2),
    record_shapes=True,
    profile_memory=True,
    with_stack=True
) as prof6:
    eval_op(model6, prof6)

74396196


Evaluating: 100%|██████████| 1250/1250 [03:31<00:00,  5.92it/s]

Eval Loss: 0.9919
Eval Top-1 Accuracy: 0.7346
Eval Top-3 Accuracy: 0.8910
Evaluation Time: 211.22 seconds





In [28]:
key_averages = prof6.key_averages()
total_memory = sum(avg.cpu_memory_usage for avg in key_averages)
print(f"Total CPU Memory Usage: {total_memory / (1024 * 1024*1024):.2f} GB")

Total CPU Memory Usage: 20.19 GB


In [29]:
print(prof6.key_averages().table(sort_by="cpu_time_total", row_limit=10))
print(prof6.key_averages().table(sort_by="self_cpu_memory_usage", row_limit=10))

-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                   Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg       CPU Mem  Self CPU Mem    # of Calls  
-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                          ProfilerStep*         2.39%      78.950ms       100.00%        3.309s     551.536ms      -9.18 Mb      -8.91 Gb             6  
                                           aten::linear         0.18%       6.003ms        52.64%        1.742s       3.977ms       2.76 Gb           0 b           438  
                                            aten::addmm        19.21%     635.692ms        52.22%        1.728s       3.945ms       2.76 Gb       2.76

In [30]:
# step 5
model5 = prune_vit_model(model, device="cpu", pruning_ratio=0.1, iterative_steps=5)
print(count_parameters(model5))


with profile(
    activities=[ProfilerActivity.CPU],
    schedule=torch.profiler.schedule(
        wait=1,
        warmup=1,
        active=3,
        repeat=2),
    record_shapes=True,
    profile_memory=True,
    with_stack=True
) as prof5:
    eval_op(model5, prof5)

72056206


Evaluating: 100%|██████████| 1250/1250 [04:12<00:00,  4.95it/s]

Eval Loss: 1.1247
Eval Top-1 Accuracy: 0.7015
Eval Top-3 Accuracy: 0.8695
Evaluation Time: 252.65 seconds





In [31]:
key_averages = prof5.key_averages()
total_memory = sum(avg.cpu_memory_usage for avg in key_averages)
print(f"Total CPU Memory Usage: {total_memory / (1024 * 1024*1024):.2f} GB")

Total CPU Memory Usage: 20.05 GB


In [32]:
print(prof5.key_averages().table(sort_by="cpu_time_total", row_limit=10))
print(prof5.key_averages().table(sort_by="self_cpu_memory_usage", row_limit=10))

-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                   Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg       CPU Mem  Self CPU Mem    # of Calls  
-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                          ProfilerStep*         2.87%      75.774ms       100.00%        2.640s     440.029ms      -9.18 Mb      -8.83 Gb             6  
                                           aten::linear         0.23%       6.073ms        54.25%        1.432s       3.270ms       2.73 Gb           0 b           438  
                                            aten::addmm        22.62%     597.320ms        53.72%        1.418s       3.238ms       2.73 Gb       2.73

In [33]:
# step 4
model4 = prune_vit_model(model, device="cpu", pruning_ratio=0.1, iterative_steps=4)
print(count_parameters(model4))

with profile(
    activities=[ProfilerActivity.CPU],
    schedule=torch.profiler.schedule(
        wait=1,
        warmup=1,
        active=3,
        repeat=2),
    record_shapes=True,
    profile_memory=True,
    with_stack=True
) as prof4:
    eval_op(model4, prof4)

68951428


Evaluating: 100%|██████████| 1250/1250 [03:57<00:00,  5.26it/s]

Eval Loss: 1.2572
Eval Top-1 Accuracy: 0.6726
Eval Top-3 Accuracy: 0.8476
Evaluation Time: 237.79 seconds





In [34]:
key_averages = prof4.key_averages()
total_memory = sum(avg.cpu_memory_usage for avg in key_averages)
print(f"Total CPU Memory Usage: {total_memory / (1024 * 1024*1024):.2f} GB")

Total CPU Memory Usage: 19.86 GB


In [35]:
print(prof4.key_averages().table(sort_by="cpu_time_total", row_limit=10))
print(prof4.key_averages().table(sort_by="self_cpu_memory_usage", row_limit=10))

-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                   Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg       CPU Mem  Self CPU Mem    # of Calls  
-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                          ProfilerStep*         3.06%      72.869ms       100.00%        2.382s     396.966ms      -4.59 Mb      -8.72 Gb             6  
                                           aten::linear         0.29%       6.888ms        49.13%        1.170s       2.672ms       2.68 Gb           0 b           438  
                                            aten::addmm        25.59%     609.464ms        48.50%        1.155s       2.637ms       2.68 Gb       2.68

In [36]:
# step 3
model3 = prune_vit_model(model, device="cpu", pruning_ratio=0.1, iterative_steps=3)
print(count_parameters(model3))

with profile(
    activities=[ProfilerActivity.CPU],
    schedule=torch.profiler.schedule(
        wait=1,
        warmup=1,
        active=3,
        repeat=2),
    record_shapes=True,
    profile_memory=True,
    with_stack=True
) as prof3:
    eval_op(model3, prof3)

63703268


Evaluating: 100%|██████████| 1250/1250 [03:43<00:00,  5.59it/s]

Eval Loss: 1.7575
Eval Top-1 Accuracy: 0.5660
Eval Top-3 Accuracy: 0.7551
Evaluation Time: 223.65 seconds





In [37]:
key_averages = prof3.key_averages()
total_memory = sum(avg.cpu_memory_usage for avg in key_averages)
print(f"Total CPU Memory Usage: {total_memory / (1024 * 1024*1024):.2f} GB")

Total CPU Memory Usage: 19.52 GB


In [38]:
print(prof3.key_averages().table(sort_by="cpu_time_total", row_limit=10))
print(prof3.key_averages().table(sort_by="self_cpu_memory_usage", row_limit=10))

-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                   Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg       CPU Mem  Self CPU Mem    # of Calls  
-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                          ProfilerStep*         3.69%      84.549ms       100.00%        2.290s     381.619ms      -9.18 Mb      -8.53 Gb             6  
                                           aten::linear         0.28%       6.423ms        49.95%        1.144s       2.611ms       2.60 Gb           0 b           438  
                                            aten::addmm        25.50%     583.974ms        49.26%        1.128s       2.575ms       2.60 Gb       2.60

In [None]:
# Function to quantize a model. Only works on CPU

def quantize(model):
    model = model.to('cpu')
    # Perform dynamic quantization
    quantized_model = torch.quantization.quantize_dynamic(
        model,
            {torch.nn.Linear}, 
        dtype=torch.qint8
    )
    
    # To use the quantized model
    quantized_model.to('cpu')
    quantized_model.eval()

    return quantized_model 
    

In [None]:
# Profile quantized model
quant_model = quantize(model)

print(count_parameters(quant_model))

with profile(
    activities=[ProfilerActivity.CPU],
    schedule=torch.profiler.schedule(
        wait=1,
        warmup=1,
        active=3,
        repeat=2),
    record_shapes=True,
    profile_memory=True,
    with_stack=True
) as quant_prof:
    eval_op(quant_model, quant_prof)

781056


Evaluating: 100%|██████████| 1250/1250 [03:21<00:00,  6.21it/s]

Eval Loss: 0.8766
Eval Top-1 Accuracy: 0.7673
Eval Top-3 Accuracy: 0.9150
Evaluation Time: 201.17 seconds





In [44]:
key_averages = quant_prof.key_averages()
total_memory = sum(avg.cpu_memory_usage for avg in key_averages)
print(f"Total CPU Memory Usage: {total_memory / (1024 * 1024*1024):.2f} GB")

Total CPU Memory Usage: 16.96 GB


In [45]:
print(quant_prof.key_averages().table(sort_by="cpu_time_total", row_limit=10))
print(quant_prof.key_averages().table(sort_by="self_cpu_memory_usage", row_limit=10))

-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                   Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg       CPU Mem  Self CPU Mem    # of Calls  
-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                          ProfilerStep*         8.80%     102.920ms       100.00%        1.169s     194.848ms      -9.18 Mb      -5.98 Gb             6  
                              quantized::linear_dynamic        62.47%     730.347ms        63.64%     743.987ms       1.699ms       2.92 Gb      -2.92 Gb           438  
                                           aten::conv2d         0.01%      60.649us         8.55%      99.906ms      16.651ms      27.56 Mb           

In [14]:
torch.save(quant_model.state_dict(), "vit_cifar100_finetuned_quantized.pth")

In [49]:
quant_model7 = quantize(model7)

print(count_parameters(quant_model7))

with profile(
    activities=[ProfilerActivity.CPU],
    schedule=torch.profiler.schedule(
        wait=1,
        warmup=1,
        active=3,
        repeat=2),
    record_shapes=True,
    profile_memory=True,
    with_stack=True
) as quant_prof7:
    eval_op(quant_model7, quant_prof7)

724104


Evaluating: 100%|██████████| 1250/1250 [03:51<00:00,  5.40it/s]

Eval Loss: 0.9723
Eval Top-1 Accuracy: 0.7360
Eval Top-3 Accuracy: 0.8918
Evaluation Time: 231.27 seconds





In [50]:
key_averages = quant_prof7.key_averages()
total_memory = sum(avg.cpu_memory_usage for avg in key_averages)
print(f"Total CPU Memory Usage: {total_memory / (1024 * 1024*1024):.2f} GB")

Total CPU Memory Usage: 25.84 GB


In [51]:
print(quant_prof7.key_averages().table(sort_by="cpu_time_total", row_limit=10))
print(quant_prof7.key_averages().table(sort_by="self_cpu_memory_usage", row_limit=10))

-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                   Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg       CPU Mem  Self CPU Mem    # of Calls  
-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                          ProfilerStep*         7.03%     124.294ms       100.00%        1.768s     294.584ms      -9.18 Mb      -8.97 Gb             6  
                              quantized::linear_dynamic        54.96%     971.465ms        55.66%     983.735ms       2.246ms       2.78 Gb      -2.78 Gb           438  
                                           aten::matmul         0.20%       3.577ms        12.51%     221.178ms       1.536ms       1.32 Gb    -997.31

In [53]:
quant_model6 = quantize(model6)

print(count_parameters(quant_model6))

with profile(
    activities=[ProfilerActivity.CPU],
    schedule=torch.profiler.schedule(
        wait=1,
        warmup=1,
        active=3,
        repeat=2),
    record_shapes=True,
    profile_memory=True,
    with_stack=True
) as quant_prof6:
    eval_op(quant_model6, quant_prof6)

715968


Evaluating: 100%|██████████| 1250/1250 [03:34<00:00,  5.83it/s]

Eval Loss: 1.0220
Eval Top-1 Accuracy: 0.7261
Eval Top-3 Accuracy: 0.8869
Evaluation Time: 214.25 seconds





In [54]:
key_averages = quant_prof6.key_averages()
total_memory = sum(avg.cpu_memory_usage for avg in key_averages)
print(f"Total CPU Memory Usage: {total_memory / (1024 * 1024*1024):.2f} GB")

Total CPU Memory Usage: 25.71 GB


In [55]:
print(quant_prof6.key_averages().table(sort_by="cpu_time_total", row_limit=10))
print(quant_prof6.key_averages().table(sort_by="self_cpu_memory_usage", row_limit=10))

-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                   Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg       CPU Mem  Self CPU Mem    # of Calls  
-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                          ProfilerStep*         6.08%     147.129ms       100.00%        2.421s     403.491ms      -9.18 Mb      -8.91 Gb             6  
                              quantized::linear_dynamic        56.84%        1.376s        57.49%        1.392s       3.178ms       2.76 Gb      -2.76 Gb           438  
                                       aten::layer_norm         0.05%       1.187ms         9.34%     226.143ms       1.508ms     634.86 Mb      -1.80

In [46]:
quant_model5 = quantize(model5)

print(count_parameters(quant_model5))

with profile(
    activities=[ProfilerActivity.CPU],
    schedule=torch.profiler.schedule(
        wait=1,
        warmup=1,
        active=3,
        repeat=2),
    record_shapes=True,
    profile_memory=True,
    with_stack=True
) as quant_prof5:
    eval_op(quant_model5, quant_prof5)

701730


Evaluating: 100%|██████████| 1250/1250 [03:32<00:00,  5.89it/s]

Eval Loss: 1.1457
Eval Top-1 Accuracy: 0.6997
Eval Top-3 Accuracy: 0.8662
Evaluation Time: 212.28 seconds





In [47]:
key_averages = quant_prof5.key_averages()
total_memory = sum(avg.cpu_memory_usage for avg in key_averages)
print(f"Total CPU Memory Usage: {total_memory / (1024 * 1024*1024):.2f} GB")

Total CPU Memory Usage: 25.50 GB


In [48]:
print(quant_prof5.key_averages().table(sort_by="cpu_time_total", row_limit=10))
print(quant_prof5.key_averages().table(sort_by="self_cpu_memory_usage", row_limit=10))

-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                   Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg       CPU Mem  Self CPU Mem    # of Calls  
-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                          ProfilerStep*         8.82%     131.045ms       100.00%        1.486s     247.698ms      -9.18 Mb      -8.83 Gb             6  
                              quantized::linear_dynamic        42.89%     637.411ms        43.63%     648.407ms       1.480ms       2.73 Gb      -2.73 Gb           438  
                                           aten::matmul         0.27%       3.987ms        21.43%     318.448ms       2.211ms       1.32 Gb    -997.31

In [56]:
quant_model4 = quantize(model4)

print(count_parameters(quant_model4))

with profile(
    activities=[ProfilerActivity.CPU],
    schedule=torch.profiler.schedule(
        wait=1,
        warmup=1,
        active=3,
        repeat=2),
    record_shapes=True,
    profile_memory=True,
    with_stack=True
) as quant_prof4:
    eval_op(quant_model4, quant_prof4)

683424


Evaluating: 100%|██████████| 1250/1250 [03:30<00:00,  5.94it/s]

Eval Loss: 1.2551
Eval Top-1 Accuracy: 0.6718
Eval Top-3 Accuracy: 0.8477
Evaluation Time: 210.57 seconds





In [57]:
key_averages = quant_prof4.key_averages()
total_memory = sum(avg.cpu_memory_usage for avg in key_averages)
print(f"Total CPU Memory Usage: {total_memory / (1024 * 1024*1024):.2f} GB")

Total CPU Memory Usage: 25.21 GB


In [58]:
print(quant_prof4.key_averages().table(sort_by="cpu_time_total", row_limit=10))
print(quant_prof4.key_averages().table(sort_by="self_cpu_memory_usage", row_limit=10))

-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                   Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg       CPU Mem  Self CPU Mem    # of Calls  
-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                          ProfilerStep*         3.40%     156.946ms       100.00%        4.619s     769.886ms      -9.18 Mb      -8.72 Gb             6  
                              quantized::linear_dynamic        42.46%        1.961s        42.84%        1.979s       4.518ms       2.68 Gb      -2.68 Gb           438  
                                             aten::gelu        14.86%     686.491ms        14.86%     686.491ms       9.535ms       1.14 Gb       1.14

In [59]:
quant_model3 = quantize(model3)

print(count_parameters(quant_model3))

with profile(
    activities=[ProfilerActivity.CPU],
    schedule=torch.profiler.schedule(
        wait=1,
        warmup=1,
        active=3,
        repeat=2),
    record_shapes=True,
    profile_memory=True,
    with_stack=True
) as quant_prof3:
    eval_op(quant_model3, quant_prof3)

650880


Evaluating: 100%|██████████| 1250/1250 [03:18<00:00,  6.28it/s]

Eval Loss: 1.7058
Eval Top-1 Accuracy: 0.5770
Eval Top-3 Accuracy: 0.7673
Evaluation Time: 198.94 seconds





In [60]:
key_averages = quant_prof3.key_averages()
total_memory = sum(avg.cpu_memory_usage for avg in key_averages)
print(f"Total CPU Memory Usage: {total_memory / (1024 * 1024*1024):.2f} GB")

Total CPU Memory Usage: 24.71 GB


In [61]:
print(quant_prof3.key_averages().table(sort_by="cpu_time_total", row_limit=10))
print(quant_prof3.key_averages().table(sort_by="self_cpu_memory_usage", row_limit=10))

-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                   Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg       CPU Mem  Self CPU Mem    # of Calls  
-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                          ProfilerStep*         8.34%     132.023ms       100.00%        1.584s     263.992ms      -9.18 Mb      -8.53 Gb             6  
                              quantized::linear_dynamic        52.09%     825.087ms        52.72%     835.018ms       1.906ms       2.60 Gb      -2.60 Gb           438  
                                           aten::matmul         0.21%       3.353ms        15.63%     247.607ms       1.719ms       1.32 Gb    -997.31

In [62]:
quant_stud_model = quantize(student_model)

print(count_parameters(quant_stud_model))

with profile(
    activities=[ProfilerActivity.CPU],
    schedule=torch.profiler.schedule(
        wait=1,
        warmup=1,
        active=3,
        repeat=2),
    record_shapes=True,
    profile_memory=True,
    with_stack=True
) as quant_stud_prof:
    eval_op(quant_stud_model, quant_stud_prof)

195264


Evaluating: 100%|██████████| 1250/1250 [01:36<00:00, 13.02it/s]

Eval Loss: 0.7968
Eval Top-1 Accuracy: 0.7792
Eval Top-3 Accuracy: 0.9251
Evaluation Time: 96.02 seconds





In [None]:
key_averages = quant_stud_prof.key_averages()
total_memory = sum(avg.cpu_memory_usage for avg in key_averages)
print(f"Total CPU Memory Usage: {total_memory / (1024 * 1024*1024):.2f} GB")


Total CPU Memory Usage: 4.33 GB


In [64]:
print(quant_stud_prof.key_averages().table(sort_by="cpu_time_total", row_limit=10))
print(quant_stud_prof.key_averages().table(sort_by="self_cpu_memory_usage", row_limit=10))

-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                   Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg       CPU Mem  Self CPU Mem    # of Calls  
-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                          ProfilerStep*        22.22%      91.765ms       100.00%     412.956ms      68.826ms      -9.18 Mb      -1.51 Gb             6  
                              quantized::linear_dynamic        28.21%     116.509ms        29.74%     122.824ms     280.419us     748.00 Mb    -748.04 Mb           438  
                                           aten::conv2d         0.02%      89.950us        21.58%      89.108ms      14.851ms       6.89 Mb           

In [None]:
import torch
import time


# Evaluation function calculating accuracy, latency and CPU memory
def evaluate_model(model, test_loader=test_loader):
    model.eval()
    correct_top3 = 0
    total = 0
    latency = 0
    gpu_memory = 0
    cpu_memory = 0

    start_time = time.time()
    
    with profile(
        activities=[ProfilerActivity.CPU],
        schedule=torch.profiler.schedule(
            wait=1,
            warmup=1,
            active=3,
            repeat=2),
        record_shapes=True,
        profile_memory=True,
        with_stack=False
    ) as prof:
        with torch.no_grad():
            for inputs, labels in test_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                
                prof.step()
                
                outputs = model(inputs).logits
                
                # Top-3 accuracy
                _, predicted_top3 = outputs.topk(3, 1, largest=True, sorted=True)
                correct_top3 += sum([1 for i, label in enumerate(labels) if label in predicted_top3[i]])
                
                total += labels.size(0)

    end_time = time.time()
    latency = end_time - start_time

    # Measure GPU memory usage
    if torch.cuda.is_available():
        gpu_memory = torch.cuda.max_memory_allocated()
        torch.cuda.reset_peak_memory_stats()  # Reset peak memory stats for next run
    
    key_averages = prof.key_averages()
    cpu_memory = sum(avg.cpu_memory_usage for avg in key_averages)

    accuracy_top3 = 100 * correct_top3 / total

    return accuracy_top3, latency, cpu_memory / (1024 * 1024*1024)

In [None]:
# Define the search space
import numpy as np
search_space = {
    'original': {
        'quantization': [True, False],
        'pruning_steps': [3, 4, 5, 6, 7, 8]
    },
    'student': {
        'quantization': [True, False],
        'pruning_steps': [15, 16, 17, 18, 19, 20]
    }
}

# Define random search strategy based on given inputs
def random_search(original_model, student_model, test_loader, num_iterations=10, max_params=np.inf, max_mem = np.inf):
    best_config = None
    best_score = float('-inf')
    student_params = count_parameters(student_model)
    tried_configs = set()

    model_type = 'original' if max_params > student_params else 'student'
    config_space = search_space[model_type]

    for i in range(num_iterations):
        # Generate a new configuration
        while True:
            config = {
                'model_type': model_type,
                'quantization': np.random.choice(config_space['quantization']),
                'pruning_steps': np.random.choice(config_space['pruning_steps'])
            }
            config_tuple = tuple(config.items())
            if config_tuple not in tried_configs:
                tried_configs.add(config_tuple)
                break

        print(f"Iteration {i+1}:")
        print(f"Config: {config}")

        # Select and copy the appropriate model
        model = copy.deepcopy(original_model if config['model_type'] == 'original' else student_model)

        # Apply pruning
        model = prune_vit_model(model, iterative_steps=config['pruning_steps'])

        # Apply quantization if specified
        if config['quantization']:
            model = quantize(model)

        # Check if the model meets the parameter constraint
        if count_parameters(model) > max_params:
            print("Failed: Exceeds parameter limit")
            continue

        # Evaluate the model
        accuracy, latency, memory = evaluate_model(model, test_loader)
        # Check if the model meets the memory constraint
        if(memory > max_mem):
            print("Failed: Exceeds memory limit")
            continue
            
        print(f"Accuracy: {accuracy:.2f}%, Latency: {latency:.4f}s, Memory: {memory:.2f}GB")

        # Calculate a combined score
        score = accuracy - 0.1 * latency - memory
        print(f"Score: {score:.4f}")

        if score > best_score:
            best_score = score
            best_config = config

    return best_config

In [12]:
def best_model(best_config, original_model, student_model):
    if best_config['model_type'] == 'original':
        final_model = copy.deepcopy(original_model)
    else:
        final_model = copy.deepcopy(student_model)

    final_model = prune_vit_model(final_model, iterative_steps = best_config['pruning_steps'])

    if best_config['quantization']:
        final_model = quantize(final_model)
    
    return final_model


In [112]:
best_config = random_search(model, student_model, test_loader, num_iterations=10, max_params=70000000)

Iteration 1:
Config: {'model_type': 'original', 'quantization': np.False_, 'pruning_steps': np.int64(3)}
Accuracy: 75.51%, Latency: 210.2708s, Memory: 19.52GB
Score: 34.9646
Iteration 2:
Config: {'model_type': 'original', 'quantization': np.True_, 'pruning_steps': np.int64(4)}
Accuracy: 84.77%, Latency: 227.0053s, Memory: 25.21GB
Score: 36.8581
Iteration 3:
Config: {'model_type': 'original', 'quantization': np.False_, 'pruning_steps': np.int64(7)}
Failed: Exceeds parameter limit
Iteration 4:
Config: {'model_type': 'original', 'quantization': np.True_, 'pruning_steps': np.int64(6)}
Accuracy: 88.69%, Latency: 216.6466s, Memory: 25.71GB
Score: 41.3157
Iteration 5:
Config: {'model_type': 'original', 'quantization': np.True_, 'pruning_steps': np.int64(5)}
Accuracy: 86.62%, Latency: 213.7290s, Memory: 25.50GB
Score: 39.7510
Iteration 6:
Config: {'model_type': 'original', 'quantization': np.False_, 'pruning_steps': np.int64(8)}
Failed: Exceeds parameter limit
Iteration 7:
Config: {'model_type

In [113]:
best_config

{'model_type': 'original',
 'quantization': np.True_,
 'pruning_steps': np.int64(6)}

In [114]:
optimized_model = best_model(best_config, model, student_model)
evaluate_model(optimized_model, test_loader)

(88.69, 200.24301671981812, 25.70965152978897)

In [13]:
best_config = random_search(model, student_model, test_loader, num_iterations=10, max_params=5300000)

Iteration 1:
Config: {'model_type': 'student', 'quantization': np.True_, 'pruning_steps': np.int64(19)}


 Torch-Pruning will prune the last non-singleton dimension of these parameters. If you wish to change this behavior, please provide an unwrapped_parameters argument.
[W1211 16:22:12.335792552 CPUAllocator.cpp:249] Memory block of unknown size was allocated before the profiling started, profiler results will not include the deallocation event


Accuracy: 78.24%, Latency: 115.1802s, Memory: 6.61GB
Score: 60.1162
Iteration 2:
Config: {'model_type': 'student', 'quantization': np.True_, 'pruning_steps': np.int64(15)}
Accuracy: 73.00%, Latency: 115.8008s, Memory: 6.59GB
Score: 54.8279
Iteration 3:
Config: {'model_type': 'student', 'quantization': np.False_, 'pruning_steps': np.int64(17)}
Accuracy: 78.39%, Latency: 94.9493s, Memory: 5.18GB
Score: 63.7152
Iteration 4:
Config: {'model_type': 'student', 'quantization': np.False_, 'pruning_steps': np.int64(15)}
Accuracy: 73.57%, Latency: 85.4092s, Memory: 5.17GB
Score: 59.8626
Iteration 5:
Config: {'model_type': 'student', 'quantization': np.False_, 'pruning_steps': np.int64(20)}
Accuracy: 78.37%, Latency: 92.3090s, Memory: 5.17GB
Score: 63.9721
Iteration 6:
Config: {'model_type': 'student', 'quantization': np.True_, 'pruning_steps': np.int64(18)}
Accuracy: 78.24%, Latency: 114.6155s, Memory: 6.61GB
Score: 60.1637
Iteration 7:
Config: {'model_type': 'student', 'quantization': np.False_

In [14]:
best_config

{'model_type': 'student',
 'quantization': np.False_,
 'pruning_steps': np.int64(19)}

In [15]:
optimized_model = best_model(best_config, model, student_model)
evaluate_model(optimized_model, test_loader)


(78.44, 89.92148685455322, 5.164437800645828)