### Author

Recep Fırat Çekinel

In [1]:
from cifar10_models import resnet
import torch
import numpy as np
import pandas as pd
from torch import nn
import torch.nn.utils.prune as prune
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
from tqdm import tqdm
from time import process_time, process_time_ns

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [3]:
import wandb

wandb.login()
#!wandb login --relogin

[34m[1mwandb[0m: Currently logged in as: [33mfiratc[0m (use `wandb login --relogin` to force relogin)


True

In [4]:
sweep_config = {'method': 'grid'}

parameters_dict = {
    'learning_rate': {
        'values': [0.001, 0.01 ]
        },
    }

sweep_config['parameters'] = parameters_dict


sweep_id = wandb.sweep(sweep_config, project="mmi712-term-project")

Create sweep with ID: dvsgvufx
Sweep URL: https://wandb.ai/firatc/mmi712-term-project/sweeps/dvsgvufx


# Loading the Cifar-10 Dataset

In [3]:
# Image preprocessing modules
transform = transforms.Compose([
    transforms.Pad(4),
    transforms.RandomHorizontalFlip(),
    transforms.RandomCrop(32),
    transforms.ToTensor()])

# CIFAR-10 dataset
train_dataset = torchvision.datasets.CIFAR10(root='data/',
                                             train=True, 
                                             transform=transform,
                                             download=False)

test_dataset = torchvision.datasets.CIFAR10(root='data/',
                                            train=False, 
                                            transform=transforms.ToTensor())

# Data loader
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=100, 
                                           shuffle=True)

test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                          batch_size=100, 
                                          shuffle=False)

# Save & Load the Model

In [4]:
def saveModel(model, model_name):
    # Save the model checkpoint
    torch.save(model, model_name)

In [5]:
def loadModel(model_name, dev="cuda"):
    if dev == "cuda" and not torch.cuda.is_available():
        dev = "cpu"
    return torch.load(model_name, map_location=torch.device(dev))

# Test the Model on Eval Dataset

In [6]:
def testModel(model, test_loader):
    # Test the model
    model.eval()
    with torch.no_grad():
        correct = 0
        total = 0
        for images, labels in test_loader:
            images = images.to(device)
            labels = labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        print('Accuracy of the model on the test images: {} %'.format(100 * correct / total))

In [7]:
def getInference(instance, model, repetitions=300):
    
    #starter, ender = torch.cuda.Event(enable_timing=True), torch.cuda.Event(enable_timing=True)
    timings=np.zeros((repetitions,1))
    #GPU-WARM-UP
    # MEASURE PERFORMANCE
    model.eval()
    with torch.no_grad():
        instance = instance.to("cpu")
        for rep in range(repetitions):
            #starter.record()
            t_start = process_time_ns() 
            _ = model(instance)
            t_stop = process_time_ns() 
            #ender.record()
            # WAIT FOR GPU SYNC
            #torch.cuda.synchronize()
            #curr_time = starter.elapsed_time(ender)
            curr_time = (t_stop - t_start) / 10 **9
            timings[rep] = curr_time
    mean_syn = np.sum(timings) / repetitions
    std_syn = np.std(timings)
    
    return (mean_syn, std_syn)

In [8]:
# take a sample data instance to measure the inference
test_loader2 = torch.utils.data.DataLoader(dataset=test_dataset,
                                          batch_size=1, 
                                          shuffle=False)
for sample_image, sample_label in test_loader2:  
    print(sample_image, sample_label)
    break

tensor([[[[0.6196, 0.6235, 0.6471,  ..., 0.5373, 0.4941, 0.4549],
          [0.5961, 0.5922, 0.6235,  ..., 0.5333, 0.4902, 0.4667],
          [0.5922, 0.5922, 0.6196,  ..., 0.5451, 0.5098, 0.4706],
          ...,
          [0.2667, 0.1647, 0.1216,  ..., 0.1490, 0.0510, 0.1569],
          [0.2392, 0.1922, 0.1373,  ..., 0.1020, 0.1137, 0.0784],
          [0.2118, 0.2196, 0.1765,  ..., 0.0941, 0.1333, 0.0824]],

         [[0.4392, 0.4353, 0.4549,  ..., 0.3725, 0.3569, 0.3333],
          [0.4392, 0.4314, 0.4471,  ..., 0.3725, 0.3569, 0.3451],
          [0.4314, 0.4275, 0.4353,  ..., 0.3843, 0.3725, 0.3490],
          ...,
          [0.4863, 0.3922, 0.3451,  ..., 0.3804, 0.2510, 0.3333],
          [0.4549, 0.4000, 0.3333,  ..., 0.3216, 0.3216, 0.2510],
          [0.4196, 0.4118, 0.3490,  ..., 0.3020, 0.3294, 0.2627]],

         [[0.1922, 0.1843, 0.2000,  ..., 0.1412, 0.1412, 0.1294],
          [0.2000, 0.1569, 0.1765,  ..., 0.1216, 0.1255, 0.1333],
          [0.1843, 0.1294, 0.1412,  ..., 0

In [10]:
model = loadModel("resnet_lr0.01.pth")
testModel(model, test_loader)
model = model.to("cpu")
getInference(sample_image, model, 100)

Accuracy of the model on the test images: 90.43 %


(0.04692480155, 0.006379768579961289)

# Training a Resnet Model

In [9]:
def trainModel(model, learning_rate, train_loader):
    # Hyper-parameters
    num_epochs = 80
    #learning_rate = 0.001
    
    # Loss and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

    # For updating learning rate
    def update_lr(optimizer, lr):    
        for param_group in optimizer.param_groups:
            param_group['lr'] = lr

    # Train the model
    total_step = len(train_loader)
    curr_lr = learning_rate
    for epoch in tqdm(range(num_epochs)):
        for i, (images, labels) in enumerate(train_loader):
            images = images.to(device)
            labels = labels.to(device)

            # Forward pass
            outputs = model(images)
            loss = criterion(outputs, labels)

            # Backward and optimize
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            if (i+1) % 100 == 0:
                print ("Epoch [{}/{}], Step [{}/{}] Loss: {:.4f}".format(epoch+1, num_epochs, i+1, total_step, loss.item()))
                wandb.log({"Epoch": epoch+1, "Loss": loss.item()})

        # Decay learning rate
        if (epoch+1) % 20 == 0:
            curr_lr /= 3
            update_lr(optimizer, curr_lr)
    
    # Save the model checkpoint
    #torch.save(model, 'resnet_lr'+str(learning_rate)+'.pth')
    saveModel(model, 'resnet_lr'+str(learning_rate)+'.pth')
    testModel(model, test_loader)

In [10]:
def init_model(config=None):
    # Initialize a new wandb run
    with wandb.init(config=config):
        # If called by wandb.agent, as below,
        # this config will be set by Sweep Controller
        config = wandb.config

        model = resnet.resnet18().to(device)
        
        trainModel(model, config.learning_rate, train_loader)

In [11]:
wandb.agent(sweep_id, init_model)

[34m[1mwandb[0m: Agent Starting Run: ef7yabdt with config:
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: wandb version 0.12.9 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


  0%|          | 0/80 [00:00<?, ?it/s]

Epoch [1/80], Step [100/500] Loss: 1.6570
Epoch [1/80], Step [200/500] Loss: 1.4522
Epoch [1/80], Step [300/500] Loss: 1.2114
Epoch [1/80], Step [400/500] Loss: 1.1740


  1%|▏         | 1/80 [00:18<24:03, 18.27s/it]

Epoch [1/80], Step [500/500] Loss: 1.1244
Epoch [2/80], Step [100/500] Loss: 0.9098
Epoch [2/80], Step [200/500] Loss: 0.8929
Epoch [2/80], Step [300/500] Loss: 1.0156
Epoch [2/80], Step [400/500] Loss: 0.9678


  2%|▎         | 2/80 [00:36<23:21, 17.97s/it]

Epoch [2/80], Step [500/500] Loss: 0.7824
Epoch [3/80], Step [100/500] Loss: 0.7793
Epoch [3/80], Step [200/500] Loss: 0.7750
Epoch [3/80], Step [300/500] Loss: 0.7202
Epoch [3/80], Step [400/500] Loss: 0.5749


  4%|▍         | 3/80 [00:53<22:51, 17.81s/it]

Epoch [3/80], Step [500/500] Loss: 0.6604
Epoch [4/80], Step [100/500] Loss: 0.6899
Epoch [4/80], Step [200/500] Loss: 0.6883
Epoch [4/80], Step [300/500] Loss: 0.7612
Epoch [4/80], Step [400/500] Loss: 0.6042


  5%|▌         | 4/80 [01:11<22:26, 17.72s/it]

Epoch [4/80], Step [500/500] Loss: 0.6170
Epoch [5/80], Step [100/500] Loss: 0.6246
Epoch [5/80], Step [200/500] Loss: 0.5962
Epoch [5/80], Step [300/500] Loss: 0.6270
Epoch [5/80], Step [400/500] Loss: 0.6484


  6%|▋         | 5/80 [01:28<22:07, 17.70s/it]

Epoch [5/80], Step [500/500] Loss: 0.7490
Epoch [6/80], Step [100/500] Loss: 0.4940
Epoch [6/80], Step [200/500] Loss: 0.5683
Epoch [6/80], Step [300/500] Loss: 0.5762
Epoch [6/80], Step [400/500] Loss: 0.5569


  8%|▊         | 6/80 [01:46<21:48, 17.69s/it]

Epoch [6/80], Step [500/500] Loss: 0.6050
Epoch [7/80], Step [100/500] Loss: 0.4597
Epoch [7/80], Step [200/500] Loss: 0.5345
Epoch [7/80], Step [300/500] Loss: 0.6020
Epoch [7/80], Step [400/500] Loss: 0.3917


  9%|▉         | 7/80 [02:04<21:31, 17.69s/it]

Epoch [7/80], Step [500/500] Loss: 0.3860
Epoch [8/80], Step [100/500] Loss: 0.3567
Epoch [8/80], Step [200/500] Loss: 0.4515
Epoch [8/80], Step [300/500] Loss: 0.7240
Epoch [8/80], Step [400/500] Loss: 0.4638


 10%|█         | 8/80 [02:21<21:12, 17.68s/it]

Epoch [8/80], Step [500/500] Loss: 0.5578
Epoch [9/80], Step [100/500] Loss: 0.3886
Epoch [9/80], Step [200/500] Loss: 0.5099
Epoch [9/80], Step [300/500] Loss: 0.3563
Epoch [9/80], Step [400/500] Loss: 0.4894


 11%|█▏        | 9/80 [02:39<20:56, 17.69s/it]

Epoch [9/80], Step [500/500] Loss: 0.5155
Epoch [10/80], Step [100/500] Loss: 0.4485
Epoch [10/80], Step [200/500] Loss: 0.4386
Epoch [10/80], Step [300/500] Loss: 0.4920
Epoch [10/80], Step [400/500] Loss: 0.3521


 12%|█▎        | 10/80 [02:57<20:41, 17.73s/it]

Epoch [10/80], Step [500/500] Loss: 0.3886
Epoch [11/80], Step [100/500] Loss: 0.2135
Epoch [11/80], Step [200/500] Loss: 0.3057
Epoch [11/80], Step [300/500] Loss: 0.2454
Epoch [11/80], Step [400/500] Loss: 0.5966


 14%|█▍        | 11/80 [03:15<20:23, 17.73s/it]

Epoch [11/80], Step [500/500] Loss: 0.4182
Epoch [12/80], Step [100/500] Loss: 0.4511
Epoch [12/80], Step [200/500] Loss: 0.4959
Epoch [12/80], Step [300/500] Loss: 0.2540
Epoch [12/80], Step [400/500] Loss: 0.4591


 15%|█▌        | 12/80 [03:32<20:05, 17.73s/it]

Epoch [12/80], Step [500/500] Loss: 0.4106
Epoch [13/80], Step [100/500] Loss: 0.3010
Epoch [13/80], Step [200/500] Loss: 0.2837
Epoch [13/80], Step [300/500] Loss: 0.2607
Epoch [13/80], Step [400/500] Loss: 0.2987


 16%|█▋        | 13/80 [03:50<19:49, 17.76s/it]

Epoch [13/80], Step [500/500] Loss: 0.5259
Epoch [14/80], Step [100/500] Loss: 0.3289
Epoch [14/80], Step [200/500] Loss: 0.2761
Epoch [14/80], Step [300/500] Loss: 0.2965
Epoch [14/80], Step [400/500] Loss: 0.2231


 18%|█▊        | 14/80 [04:08<19:33, 17.78s/it]

Epoch [14/80], Step [500/500] Loss: 0.4156
Epoch [15/80], Step [100/500] Loss: 0.3295
Epoch [15/80], Step [200/500] Loss: 0.3492
Epoch [15/80], Step [300/500] Loss: 0.2373
Epoch [15/80], Step [400/500] Loss: 0.3037


 19%|█▉        | 15/80 [04:26<19:17, 17.81s/it]

Epoch [15/80], Step [500/500] Loss: 0.2703
Epoch [16/80], Step [100/500] Loss: 0.2526
Epoch [16/80], Step [200/500] Loss: 0.2252
Epoch [16/80], Step [300/500] Loss: 0.4011
Epoch [16/80], Step [400/500] Loss: 0.4907


 20%|██        | 16/80 [04:44<19:01, 17.84s/it]

Epoch [16/80], Step [500/500] Loss: 0.2344
Epoch [17/80], Step [100/500] Loss: 0.2382
Epoch [17/80], Step [200/500] Loss: 0.3255
Epoch [17/80], Step [300/500] Loss: 0.2316
Epoch [17/80], Step [400/500] Loss: 0.2718


 21%|██▏       | 17/80 [05:02<18:44, 17.84s/it]

Epoch [17/80], Step [500/500] Loss: 0.2689
Epoch [18/80], Step [100/500] Loss: 0.2584
Epoch [18/80], Step [200/500] Loss: 0.2509
Epoch [18/80], Step [300/500] Loss: 0.3057
Epoch [18/80], Step [400/500] Loss: 0.2146


 22%|██▎       | 18/80 [05:20<18:26, 17.85s/it]

Epoch [18/80], Step [500/500] Loss: 0.1190
Epoch [19/80], Step [100/500] Loss: 0.3675
Epoch [19/80], Step [200/500] Loss: 0.1619
Epoch [19/80], Step [300/500] Loss: 0.3158
Epoch [19/80], Step [400/500] Loss: 0.2542


 24%|██▍       | 19/80 [05:37<18:09, 17.85s/it]

Epoch [19/80], Step [500/500] Loss: 0.3189
Epoch [20/80], Step [100/500] Loss: 0.1674
Epoch [20/80], Step [200/500] Loss: 0.3432
Epoch [20/80], Step [300/500] Loss: 0.2291
Epoch [20/80], Step [400/500] Loss: 0.2478


 25%|██▌       | 20/80 [05:55<17:51, 17.86s/it]

Epoch [20/80], Step [500/500] Loss: 0.2738
Epoch [21/80], Step [100/500] Loss: 0.1556
Epoch [21/80], Step [200/500] Loss: 0.1749
Epoch [21/80], Step [300/500] Loss: 0.1986
Epoch [21/80], Step [400/500] Loss: 0.1930


 26%|██▋       | 21/80 [06:13<17:34, 17.87s/it]

Epoch [21/80], Step [500/500] Loss: 0.2226
Epoch [22/80], Step [100/500] Loss: 0.0861
Epoch [22/80], Step [200/500] Loss: 0.1235
Epoch [22/80], Step [300/500] Loss: 0.2071
Epoch [22/80], Step [400/500] Loss: 0.2095


 28%|██▊       | 22/80 [06:31<17:16, 17.87s/it]

Epoch [22/80], Step [500/500] Loss: 0.1109
Epoch [23/80], Step [100/500] Loss: 0.0912
Epoch [23/80], Step [200/500] Loss: 0.1451
Epoch [23/80], Step [300/500] Loss: 0.1574
Epoch [23/80], Step [400/500] Loss: 0.1630


 29%|██▉       | 23/80 [06:49<16:59, 17.88s/it]

Epoch [23/80], Step [500/500] Loss: 0.1737
Epoch [24/80], Step [100/500] Loss: 0.0804
Epoch [24/80], Step [200/500] Loss: 0.1233
Epoch [24/80], Step [300/500] Loss: 0.1351
Epoch [24/80], Step [400/500] Loss: 0.1822


 30%|███       | 24/80 [07:07<16:41, 17.88s/it]

Epoch [24/80], Step [500/500] Loss: 0.0722
Epoch [25/80], Step [100/500] Loss: 0.0454
Epoch [25/80], Step [200/500] Loss: 0.1512
Epoch [25/80], Step [300/500] Loss: 0.0592
Epoch [25/80], Step [400/500] Loss: 0.0955


 31%|███▏      | 25/80 [07:25<16:23, 17.88s/it]

Epoch [25/80], Step [500/500] Loss: 0.0755
Epoch [26/80], Step [100/500] Loss: 0.2146
Epoch [26/80], Step [200/500] Loss: 0.1593
Epoch [26/80], Step [300/500] Loss: 0.1650
Epoch [26/80], Step [400/500] Loss: 0.0570


 32%|███▎      | 26/80 [07:43<16:05, 17.89s/it]

Epoch [26/80], Step [500/500] Loss: 0.0647
Epoch [27/80], Step [100/500] Loss: 0.0287
Epoch [27/80], Step [200/500] Loss: 0.1838
Epoch [27/80], Step [300/500] Loss: 0.0826
Epoch [27/80], Step [400/500] Loss: 0.1084


 34%|███▍      | 27/80 [08:00<15:47, 17.88s/it]

Epoch [27/80], Step [500/500] Loss: 0.1262
Epoch [28/80], Step [100/500] Loss: 0.0512
Epoch [28/80], Step [200/500] Loss: 0.1077
Epoch [28/80], Step [300/500] Loss: 0.1438
Epoch [28/80], Step [400/500] Loss: 0.1012


 35%|███▌      | 28/80 [08:18<15:29, 17.88s/it]

Epoch [28/80], Step [500/500] Loss: 0.1142
Epoch [29/80], Step [100/500] Loss: 0.0664
Epoch [29/80], Step [200/500] Loss: 0.1017
Epoch [29/80], Step [300/500] Loss: 0.1359
Epoch [29/80], Step [400/500] Loss: 0.0752


 36%|███▋      | 29/80 [08:36<15:11, 17.87s/it]

Epoch [29/80], Step [500/500] Loss: 0.0253
Epoch [30/80], Step [100/500] Loss: 0.1527
Epoch [30/80], Step [200/500] Loss: 0.0630
Epoch [30/80], Step [300/500] Loss: 0.0936
Epoch [30/80], Step [400/500] Loss: 0.1549


 38%|███▊      | 30/80 [08:54<14:53, 17.87s/it]

Epoch [30/80], Step [500/500] Loss: 0.0844
Epoch [31/80], Step [100/500] Loss: 0.1563
Epoch [31/80], Step [200/500] Loss: 0.0887
Epoch [31/80], Step [300/500] Loss: 0.1148
Epoch [31/80], Step [400/500] Loss: 0.0577


 39%|███▉      | 31/80 [09:12<14:36, 17.88s/it]

Epoch [31/80], Step [500/500] Loss: 0.0507
Epoch [32/80], Step [100/500] Loss: 0.0625
Epoch [32/80], Step [200/500] Loss: 0.1187
Epoch [32/80], Step [300/500] Loss: 0.1641
Epoch [32/80], Step [400/500] Loss: 0.1093


 40%|████      | 32/80 [09:30<14:18, 17.88s/it]

Epoch [32/80], Step [500/500] Loss: 0.2569
Epoch [33/80], Step [100/500] Loss: 0.0692
Epoch [33/80], Step [200/500] Loss: 0.0673
Epoch [33/80], Step [300/500] Loss: 0.0350
Epoch [33/80], Step [400/500] Loss: 0.0816


 41%|████▏     | 33/80 [09:48<13:59, 17.87s/it]

Epoch [33/80], Step [500/500] Loss: 0.0292
Epoch [34/80], Step [100/500] Loss: 0.0776
Epoch [34/80], Step [200/500] Loss: 0.1095
Epoch [34/80], Step [300/500] Loss: 0.0567
Epoch [34/80], Step [400/500] Loss: 0.0443


 42%|████▎     | 34/80 [10:06<13:41, 17.86s/it]

Epoch [34/80], Step [500/500] Loss: 0.1120
Epoch [35/80], Step [100/500] Loss: 0.0808
Epoch [35/80], Step [200/500] Loss: 0.0572
Epoch [35/80], Step [300/500] Loss: 0.2111
Epoch [35/80], Step [400/500] Loss: 0.0206


 44%|████▍     | 35/80 [10:23<13:24, 17.87s/it]

Epoch [35/80], Step [500/500] Loss: 0.1158
Epoch [36/80], Step [100/500] Loss: 0.0458
Epoch [36/80], Step [200/500] Loss: 0.0535
Epoch [36/80], Step [300/500] Loss: 0.1710
Epoch [36/80], Step [400/500] Loss: 0.0280


 45%|████▌     | 36/80 [10:41<13:06, 17.88s/it]

Epoch [36/80], Step [500/500] Loss: 0.0519
Epoch [37/80], Step [100/500] Loss: 0.0157
Epoch [37/80], Step [200/500] Loss: 0.0564
Epoch [37/80], Step [300/500] Loss: 0.0368
Epoch [37/80], Step [400/500] Loss: 0.0719


 46%|████▋     | 37/80 [10:59<12:48, 17.88s/it]

Epoch [37/80], Step [500/500] Loss: 0.0545
Epoch [38/80], Step [100/500] Loss: 0.0142
Epoch [38/80], Step [200/500] Loss: 0.1143
Epoch [38/80], Step [300/500] Loss: 0.0536
Epoch [38/80], Step [400/500] Loss: 0.0683


 48%|████▊     | 38/80 [11:17<12:30, 17.88s/it]

Epoch [38/80], Step [500/500] Loss: 0.0559
Epoch [39/80], Step [100/500] Loss: 0.0767
Epoch [39/80], Step [200/500] Loss: 0.0311
Epoch [39/80], Step [300/500] Loss: 0.0996
Epoch [39/80], Step [400/500] Loss: 0.1269


 49%|████▉     | 39/80 [11:35<12:12, 17.87s/it]

Epoch [39/80], Step [500/500] Loss: 0.0347
Epoch [40/80], Step [100/500] Loss: 0.0660
Epoch [40/80], Step [200/500] Loss: 0.0319
Epoch [40/80], Step [300/500] Loss: 0.0451
Epoch [40/80], Step [400/500] Loss: 0.0590


 50%|█████     | 40/80 [11:53<11:54, 17.87s/it]

Epoch [40/80], Step [500/500] Loss: 0.0614
Epoch [41/80], Step [100/500] Loss: 0.0216
Epoch [41/80], Step [200/500] Loss: 0.0534
Epoch [41/80], Step [300/500] Loss: 0.0195
Epoch [41/80], Step [400/500] Loss: 0.0321


 51%|█████▏    | 41/80 [12:11<11:36, 17.87s/it]

Epoch [41/80], Step [500/500] Loss: 0.0791
Epoch [42/80], Step [100/500] Loss: 0.0394
Epoch [42/80], Step [200/500] Loss: 0.0139
Epoch [42/80], Step [300/500] Loss: 0.0693
Epoch [42/80], Step [400/500] Loss: 0.0093


 52%|█████▎    | 42/80 [12:29<11:19, 17.88s/it]

Epoch [42/80], Step [500/500] Loss: 0.0603
Epoch [43/80], Step [100/500] Loss: 0.0376
Epoch [43/80], Step [200/500] Loss: 0.0319
Epoch [43/80], Step [300/500] Loss: 0.1718
Epoch [43/80], Step [400/500] Loss: 0.0276


 54%|█████▍    | 43/80 [12:46<11:01, 17.87s/it]

Epoch [43/80], Step [500/500] Loss: 0.0128
Epoch [44/80], Step [100/500] Loss: 0.0273
Epoch [44/80], Step [200/500] Loss: 0.0451
Epoch [44/80], Step [300/500] Loss: 0.0158
Epoch [44/80], Step [400/500] Loss: 0.0048


 55%|█████▌    | 44/80 [13:04<10:43, 17.87s/it]

Epoch [44/80], Step [500/500] Loss: 0.0139
Epoch [45/80], Step [100/500] Loss: 0.0067
Epoch [45/80], Step [200/500] Loss: 0.0538
Epoch [45/80], Step [300/500] Loss: 0.0068
Epoch [45/80], Step [400/500] Loss: 0.0220


 56%|█████▋    | 45/80 [13:22<10:25, 17.87s/it]

Epoch [45/80], Step [500/500] Loss: 0.0204
Epoch [46/80], Step [100/500] Loss: 0.0122
Epoch [46/80], Step [200/500] Loss: 0.0232
Epoch [46/80], Step [300/500] Loss: 0.0237
Epoch [46/80], Step [400/500] Loss: 0.0785


 57%|█████▊    | 46/80 [13:40<10:07, 17.87s/it]

Epoch [46/80], Step [500/500] Loss: 0.0175
Epoch [47/80], Step [100/500] Loss: 0.0437
Epoch [47/80], Step [200/500] Loss: 0.0268
Epoch [47/80], Step [300/500] Loss: 0.0210
Epoch [47/80], Step [400/500] Loss: 0.0285


 59%|█████▉    | 47/80 [13:58<09:50, 17.88s/it]

Epoch [47/80], Step [500/500] Loss: 0.0275
Epoch [48/80], Step [100/500] Loss: 0.0200
Epoch [48/80], Step [200/500] Loss: 0.0620
Epoch [48/80], Step [300/500] Loss: 0.0366
Epoch [48/80], Step [400/500] Loss: 0.0214


 60%|██████    | 48/80 [14:16<09:31, 17.87s/it]

Epoch [48/80], Step [500/500] Loss: 0.0192
Epoch [49/80], Step [100/500] Loss: 0.0047
Epoch [49/80], Step [200/500] Loss: 0.0120
Epoch [49/80], Step [300/500] Loss: 0.0325
Epoch [49/80], Step [400/500] Loss: 0.0653


 61%|██████▏   | 49/80 [14:34<09:14, 17.87s/it]

Epoch [49/80], Step [500/500] Loss: 0.0269
Epoch [50/80], Step [100/500] Loss: 0.0059
Epoch [50/80], Step [200/500] Loss: 0.0144
Epoch [50/80], Step [300/500] Loss: 0.0034
Epoch [50/80], Step [400/500] Loss: 0.0084


 62%|██████▎   | 50/80 [14:52<08:56, 17.87s/it]

Epoch [50/80], Step [500/500] Loss: 0.0207
Epoch [51/80], Step [100/500] Loss: 0.0447
Epoch [51/80], Step [200/500] Loss: 0.0198
Epoch [51/80], Step [300/500] Loss: 0.0193
Epoch [51/80], Step [400/500] Loss: 0.0160


 64%|██████▍   | 51/80 [15:09<08:38, 17.87s/it]

Epoch [51/80], Step [500/500] Loss: 0.0658
Epoch [52/80], Step [100/500] Loss: 0.0133
Epoch [52/80], Step [200/500] Loss: 0.0107
Epoch [52/80], Step [300/500] Loss: 0.0706
Epoch [52/80], Step [400/500] Loss: 0.0381


 65%|██████▌   | 52/80 [15:27<08:20, 17.88s/it]

Epoch [52/80], Step [500/500] Loss: 0.0229
Epoch [53/80], Step [100/500] Loss: 0.0281
Epoch [53/80], Step [200/500] Loss: 0.0291
Epoch [53/80], Step [300/500] Loss: 0.0379
Epoch [53/80], Step [400/500] Loss: 0.0941


 66%|██████▋   | 53/80 [15:45<08:02, 17.87s/it]

Epoch [53/80], Step [500/500] Loss: 0.0111
Epoch [54/80], Step [100/500] Loss: 0.0323
Epoch [54/80], Step [200/500] Loss: 0.0207
Epoch [54/80], Step [300/500] Loss: 0.0311
Epoch [54/80], Step [400/500] Loss: 0.0133


 68%|██████▊   | 54/80 [16:03<07:44, 17.87s/it]

Epoch [54/80], Step [500/500] Loss: 0.0030
Epoch [55/80], Step [100/500] Loss: 0.0059
Epoch [55/80], Step [200/500] Loss: 0.0164
Epoch [55/80], Step [300/500] Loss: 0.0012
Epoch [55/80], Step [400/500] Loss: 0.0023


 69%|██████▉   | 55/80 [16:21<07:26, 17.87s/it]

Epoch [55/80], Step [500/500] Loss: 0.0124
Epoch [56/80], Step [100/500] Loss: 0.0042
Epoch [56/80], Step [200/500] Loss: 0.0060
Epoch [56/80], Step [300/500] Loss: 0.0481
Epoch [56/80], Step [400/500] Loss: 0.0161


 70%|███████   | 56/80 [16:39<07:08, 17.87s/it]

Epoch [56/80], Step [500/500] Loss: 0.0196
Epoch [57/80], Step [100/500] Loss: 0.0023
Epoch [57/80], Step [200/500] Loss: 0.0234
Epoch [57/80], Step [300/500] Loss: 0.0107
Epoch [57/80], Step [400/500] Loss: 0.0206


 71%|███████▏  | 57/80 [16:57<06:51, 17.89s/it]

Epoch [57/80], Step [500/500] Loss: 0.0007
Epoch [58/80], Step [100/500] Loss: 0.0234
Epoch [58/80], Step [200/500] Loss: 0.0349
Epoch [58/80], Step [300/500] Loss: 0.0250
Epoch [58/80], Step [400/500] Loss: 0.0046


 72%|███████▎  | 58/80 [17:15<06:33, 17.88s/it]

Epoch [58/80], Step [500/500] Loss: 0.0102
Epoch [59/80], Step [100/500] Loss: 0.0038
Epoch [59/80], Step [200/500] Loss: 0.0032
Epoch [59/80], Step [300/500] Loss: 0.0103
Epoch [59/80], Step [400/500] Loss: 0.0011


 74%|███████▍  | 59/80 [17:32<06:15, 17.87s/it]

Epoch [59/80], Step [500/500] Loss: 0.0076
Epoch [60/80], Step [100/500] Loss: 0.0661
Epoch [60/80], Step [200/500] Loss: 0.0098
Epoch [60/80], Step [300/500] Loss: 0.0397
Epoch [60/80], Step [400/500] Loss: 0.0106


 75%|███████▌  | 60/80 [17:50<05:57, 17.87s/it]

Epoch [60/80], Step [500/500] Loss: 0.0074
Epoch [61/80], Step [100/500] Loss: 0.0197
Epoch [61/80], Step [200/500] Loss: 0.0321
Epoch [61/80], Step [300/500] Loss: 0.0161
Epoch [61/80], Step [400/500] Loss: 0.0054


 76%|███████▋  | 61/80 [18:08<05:39, 17.87s/it]

Epoch [61/80], Step [500/500] Loss: 0.0083
Epoch [62/80], Step [100/500] Loss: 0.0014
Epoch [62/80], Step [200/500] Loss: 0.0558
Epoch [62/80], Step [300/500] Loss: 0.0084
Epoch [62/80], Step [400/500] Loss: 0.0127


 78%|███████▊  | 62/80 [18:26<05:21, 17.88s/it]

Epoch [62/80], Step [500/500] Loss: 0.0085
Epoch [63/80], Step [100/500] Loss: 0.0207
Epoch [63/80], Step [200/500] Loss: 0.0053
Epoch [63/80], Step [300/500] Loss: 0.0027
Epoch [63/80], Step [400/500] Loss: 0.0152


 79%|███████▉  | 63/80 [18:44<05:03, 17.87s/it]

Epoch [63/80], Step [500/500] Loss: 0.0343
Epoch [64/80], Step [100/500] Loss: 0.0056
Epoch [64/80], Step [200/500] Loss: 0.0282
Epoch [64/80], Step [300/500] Loss: 0.0153
Epoch [64/80], Step [400/500] Loss: 0.0082


 80%|████████  | 64/80 [19:02<04:45, 17.87s/it]

Epoch [64/80], Step [500/500] Loss: 0.0139
Epoch [65/80], Step [100/500] Loss: 0.0126
Epoch [65/80], Step [200/500] Loss: 0.0075
Epoch [65/80], Step [300/500] Loss: 0.0222
Epoch [65/80], Step [400/500] Loss: 0.0369


 81%|████████▏ | 65/80 [19:20<04:27, 17.86s/it]

Epoch [65/80], Step [500/500] Loss: 0.0041
Epoch [66/80], Step [100/500] Loss: 0.0183
Epoch [66/80], Step [200/500] Loss: 0.0318
Epoch [66/80], Step [300/500] Loss: 0.0058
Epoch [66/80], Step [400/500] Loss: 0.0121


 82%|████████▎ | 66/80 [19:38<04:10, 17.86s/it]

Epoch [66/80], Step [500/500] Loss: 0.0011
Epoch [67/80], Step [100/500] Loss: 0.0053
Epoch [67/80], Step [200/500] Loss: 0.0229
Epoch [67/80], Step [300/500] Loss: 0.0025
Epoch [67/80], Step [400/500] Loss: 0.0069


 84%|████████▍ | 67/80 [19:55<03:52, 17.86s/it]

Epoch [67/80], Step [500/500] Loss: 0.0011
Epoch [68/80], Step [100/500] Loss: 0.0218
Epoch [68/80], Step [200/500] Loss: 0.0040
Epoch [68/80], Step [300/500] Loss: 0.0024
Epoch [68/80], Step [400/500] Loss: 0.0103


 85%|████████▌ | 68/80 [20:13<03:34, 17.87s/it]

Epoch [68/80], Step [500/500] Loss: 0.0017
Epoch [69/80], Step [100/500] Loss: 0.0029
Epoch [69/80], Step [200/500] Loss: 0.0037
Epoch [69/80], Step [300/500] Loss: 0.0055
Epoch [69/80], Step [400/500] Loss: 0.0373


 86%|████████▋ | 69/80 [20:31<03:16, 17.87s/it]

Epoch [69/80], Step [500/500] Loss: 0.0255
Epoch [70/80], Step [100/500] Loss: 0.0357
Epoch [70/80], Step [200/500] Loss: 0.0023
Epoch [70/80], Step [300/500] Loss: 0.0052
Epoch [70/80], Step [400/500] Loss: 0.0036


 88%|████████▊ | 70/80 [20:49<02:58, 17.88s/it]

Epoch [70/80], Step [500/500] Loss: 0.0122
Epoch [71/80], Step [100/500] Loss: 0.0014
Epoch [71/80], Step [200/500] Loss: 0.0046
Epoch [71/80], Step [300/500] Loss: 0.0139
Epoch [71/80], Step [400/500] Loss: 0.0283


 89%|████████▉ | 71/80 [21:07<02:40, 17.87s/it]

Epoch [71/80], Step [500/500] Loss: 0.0013
Epoch [72/80], Step [100/500] Loss: 0.0186
Epoch [72/80], Step [200/500] Loss: 0.0013
Epoch [72/80], Step [300/500] Loss: 0.0546
Epoch [72/80], Step [400/500] Loss: 0.0157


 90%|█████████ | 72/80 [21:25<02:22, 17.87s/it]

Epoch [72/80], Step [500/500] Loss: 0.0084
Epoch [73/80], Step [100/500] Loss: 0.0122
Epoch [73/80], Step [200/500] Loss: 0.0174
Epoch [73/80], Step [300/500] Loss: 0.0096
Epoch [73/80], Step [400/500] Loss: 0.0015


 91%|█████████▏| 73/80 [21:43<02:05, 17.88s/it]

Epoch [73/80], Step [500/500] Loss: 0.0138
Epoch [74/80], Step [100/500] Loss: 0.0073
Epoch [74/80], Step [200/500] Loss: 0.0176
Epoch [74/80], Step [300/500] Loss: 0.0019
Epoch [74/80], Step [400/500] Loss: 0.0080


 92%|█████████▎| 74/80 [22:01<01:47, 17.88s/it]

Epoch [74/80], Step [500/500] Loss: 0.0072
Epoch [75/80], Step [100/500] Loss: 0.0023
Epoch [75/80], Step [200/500] Loss: 0.0043
Epoch [75/80], Step [300/500] Loss: 0.0133
Epoch [75/80], Step [400/500] Loss: 0.0247


 94%|█████████▍| 75/80 [22:18<01:29, 17.88s/it]

Epoch [75/80], Step [500/500] Loss: 0.0084
Epoch [76/80], Step [100/500] Loss: 0.0024
Epoch [76/80], Step [200/500] Loss: 0.0522
Epoch [76/80], Step [300/500] Loss: 0.0032
Epoch [76/80], Step [400/500] Loss: 0.0380


 95%|█████████▌| 76/80 [22:36<01:11, 17.88s/it]

Epoch [76/80], Step [500/500] Loss: 0.0041
Epoch [77/80], Step [100/500] Loss: 0.0019
Epoch [77/80], Step [200/500] Loss: 0.0042
Epoch [77/80], Step [300/500] Loss: 0.0122
Epoch [77/80], Step [400/500] Loss: 0.0007


 96%|█████████▋| 77/80 [22:54<00:53, 17.88s/it]

Epoch [77/80], Step [500/500] Loss: 0.0031
Epoch [78/80], Step [100/500] Loss: 0.0205
Epoch [78/80], Step [200/500] Loss: 0.0550
Epoch [78/80], Step [300/500] Loss: 0.0006
Epoch [78/80], Step [400/500] Loss: 0.0030


 98%|█████████▊| 78/80 [23:12<00:35, 17.88s/it]

Epoch [78/80], Step [500/500] Loss: 0.0224
Epoch [79/80], Step [100/500] Loss: 0.0024
Epoch [79/80], Step [200/500] Loss: 0.0071
Epoch [79/80], Step [300/500] Loss: 0.0102
Epoch [79/80], Step [400/500] Loss: 0.0278


 99%|█████████▉| 79/80 [23:30<00:17, 17.88s/it]

Epoch [79/80], Step [500/500] Loss: 0.0118
Epoch [80/80], Step [100/500] Loss: 0.0211
Epoch [80/80], Step [200/500] Loss: 0.0028
Epoch [80/80], Step [300/500] Loss: 0.0052
Epoch [80/80], Step [400/500] Loss: 0.0160


100%|██████████| 80/80 [23:48<00:00, 17.85s/it]

Epoch [80/80], Step [500/500] Loss: 0.0042





Accuracy of the model on the test images: 90.78 %


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
Epoch,80.0
Loss,0.00417
_runtime,1433.0
_timestamp,1642074059.0
_step,399.0


0,1
Epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇███
Loss,█▆▅▄▄▃▃▃▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
_runtime,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
_timestamp,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███


[34m[1mwandb[0m: Agent Starting Run: mmv6v1vm with config:
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: wandb version 0.12.9 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


  0%|          | 0/80 [00:00<?, ?it/s]

Epoch [1/80], Step [100/500] Loss: 1.8705
Epoch [1/80], Step [200/500] Loss: 1.5548
Epoch [1/80], Step [300/500] Loss: 1.7460
Epoch [1/80], Step [400/500] Loss: 1.5215


  1%|▏         | 1/80 [00:18<24:45, 18.80s/it]

Epoch [1/80], Step [500/500] Loss: 1.2853
Epoch [2/80], Step [100/500] Loss: 1.1649
Epoch [2/80], Step [200/500] Loss: 1.3205
Epoch [2/80], Step [300/500] Loss: 1.2435
Epoch [2/80], Step [400/500] Loss: 1.0596


  2%|▎         | 2/80 [00:36<23:58, 18.44s/it]

Epoch [2/80], Step [500/500] Loss: 1.1343
Epoch [3/80], Step [100/500] Loss: 1.1117
Epoch [3/80], Step [200/500] Loss: 0.9604
Epoch [3/80], Step [300/500] Loss: 1.0055
Epoch [3/80], Step [400/500] Loss: 1.1167


  4%|▍         | 3/80 [00:54<23:20, 18.19s/it]

Epoch [3/80], Step [500/500] Loss: 1.1714
Epoch [4/80], Step [100/500] Loss: 0.9478
Epoch [4/80], Step [200/500] Loss: 0.9938
Epoch [4/80], Step [300/500] Loss: 0.9199
Epoch [4/80], Step [400/500] Loss: 0.9141


  5%|▌         | 4/80 [01:12<22:53, 18.07s/it]

Epoch [4/80], Step [500/500] Loss: 0.9743
Epoch [5/80], Step [100/500] Loss: 0.8440
Epoch [5/80], Step [200/500] Loss: 0.7596
Epoch [5/80], Step [300/500] Loss: 0.6812
Epoch [5/80], Step [400/500] Loss: 0.6251


  6%|▋         | 5/80 [01:30<22:30, 18.01s/it]

Epoch [5/80], Step [500/500] Loss: 0.8565
Epoch [6/80], Step [100/500] Loss: 0.5790
Epoch [6/80], Step [200/500] Loss: 0.7890
Epoch [6/80], Step [300/500] Loss: 0.7391
Epoch [6/80], Step [400/500] Loss: 0.6328


  8%|▊         | 6/80 [01:48<22:10, 17.98s/it]

Epoch [6/80], Step [500/500] Loss: 0.6979
Epoch [7/80], Step [100/500] Loss: 0.6249
Epoch [7/80], Step [200/500] Loss: 0.4432
Epoch [7/80], Step [300/500] Loss: 0.5979
Epoch [7/80], Step [400/500] Loss: 0.6927


  9%|▉         | 7/80 [02:06<21:51, 17.96s/it]

Epoch [7/80], Step [500/500] Loss: 0.5835
Epoch [8/80], Step [100/500] Loss: 0.4308
Epoch [8/80], Step [200/500] Loss: 0.6086
Epoch [8/80], Step [300/500] Loss: 0.6151
Epoch [8/80], Step [400/500] Loss: 0.4460


 10%|█         | 8/80 [02:24<21:32, 17.95s/it]

Epoch [8/80], Step [500/500] Loss: 0.6233
Epoch [9/80], Step [100/500] Loss: 0.5789
Epoch [9/80], Step [200/500] Loss: 0.6724
Epoch [9/80], Step [300/500] Loss: 0.4826
Epoch [9/80], Step [400/500] Loss: 0.6243


 11%|█▏        | 9/80 [02:42<21:13, 17.94s/it]

Epoch [9/80], Step [500/500] Loss: 0.5281
Epoch [10/80], Step [100/500] Loss: 0.6073
Epoch [10/80], Step [200/500] Loss: 0.5377
Epoch [10/80], Step [300/500] Loss: 0.3334
Epoch [10/80], Step [400/500] Loss: 0.4197


 12%|█▎        | 10/80 [03:00<20:55, 17.94s/it]

Epoch [10/80], Step [500/500] Loss: 0.4783
Epoch [11/80], Step [100/500] Loss: 0.3497
Epoch [11/80], Step [200/500] Loss: 0.5768
Epoch [11/80], Step [300/500] Loss: 0.3754
Epoch [11/80], Step [400/500] Loss: 0.4450


 14%|█▍        | 11/80 [03:18<20:37, 17.94s/it]

Epoch [11/80], Step [500/500] Loss: 0.5097
Epoch [12/80], Step [100/500] Loss: 0.3322
Epoch [12/80], Step [200/500] Loss: 0.4911
Epoch [12/80], Step [300/500] Loss: 0.4005
Epoch [12/80], Step [400/500] Loss: 0.4246


 15%|█▌        | 12/80 [03:36<20:19, 17.93s/it]

Epoch [12/80], Step [500/500] Loss: 0.3595
Epoch [13/80], Step [100/500] Loss: 0.2803
Epoch [13/80], Step [200/500] Loss: 0.5230
Epoch [13/80], Step [300/500] Loss: 0.3695
Epoch [13/80], Step [400/500] Loss: 0.3835


 16%|█▋        | 13/80 [03:54<20:00, 17.93s/it]

Epoch [13/80], Step [500/500] Loss: 0.4621
Epoch [14/80], Step [100/500] Loss: 0.4542
Epoch [14/80], Step [200/500] Loss: 0.4076
Epoch [14/80], Step [300/500] Loss: 0.3420
Epoch [14/80], Step [400/500] Loss: 0.2537


 18%|█▊        | 14/80 [04:11<19:42, 17.92s/it]

Epoch [14/80], Step [500/500] Loss: 0.3628
Epoch [15/80], Step [100/500] Loss: 0.5903
Epoch [15/80], Step [200/500] Loss: 0.4426
Epoch [15/80], Step [300/500] Loss: 0.3191
Epoch [15/80], Step [400/500] Loss: 0.4297


 19%|█▉        | 15/80 [04:29<19:24, 17.91s/it]

Epoch [15/80], Step [500/500] Loss: 0.3109
Epoch [16/80], Step [100/500] Loss: 0.3008
Epoch [16/80], Step [200/500] Loss: 0.3941
Epoch [16/80], Step [300/500] Loss: 0.3657
Epoch [16/80], Step [400/500] Loss: 0.2420


 20%|██        | 16/80 [04:47<19:06, 17.91s/it]

Epoch [16/80], Step [500/500] Loss: 0.2354
Epoch [17/80], Step [100/500] Loss: 0.4775
Epoch [17/80], Step [200/500] Loss: 0.2534
Epoch [17/80], Step [300/500] Loss: 0.3563
Epoch [17/80], Step [400/500] Loss: 0.4603


 21%|██▏       | 17/80 [05:05<18:48, 17.91s/it]

Epoch [17/80], Step [500/500] Loss: 0.3396
Epoch [18/80], Step [100/500] Loss: 0.2650
Epoch [18/80], Step [200/500] Loss: 0.3230
Epoch [18/80], Step [300/500] Loss: 0.2831
Epoch [18/80], Step [400/500] Loss: 0.3468


 22%|██▎       | 18/80 [05:23<18:29, 17.90s/it]

Epoch [18/80], Step [500/500] Loss: 0.2853
Epoch [19/80], Step [100/500] Loss: 0.1926
Epoch [19/80], Step [200/500] Loss: 0.3007
Epoch [19/80], Step [300/500] Loss: 0.3512
Epoch [19/80], Step [400/500] Loss: 0.2647


 24%|██▍       | 19/80 [05:41<18:11, 17.89s/it]

Epoch [19/80], Step [500/500] Loss: 0.4320
Epoch [20/80], Step [100/500] Loss: 0.3462
Epoch [20/80], Step [200/500] Loss: 0.3016
Epoch [20/80], Step [300/500] Loss: 0.4730
Epoch [20/80], Step [400/500] Loss: 0.2881


 25%|██▌       | 20/80 [05:59<17:53, 17.89s/it]

Epoch [20/80], Step [500/500] Loss: 0.3563
Epoch [21/80], Step [100/500] Loss: 0.1543
Epoch [21/80], Step [200/500] Loss: 0.2008
Epoch [21/80], Step [300/500] Loss: 0.1300
Epoch [21/80], Step [400/500] Loss: 0.1390


 26%|██▋       | 21/80 [06:17<17:35, 17.89s/it]

Epoch [21/80], Step [500/500] Loss: 0.2577
Epoch [22/80], Step [100/500] Loss: 0.1082
Epoch [22/80], Step [200/500] Loss: 0.1829
Epoch [22/80], Step [300/500] Loss: 0.1260
Epoch [22/80], Step [400/500] Loss: 0.0804


 28%|██▊       | 22/80 [06:35<17:17, 17.88s/it]

Epoch [22/80], Step [500/500] Loss: 0.1730
Epoch [23/80], Step [100/500] Loss: 0.2322
Epoch [23/80], Step [200/500] Loss: 0.1516
Epoch [23/80], Step [300/500] Loss: 0.1361
Epoch [23/80], Step [400/500] Loss: 0.0998


 29%|██▉       | 23/80 [06:52<16:59, 17.89s/it]

Epoch [23/80], Step [500/500] Loss: 0.2819
Epoch [24/80], Step [100/500] Loss: 0.1543
Epoch [24/80], Step [200/500] Loss: 0.0442
Epoch [24/80], Step [300/500] Loss: 0.1715
Epoch [24/80], Step [400/500] Loss: 0.1872


 30%|███       | 24/80 [07:10<16:41, 17.89s/it]

Epoch [24/80], Step [500/500] Loss: 0.1602
Epoch [25/80], Step [100/500] Loss: 0.0677
Epoch [25/80], Step [200/500] Loss: 0.1768
Epoch [25/80], Step [300/500] Loss: 0.2317
Epoch [25/80], Step [400/500] Loss: 0.1979


 31%|███▏      | 25/80 [07:28<16:23, 17.88s/it]

Epoch [25/80], Step [500/500] Loss: 0.2222
Epoch [26/80], Step [100/500] Loss: 0.0810
Epoch [26/80], Step [200/500] Loss: 0.0803
Epoch [26/80], Step [300/500] Loss: 0.1202
Epoch [26/80], Step [400/500] Loss: 0.0815


 32%|███▎      | 26/80 [07:46<16:05, 17.88s/it]

Epoch [26/80], Step [500/500] Loss: 0.2169
Epoch [27/80], Step [100/500] Loss: 0.1762
Epoch [27/80], Step [200/500] Loss: 0.1133
Epoch [27/80], Step [300/500] Loss: 0.1512
Epoch [27/80], Step [400/500] Loss: 0.1301


 34%|███▍      | 27/80 [08:04<15:47, 17.89s/it]

Epoch [27/80], Step [500/500] Loss: 0.0687
Epoch [28/80], Step [100/500] Loss: 0.0834
Epoch [28/80], Step [200/500] Loss: 0.1566
Epoch [28/80], Step [300/500] Loss: 0.1350
Epoch [28/80], Step [400/500] Loss: 0.0496


 35%|███▌      | 28/80 [08:22<15:29, 17.88s/it]

Epoch [28/80], Step [500/500] Loss: 0.1205
Epoch [29/80], Step [100/500] Loss: 0.0393
Epoch [29/80], Step [200/500] Loss: 0.3127
Epoch [29/80], Step [300/500] Loss: 0.1177
Epoch [29/80], Step [400/500] Loss: 0.0891


 36%|███▋      | 29/80 [08:40<15:11, 17.88s/it]

Epoch [29/80], Step [500/500] Loss: 0.0816
Epoch [30/80], Step [100/500] Loss: 0.0403
Epoch [30/80], Step [200/500] Loss: 0.1925
Epoch [30/80], Step [300/500] Loss: 0.1241
Epoch [30/80], Step [400/500] Loss: 0.1946


 38%|███▊      | 30/80 [08:58<14:54, 17.90s/it]

Epoch [30/80], Step [500/500] Loss: 0.1125
Epoch [31/80], Step [100/500] Loss: 0.0943
Epoch [31/80], Step [200/500] Loss: 0.1778
Epoch [31/80], Step [300/500] Loss: 0.1309
Epoch [31/80], Step [400/500] Loss: 0.1279


 39%|███▉      | 31/80 [09:16<14:36, 17.89s/it]

Epoch [31/80], Step [500/500] Loss: 0.1836
Epoch [32/80], Step [100/500] Loss: 0.0666
Epoch [32/80], Step [200/500] Loss: 0.0939
Epoch [32/80], Step [300/500] Loss: 0.0945
Epoch [32/80], Step [400/500] Loss: 0.0744


 40%|████      | 32/80 [09:33<14:18, 17.89s/it]

Epoch [32/80], Step [500/500] Loss: 0.0557
Epoch [33/80], Step [100/500] Loss: 0.1748
Epoch [33/80], Step [200/500] Loss: 0.1021
Epoch [33/80], Step [300/500] Loss: 0.0765
Epoch [33/80], Step [400/500] Loss: 0.0420


 41%|████▏     | 33/80 [09:51<14:00, 17.89s/it]

Epoch [33/80], Step [500/500] Loss: 0.0919
Epoch [34/80], Step [100/500] Loss: 0.0645
Epoch [34/80], Step [200/500] Loss: 0.0829
Epoch [34/80], Step [300/500] Loss: 0.1009
Epoch [34/80], Step [400/500] Loss: 0.1196


 42%|████▎     | 34/80 [10:09<13:42, 17.89s/it]

Epoch [34/80], Step [500/500] Loss: 0.0819
Epoch [35/80], Step [100/500] Loss: 0.0751
Epoch [35/80], Step [200/500] Loss: 0.0702
Epoch [35/80], Step [300/500] Loss: 0.2982
Epoch [35/80], Step [400/500] Loss: 0.1013


 44%|████▍     | 35/80 [10:27<13:24, 17.89s/it]

Epoch [35/80], Step [500/500] Loss: 0.1082
Epoch [36/80], Step [100/500] Loss: 0.0754
Epoch [36/80], Step [200/500] Loss: 0.1463
Epoch [36/80], Step [300/500] Loss: 0.1309
Epoch [36/80], Step [400/500] Loss: 0.1185


 45%|████▌     | 36/80 [10:45<13:06, 17.88s/it]

Epoch [36/80], Step [500/500] Loss: 0.1428
Epoch [37/80], Step [100/500] Loss: 0.0818
Epoch [37/80], Step [200/500] Loss: 0.1063
Epoch [37/80], Step [300/500] Loss: 0.1250
Epoch [37/80], Step [400/500] Loss: 0.0685


 46%|████▋     | 37/80 [11:03<12:49, 17.90s/it]

Epoch [37/80], Step [500/500] Loss: 0.0795
Epoch [38/80], Step [100/500] Loss: 0.1580
Epoch [38/80], Step [200/500] Loss: 0.0950
Epoch [38/80], Step [300/500] Loss: 0.0499
Epoch [38/80], Step [400/500] Loss: 0.0412


 48%|████▊     | 38/80 [11:21<12:31, 17.89s/it]

Epoch [38/80], Step [500/500] Loss: 0.0571
Epoch [39/80], Step [100/500] Loss: 0.0309
Epoch [39/80], Step [200/500] Loss: 0.1461
Epoch [39/80], Step [300/500] Loss: 0.0733
Epoch [39/80], Step [400/500] Loss: 0.0595


 49%|████▉     | 39/80 [11:39<12:13, 17.88s/it]

Epoch [39/80], Step [500/500] Loss: 0.1214
Epoch [40/80], Step [100/500] Loss: 0.0688
Epoch [40/80], Step [200/500] Loss: 0.0797
Epoch [40/80], Step [300/500] Loss: 0.1169
Epoch [40/80], Step [400/500] Loss: 0.0785


 50%|█████     | 40/80 [11:57<11:55, 17.88s/it]

Epoch [40/80], Step [500/500] Loss: 0.1330
Epoch [41/80], Step [100/500] Loss: 0.0222
Epoch [41/80], Step [200/500] Loss: 0.0331
Epoch [41/80], Step [300/500] Loss: 0.0617
Epoch [41/80], Step [400/500] Loss: 0.0538


 51%|█████▏    | 41/80 [12:14<11:37, 17.89s/it]

Epoch [41/80], Step [500/500] Loss: 0.0683
Epoch [42/80], Step [100/500] Loss: 0.0142
Epoch [42/80], Step [200/500] Loss: 0.0191
Epoch [42/80], Step [300/500] Loss: 0.0618
Epoch [42/80], Step [400/500] Loss: 0.0452


 52%|█████▎    | 42/80 [12:32<11:19, 17.89s/it]

Epoch [42/80], Step [500/500] Loss: 0.0156
Epoch [43/80], Step [100/500] Loss: 0.0248
Epoch [43/80], Step [200/500] Loss: 0.1281
Epoch [43/80], Step [300/500] Loss: 0.0122
Epoch [43/80], Step [400/500] Loss: 0.0486


 54%|█████▍    | 43/80 [12:50<11:01, 17.89s/it]

Epoch [43/80], Step [500/500] Loss: 0.0112
Epoch [44/80], Step [100/500] Loss: 0.0857
Epoch [44/80], Step [200/500] Loss: 0.0283
Epoch [44/80], Step [300/500] Loss: 0.0141
Epoch [44/80], Step [400/500] Loss: 0.0517


 55%|█████▌    | 44/80 [13:08<10:43, 17.88s/it]

Epoch [44/80], Step [500/500] Loss: 0.0045
Epoch [45/80], Step [100/500] Loss: 0.1082
Epoch [45/80], Step [200/500] Loss: 0.0134
Epoch [45/80], Step [300/500] Loss: 0.0340
Epoch [45/80], Step [400/500] Loss: 0.0872


 56%|█████▋    | 45/80 [13:26<10:25, 17.88s/it]

Epoch [45/80], Step [500/500] Loss: 0.0782
Epoch [46/80], Step [100/500] Loss: 0.0709
Epoch [46/80], Step [200/500] Loss: 0.0357
Epoch [46/80], Step [300/500] Loss: 0.0383
Epoch [46/80], Step [400/500] Loss: 0.0280


 57%|█████▊    | 46/80 [13:44<10:07, 17.88s/it]

Epoch [46/80], Step [500/500] Loss: 0.0130
Epoch [47/80], Step [100/500] Loss: 0.0584
Epoch [47/80], Step [200/500] Loss: 0.0284
Epoch [47/80], Step [300/500] Loss: 0.0407
Epoch [47/80], Step [400/500] Loss: 0.0777


 59%|█████▉    | 47/80 [14:02<09:50, 17.88s/it]

Epoch [47/80], Step [500/500] Loss: 0.0464
Epoch [48/80], Step [100/500] Loss: 0.1071
Epoch [48/80], Step [200/500] Loss: 0.0216
Epoch [48/80], Step [300/500] Loss: 0.0123
Epoch [48/80], Step [400/500] Loss: 0.0184


 60%|██████    | 48/80 [14:20<09:32, 17.88s/it]

Epoch [48/80], Step [500/500] Loss: 0.0320
Epoch [49/80], Step [100/500] Loss: 0.0256
Epoch [49/80], Step [200/500] Loss: 0.0571
Epoch [49/80], Step [300/500] Loss: 0.1409
Epoch [49/80], Step [400/500] Loss: 0.0186


 61%|██████▏   | 49/80 [14:37<09:14, 17.88s/it]

Epoch [49/80], Step [500/500] Loss: 0.0064
Epoch [50/80], Step [100/500] Loss: 0.0006
Epoch [50/80], Step [200/500] Loss: 0.0051
Epoch [50/80], Step [300/500] Loss: 0.0818
Epoch [50/80], Step [400/500] Loss: 0.0035


 62%|██████▎   | 50/80 [14:55<08:56, 17.87s/it]

Epoch [50/80], Step [500/500] Loss: 0.0153
Epoch [51/80], Step [100/500] Loss: 0.0095
Epoch [51/80], Step [200/500] Loss: 0.0178
Epoch [51/80], Step [300/500] Loss: 0.0320
Epoch [51/80], Step [400/500] Loss: 0.0142


 64%|██████▍   | 51/80 [15:13<08:38, 17.87s/it]

Epoch [51/80], Step [500/500] Loss: 0.0733
Epoch [52/80], Step [100/500] Loss: 0.0341
Epoch [52/80], Step [200/500] Loss: 0.0094
Epoch [52/80], Step [300/500] Loss: 0.0092
Epoch [52/80], Step [400/500] Loss: 0.0159


 65%|██████▌   | 52/80 [15:31<08:20, 17.86s/it]

Epoch [52/80], Step [500/500] Loss: 0.0125
Epoch [53/80], Step [100/500] Loss: 0.0187
Epoch [53/80], Step [200/500] Loss: 0.0211
Epoch [53/80], Step [300/500] Loss: 0.0676
Epoch [53/80], Step [400/500] Loss: 0.0532


 66%|██████▋   | 53/80 [15:49<08:02, 17.87s/it]

Epoch [53/80], Step [500/500] Loss: 0.0060
Epoch [54/80], Step [100/500] Loss: 0.0190
Epoch [54/80], Step [200/500] Loss: 0.0557
Epoch [54/80], Step [300/500] Loss: 0.0741
Epoch [54/80], Step [400/500] Loss: 0.1048


 68%|██████▊   | 54/80 [16:07<07:44, 17.87s/it]

Epoch [54/80], Step [500/500] Loss: 0.0237
Epoch [55/80], Step [100/500] Loss: 0.0504
Epoch [55/80], Step [200/500] Loss: 0.0341
Epoch [55/80], Step [300/500] Loss: 0.0698
Epoch [55/80], Step [400/500] Loss: 0.0147


 69%|██████▉   | 55/80 [16:25<07:26, 17.88s/it]

Epoch [55/80], Step [500/500] Loss: 0.0669
Epoch [56/80], Step [100/500] Loss: 0.0320
Epoch [56/80], Step [200/500] Loss: 0.0074
Epoch [56/80], Step [300/500] Loss: 0.0295
Epoch [56/80], Step [400/500] Loss: 0.0458


 70%|███████   | 56/80 [16:43<07:08, 17.87s/it]

Epoch [56/80], Step [500/500] Loss: 0.0117
Epoch [57/80], Step [100/500] Loss: 0.0909
Epoch [57/80], Step [200/500] Loss: 0.0030
Epoch [57/80], Step [300/500] Loss: 0.0102
Epoch [57/80], Step [400/500] Loss: 0.0120


 71%|███████▏  | 57/80 [17:00<06:51, 17.87s/it]

Epoch [57/80], Step [500/500] Loss: 0.0139
Epoch [58/80], Step [100/500] Loss: 0.0154
Epoch [58/80], Step [200/500] Loss: 0.0095
Epoch [58/80], Step [300/500] Loss: 0.0094
Epoch [58/80], Step [400/500] Loss: 0.0361


 72%|███████▎  | 58/80 [17:18<06:33, 17.88s/it]

Epoch [58/80], Step [500/500] Loss: 0.0102
Epoch [59/80], Step [100/500] Loss: 0.0770
Epoch [59/80], Step [200/500] Loss: 0.0349
Epoch [59/80], Step [300/500] Loss: 0.0106
Epoch [59/80], Step [400/500] Loss: 0.0101


 74%|███████▍  | 59/80 [17:36<06:15, 17.88s/it]

Epoch [59/80], Step [500/500] Loss: 0.0366
Epoch [60/80], Step [100/500] Loss: 0.0338
Epoch [60/80], Step [200/500] Loss: 0.0198
Epoch [60/80], Step [300/500] Loss: 0.0118
Epoch [60/80], Step [400/500] Loss: 0.1234


 75%|███████▌  | 60/80 [17:54<05:57, 17.87s/it]

Epoch [60/80], Step [500/500] Loss: 0.0121
Epoch [61/80], Step [100/500] Loss: 0.0186
Epoch [61/80], Step [200/500] Loss: 0.0097
Epoch [61/80], Step [300/500] Loss: 0.0224
Epoch [61/80], Step [400/500] Loss: 0.0154


 76%|███████▋  | 61/80 [18:12<05:39, 17.87s/it]

Epoch [61/80], Step [500/500] Loss: 0.0193
Epoch [62/80], Step [100/500] Loss: 0.0218
Epoch [62/80], Step [200/500] Loss: 0.0375
Epoch [62/80], Step [300/500] Loss: 0.0012
Epoch [62/80], Step [400/500] Loss: 0.0185


 78%|███████▊  | 62/80 [18:30<05:21, 17.87s/it]

Epoch [62/80], Step [500/500] Loss: 0.0411
Epoch [63/80], Step [100/500] Loss: 0.0265
Epoch [63/80], Step [200/500] Loss: 0.0045
Epoch [63/80], Step [300/500] Loss: 0.0240
Epoch [63/80], Step [400/500] Loss: 0.0003


 79%|███████▉  | 63/80 [18:48<05:04, 17.88s/it]

Epoch [63/80], Step [500/500] Loss: 0.0051
Epoch [64/80], Step [100/500] Loss: 0.0138
Epoch [64/80], Step [200/500] Loss: 0.0054
Epoch [64/80], Step [300/500] Loss: 0.0140
Epoch [64/80], Step [400/500] Loss: 0.0117


 80%|████████  | 64/80 [19:06<04:46, 17.88s/it]

Epoch [64/80], Step [500/500] Loss: 0.0078
Epoch [65/80], Step [100/500] Loss: 0.0177
Epoch [65/80], Step [200/500] Loss: 0.0024
Epoch [65/80], Step [300/500] Loss: 0.0454
Epoch [65/80], Step [400/500] Loss: 0.0051


 81%|████████▏ | 65/80 [19:23<04:28, 17.88s/it]

Epoch [65/80], Step [500/500] Loss: 0.0043
Epoch [66/80], Step [100/500] Loss: 0.0063
Epoch [66/80], Step [200/500] Loss: 0.0578
Epoch [66/80], Step [300/500] Loss: 0.0121
Epoch [66/80], Step [400/500] Loss: 0.0023


 82%|████████▎ | 66/80 [19:41<04:10, 17.88s/it]

Epoch [66/80], Step [500/500] Loss: 0.0124
Epoch [67/80], Step [100/500] Loss: 0.0305
Epoch [67/80], Step [200/500] Loss: 0.0197
Epoch [67/80], Step [300/500] Loss: 0.0044
Epoch [67/80], Step [400/500] Loss: 0.0185


 84%|████████▍ | 67/80 [19:59<03:52, 17.87s/it]

Epoch [67/80], Step [500/500] Loss: 0.0085
Epoch [68/80], Step [100/500] Loss: 0.0296
Epoch [68/80], Step [200/500] Loss: 0.0038
Epoch [68/80], Step [300/500] Loss: 0.0247
Epoch [68/80], Step [400/500] Loss: 0.0193


 85%|████████▌ | 68/80 [20:17<03:34, 17.88s/it]

Epoch [68/80], Step [500/500] Loss: 0.0006
Epoch [69/80], Step [100/500] Loss: 0.0177
Epoch [69/80], Step [200/500] Loss: 0.0028
Epoch [69/80], Step [300/500] Loss: 0.0407
Epoch [69/80], Step [400/500] Loss: 0.0234


 86%|████████▋ | 69/80 [20:35<03:16, 17.88s/it]

Epoch [69/80], Step [500/500] Loss: 0.0456
Epoch [70/80], Step [100/500] Loss: 0.0023
Epoch [70/80], Step [200/500] Loss: 0.0022
Epoch [70/80], Step [300/500] Loss: 0.0010
Epoch [70/80], Step [400/500] Loss: 0.0071


 88%|████████▊ | 70/80 [20:53<02:58, 17.89s/it]

Epoch [70/80], Step [500/500] Loss: 0.0072
Epoch [71/80], Step [100/500] Loss: 0.0011
Epoch [71/80], Step [200/500] Loss: 0.0128
Epoch [71/80], Step [300/500] Loss: 0.0234
Epoch [71/80], Step [400/500] Loss: 0.0156


 89%|████████▉ | 71/80 [21:11<02:40, 17.87s/it]

Epoch [71/80], Step [500/500] Loss: 0.0034
Epoch [72/80], Step [100/500] Loss: 0.0091
Epoch [72/80], Step [200/500] Loss: 0.0763
Epoch [72/80], Step [300/500] Loss: 0.0055
Epoch [72/80], Step [400/500] Loss: 0.0182


 90%|█████████ | 72/80 [21:29<02:22, 17.87s/it]

Epoch [72/80], Step [500/500] Loss: 0.0360
Epoch [73/80], Step [100/500] Loss: 0.0179
Epoch [73/80], Step [200/500] Loss: 0.0063
Epoch [73/80], Step [300/500] Loss: 0.0014
Epoch [73/80], Step [400/500] Loss: 0.0139


 91%|█████████▏| 73/80 [21:46<02:05, 17.87s/it]

Epoch [73/80], Step [500/500] Loss: 0.0020
Epoch [74/80], Step [100/500] Loss: 0.0267
Epoch [74/80], Step [200/500] Loss: 0.0079
Epoch [74/80], Step [300/500] Loss: 0.0110
Epoch [74/80], Step [400/500] Loss: 0.0575


 92%|█████████▎| 74/80 [22:04<01:47, 17.90s/it]

Epoch [74/80], Step [500/500] Loss: 0.0006
Epoch [75/80], Step [100/500] Loss: 0.0305
Epoch [75/80], Step [200/500] Loss: 0.0058
Epoch [75/80], Step [300/500] Loss: 0.0094
Epoch [75/80], Step [400/500] Loss: 0.0079


 94%|█████████▍| 75/80 [22:22<01:29, 17.89s/it]

Epoch [75/80], Step [500/500] Loss: 0.0036
Epoch [76/80], Step [100/500] Loss: 0.0031
Epoch [76/80], Step [200/500] Loss: 0.0017
Epoch [76/80], Step [300/500] Loss: 0.0251
Epoch [76/80], Step [400/500] Loss: 0.0016


 95%|█████████▌| 76/80 [22:40<01:11, 17.88s/it]

Epoch [76/80], Step [500/500] Loss: 0.0007
Epoch [77/80], Step [100/500] Loss: 0.0105
Epoch [77/80], Step [200/500] Loss: 0.0250
Epoch [77/80], Step [300/500] Loss: 0.0081
Epoch [77/80], Step [400/500] Loss: 0.0097


 96%|█████████▋| 77/80 [22:58<00:53, 17.88s/it]

Epoch [77/80], Step [500/500] Loss: 0.0012
Epoch [78/80], Step [100/500] Loss: 0.0050
Epoch [78/80], Step [200/500] Loss: 0.0008
Epoch [78/80], Step [300/500] Loss: 0.0011
Epoch [78/80], Step [400/500] Loss: 0.0016


 98%|█████████▊| 78/80 [23:16<00:35, 17.87s/it]

Epoch [78/80], Step [500/500] Loss: 0.0009
Epoch [79/80], Step [100/500] Loss: 0.0493
Epoch [79/80], Step [200/500] Loss: 0.0010
Epoch [79/80], Step [300/500] Loss: 0.0015
Epoch [79/80], Step [400/500] Loss: 0.0615


 99%|█████████▉| 79/80 [23:34<00:17, 17.88s/it]

Epoch [79/80], Step [500/500] Loss: 0.0128
Epoch [80/80], Step [100/500] Loss: 0.0035
Epoch [80/80], Step [200/500] Loss: 0.0390
Epoch [80/80], Step [300/500] Loss: 0.0085
Epoch [80/80], Step [400/500] Loss: 0.0105


100%|██████████| 80/80 [23:52<00:00, 17.90s/it]

Epoch [80/80], Step [500/500] Loss: 0.0133





Accuracy of the model on the test images: 90.43 %


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
Epoch,80.0
Loss,0.01325
_runtime,1435.0
_timestamp,1642075502.0
_step,399.0


0,1
Epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇███
Loss,█▅▄▃▄▄▃▃▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
_runtime,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
_timestamp,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Sweep Agent: Exiting.


# Pruning

In [15]:
model.modules

<bound method Module.modules of ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
   

In [16]:
prune_modules = []
for name, module in model.named_modules():
    if isinstance(module, torch.nn.Conv2d):
        print(name, module)
        if "downsample" not in name:
            prune_modules.append(name)
    # prune 40% of connections in all linear layers
    elif isinstance(module, torch.nn.Linear):
        print(name, module)
        prune_modules.append(name)

conv1 Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
layer1.0.conv1 Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
layer1.0.conv2 Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
layer1.1.conv1 Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
layer1.1.conv2 Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
layer2.0.conv1 Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
layer2.0.conv2 Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
layer2.0.downsample.0 Conv2d(64, 128, kernel_size=(1, 1), stride=(2, 2), bias=False)
layer2.1.conv1 Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
layer2.1.conv2 Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
layer3.0.conv1 Conv2d(128, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1)

In [9]:
def getParams(model):
    parameters_to_prune = (
        (model.conv1, 'weight'),
        (model.layer1[0].conv1, 'weight'),
        (model.layer1[0].conv2, 'weight'),
        (model.layer1[1].conv1, 'weight'),
        (model.layer1[1].conv2, 'weight'),
        (model.layer2[0].conv1, 'weight'),
        (model.layer2[0].conv2, 'weight'),
        (model.layer2[1].conv1, 'weight'),
        (model.layer2[1].conv2, 'weight'),
        (model.layer3[0].conv1, 'weight'),
        (model.layer3[0].conv2, 'weight'),
        (model.layer3[1].conv1, 'weight'),
        (model.layer3[1].conv2, 'weight'),
        (model.layer4[0].conv1, 'weight'),
        (model.layer4[0].conv2, 'weight'),
        (model.layer4[1].conv1, 'weight'),
        (model.layer4[1].conv2, 'weight'),
        (model.fc, 'weight'),
    )
    return parameters_to_prune

In [10]:
def pruneModel(model, parameters_to_prune, ratio, printStats=True):
    prune.global_unstructured(
        parameters_to_prune,
        pruning_method=prune.L1Unstructured,
        amount=ratio,
    )
    
    if not printStats:
        return
    
    print(
        "Sparsity in conv1.weight: {:.2f}%".format(
            100. * float(torch.sum(model.conv1.weight == 0))
            / float(model.conv1.weight.nelement())
        )
    )
    print(
        "Sparsity in layer1[0].conv1.weight: {:.2f}%".format(
            100. * float(torch.sum(model.layer1[0].conv1.weight == 0))
            / float(model.layer1[0].conv1.weight.nelement())
        )
    )
    print(
        "Sparsity in layer1[0].conv2.weight: {:.2f}%".format(
            100. * float(torch.sum(model.layer1[0].conv2.weight == 0))
            / float(model.layer1[0].conv2.weight.nelement())
        )
    )
    print(
        "Sparsity in layer1[1].conv1.weight: {:.2f}%".format(
            100. * float(torch.sum(model.layer1[1].conv1.weight == 0))
            / float(model.layer1[1].conv1.weight.nelement())
        )
    )
    print(
        "Sparsity in layer1[1].conv2.weight: {:.2f}%".format(
            100. * float(torch.sum(model.layer1[1].conv2.weight == 0))
            / float(model.layer1[1].conv2.weight.nelement())
        )
    )
    print(
        "Sparsity in layer2[0].conv1.weight: {:.2f}%".format(
            100. * float(torch.sum(model.layer2[0].conv1.weight == 0))
            / float(model.layer2[0].conv1.weight.nelement())
        )
    )
    print(
        "Sparsity in layer2[0].conv2.weight: {:.2f}%".format(
            100. * float(torch.sum(model.layer2[0].conv2.weight == 0))
            / float(model.layer2[0].conv2.weight.nelement())
        )
    )
    print(
        "Sparsity in layer2[1].conv1.weight: {:.2f}%".format(
            100. * float(torch.sum(model.layer2[1].conv1.weight == 0))
            / float(model.layer2[1].conv1.weight.nelement())
        )
    )
    print(
        "Sparsity in layer2[1].conv2.weight: {:.2f}%".format(
            100. * float(torch.sum(model.layer2[1].conv2.weight == 0))
            / float(model.layer2[1].conv2.weight.nelement())
        )
    )
    print(
        "Sparsity in layer3[0].conv1.weight: {:.2f}%".format(
            100. * float(torch.sum(model.layer3[0].conv1.weight == 0))
            / float(model.layer3[0].conv1.weight.nelement())
        )
    )
    print(
        "Sparsity in layer3[0].conv2.weight: {:.2f}%".format(
            100. * float(torch.sum(model.layer3[0].conv2.weight == 0))
            / float(model.layer3[0].conv2.weight.nelement())
        )
    )
    print(
        "Sparsity in layer3[1].conv1.weight: {:.2f}%".format(
            100. * float(torch.sum(model.layer3[1].conv1.weight == 0))
            / float(model.layer3[1].conv1.weight.nelement())
        )
    )
    print(
        "Sparsity in layer3[1].conv2.weight: {:.2f}%".format(
            100. * float(torch.sum(model.layer3[1].conv2.weight == 0))
            / float(model.layer3[1].conv2.weight.nelement())
        )
    )
    print(
        "Sparsity in layer4[0].conv1.weight: {:.2f}%".format(
            100. * float(torch.sum(model.layer4[0].conv1.weight == 0))
            / float(model.layer4[0].conv1.weight.nelement())
        )
    )
    print(
        "Sparsity in layer4[0].conv2.weight: {:.2f}%".format(
            100. * float(torch.sum(model.layer4[0].conv2.weight == 0))
            / float(model.layer4[0].conv2.weight.nelement())
        )
    )
    print(
        "Sparsity in layer4[1].conv1.weight: {:.2f}%".format(
            100. * float(torch.sum(model.layer4[1].conv1.weight == 0))
            / float(model.layer4[1].conv1.weight.nelement())
        )
    )
    print(
        "Sparsity in layer4[1].conv2.weight: {:.2f}%".format(
            100. * float(torch.sum(model.layer4[1].conv2.weight == 0))
            / float(model.layer4[1].conv2.weight.nelement())
        )
    )
    print(
        "Sparsity in fc1.weight: {:.2f}%".format(
            100. * float(torch.sum(model.fc.weight == 0))
            / float(model.fc.weight.nelement())
        )
    )

    print(
        "Global sparsity: {:.2f}%".format(
            100. * float(
                torch.sum(model.conv1.weight == 0)
                + torch.sum(model.layer1[0].conv1.weight == 0)
                + torch.sum(model.layer1[0].conv2.weight == 0)
                + torch.sum(model.layer1[1].conv1.weight == 0)
                + torch.sum(model.layer1[1].conv2.weight == 0)
                + torch.sum(model.layer2[0].conv1.weight == 0)
                + torch.sum(model.layer2[0].conv2.weight == 0)
                + torch.sum(model.layer2[1].conv1.weight == 0)
                + torch.sum(model.layer2[1].conv2.weight == 0)
                + torch.sum(model.layer3[0].conv1.weight == 0)
                + torch.sum(model.layer3[0].conv2.weight == 0)
                + torch.sum(model.layer3[1].conv1.weight == 0)
                + torch.sum(model.layer3[1].conv2.weight == 0)
                + torch.sum(model.layer4[0].conv1.weight == 0)
                + torch.sum(model.layer4[0].conv2.weight == 0)
                + torch.sum(model.layer4[1].conv1.weight == 0)
                + torch.sum(model.layer4[1].conv2.weight == 0)
                + torch.sum(model.fc.weight == 0)
            )
            / float(
                model.conv1.weight.nelement()
                + model.layer1[0].conv1.weight.nelement()
                + model.layer1[0].conv2.weight.nelement()
                + model.layer1[1].conv1.weight.nelement()
                + model.layer1[1].conv2.weight.nelement()
                + model.layer2[0].conv1.weight.nelement()
                + model.layer2[0].conv2.weight.nelement()
                + model.layer2[1].conv1.weight.nelement()
                + model.layer2[1].conv2.weight.nelement()
                + model.layer3[0].conv1.weight.nelement()
                + model.layer3[0].conv2.weight.nelement()
                + model.layer3[1].conv1.weight.nelement()
                + model.layer3[1].conv2.weight.nelement()
                + model.layer4[0].conv1.weight.nelement()
                + model.layer4[0].conv2.weight.nelement()
                + model.layer4[1].conv1.weight.nelement()
                + model.layer4[1].conv2.weight.nelement()
                + model.fc.weight.nelement()
            )
        )
    )

In [15]:
def setPruningPermanent(model, modules):
    for module, w in modules:
        prune.remove(module, 'weight')

# Re-evaluate the model after pruning

In [16]:
prune_ratios = np.arange(0.05, 0.85, 0.05)
for i in prune_ratios:
    print("Ratio: ", i)
    print("Model1: lr= ", 0.001)
    model = loadModel("resnet_lr0.001.pth")
    parameters_to_prune = getParams(model)
    pruneModel(model, parameters_to_prune, i, printStats=False)
    setPruningPermanent(model, parameters_to_prune)
    testModel(model, test_loader)
    model = model.to("cpu")
    mean, stdev = getInference(sample_image, model, 100)
    print("Inference: ", (mean, stdev))
    
    print("Model2: lr= ", 0.01)
    model = loadModel("resnet_lr0.01.pth")
    parameters_to_prune = getParams(model)
    pruneModel(model, parameters_to_prune, i, printStats=False)
    setPruningPermanent(model, parameters_to_prune)
    testModel(model, test_loader)
    model = model.to("cpu")
    mean, stdev = getInference(sample_image, model, 100)
    print("Inference: ", (mean, stdev))

Ratio:  0.05
Model1: lr=  0.001
Accuracy of the model on the test images: 90.84 %
Inference:  (0.04554151123000001, 0.007384506765080374)
Model2: lr=  0.01
Accuracy of the model on the test images: 90.44 %
Inference:  (0.045861150069999995, 0.005755680796214182)
Ratio:  0.1
Model1: lr=  0.001
Accuracy of the model on the test images: 90.78 %
Inference:  (0.04577782358999999, 0.0057789616452419874)
Model2: lr=  0.01
Accuracy of the model on the test images: 90.42 %
Inference:  (0.045754017550000005, 0.006526924194281888)
Ratio:  0.15000000000000002
Model1: lr=  0.001
Accuracy of the model on the test images: 90.73 %
Inference:  (0.05521959469000001, 0.011002556970250215)
Model2: lr=  0.01
Accuracy of the model on the test images: 90.42 %
Inference:  (0.04616189984999999, 0.006080496278343344)
Ratio:  0.2
Model1: lr=  0.001
Accuracy of the model on the test images: 90.03 %
Inference:  (0.045945243729999995, 0.0055283602325883014)
Model2: lr=  0.01
Accuracy of the model on the test images

In [20]:
model.layer1[0].conv1.weight

tensor([[[[-0.4249,  0.2550, -0.0000],
          [ 0.6017,  1.8117,  1.1083],
          [-0.0000,  1.4855,  1.5301]],

         [[-0.5792,  0.4112,  2.8071],
          [ 0.4502,  0.2385,  1.4300],
          [ 0.8258,  0.4430,  1.0861]],

         [[ 0.2909,  0.0000, -0.0000],
          [ 0.0000,  0.2227,  0.0000],
          [-0.0000,  0.0000, -0.1957]],

         ...,

         [[ 0.0000,  0.1557,  0.2102],
          [ 0.1737,  0.2955, -0.1892],
          [-0.2364,  0.0000, -0.1832]],

         [[-0.7010, -1.1919,  1.0778],
          [-0.1431, -1.3694, -0.9160],
          [ 0.3771, -0.1808, -0.6137]],

         [[-0.5214, -2.2948, -0.3137],
          [-0.4003, -5.5395, -2.8928],
          [ 0.7913, -1.9342, -0.7764]]],


        [[[ 0.6393,  0.3993,  0.9330],
          [-0.2474, -0.8675, -0.6761],
          [ 0.2971, -0.2367,  0.2297]],

         [[-0.9551, -0.4279,  0.0000],
          [-1.3315, -1.5427, -0.9220],
          [-0.3729, -1.0323, -0.4277]],

         [[ 0.4903,  0.2804,  0