In [1]:
import torch.nn as nn
import torch
import torchvision
import torchvision.transforms as transforms
import os
import math
# os.environ['CUDA_LAUNCH_BLOCKING'] = "0,1"
transforms = transforms.Compose([transforms.Resize((224,224)),transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])])
trainset = torchvision.datasets.ImageNet(root='/home/mitlab26/ImageNet', split='train', transform=transforms)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=128,shuffle=True, num_workers=2)
testset = torchvision.datasets.ImageNet(root='/home/mitlab26/ImageNet', split='val', transform=transforms)
testloader = torch.utils.data.DataLoader(testset, batch_size=128,shuffle=False, num_workers=2)
# training set: 1,281,167, validation set: 50,000 image

In [2]:
import torchvision.models as models
Resnet50=models.resnet50(pretrained=True)
compressed_Resnet50=models.resnet50(pretrained=True)
encoding_Resnet50=models.resnet50()
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)
# compressed_Resnet152 = nn.parallel.DataParallel(compressed_Resnet152)
Resnet50.to(device)
compressed_Resnet50.to(device)
encoding_Resnet50.to(device)

cuda:0


ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [3]:
# Find all convolutional layers and fully-connected layers
conv_layers=[]
for name,layer in compressed_Resnet50.named_modules():
  if isinstance(layer, torch.nn.Conv2d):
    conv_layers.append(layer)
fc_layers=[]
for name,layer in compressed_Resnet50.named_modules():
  if isinstance(layer, torch.nn.Linear):
    fc_layers.append(layer)

encoding_conv_layers=[]
for name,layer in encoding_Resnet50.named_modules():
  if isinstance(layer, torch.nn.Conv2d):
    encoding_conv_layers.append(layer)
encoding_fc_layers=[]
for name,layer in encoding_Resnet50.named_modules():
  if isinstance(layer, torch.nn.Linear):
    encoding_fc_layers.append(layer)

In [4]:
import torch.optim as optim
def train(epochs, learning_rate, model, stop):    
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9)
    for epoch in range(epochs):  # loop over the dataset multiple times
        running_loss = 0.0
        total = 0
        top1 = 0
        topk = 0
        k = 5
        for i, data in enumerate(trainloader, 0):
            # get the inputs; data is a list of [inputs, labels]
            inputs, labels = data
            inputs, labels = inputs.to(device),labels.to(device)
            # zero the parameter gradients
            optimizer.zero_grad()
            # forward + backward + optimize
            outputs = model(inputs)
            loss = criterion(outputs, labels).to(device)
            loss.backward()
            optimizer.step()
            # print('epoch: %d iteration: %d loss: %.3f' % (epoch + 1, i + 1, loss))
            _,maxk = torch.topk(outputs,k,dim=-1)
            total += labels.size(0)
            test_labels = labels.view(-1,1) # reshape labels from [n] to [n,1] to compare [n,k]

            top1 += (test_labels == maxk[:,0:1]).sum().item()
            topk += (test_labels == maxk).sum().item()
            print('Epoch: %d iteration: %d top1-accuracy: %.3f top5-accuracy: %.3f loss: %.3f' % (epoch + 1, i + 1,100*top1/total,100*topk/total ,loss))
            if i==stop:
                break
            # print('\n')
            # print statistics
    #         running_loss += loss.item()
    #         if i % 2000 == 1999:    # print every 2000 mini-batches
    #             print('[%d, %5d] loss: %.3f' %
    #                   (epoch + 1, i + 1, running_loss / 2000))
    #             running_loss = 0.0
        learning_rate = learning_rate /100
#     print('Finish training model. Total training images:{}'%(total))

In [5]:
import datetime
def evaluate_accuracy(testloader, model):
  starttime = datetime.datetime.now()
  with torch.no_grad():
    total = 0
    top1 = 0
    topk = 0
    k = 5
    for data in testloader:
        images, labels = data
        images, labels = images.to(device), labels.to(device)
        # output
        outputs = model(images)
        _,maxk = torch.topk(outputs,k,dim=-1)
        total += labels.size(0)
        test_labels = labels.view(-1,1) # reshape labels from [n] to [n,1] to compare [n,k]

        top1 += (test_labels == maxk[:,0:1]).sum().item()
        topk += (test_labels == maxk).sum().item()
    endtime = datetime.datetime.now()
    print(str(endtime - starttime)+'seconds')
    print('Accuracy of the network on total {} test images: top1={}% ; top{}={}%'.format(total,100 * top1 / total,k,100*topk/total))

In [6]:
# Linearization L(x)=f(a)+f'(a)(x-a)
def Linearization (model,p):
  model = model.clone().view(1,-1)
  sorted, index = torch.sort(model[0])
  nonzero_value = sorted[sorted.nonzero().squeeze().detach()]  # sorted value
  nonzero_index = index[sorted.nonzero().squeeze().detach()] # equal to index, nonzero value original position
  slope = []
  interval = []
  interval_index = 0
  new_value = []
  delta_x = [] # x-a
  all_fixed_points = []
  all_derivatives = []
  distance = 0
  approximation_error = 0 # True error
  average_error = 0 # Estimated error
  # Evaluate slopes at every points
    # left end-point
  m = float(nonzero_value[1]-nonzero_value[0])
  slope.append(m)

    #interior points
  for i in range(1, nonzero_value.size()[0]-1):
    m = 0.5 * float(nonzero_value[i+1]-nonzero_value[i-1])
    slope.append(m)

    # right end-point
  m = float(nonzero_value[nonzero_value.size()[0]-1]-nonzero_value[nonzero_value.size()[0]-2])
  slope.append(m)

  derivative = slope[0]
  fixed_point = nonzero_value[0]
  all_derivatives.append(derivative) # f'(0)
  all_fixed_points.append(fixed_point) # f(0)

  # Partition weights to intervals
  for j in range(0,len(slope)):
    if (j+1<len(slope) and (abs(derivative-slope[j]) > p*derivative)):  # The change of slope should be less then slope
      interval.append(interval_index)
      interval_index = interval_index + 1
      new_value.append(fixed_point + distance * derivative) # Linearization to approximate values on the same interval. distance is (x-a).
      approximation_error = approximation_error+abs((nonzero_value[j] - new_value[j]).item())
      average_error = average_error + distance*p*derivative
      delta_x.append(distance)
      derivative = float(slope[j+1])  # Assign the next slope as f'(a)
      fixed_point = float(nonzero_value[j+1]) # Assign the next point as f(a)
      all_derivatives.append(derivative)
      all_fixed_points.append(fixed_point)
      distance=0
    else:
      interval.append(interval_index)
      new_value.append(fixed_point + distance * derivative) # Linearization to approximate values on the same interval. distance is (x-a).
      approximation_error = approximation_error+abs((nonzero_value[j] - new_value[j]).item())
      average_error = average_error + distance*p*derivative
      delta_x.append(distance)
      distance = distance + 1
  approximation_error = approximation_error / len(nonzero_value)
  average_error = average_error / len(nonzero_value)
  interval = torch.tensor(interval) # Convert list to tensor

  # Convert list to tensor
  new_value = torch.tensor(new_value).to(device)
  delta_x = torch.tensor(delta_x).to(device)
  all_fixed_points = torch.tensor(all_fixed_points).to(device)
  all_derivatives = torch.tensor(all_derivatives).to(device)

  # print(alexnet.features[0].weight[0][0][0])
#   print(new_value[2000:2020])
#   print('slope:'+str(slope[2000:2020]))
#   print('distance:'+str(delta_x[2000:2020]))
#   print('approximation error:'+str(approximation_error))
#   print('average error:'+str(average_error))
  # Assign new value to model
  model[0][index]=new_value
  delta_x[index]=delta_x
  interval[index]=interval
  return model, interval, delta_x, all_fixed_points, all_derivatives

In [7]:
# compress convolutional layers
import datetime
compressed_Resnet50.train()
starttime = datetime.datetime.now()
for l in range(1,len(conv_layers)): 
  model, interval, distance, all_fixed_points, all_derivatives = Linearization(conv_layers[l].weight,0.96) 
  model = model.view(conv_layers[l].weight.size())
  del conv_layers[l].weight
  del encoding_conv_layers[l].weight
  conv_layers[l].register_parameter('weight', nn.Parameter(model))
  encoding_conv_layers[l].register_parameter('fixed_points',nn.Parameter(all_fixed_points))
  encoding_conv_layers[l].register_parameter('derivatives',nn.Parameter(all_derivatives))
  encoding_conv_layers[l].register_parameter('interval',nn.Parameter(interval.type(torch.uint8),False))
  encoding_conv_layers[l].register_parameter('distance',nn.Parameter(distance.type(torch.uint8),False))
  conv_layers[l].weight.requires_grad=False
endtime = datetime.datetime.now()
print(str(endtime - starttime)+'seconds')

	nonzero()
Consider using one of the following signatures instead:
	nonzero(*, bool as_tuple) (Triggered internally at  ../torch/csrc/utils/python_arg_parser.cpp:962.)
  """


tensor([1.1838e-08, 1.2123e-08, 1.2409e-08, 1.2694e-08, 1.2980e-08, 1.3265e-08,
        1.3551e-08, 1.3836e-08, 1.4121e-08, 1.4407e-08, 1.4692e-08, 1.4978e-08,
        1.5263e-08, 1.5549e-08, 1.5834e-08, 1.6119e-08, 1.6405e-08, 1.6690e-08,
        1.6976e-08, 1.7261e-08], device='cuda:0')
slope:[6.877476366184965e-11, 7.41331440679005e-11, 2.9060309714168397e-11, 2.7472246699744574e-11, 8.247580396414378e-11, 2.351430161695589e-10, 2.3098589707615247e-10, 1.3059908710033596e-10, 6.771827543161635e-11, 7.044542726930558e-11, 1.7528467566307881e-10, 1.9481705137991412e-10, 1.4333556563883576e-10, 7.135891877396716e-11, 2.3301360840832785e-11, 4.543920795185841e-11, 7.824318970506283e-11, 7.219735920216408e-11, 4.575717582611105e-11, 2.2884805161993427e-11]
distance:tensor([15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
        33, 34], device='cuda:0')
approximation error:0.009459627459634497
average error:0.014702211097351571
Epoch: 1 iteration: 1 top1-accuracy:

Epoch: 1 iteration: 1 top1-accuracy: 79.688 top5-accuracy: 93.750 loss: 0.779
Epoch: 1 iteration: 2 top1-accuracy: 82.812 top5-accuracy: 96.094 loss: 0.457
Epoch: 1 iteration: 3 top1-accuracy: 79.688 top5-accuracy: 94.792 loss: 0.995
Epoch: 1 iteration: 4 top1-accuracy: 81.055 top5-accuracy: 94.727 loss: 0.721
Epoch: 1 iteration: 5 top1-accuracy: 79.844 top5-accuracy: 93.906 loss: 1.008
Epoch: 1 iteration: 6 top1-accuracy: 78.776 top5-accuracy: 93.229 loss: 1.132
Epoch: 1 iteration: 7 top1-accuracy: 78.906 top5-accuracy: 93.527 loss: 0.852
Epoch: 1 iteration: 8 top1-accuracy: 79.785 top5-accuracy: 93.848 loss: 0.667
Epoch: 1 iteration: 9 top1-accuracy: 79.774 top5-accuracy: 93.837 loss: 0.815
Epoch: 1 iteration: 10 top1-accuracy: 80.078 top5-accuracy: 94.141 loss: 0.616
Epoch: 1 iteration: 11 top1-accuracy: 80.114 top5-accuracy: 94.034 loss: 0.778
Epoch: 1 iteration: 12 top1-accuracy: 79.948 top5-accuracy: 94.141 loss: 0.826
Epoch: 1 iteration: 13 top1-accuracy: 79.567 top5-accuracy: 9

Epoch: 1 iteration: 9 top1-accuracy: 80.556 top5-accuracy: 94.705 loss: 0.750
Epoch: 1 iteration: 10 top1-accuracy: 80.312 top5-accuracy: 95.078 loss: 0.713
Epoch: 1 iteration: 11 top1-accuracy: 80.611 top5-accuracy: 95.384 loss: 0.524
Epoch: 1 iteration: 12 top1-accuracy: 80.469 top5-accuracy: 95.508 loss: 0.707
Epoch: 1 iteration: 13 top1-accuracy: 80.409 top5-accuracy: 95.373 loss: 0.823
Epoch: 1 iteration: 14 top1-accuracy: 80.413 top5-accuracy: 95.480 loss: 0.709
Epoch: 1 iteration: 15 top1-accuracy: 80.260 top5-accuracy: 95.729 loss: 0.729
Epoch: 1 iteration: 16 top1-accuracy: 80.078 top5-accuracy: 95.605 loss: 0.737
Epoch: 1 iteration: 17 top1-accuracy: 79.688 top5-accuracy: 95.588 loss: 0.880
Epoch: 1 iteration: 18 top1-accuracy: 79.948 top5-accuracy: 95.703 loss: 0.580
Epoch: 1 iteration: 19 top1-accuracy: 79.811 top5-accuracy: 95.518 loss: 0.927
Epoch: 1 iteration: 20 top1-accuracy: 80.078 top5-accuracy: 95.547 loss: 0.553
Epoch: 1 iteration: 21 top1-accuracy: 79.985 top5-acc

Epoch: 1 iteration: 16 top1-accuracy: 80.615 top5-accuracy: 96.045 loss: 0.599
Epoch: 1 iteration: 17 top1-accuracy: 80.607 top5-accuracy: 96.140 loss: 0.682
Epoch: 1 iteration: 18 top1-accuracy: 80.339 top5-accuracy: 96.050 loss: 0.846
Epoch: 1 iteration: 19 top1-accuracy: 80.469 top5-accuracy: 96.053 loss: 0.594
Epoch: 1 iteration: 20 top1-accuracy: 80.391 top5-accuracy: 95.898 loss: 0.793
Epoch: 1 iteration: 21 top1-accuracy: 80.580 top5-accuracy: 95.759 loss: 0.879
tensor([-0.0519, -0.0519, -0.0519, -0.0518, -0.0518, -0.0518, -0.0518, -0.0517,
        -0.0517, -0.0517, -0.0517, -0.0516, -0.0516, -0.0516, -0.0516, -0.0515,
        -0.0515, -0.0515, -0.0515, -0.0514], device='cuda:0')
slope:[1.264922320842743e-05, 9.017065167427063e-06, 2.5760382413864136e-05, 2.72262841463089e-05, 6.945803761482239e-06, 1.697242259979248e-05, 1.892074942588806e-05, 1.2828037142753601e-05, 8.56257975101471e-06, 4.816800355911255e-06, 4.056841135025024e-06, 1.3984739780426025e-05, 1.4467164874076843e-

Epoch: 1 iteration: 1 top1-accuracy: 84.375 top5-accuracy: 95.312 loss: 0.585
Epoch: 1 iteration: 2 top1-accuracy: 83.594 top5-accuracy: 95.312 loss: 0.647
Epoch: 1 iteration: 3 top1-accuracy: 81.771 top5-accuracy: 94.531 loss: 0.765
Epoch: 1 iteration: 4 top1-accuracy: 82.031 top5-accuracy: 94.531 loss: 0.711
Epoch: 1 iteration: 5 top1-accuracy: 82.188 top5-accuracy: 94.844 loss: 0.659
Epoch: 1 iteration: 6 top1-accuracy: 81.510 top5-accuracy: 95.312 loss: 0.731
Epoch: 1 iteration: 7 top1-accuracy: 80.915 top5-accuracy: 94.978 loss: 0.830
Epoch: 1 iteration: 8 top1-accuracy: 81.641 top5-accuracy: 95.410 loss: 0.589
Epoch: 1 iteration: 9 top1-accuracy: 81.337 top5-accuracy: 95.226 loss: 0.776
Epoch: 1 iteration: 10 top1-accuracy: 81.094 top5-accuracy: 95.000 loss: 0.882
Epoch: 1 iteration: 11 top1-accuracy: 81.108 top5-accuracy: 94.957 loss: 0.815
Epoch: 1 iteration: 12 top1-accuracy: 80.794 top5-accuracy: 95.052 loss: 0.816
Epoch: 1 iteration: 13 top1-accuracy: 80.589 top5-accuracy: 9

Epoch: 1 iteration: 9 top1-accuracy: 79.948 top5-accuracy: 96.007 loss: 0.556
Epoch: 1 iteration: 10 top1-accuracy: 80.312 top5-accuracy: 96.016 loss: 0.635
Epoch: 1 iteration: 11 top1-accuracy: 80.185 top5-accuracy: 95.881 loss: 0.802
Epoch: 1 iteration: 12 top1-accuracy: 79.948 top5-accuracy: 95.638 loss: 0.914
Epoch: 1 iteration: 13 top1-accuracy: 79.748 top5-accuracy: 95.673 loss: 0.847
Epoch: 1 iteration: 14 top1-accuracy: 79.799 top5-accuracy: 95.592 loss: 0.726
Epoch: 1 iteration: 15 top1-accuracy: 79.583 top5-accuracy: 95.365 loss: 0.894
Epoch: 1 iteration: 16 top1-accuracy: 79.785 top5-accuracy: 95.410 loss: 0.605
Epoch: 1 iteration: 17 top1-accuracy: 80.147 top5-accuracy: 95.450 loss: 0.575
Epoch: 1 iteration: 18 top1-accuracy: 80.035 top5-accuracy: 95.616 loss: 0.639
Epoch: 1 iteration: 19 top1-accuracy: 80.263 top5-accuracy: 95.724 loss: 0.583
Epoch: 1 iteration: 20 top1-accuracy: 80.508 top5-accuracy: 95.781 loss: 0.580
Epoch: 1 iteration: 21 top1-accuracy: 80.766 top5-acc

Epoch: 1 iteration: 17 top1-accuracy: 81.434 top5-accuracy: 95.496 loss: 0.892
Epoch: 1 iteration: 18 top1-accuracy: 81.727 top5-accuracy: 95.530 loss: 0.555
Epoch: 1 iteration: 19 top1-accuracy: 81.785 top5-accuracy: 95.477 loss: 0.665
Epoch: 1 iteration: 20 top1-accuracy: 82.031 top5-accuracy: 95.586 loss: 0.496
Epoch: 1 iteration: 21 top1-accuracy: 82.106 top5-accuracy: 95.610 loss: 0.598
tensor([-0.0460, -0.0459, -0.0459, -0.0459, -0.0459, -0.0459, -0.0458, -0.0458,
        -0.0458, -0.0458, -0.0458, -0.0457, -0.0457, -0.0457, -0.0457, -0.0456,
        -0.0456, -0.0456, -0.0456, -0.0456], device='cuda:0')
slope:[6.776303052902222e-06, 3.7476420402526855e-06, 8.847564458847046e-07, 9.305775165557861e-06, 9.395182132720947e-06, 9.592622518539429e-06, 1.4029443264007568e-05, 9.234994649887085e-06, 6.3069164752960205e-06, 2.4568289518356323e-06, 3.596767783164978e-06, 8.048489689826965e-06, 5.522742867469788e-06, 4.364177584648132e-06, 4.537403583526611e-06, 2.0042061805725098e-06, 8.4

Epoch: 1 iteration: 1 top1-accuracy: 87.500 top5-accuracy: 96.094 loss: 0.587
Epoch: 1 iteration: 2 top1-accuracy: 85.156 top5-accuracy: 95.703 loss: 0.587
Epoch: 1 iteration: 3 top1-accuracy: 85.677 top5-accuracy: 96.354 loss: 0.546
Epoch: 1 iteration: 4 top1-accuracy: 83.789 top5-accuracy: 96.680 loss: 0.706
Epoch: 1 iteration: 5 top1-accuracy: 83.750 top5-accuracy: 96.562 loss: 0.623
Epoch: 1 iteration: 6 top1-accuracy: 83.203 top5-accuracy: 95.703 loss: 0.855
Epoch: 1 iteration: 7 top1-accuracy: 83.147 top5-accuracy: 95.759 loss: 0.698
Epoch: 1 iteration: 8 top1-accuracy: 82.910 top5-accuracy: 95.605 loss: 0.877
Epoch: 1 iteration: 9 top1-accuracy: 82.378 top5-accuracy: 95.573 loss: 0.833
Epoch: 1 iteration: 10 top1-accuracy: 82.188 top5-accuracy: 95.625 loss: 0.689
Epoch: 1 iteration: 11 top1-accuracy: 82.173 top5-accuracy: 95.668 loss: 0.683
Epoch: 1 iteration: 12 top1-accuracy: 81.771 top5-accuracy: 95.443 loss: 0.864
Epoch: 1 iteration: 13 top1-accuracy: 81.490 top5-accuracy: 9

Epoch: 1 iteration: 8 top1-accuracy: 82.324 top5-accuracy: 95.703 loss: 0.645
Epoch: 1 iteration: 9 top1-accuracy: 82.552 top5-accuracy: 95.833 loss: 0.549
Epoch: 1 iteration: 10 top1-accuracy: 82.734 top5-accuracy: 96.094 loss: 0.559
Epoch: 1 iteration: 11 top1-accuracy: 82.812 top5-accuracy: 96.023 loss: 0.610
Epoch: 1 iteration: 12 top1-accuracy: 82.031 top5-accuracy: 96.159 loss: 0.607
Epoch: 1 iteration: 13 top1-accuracy: 81.611 top5-accuracy: 95.974 loss: 0.794
Epoch: 1 iteration: 14 top1-accuracy: 81.473 top5-accuracy: 95.982 loss: 0.686
Epoch: 1 iteration: 15 top1-accuracy: 81.094 top5-accuracy: 95.729 loss: 0.778
Epoch: 1 iteration: 16 top1-accuracy: 81.104 top5-accuracy: 95.752 loss: 0.697
Epoch: 1 iteration: 17 top1-accuracy: 81.112 top5-accuracy: 95.772 loss: 0.640
Epoch: 1 iteration: 18 top1-accuracy: 80.990 top5-accuracy: 95.790 loss: 0.688
Epoch: 1 iteration: 19 top1-accuracy: 81.250 top5-accuracy: 95.847 loss: 0.538
Epoch: 1 iteration: 20 top1-accuracy: 81.289 top5-accu

Epoch: 1 iteration: 16 top1-accuracy: 82.422 top5-accuracy: 96.436 loss: 0.636
Epoch: 1 iteration: 17 top1-accuracy: 82.399 top5-accuracy: 96.415 loss: 0.630
Epoch: 1 iteration: 18 top1-accuracy: 82.292 top5-accuracy: 96.441 loss: 0.626
Epoch: 1 iteration: 19 top1-accuracy: 82.401 top5-accuracy: 96.505 loss: 0.598
Epoch: 1 iteration: 20 top1-accuracy: 82.188 top5-accuracy: 96.445 loss: 0.755
Epoch: 1 iteration: 21 top1-accuracy: 82.254 top5-accuracy: 96.540 loss: 0.552
tensor([-0.0423, -0.0423, -0.0423, -0.0422, -0.0422, -0.0422, -0.0422, -0.0422,
        -0.0422, -0.0422, -0.0422, -0.0422, -0.0422, -0.0422, -0.0421, -0.0421,
        -0.0421, -0.0421, -0.0421, -0.0421], device='cuda:0')
slope:[3.5427510738372803e-06, 3.5259872674942017e-06, 4.388391971588135e-06, 2.078711986541748e-06, 3.1385570764541626e-06, 3.559514880180359e-06, 2.739951014518738e-06, 2.825632691383362e-06, 1.0488554835319519e-05, 1.5009194612503052e-05, 6.0461461544036865e-06, 7.003545761108398e-07, 7.4375420808792

Epoch: 1 iteration: 1 top1-accuracy: 83.594 top5-accuracy: 94.531 loss: 0.745
Epoch: 1 iteration: 2 top1-accuracy: 82.422 top5-accuracy: 94.922 loss: 0.722
Epoch: 1 iteration: 3 top1-accuracy: 82.031 top5-accuracy: 95.312 loss: 0.694
Epoch: 1 iteration: 4 top1-accuracy: 82.227 top5-accuracy: 95.898 loss: 0.589
Epoch: 1 iteration: 5 top1-accuracy: 81.875 top5-accuracy: 96.250 loss: 0.559
Epoch: 1 iteration: 6 top1-accuracy: 81.771 top5-accuracy: 96.354 loss: 0.603
Epoch: 1 iteration: 7 top1-accuracy: 82.031 top5-accuracy: 96.317 loss: 0.522
Epoch: 1 iteration: 8 top1-accuracy: 81.055 top5-accuracy: 95.996 loss: 0.912
Epoch: 1 iteration: 9 top1-accuracy: 80.903 top5-accuracy: 96.094 loss: 0.601
Epoch: 1 iteration: 10 top1-accuracy: 81.016 top5-accuracy: 96.094 loss: 0.678
Epoch: 1 iteration: 11 top1-accuracy: 81.250 top5-accuracy: 96.023 loss: 0.621
Epoch: 1 iteration: 12 top1-accuracy: 81.120 top5-accuracy: 95.964 loss: 0.831
Epoch: 1 iteration: 13 top1-accuracy: 81.550 top5-accuracy: 9

Epoch: 1 iteration: 9 top1-accuracy: 80.469 top5-accuracy: 94.792 loss: 0.725
Epoch: 1 iteration: 10 top1-accuracy: 80.938 top5-accuracy: 95.078 loss: 0.555
Epoch: 1 iteration: 11 top1-accuracy: 81.179 top5-accuracy: 95.170 loss: 0.658
Epoch: 1 iteration: 12 top1-accuracy: 81.250 top5-accuracy: 95.312 loss: 0.620
Epoch: 1 iteration: 13 top1-accuracy: 81.130 top5-accuracy: 95.252 loss: 0.712
Epoch: 1 iteration: 14 top1-accuracy: 81.083 top5-accuracy: 95.424 loss: 0.610
Epoch: 1 iteration: 15 top1-accuracy: 81.146 top5-accuracy: 95.521 loss: 0.646
Epoch: 1 iteration: 16 top1-accuracy: 81.006 top5-accuracy: 95.459 loss: 0.838
Epoch: 1 iteration: 17 top1-accuracy: 81.296 top5-accuracy: 95.542 loss: 0.518
Epoch: 1 iteration: 18 top1-accuracy: 81.380 top5-accuracy: 95.747 loss: 0.615
Epoch: 1 iteration: 19 top1-accuracy: 81.414 top5-accuracy: 95.724 loss: 0.588
Epoch: 1 iteration: 20 top1-accuracy: 81.484 top5-accuracy: 95.703 loss: 0.617
Epoch: 1 iteration: 21 top1-accuracy: 81.696 top5-acc



Epoch: 1 iteration: 13 top1-accuracy: 81.911 top5-accuracy: 96.514 loss: 0.626
Epoch: 1 iteration: 14 top1-accuracy: 82.422 top5-accuracy: 96.484 loss: 0.479
Epoch: 1 iteration: 15 top1-accuracy: 82.396 top5-accuracy: 96.458 loss: 0.680
Epoch: 1 iteration: 16 top1-accuracy: 82.080 top5-accuracy: 96.338 loss: 0.720
Epoch: 1 iteration: 17 top1-accuracy: 81.756 top5-accuracy: 96.324 loss: 0.688
Epoch: 1 iteration: 18 top1-accuracy: 82.161 top5-accuracy: 96.441 loss: 0.493
Epoch: 1 iteration: 19 top1-accuracy: 82.196 top5-accuracy: 96.464 loss: 0.595
Epoch: 1 iteration: 20 top1-accuracy: 82.227 top5-accuracy: 96.406 loss: 0.676
Epoch: 1 iteration: 21 top1-accuracy: 81.882 top5-accuracy: 96.391 loss: 0.807
tensor([-0.0433, -0.0433, -0.0433, -0.0433, -0.0433, -0.0433, -0.0433, -0.0433,
        -0.0433, -0.0432, -0.0432, -0.0432, -0.0432, -0.0432, -0.0432, -0.0432,
        -0.0432, -0.0432, -0.0432, -0.0432], device='cuda:0')
slope:[4.777684807777405e-06, 1.8309801816940308e-06, 6.16535544395

Epoch: 1 iteration: 20 top1-accuracy: 81.914 top5-accuracy: 95.586 loss: 0.621
Epoch: 1 iteration: 21 top1-accuracy: 81.957 top5-accuracy: 95.722 loss: 0.623
tensor([-0.0401, -0.0401, -0.0401, -0.0401, -0.0401, -0.0400, -0.0400, -0.0400,
        -0.0400, -0.0400, -0.0400, -0.0400, -0.0400, -0.0400, -0.0400, -0.0400,
        -0.0400, -0.0400, -0.0400, -0.0400], device='cuda:0')
slope:[8.065253496170044e-07, 7.245689630508423e-07, 7.003545761108398e-07, 7.450580596923828e-07, 1.952052116394043e-06, 3.302469849586487e-06, 1.8905848264694214e-06, 4.675239324569702e-07, 5.5674463510513306e-06, 6.934627890586853e-06, 1.9669532775878906e-06, 9.238719940185547e-07, 7.711350917816162e-07, 1.2870877981185913e-06, 4.5746564865112305e-06, 5.595386028289795e-06, 3.634020686149597e-06, 3.0938535928726196e-06, 1.8309801816940308e-06, 2.4084001779556274e-06]
distance:tensor([23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
        41, 42], device='cuda:0')
approximation error:5.

Epoch: 1 iteration: 1 top1-accuracy: 81.250 top5-accuracy: 96.094 loss: 0.683
Epoch: 1 iteration: 2 top1-accuracy: 81.641 top5-accuracy: 95.703 loss: 0.613
Epoch: 1 iteration: 3 top1-accuracy: 83.073 top5-accuracy: 96.354 loss: 0.547
Epoch: 1 iteration: 4 top1-accuracy: 81.836 top5-accuracy: 96.094 loss: 0.692
Epoch: 1 iteration: 5 top1-accuracy: 82.188 top5-accuracy: 96.562 loss: 0.643
Epoch: 1 iteration: 6 top1-accuracy: 81.380 top5-accuracy: 96.224 loss: 0.797
Epoch: 1 iteration: 7 top1-accuracy: 80.804 top5-accuracy: 96.094 loss: 0.805
Epoch: 1 iteration: 8 top1-accuracy: 81.250 top5-accuracy: 96.191 loss: 0.497
Epoch: 1 iteration: 9 top1-accuracy: 81.076 top5-accuracy: 95.833 loss: 0.823
Epoch: 1 iteration: 10 top1-accuracy: 81.250 top5-accuracy: 95.781 loss: 0.650
Epoch: 1 iteration: 11 top1-accuracy: 81.463 top5-accuracy: 95.952 loss: 0.631
Epoch: 1 iteration: 12 top1-accuracy: 81.576 top5-accuracy: 96.029 loss: 0.612
Epoch: 1 iteration: 13 top1-accuracy: 81.791 top5-accuracy: 9

Epoch: 1 iteration: 8 top1-accuracy: 81.641 top5-accuracy: 95.410 loss: 0.658
Epoch: 1 iteration: 9 top1-accuracy: 82.118 top5-accuracy: 95.660 loss: 0.564
Epoch: 1 iteration: 10 top1-accuracy: 82.734 top5-accuracy: 95.938 loss: 0.514
Epoch: 1 iteration: 11 top1-accuracy: 82.599 top5-accuracy: 96.165 loss: 0.637
Epoch: 1 iteration: 12 top1-accuracy: 82.682 top5-accuracy: 95.964 loss: 0.725
Epoch: 1 iteration: 13 top1-accuracy: 82.933 top5-accuracy: 96.034 loss: 0.612
Epoch: 1 iteration: 14 top1-accuracy: 82.812 top5-accuracy: 95.982 loss: 0.684
Epoch: 1 iteration: 15 top1-accuracy: 82.604 top5-accuracy: 95.938 loss: 0.758
Epoch: 1 iteration: 16 top1-accuracy: 82.715 top5-accuracy: 95.898 loss: 0.723
Epoch: 1 iteration: 17 top1-accuracy: 82.767 top5-accuracy: 96.002 loss: 0.573
Epoch: 1 iteration: 18 top1-accuracy: 82.292 top5-accuracy: 96.050 loss: 0.786
Epoch: 1 iteration: 19 top1-accuracy: 82.237 top5-accuracy: 96.012 loss: 0.794
Epoch: 1 iteration: 20 top1-accuracy: 82.148 top5-accu

In [8]:
# compress fully-connected layers
import datetime
starttime = datetime.datetime.now()
for l in range(len(fc_layers)): 
  model, interval, distance, all_fixed_points, all_derivatives = Linearization(fc_layers[l].weight,0.95) 
  model = model.view(fc_layers[l].weight.size())
  del fc_layers[l].weight
  del encoding_fc_layers[l].weight
  fc_layers[l].register_parameter('weight', nn.Parameter(model))
  encoding_fc_layers[l].register_parameter('fixed_points',nn.Parameter(all_fixed_points))
  encoding_fc_layers[l].register_parameter('derivatives',nn.Parameter(all_derivatives))
  encoding_fc_layers[l].register_parameter('interval',nn.Parameter(interval.type(torch.uint8),False))
  encoding_fc_layers[l].register_parameter('distance',nn.Parameter(distance.type(torch.uint8),False))
  fc_layers[l].weight.requires_grad=False
endtime = datetime.datetime.now()
print(str(endtime - starttime)+'seconds')

tensor([-0.0823, -0.0823, -0.0823, -0.0822, -0.0822, -0.0822, -0.0822, -0.0822,
        -0.0822, -0.0822, -0.0822, -0.0822, -0.0822, -0.0822, -0.0821, -0.0821,
        -0.0821, -0.0821, -0.0821, -0.0821], device='cuda:0')
slope:[2.51084566116333e-06, 2.3618340492248535e-06, 1.776963472366333e-06, 5.807727575302124e-06, 1.4387071132659912e-05, 9.752810001373291e-06, 2.294778823852539e-06, 7.033348083496094e-06, 5.576759576797485e-06, 3.829598426818848e-06, 3.956258296966553e-06, 3.6098062992095947e-06, 4.123896360397339e-06, 3.1925737857818604e-06, 5.163252353668213e-06, 3.5651028156280518e-06, 1.0322779417037964e-05, 1.0613352060317993e-05, 3.3192336559295654e-06, 6.295740604400635e-06]
distance:tensor([ 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
        27, 28], device='cuda:0')
approximation error:8.948212601396089e-06
average error:1.6969487314010498e-05
Epoch: 1 iteration: 1 top1-accuracy: 80.469 top5-accuracy: 94.531 loss: 0.746
Epoch: 1 iteration: 2 to

In [11]:
# compressed_Resnet50.eval()
# evaluate_accuracy(testloader, compressed_Resnet50)
Resnet50.eval()
evaluate_accuracy(testloader, Resnet50)

0:03:13.570003seconds
Accuracy of the network on total 50000 test images: top1=74.56% ; top5=92.012%


In [14]:
PATH = './96compressed_Resnet50.pth' 
torch.save(compressed_Resnet50.state_dict(), PATH)
PATH2 = './96encoding_Resnet50.pth' 
torch.save(encoding_Resnet50.state_dict(), PATH2)

In [12]:
for name, para in encoding_Resnet50.named_parameters():
  print(name+":"+str(para.numel()))

conv1.weight:9408
bn1.weight:64
bn1.bias:64
layer1.0.conv1.fixed_points:158
layer1.0.conv1.derivatives:158
layer1.0.conv1.interval:4096
layer1.0.conv1.distance:4096
layer1.0.bn1.weight:64
layer1.0.bn1.bias:64
layer1.0.conv2.fixed_points:1222
layer1.0.conv2.derivatives:1222
layer1.0.conv2.interval:36864
layer1.0.conv2.distance:36864
layer1.0.bn2.weight:64
layer1.0.bn2.bias:64
layer1.0.conv3.fixed_points:558
layer1.0.conv3.derivatives:558
layer1.0.conv3.interval:16384
layer1.0.conv3.distance:16384
layer1.0.bn3.weight:256
layer1.0.bn3.bias:256
layer1.0.downsample.0.fixed_points:545
layer1.0.downsample.0.derivatives:545
layer1.0.downsample.0.interval:16384
layer1.0.downsample.0.distance:16384
layer1.0.downsample.1.weight:256
layer1.0.downsample.1.bias:256
layer1.1.conv1.fixed_points:480
layer1.1.conv1.derivatives:480
layer1.1.conv1.interval:16384
layer1.1.conv1.distance:16384
layer1.1.bn1.weight:64
layer1.1.bn1.bias:64
layer1.1.conv2.fixed_points:1227
layer1.1.conv2.derivatives:1227
layer1

In [13]:
for name, para in compressed_Resnet50.named_parameters():
  print(name+":"+str(para.numel()))

conv1.weight:9408
bn1.weight:64
bn1.bias:64
layer1.0.conv1.weight:4096
layer1.0.bn1.weight:64
layer1.0.bn1.bias:64
layer1.0.conv2.weight:36864
layer1.0.bn2.weight:64
layer1.0.bn2.bias:64
layer1.0.conv3.weight:16384
layer1.0.bn3.weight:256
layer1.0.bn3.bias:256
layer1.0.downsample.0.weight:16384
layer1.0.downsample.1.weight:256
layer1.0.downsample.1.bias:256
layer1.1.conv1.weight:16384
layer1.1.bn1.weight:64
layer1.1.bn1.bias:64
layer1.1.conv2.weight:36864
layer1.1.bn2.weight:64
layer1.1.bn2.bias:64
layer1.1.conv3.weight:16384
layer1.1.bn3.weight:256
layer1.1.bn3.bias:256
layer1.2.conv1.weight:16384
layer1.2.bn1.weight:64
layer1.2.bn1.bias:64
layer1.2.conv2.weight:36864
layer1.2.bn2.weight:64
layer1.2.bn2.bias:64
layer1.2.conv3.weight:16384
layer1.2.bn3.weight:256
layer1.2.bn3.bias:256
layer2.0.conv1.weight:32768
layer2.0.bn1.weight:128
layer2.0.bn1.bias:128
layer2.0.conv2.weight:147456
layer2.0.bn2.weight:128
layer2.0.bn2.bias:128
layer2.0.conv3.weight:65536
layer2.0.bn3.weight:512
lay