In [1]:
import torch
import torch.nn as nn
from torch.autograd import Variable
from torchvision import transforms, datasets
import torchvision
from torch.utils.data import DataLoader,Dataset
from PIL import Image

import math
import time
import numpy as np

In [2]:
class DarkNet(nn.Module):
    def __init__(self):

        super(DarkNet, self).__init__()
        
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(32)
        self.relu1 = nn.LeakyReLU(0.1)

        self.pool1 = nn.MaxPool2d(2)

        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(64)
        self.relu2 = nn.LeakyReLU(0.1)

        self.pool2 = nn.MaxPool2d(2)

        self.conv3 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn3 = nn.BatchNorm2d(128)
        self.relu3 = nn.LeakyReLU(0.1)

        self.conv4 = nn.Conv2d(in_channels=128, out_channels=64, kernel_size=1, stride=1, padding=1, bias=False)
        self.bn4 = nn.BatchNorm2d(64)
        self.relu4 = nn.LeakyReLU(0.1)

        self.conv5 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn5 = nn.BatchNorm2d(128)
        self.relu5 = nn.LeakyReLU(0.1)

        self.pool5 = nn.MaxPool2d(2)

        self.conv6 = nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn6 = nn.BatchNorm2d(256)
        self.relu6 = nn.LeakyReLU(0.1)

        self.conv7 = nn.Conv2d(in_channels=256, out_channels=128, kernel_size=1, stride=1, padding=1, bias=False)
        self.bn7 = nn.BatchNorm2d(128)
        self.relu7 = nn.LeakyReLU(0.1)

        self.conv8 = nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn8 = nn.BatchNorm2d(256)
        self.relu8 = nn.LeakyReLU(0.1)

        self.pool8 = nn.MaxPool2d(2)

        self.conv9 = nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn9 = nn.BatchNorm2d(512)
        self.relu9 = nn.LeakyReLU(0.1)

        self.conv10 = nn.Conv2d(in_channels=512, out_channels=256, kernel_size=1, stride=1, padding=1, bias=False)
        self.bn10 = nn.BatchNorm2d(256)
        self.relu10 = nn.LeakyReLU(0.1)

        self.conv11 = nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn11 = nn.BatchNorm2d(512)
        self.relu11 = nn.LeakyReLU(0.1)

        self.conv12 = nn.Conv2d(in_channels=512, out_channels=256, kernel_size=1, stride=1, padding=1, bias=False)
        self.bn12 = nn.BatchNorm2d(256)
        self.relu12 = nn.LeakyReLU(0.1)

        self.conv13 = nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn13 = nn.BatchNorm2d(512)
        self.relu13 = nn.LeakyReLU(0.1)

        self.pool13 = nn.MaxPool2d(2)

        self.conv14 = nn.Conv2d(in_channels=512, out_channels=1024, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn14 = nn.BatchNorm2d(1024)
        self.relu14 = nn.LeakyReLU(0.1)

        self.conv15 = nn.Conv2d(in_channels=1024, out_channels=512, kernel_size=1, stride=1, padding=1, bias=False)
        self.bn15 = nn.BatchNorm2d(512)
        self.relu15 = nn.LeakyReLU(0.1)

        self.conv16 = nn.Conv2d(in_channels=512, out_channels=1024, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn16 = nn.BatchNorm2d(1024)
        self.relu16 = nn.LeakyReLU(0.1)

        self.conv17 = nn.Conv2d(in_channels=1024, out_channels=512, kernel_size=1, stride=1, padding=1, bias=False)
        self.bn17 = nn.BatchNorm2d(512)
        self.relu17 = nn.LeakyReLU(0.1)
        
        self.conv18 = nn.Conv2d(in_channels=512, out_channels=1024, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn18 = nn.BatchNorm2d(1024)
        self.relu18 = nn.LeakyReLU(0.1)

        self.conv19 = nn.Conv2d(in_channels=1024, out_channels=30, kernel_size=3, stride=1, padding=1, bias=False)

        self.pool19 = nn.AvgPool2d(7)
        
    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu1(x)
        x = self.pool1(x)
        x = self.conv2(x)
        x = self.bn2(x)
        x = self.relu2(x)
        x = self.pool2(x)
        x = self.conv3(x)
        x = self.bn3(x)
        x = self.relu3(x)
        x = self.conv4(x)
        x = self.bn4(x)
        x = self.relu4(x)
        x = self.conv5(x)
        x = self.bn5(x)
        x = self.relu5(x)
        x = self.pool5(x)
        x = self.conv6(x)
        x = self.bn6(x)
        x = self.relu6(x)
        x = self.conv7(x)
        x = self.bn7(x)
        x = self.relu7(x)
        x = self.conv8(x)
        x = self.bn8(x)
        x = self.relu8(x)
        x = self.pool8(x)
        x = self.conv9(x)
        x = self.bn9(x)
        x = self.relu9(x)
        x = self.conv10(x)
        x = self.bn10(x)
        x = self.relu10(x)
        x = self.conv11(x)
        x = self.bn11(x)
        x = self.relu11(x)
        x = self.conv12(x)
        x = self.bn12(x)
        x = self.relu12(x)
        x = self.conv13(x)
        x = self.bn13(x)
        x = self.relu13(x)
        x = self.pool13(x)
        x = self.conv14(x)
        x = self.bn14(x)
        x = self.relu14(x)
        x = self.conv15(x)
        x = self.bn15(x)
        x = self.relu15(x)
        x = self.conv16(x)
        x = self.bn16(x)
        x = self.relu16(x)
        x = self.conv17(x)
        x = self.bn17(x)
        x = self.relu17(x)
        x = self.conv18(x)
        x = self.conv19(x)
        x = self.pool19(x)
        x = x.view(x.size(0), -1)
        return x

In [3]:
batchsize = 32
train_data_transform = transforms.Compose([
        transforms.RandomResizedCrop(224),
        #transforms.RandomRotation(10),
        #transforms.ColorJitter(hue=0.1,saturation=0.75),
        transforms.RandomHorizontalFlip(),
        #transforms.RandomVerticalFlip(),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225])
    ])

tset_data_transform = transforms.Compose([
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225])
    ])

train_dataset = torchvision.datasets.ImageFolder(root='/media/lulugay/PC/CCCV-30/train_set',transform=train_data_transform)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batchsize, shuffle=True, num_workers=8)
 
val_dataset = torchvision.datasets.ImageFolder(root='/media/lulugay/PC/CCCV-30/test_set', transform=tset_data_transform)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=batchsize, shuffle=True, num_workers=8)

In [4]:
model = DarkNet()
pruned_model = DarkNet()
model.cuda()
pruned_model.cuda()
loss_func = nn.CrossEntropyLoss()

In [5]:
def test(net,data_loader):
    corret,total = 0,0
    running_loss = 0.0
    for inputs,labels in data_loader:
        inputs = inputs.cuda()
        labels = labels.cuda()
        outputs = net(Variable(inputs))
        _,predicted = torch.max(outputs.data,1)
        total += labels.size(0)
        corret += (predicted == labels).sum()
        accuracy = 100 * float(corret) / float(total)
        loss = loss_func(outputs, labels)
        running_loss += loss.data[0]
    running_loss = running_loss / (15000 / batchsize)
    print('Test Accuracy: %.4f %%, Loss: %.4f' % (accuracy,running_loss))
    return accuracy,float(running_loss)

def poly(base_lr,power,total_epoch,now_epoch):
    return base_lr*(1-math.pow(float(now_epoch)/float(total_epoch),power))

def get_layer_fm_index(index):
    layer_index = 0
    fm_index = 0
    while (index >= model_config[layer_index]):
        index = index - model_config[layer_index]
        layer_index = layer_index + 1
    fm_index = index
    return layer_index,fm_index

def get_index(layer_index,fm_index):
    index = 0
    for i in range (layer_index):
        index = index + model_config[i]
    index = index + fm_index
    return index

In [6]:
#model.load_state_dict(torch.load('epoch39_weight.pkl'))
num_epochs = 80
for epoch in range(0,num_epochs):
    batch_size_start = time.time()
    running_loss = 0.0
    corret,total = 0,0
    print('train epoch%d'%epoch)
    for i,(inputs, labels) in enumerate(train_loader):
        inputs = inputs.cuda()
        labels = labels.cuda()
        inputs = Variable(inputs)
        lables = Variable(labels)
        lr=poly(0.01,4,num_epochs,epoch)
        optimizer = torch.optim.SGD(model.parameters(), lr, momentum=0.9)
        optimizer.zero_grad()
        outputs = model(inputs)
        _,predicted = torch.max(outputs.data,1)
        total += labels.size(0)
        corret += (predicted == labels).sum()
        loss = loss_func(outputs, labels)        #交叉熵
        loss.backward()
        optimizer.step()                          #更新权重
        running_loss += loss.data[0]
    accuracy = 100 * float(corret) / float(total)
    running_loss = running_loss / (45000 / batchsize)
    print('Train Accuracy: %.4f %%, Loss: %.4f' % (accuracy, running_loss))
    print('test epoch%d'%epoch)
    test(model,val_loader)
    torch.save(model.state_dict(),'epoch%d_weight.pkl'%epoch)
    print('epoch [%d/%d] need time %.4f' % (epoch + 1, num_epochs, time.time() - batch_size_start))
    

train epoch0




Train Accuracy: 20.3067 %, Loss: 2.7864
test epoch0


  del sys.path[0]


Test Accuracy: 21.0467 %, Loss: 2.7205
epoch [1/80] need time 212.8345
train epoch1
Train Accuracy: 33.6400 %, Loss: 2.2433
test epoch1
Test Accuracy: 35.1267 %, Loss: 2.1964
epoch [2/80] need time 214.7331
train epoch2
Train Accuracy: 40.5756 %, Loss: 1.9810
test epoch2
Test Accuracy: 40.2400 %, Loss: 1.9778
epoch [3/80] need time 214.7800
train epoch3
Train Accuracy: 45.5578 %, Loss: 1.8056
test epoch3
Test Accuracy: 43.7733 %, Loss: 1.8627
epoch [4/80] need time 214.7582
train epoch4
Train Accuracy: 49.1067 %, Loss: 1.6736
test epoch4
Test Accuracy: 39.6667 %, Loss: 2.0659
epoch [5/80] need time 215.2308
train epoch5
Train Accuracy: 51.5511 %, Loss: 1.5752
test epoch5
Test Accuracy: 49.9467 %, Loss: 1.6453
epoch [6/80] need time 214.9244
train epoch6
Train Accuracy: 54.1667 %, Loss: 1.4851
test epoch6
Test Accuracy: 49.0933 %, Loss: 1.6912
epoch [7/80] need time 215.2587
train epoch7
Train Accuracy: 55.9889 %, Loss: 1.4171
test epoch7
Test Accuracy: 51.0733 %, Loss: 1.6276
epoch [8/

Process Process-440:
Process Process-437:
Process Process-433:
Process Process-439:
Process Process-438:
Process Process-436:
Process Process-434:
Process Process-435:
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
  File "/usr/lib/python2.7/multiprocessing/process.py", line 258, in _bootstrap
Traceback (most recent call last):
  File "/usr/lib/python2.7/multiprocessing/process.py", line 258, in _bootstrap
Traceback (most recent call last):
  File "/usr/lib/python2.7/multiprocessing/process.py", line 258, in _bootstrap
  File "/usr/lib/python2.7/multiprocessing/process.py", line 258, in _bootstrap
  File "/usr/lib/python2.7/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
    self.run()
  File "/usr/lib/python2.7/multiprocessing/process.py", line 258, in _bootstrap
  File "/usr/lib/python2.7/multiprocessing

KeyboardInterrupt: 

In [None]:
accuracy = []
loss = []
for i in range(32):
    print('prune fm %d' % i)
    model.load_state_dict(torch.load('epoch54_weight.pkl'))
    conv1_weight = model.conv1.weight.data.cpu().numpy()
    zeros = np.zeros(conv1_weight[0].shape)
    conv1_weight[i]=zeros
    model.conv1.weight.data=torch.from_numpy(conv1_weight).cuda()
    accuracy_tmp,loss_tmp = test(model,val_loader)
    accuracy.append(accuracy_tmp)
    loss.append(loss_tmp)
for i in range(model.conv2.out_channels):
    print('prune fm %d' % i)
    model.load_state_dict(torch.load('epoch54_weight.pkl'))
    conv_weight = model.conv2.weight.data.cpu().numpy()
    zeros = np.zeros(conv_weight[0].shape)
    conv_weight[i]=zeros
    model.conv2.weight.data=torch.from_numpy(conv_weight).cuda()
    accuracy_tmp,loss_tmp = test(model,val_loader)
    accuracy.append(accuracy_tmp)
    loss.append(loss_tmp)

In [None]:
def prune_weight(in_remain,out_remain,layer,pruned_layer,layer_index,prune_model_config):
    in_channel=0
    out_channel=0
    weight_remain=layer.weight.data.cpu().numpy()[:prune_model_config[layer_index],:prune_model_config[layer_index-1]]
    for i in out_remain:
        for j in in_remain:
            weight_remain[out_channel,in_channel]=weight[i,j]
            in_channel=in_channel+1
        in_channel=0
        out_channel=out_channel+1
    pruned_layer.weight.data = torch.from_numpy(weight_remain).cuda()

def prune_batchnorm(remain,layer,pruned_layer,layer_index,prune_model_config):
    channel=0
    weight_remain=np.zeros(prune_model_config[layer_index])
    bias_remain=np.zeros(prune_model_config[layer_index])
    mean_remain=np.zeros(prune_model_config[layer_index])
    var_remain=np.zeros(prune_model_config[layer_index])
    for i in remain:
        weight_remain[channel]=layer.weight.data.cpu().numpy()[i]
        bias_remain[channel]=layer.bias.data.cpu().numpy()[i]
        mean_remain[channel]=layer.running_mean.data.cpu().numpy()[i]
        var_remain[channel]=layer.running_var.data.cpu().numpy()[i]
        channel = channel + 1
    pruned_layer.weight.data = torch.from_numpy(weight_remain).cuda()
    pruned_layer.bias.data = torch.from_numpy(bias_remain).cuda()
    pruned_layer.running_mean.data = torch.from_numpy(mean_remain).cuda()
    pruned_layer.running_var.data = torch.from_numpy(var_remain).cuda()

In [None]:
model_config = [32,64,128,64,128,256,128,256,512,256,512,256,512,1024,512,1024,512]
prune_model_config = [16,32,64,32,64,128,64,128,256,128,256,128,256,512,256,512,256]
layer_cnt = 58
layer_index = 0

for i in range(layer_cnt):
    _,pruned_layer = pruned_model._modules.items()[i]
    _,layer = model._modules.items()[i]
    if (str(pruned_layer.type).find("Conv2d") >= 0):
        print 'conv%d'%(layer_index+1)
        if(layer_index == 0):
            out_remain=[]
            
            pruned_layer.out_channels = prune_model_config[layer_index]
            #print 'out_channels = %d' % pruned_layer.out_channels
            #weight = layer.weight.data.cpu().numpy()
            prune_weight()
            
        elif(layer_index == 17):
            pruned_layer.in_channels = prune_model_config[layer_index - 1]
            #print 'in_channels = %d' % pruned_layer.in_channels
            #weight = layer.weight.data.cpu().numpy()
            pruned_weight = layer.weight.data.cpu().numpy()[:prune_model_config[layer_index - 1]]
            pruned_layer.weight.data = torch.from_numpy(pruned_weight).cuda()
            
        else:
            pruned_layer.in_channels = prune_model_config[layer_index - 1]
            pruned_layer.out_channels = prune_model_config[layer_index]
            #print 'in_channels = %d' % pruned_layer.in_channels
            #print 'out_channels = %d' % pruned_layer.out_channels
            #weight = layer.weight.data.cpu().numpy()
            pruned_weight = layer.weight.data.cpu().numpy()[:prune_model_config[layer_index]]
            pruned_layer.weight.data = torch.from_numpy(pruned_weight).cuda()
            
    elif (str(pruned_layer.type).find("BatchNorm2d") >= 0):
        print 'bn%d'%(layer_index+1)
        pruned_layer.num_features = prune_model_config[layer_index]
        print 'num_features = %d'% pruned_layer.num_features
        pruned_layer.weight
        
        
        
        
        layer_index = layer_index + 1
    elif (str(pruned_layer.type).find("MaxPool2d") >= 0):
        print 'MaxPool2d'
    elif (str(pruned_layer.type).find("AvgPool2d") >= 0):
        print 'AvgPool2d'
    else:
        print 'LeakyReLU'
print pruned_model

In [42]:

# loss_rank:删掉每个feature map后的loss排名
# model_config:原模型中每个卷积层的层数
# prune_cnt:要被删除的feature map层数
# remain_config:每一个卷积层要保留的feature map的编号
def generate_remain_config(loss_rank,model_config,prune_cnt):
    #准备一个二维数组
    remain_config=[[]]
    for i in range(len(model_config)-1):
        remain_config.append([])
    
    fm_cnt=len(loss_rank) - prune_cnt#要保留的feature map个数
    layer_index=0
    fm_index=0
    for i in range(fm_cnt):
        layer_index,fm_index=get_layer_fm_index(loss_rank[i])
        remain_config[layer_index].append(fm_index)
    
    return remain_config

def generate_prune_model_config(remain_config):
    layer_cnt=len(remain_config)
    prune_model_config=[]
    for i in range(layer_cnt):
        prune_model_config.append(len(remain_config[i]))
    return prune_model_config

In [45]:
loss = np.random.random(6176)
loss_rank = np.argsort(loss)
remain_config=generate_remain_config(loss_rank,model_config,3088)
prune_model_config=generate_prune_model_config(remain_config)
print prune_model_config

[21, 32, 71, 33, 62, 130, 61, 127, 239, 108, 254, 127, 247, 507, 270, 537, 262]


In [12]:
conv1_remain=[0,1,2,4,6,7,8,9,12,15,17,19,20,21,23,24]
conv2_remain=[0,1,2,4,6,7,8,9,12,15,17,19,20,21,23,24,25,26,27,28,29,33,34,35,36,37,39,41,43,44,45,47]
prune_model_config=[18,32,64,32,64,128,64,128,256,128,256,128,256,512,256,512,256]
conv2_weight = model.conv2.weight.data.cpu().numpy()
conv2_weight_remain=prune_weight(conv1_remain,conv2_remain,model.conv2.weight.data.cpu().numpy(),prune_model_config,1)
print conv2_weight_remain.shape


(32, 18, 3, 3)
