| Wide-Resnet 28x10
torch.Size([1, 10])


In [2]:
############### Pytorch CIFAR configuration file ###############
import math
import sys
import os
import functions.BinaryConnect as BC
import functions.DataAugmentation as DA
from functions.AutoAugment import AutoAugment, Cutout
from models.WideResnet_HRank import Wide_ResNet_HRank, wide_basic

start_epoch = 1
num_epochs = 140
optim_type = 'SGD'

mean = {
    'cifar10': (0.4914, 0.4822, 0.4465),
    'cifar100': (0.5071, 0.4867, 0.4408),
}

std = {
    'cifar10': (0.2023, 0.1994, 0.2010),
    'cifar100': (0.2675, 0.2565, 0.2761),
}

# Only for cifar-10
classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

def learning_rate(init, epoch):
    optim_factor = 0
    if(epoch > 120):
        optim_factor = 3
    elif(epoch > 80):
        optim_factor = 2
    elif(epoch > 40):
        optim_factor = 1

    return init*math.pow(0.2, optim_factor)

def get_hms(seconds):
    m, s = divmod(seconds, 60)
    h, m = divmod(m, 60)

    return h, m, s


In [None]:
##### TRAINING CELL #####
from __future__ import print_function

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torch.backends.cudnn as cudnn
from torch.autograd import Variable

import torchvision
import torchvision.transforms as transforms

import os
import sys
import time
import datetime


#Parameters settings
depth = 40 ##can be 10, 16, 22, 28(default), 34, 40
net_type = 'wide-resnet'
lr = 0.1
widen_factor = 2 #any numer, 10(default)
dropout = 0.3
dataset = 'cifar10'
testOnly = False
resume = False
bc = False
da = True

# Hyper Parameter settings
use_cuda = torch.cuda.is_available()
best_acc = 0
print('\n[Phase 1] : Data Preparation')
if da:
    print("| Using data augmentation")
    to_da = DA.DataAugmentation(dataset,aa=True,cut=True) 
    if(dataset == 'cifar10'):
        num_classes = 10
    elif(dataset == 'cifar100'):
        num_classes = 100
    trainset_length, trainloader, testloader = to_da.load_data()
else:
    print("| Using no augmentation")
    transform_train = transforms.Compose([
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize(mean[dataset], std[dataset]),
    ])

    transform_test = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(mean[dataset], std[dataset]),
    ])

    if(dataset == 'cifar10'):
        print("| Preparing CIFAR-10 dataset...")
        sys.stdout.write("| ")
        trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_train)
        testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=False, transform=transform_test)
        trainset_length = len(trainset)
        num_classes = 10
    elif(dataset == 'cifar100'):
        print("| Preparing CIFAR-100 dataset...")
        sys.stdout.write("| ")
        trainset = torchvision.datasets.CIFAR100(root='./data', train=True, download=True, transform=transform_train)
        testset = torchvision.datasets.CIFAR100(root='./data', train=False, download=False, transform=transform_test)
        trainset_length = len(trainset)
        num_classes = 100
    trainloader = torch.utils.data.DataLoader(trainset, batch_size=128, shuffle=True, num_workers=2)
    testloader = torch.utils.data.DataLoader(testset, batch_size=128, shuffle=False, num_workers=2)

# Define net
net = Wide_ResNet_HRank(depth, widen_factor, dropout, num_classes)
file_name = 'wide-resnet-'+str(depth)+'x'+str(widen_factor) 

for m in net.modules():
    if m isinstance(m,wide_basic):
        m.pruning = False        
        
if bc:
    to_bc = BC(net)
    net = to_bc.model

# Test only option
if (testOnly):
    print('\n[Test Phase] : Model setup')
    assert os.path.isdir('checkpoint'), 'Error: No checkpoint directory found!'
    checkpoint = torch.load('./checkpoint/'+dataset+os.sep+file_name+'.t7')
    net = checkpoint['net']

    if use_cuda:
        net.cuda()
        net = torch.nn.DataParallel(net, device_ids=range(torch.cuda.device_count()))
        cudnn.benchmark = True

    net.eval()
    net.training = False
    test_loss = 0
    correct = 0
    total = 0

    with torch.no_grad():
        for batch_idx, (inputs, targets) in enumerate(testloader):
            if use_cuda:
                inputs, targets = inputs.cuda(), targets.cuda()
            inputs, targets = Variable(inputs), Variable(targets)
            outputs = net(inputs)

            _, predicted = torch.max(outputs.data, 1)
            total += targets.size(0)
            correct += predicted.eq(targets.data).cpu().sum()

        acc = 100.*correct/total
        print("| Test Result\tAcc@1: %.2f%%" %(acc))

    sys.exit(0)

# Model
print('\n[Phase 2] : Model setup')
if(resume):
    # Load checkpoint
    print('| Resuming from checkpoint...')
    assert os.path.isdir('checkpoint'), 'Error: No checkpoint directory found!'
    checkpoint = torch.load('./checkpoint/'+dataset+os.sep+file_name+'.t7')
    net = checkpoint['net']
    best_acc = checkpoint['acc']
    start_epoch = checkpoint['epoch']
else:
    print('| Building net type [' + net_type + ']...')
    net.apply(conv_init)

if use_cuda:
    net.cuda()
    net = torch.nn.DataParallel(net, device_ids=range(torch.cuda.device_count()))
    cudnn.benchmark = True
    print('| Going fast AF with C U D A *o* !')

criterion = nn.CrossEntropyLoss()

# Training
def train(epoch):
    net.train()
    net.training = True
    train_loss = 0
    correct = 0
    total = 0
    optimizer = optim.SGD(net.parameters(), lr=learning_rate(lr, epoch), momentum=0.9, weight_decay=5e-4)

    print('\n=> Training Epoch #%d, LR=%.4f' %(epoch, learning_rate(lr, epoch)))
    for batch_idx, (inputs, targets) in enumerate(trainloader):
        if use_cuda:
            inputs, targets = inputs.cuda(), targets.cuda() # GPU settings
            
        inputs, targets = Variable(inputs), Variable(targets)
            
        optimizer.zero_grad()
        if bc:
            bc.binarization()
            outputs = net(inputs)       # Forward Propagation
            loss = criterion(outputs,targets)
            bc.restore()
            loss.backward()
            bc.clip()
            optimizer.step()
        else:
            outputs = net(inputs)
            loss = criterion(outputs,targets)
            loss.backward()
            optimizer.step()

        train_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total += targets.size(0)
        correct += predicted.eq(targets.data).cpu().sum()

        sys.stdout.write('\r')
        sys.stdout.write('| Epoch [%3d/%3d] Iter[%3d/%3d]\t\tLoss: %.4f Acc@1: %.3f%%'
                %(epoch, num_epochs, batch_idx+1,
                    (trainset_length//128)+1, loss.item(), 100.*correct/total))
        sys.stdout.flush()

def test(epoch):
    global best_acc
    net.eval()
    net.training = False
    test_loss = 0
    correct = 0
    total = 0
    with torch.no_grad():
        for batch_idx, (inputs, targets) in enumerate(testloader):
            if use_cuda:
                inputs, targets = inputs.cuda(), targets.cuda()
            inputs, targets = Variable(inputs), Variable(targets)
            outputs = net(inputs)
            loss = criterion(outputs, targets)

            test_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += targets.size(0)
            correct += predicted.eq(targets.data).cpu().sum()

        # Save checkpoint when best model
        acc = 100.*correct/total
        print("\n| Validation Epoch #%d\t\t\tLoss: %.4f Acc@1: %.2f%%" %(epoch, loss.item(), acc))

        if acc > best_acc:
            print('| Saving Best model...\t\t\tTop1 = %.2f%%' %(acc))
            state = {
                    'net':net.module if use_cuda else net,
                    'acc':acc,
                    'epoch':epoch,
            }
            if not os.path.isdir('checkpoint'):
                os.mkdir('checkpoint')
            save_point = './checkpoint/'+dataset+os.sep
            if not os.path.isdir(save_point):
                os.mkdir(save_point)
            torch.save(state, save_point+file_name+'.t7')
            best_acc = acc

print('\n[Phase 3] : Training model')
print('| Training Epochs = ' + str(num_epochs))
print('| Initial Learning Rate = ' + str(lr))
print('| Optimizer = ' + str(optim_type))

elapsed_time = 0
for epoch in range(start_epoch, start_epoch+num_epochs):
    start_time = time.time()

    train(epoch)
    test(epoch)

    epoch_time = time.time() - start_time
    elapsed_time += epoch_time
    print('| Elapsed time : %d:%02d:%02d'  %(get_hms(elapsed_time)))

print('\n[Phase 4] : Testing model')
print('* Test results : Acc@1 = %.2f%%' %(best_acc))


0it [00:00, ?it/s][A


[Phase 1] : Data Preparation
| Using data augmentation
| Preparing CIFAR-10 dataset...
Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ../data/cifar-10-python.tar.gz



  0%|          | 0/170498071 [00:00<?, ?it/s][A
  0%|          | 24576/170498071 [00:00<13:01, 218155.22it/s][A
  0%|          | 73728/170498071 [00:00<11:00, 258216.57it/s][A
  0%|          | 139264/170498071 [00:00<09:04, 313003.46it/s][A
  0%|          | 270336/170498071 [00:00<07:04, 401073.45it/s][A
  0%|          | 401408/170498071 [00:00<05:40, 499582.89it/s][A
  0%|          | 483328/170498071 [00:01<07:54, 358049.19it/s][A
  1%|          | 974848/170498071 [00:01<05:42, 494495.50it/s][A
  1%|          | 1155072/170498071 [00:01<05:28, 516016.54it/s][A
  1%|          | 1302528/170498071 [00:01<04:57, 569033.10it/s][A
  1%|          | 1695744/170498071 [00:01<03:40, 764512.48it/s][A
  1%|          | 1900544/170498071 [00:02<03:15, 860778.36it/s][A
  1%|          | 2080768/170498071 [00:02<03:45, 747945.43it/s][A
  1%|▏         | 2252800/170498071 [00:02<03:12, 874676.51it/s][A
  1%|▏         | 2531328/170498071 [00:02<02:34, 1087458.36it/s][A
  2%|▏         | 271

 19%|█▉        | 32751616/170498071 [00:28<02:49, 813098.07it/s] [A
 19%|█▉        | 33054720/170498071 [00:28<02:12, 1038322.15it/s][A
 19%|█▉        | 33218560/170498071 [00:28<02:07, 1076801.52it/s][A
 20%|█▉        | 33366016/170498071 [00:28<02:04, 1104447.14it/s][A
 20%|█▉        | 33505280/170498071 [00:29<01:59, 1142543.57it/s][A
 20%|█▉        | 33644544/170498071 [00:29<02:00, 1132725.01it/s][A
 20%|█▉        | 33775616/170498071 [00:29<01:59, 1142151.53it/s][A
 20%|█▉        | 33906688/170498071 [00:29<01:59, 1146360.97it/s][A
 20%|█▉        | 34037760/170498071 [00:29<01:58, 1153511.31it/s][A
 20%|██        | 34168832/170498071 [00:29<01:57, 1162185.65it/s][A
 20%|██        | 34299904/170498071 [00:29<01:56, 1164516.58it/s][A
 20%|██        | 34430976/170498071 [00:29<01:56, 1166287.39it/s][A
 20%|██        | 34562048/170498071 [00:29<01:56, 1170605.90it/s][A
 20%|██        | 34693120/170498071 [00:30<01:56, 1167023.50it/s][A
 20%|██        | 34824192/17049807

 38%|███▊      | 64847872/170498071 [00:55<01:33, 1127201.88it/s][A
 38%|███▊      | 64987136/170498071 [00:55<01:34, 1120352.37it/s][A
 38%|███▊      | 65118208/170498071 [00:55<01:32, 1134468.89it/s][A
 38%|███▊      | 65249280/170498071 [00:56<01:31, 1145131.89it/s][A
 38%|███▊      | 65380352/170498071 [00:56<01:30, 1155998.46it/s][A
 38%|███▊      | 65503232/170498071 [00:56<01:58, 882733.30it/s] [A
 39%|███▊      | 65757184/170498071 [00:56<01:36, 1089816.96it/s][A
 39%|███▊      | 65904640/170498071 [00:56<01:33, 1113215.41it/s][A
 39%|███▊      | 66043904/170498071 [00:56<01:30, 1152818.64it/s][A
 39%|███▉      | 66183168/170498071 [00:56<01:31, 1139670.33it/s][A
 39%|███▉      | 66314240/170498071 [00:56<01:30, 1148691.94it/s][A
 39%|███▉      | 66445312/170498071 [00:57<01:30, 1155288.41it/s][A
 39%|███▉      | 66576384/170498071 [00:57<01:29, 1161665.21it/s][A
 39%|███▉      | 66707456/170498071 [00:57<01:29, 1164510.29it/s][A
 39%|███▉      | 66838528/17049807

 55%|█████▌    | 94461952/170498071 [01:23<01:07, 1126048.96it/s][A
 55%|█████▌    | 94576640/170498071 [01:23<01:08, 1113046.58it/s][A
 56%|█████▌    | 94691328/170498071 [01:23<01:12, 1050496.50it/s][A
 56%|█████▌    | 94806016/170498071 [01:23<01:12, 1051021.41it/s][A
 56%|█████▌    | 94912512/170498071 [01:23<01:15, 1005452.95it/s][A
 56%|█████▌    | 95019008/170498071 [01:23<01:19, 947697.64it/s] [A
 56%|█████▌    | 95117312/170498071 [01:23<01:21, 923203.18it/s][A
 56%|█████▌    | 95215616/170498071 [01:23<01:21, 922410.41it/s][A
 56%|█████▌    | 95313920/170498071 [01:24<01:21, 926715.20it/s][A
 56%|█████▌    | 95412224/170498071 [01:24<01:20, 931915.53it/s][A
 56%|█████▌    | 95510528/170498071 [01:24<01:20, 935313.66it/s][A
 56%|█████▌    | 95625216/170498071 [01:24<01:18, 953913.88it/s][A
 56%|█████▌    | 95739904/170498071 [01:24<01:16, 976295.66it/s][A
 56%|█████▌    | 95854592/170498071 [01:24<01:16, 982049.55it/s][A
 56%|█████▋    | 95969280/170498071 [01:24

 72%|███████▏  | 123166720/170498071 [01:50<00:41, 1151715.23it/s][A
 72%|███████▏  | 123289600/170498071 [01:50<00:40, 1172069.86it/s][A
 72%|███████▏  | 123412480/170498071 [01:50<00:41, 1145109.91it/s][A
 72%|███████▏  | 123543552/170498071 [01:50<00:40, 1152888.28it/s][A
 73%|███████▎  | 123674624/170498071 [01:50<00:40, 1156456.16it/s][A
 73%|███████▎  | 123805696/170498071 [01:50<00:40, 1162238.55it/s][A
 73%|███████▎  | 123936768/170498071 [01:50<00:40, 1163576.21it/s][A
 73%|███████▎  | 124067840/170498071 [01:50<00:39, 1165811.62it/s][A
 73%|███████▎  | 124198912/170498071 [01:50<00:39, 1171096.75it/s][A
 73%|███████▎  | 124329984/170498071 [01:51<00:39, 1170279.85it/s][A
 73%|███████▎  | 124461056/170498071 [01:51<00:39, 1169882.69it/s][A
 73%|███████▎  | 124592128/170498071 [01:51<00:39, 1174040.13it/s][A
 73%|███████▎  | 124723200/170498071 [01:51<00:39, 1172882.78it/s][A
 73%|███████▎  | 124854272/170498071 [01:51<00:38, 1176034.27it/s][A
 73%|███████▎  | 124

 91%|█████████ | 154443776/170498071 [02:16<00:13, 1171806.18it/s][A
 91%|█████████ | 154574848/170498071 [02:16<00:13, 1174534.40it/s][A
 91%|█████████ | 154705920/170498071 [02:17<00:13, 1173285.14it/s][A
 91%|█████████ | 154836992/170498071 [02:17<00:13, 1165112.36it/s][A
 91%|█████████ | 154968064/170498071 [02:17<00:13, 1166042.70it/s][A
 91%|█████████ | 155099136/170498071 [02:17<00:13, 1166253.77it/s][A
 91%|█████████ | 155230208/170498071 [02:17<00:13, 1168967.79it/s][A
 91%|█████████ | 155361280/170498071 [02:17<00:12, 1167419.32it/s][A
 91%|█████████ | 155492352/170498071 [02:17<00:12, 1167990.37it/s][A
 91%|█████████▏| 155623424/170498071 [02:17<00:12, 1172582.37it/s][A
 91%|█████████▏| 155754496/170498071 [02:17<00:12, 1172017.63it/s][A
 91%|█████████▏| 155885568/170498071 [02:18<00:12, 1168995.53it/s][A
 92%|█████████▏| 156016640/170498071 [02:18<00:12, 1171147.54it/s][A
 92%|█████████▏| 156147712/170498071 [02:18<00:12, 1168685.29it/s][A
 92%|█████████▏| 156

Extracting ../data/cifar-10-python.tar.gz to ../data
Files already downloaded and verified
| Wide-Resnet 40x2

[Phase 2] : Model setup
| Building net type [wide-resnet]...
| Going fast AF with C U D A *o* !

[Phase 3] : Training model
| Training Epochs = 140
| Initial Learning Rate = 0.1
| Optimizer = SGD

=> Training Epoch #1, LR=0.1000
| Epoch [  1/140] Iter[ 51/391]		Loss: 2.0855 Acc@1: 16.452%


170500096it [02:48, 1114839.90it/s]                               [A

| Epoch [  1/140] Iter[391/391]		Loss: 1.5936 Acc@1: 29.164%
| Validation Epoch #1			Loss: 1.8587 Acc@1: 45.15%
| Saving Best model...			Top1 = 45.15%
| Elapsed time : 0:00:58

=> Training Epoch #2, LR=0.1000


  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "


| Epoch [  2/140] Iter[391/391]		Loss: 1.4409 Acc@1: 45.130%
| Validation Epoch #2			Loss: 1.5417 Acc@1: 54.12%
| Saving Best model...			Top1 = 54.12%
| Elapsed time : 0:01:56

=> Training Epoch #3, LR=0.1000
| Epoch [  3/140] Iter[391/391]		Loss: 1.3242 Acc@1: 51.240%
| Validation Epoch #3			Loss: 1.0111 Acc@1: 57.99%
| Saving Best model...			Top1 = 57.99%
| Elapsed time : 0:02:54

=> Training Epoch #4, LR=0.1000
| Epoch [  4/140] Iter[391/391]		Loss: 1.0543 Acc@1: 55.264%
| Validation Epoch #4			Loss: 1.3563 Acc@1: 60.34%
| Saving Best model...			Top1 = 60.34%
| Elapsed time : 0:03:53

=> Training Epoch #5, LR=0.1000
| Epoch [  5/140] Iter[391/391]		Loss: 1.1771 Acc@1: 58.230%
| Validation Epoch #5			Loss: 1.2934 Acc@1: 61.17%
| Saving Best model...			Top1 = 61.17%
| Elapsed time : 0:04:51

=> Training Epoch #6, LR=0.1000
| Epoch [  6/140] Iter[391/391]		Loss: 1.1724 Acc@1: 60.468%
| Validation Epoch #6			Loss: 1.2542 Acc@1: 71.13%
| Saving Best model...			Top1 = 71.13%
| Elapsed tim

In [None]:
##Prunning##

from functions.HRankPruningIter import HRank

for m in net.modules():
    if m isinstance(m,wide_basic):
        m.prunning = True     

pruning_ratios = [0.25,0.50,0.75]
PR = HRank(net)
PR.pruning_and_training(trainloader)
