In [1]:
import argparse
import os
import random
import shutil
import time
import warnings
import sys
sys.path.append('../')

import torch
import torch.nn as nn
import torch.nn.parallel
import torch.backends.cudnn as cudnn
import torch.distributed as dist
import torch.optim
import torch.multiprocessing as mp
import torch.utils.data
import torch.utils.data.distributed
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torchvision.models as models
from resnet import *

from main import *


In [2]:
# args = parser.parse_args(args=[])
# args = parser.parse_args()
import easydict 
args = easydict.EasyDict({ "batch-size": 256, 
                          "epochs": 100, 
                          "data": 0, 
                          'arch':'resnet18',
                          'lr':0.1,
                         'momentum':0.9,
                         'weight_decay':1e-4,
                         'start_epoch':90,
                         'gpu':0})


In [3]:
ngpus_per_node = torch.cuda.device_count()
print(ngpus_per_node)
device = 'cpu'
#device = 'cuda'

1


In [4]:
print("=> using pre-trained model '{}'".format('resnet18'))
# model = models.__dict__['resnet18'](pretrained=True)
# model = models.resnet18(pretrained=True)
model = models.resnet18(pretrained=True)
model.to(device)
criterion = nn.CrossEntropyLoss().to(device)
optimizer = torch.optim.SGD(model.parameters(), args.lr,
                            momentum=args.momentum,
                            weight_decay=args.weight_decay)


=> using pre-trained model 'resnet18'


In [6]:
model_dict = model.state_dict() 
for k in model_dict :
    print(k)
    print(model_dict[k].shape)

conv1.weight
torch.Size([64, 3, 7, 7])
bn1.weight
torch.Size([64])
bn1.bias
torch.Size([64])
bn1.running_mean
torch.Size([64])
bn1.running_var
torch.Size([64])
bn1.num_batches_tracked
torch.Size([])
layer1.0.conv1.weight
torch.Size([64, 64, 3, 3])
layer1.0.bn1.weight
torch.Size([64])
layer1.0.bn1.bias
torch.Size([64])
layer1.0.bn1.running_mean
torch.Size([64])
layer1.0.bn1.running_var
torch.Size([64])
layer1.0.bn1.num_batches_tracked
torch.Size([])
layer1.0.conv2.weight
torch.Size([64, 64, 3, 3])
layer1.0.bn2.weight
torch.Size([64])
layer1.0.bn2.bias
torch.Size([64])
layer1.0.bn2.running_mean
torch.Size([64])
layer1.0.bn2.running_var
torch.Size([64])
layer1.0.bn2.num_batches_tracked
torch.Size([])
layer1.1.conv1.weight
torch.Size([64, 64, 3, 3])
layer1.1.bn1.weight
torch.Size([64])
layer1.1.bn1.bias
torch.Size([64])
layer1.1.bn1.running_mean
torch.Size([64])
layer1.1.bn1.running_var
torch.Size([64])
layer1.1.bn1.num_batches_tracked
torch.Size([])
layer1.1.conv2.weight
torch.Size([64, 6

In [7]:
model.eval()

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [7]:
# Data loading code
data_dir = '../ILSVRC/Data/CLS-LOC/'
traindir = os.path.join(data_dir, 'train')
valdir = os.path.join(data_dir, 'val')

In [8]:
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])

train_dataset = datasets.ImageFolder(
    traindir,
    transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        normalize,
    ]))
val_dataset = datasets.ImageFolder(valdir, transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        normalize,
    ]))

In [9]:
# train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset)
train_sampler = None

In [10]:
train_loader = torch.utils.data.DataLoader(
    train_dataset, batch_size=256, shuffle=(train_sampler is None),
    num_workers=8, pin_memory=True, sampler=train_sampler)

In [11]:
val_loader = torch.utils.data.DataLoader(
    val_dataset,
    batch_size=32, shuffle=False,
    num_workers=4, pin_memory=True)

In [None]:
best_acc1 = 0
acc1 = 0
train_loss = []
val_acc = []
for epoch in range(args.start_epoch, args.epochs):
    adjust_learning_rate(optimizer, epoch, args)

    # train for one epoch
    epoch_loss = train(train_loader, model, criterion, optimizer, epoch, args)

    # evaluate on validation set
    acc1 = validate(val_loader, model, criterion, args)  
    
    train_loss.append(epoch_loss)
    val_acc.append(acc1)
    print('************train_loss {} val_acc {}*************'.format(epoch_loss, acc1))
    
    # remember best acc@1 and save checkpoint
    is_best = acc1 > best_acc1
    best_acc1 = max(acc1, best_acc1)

#     if not args.multiprocessing_distributed or (args.multiprocessing_distributed
#             and args.rank % ngpus_per_node == 0):
    save_checkpoint({
        'epoch': epoch + 1,
        'arch': args.arch,
        'state_dict': model.state_dict(),
        'best_acc1': best_acc1,
        'optimizer' : optimizer.state_dict(),
    }, is_best)    

Epoch: [0][   0/5005]	Time  2.857 ( 2.857)	Data  2.362 ( 2.362)	Loss 1.3540e+00 (1.3540e+00)	Acc@1  64.84 ( 64.84)	Acc@5  86.72 ( 86.72)
