In [1]:

from __future__ import print_function, division
%load_ext autoreload
%autoreload 2
import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
from torch.optim import lr_scheduler
from torch.utils.data import Dataset, DataLoader
from torch.utils.data.sampler import SubsetRandomSampler
from torch.nn import init
import torch.nn.functional as F
import functools
import numpy as np
import torchvision
from torchvision import transforms, datasets, models
import os
import time

from torchsummary import summary

from tensorboardX import SummaryWriter

import matplotlib.pyplot as plt

In [2]:
# Training settings
batch_size = 64

In [4]:
#%%

transform = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomCrop((32, 32), padding=4),   #left, top, right, bottom
    # transforms.Scale(224),
    transforms.ToTensor()
])
test_transform = transforms.Compose([
    transforms.ToTensor()
])

train_dataset = datasets.CIFAR10(root='/home/lee/Research/3.place_lbp_prediction_ISIE2019/Residual_Attention_Network/create_attention_model/data/',
                               train=True,
                               transform=transform,
                               download=True)

test_dataset = datasets.CIFAR10(root='/home/lee/Research/3.place_lbp_prediction_ISIE2019/Residual_Attention_Network/create_attention_model/data/',
                              train=False,
                              transform=test_transform)

# Data Loader (Input Pipeline)
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=batch_size, # 64
                                           shuffle=True, num_workers=4)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                          batch_size=batch_size,
                                          shuffle=False)

dataloaders = {
    'train': train_loader,
    'test' : test_loader    
}
dataset_sizes ={
    'train': len(train_dataset),
    'test' : len(test_dataset)
}

classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')


print('train_size: ', dataset_sizes['train'], '\ntest_size: ', dataset_sizes['test'])
print(classes)
print('len class: ', len(classes))



Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to /home/lee/Research/3.place_lbp_prediction_ISIE2019/Residual_Attention_Network/create_attention_model/data/cifar-10-python.tar.gz
train_size:  50000 
test_size:  10000
('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
len class:  10


In [5]:
from ResidualUnit import ResidualBlock
from ASPP_edit_PLACE_cifar10 import ASPP_places
# from attention_module import Attention_step1

In [12]:
class Attention_place(nn.Module):
    def __init__(self):
        super(Attention_place, self).__init__()
        self.begin_residual_blocks = nn.Sequential(
            nn.Conv2d(3,64,kernel_size=3, stride = 1, padding = 1),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2, padding = 1)
        )#16x16
        self.trunk_first_conv = nn.Sequential(
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.Conv2d(64,64, kernel_size=1)
        )#16x16
        self.trunk = nn.Sequential(
            ResidualBlock(64, 256, 1),
            ResidualBlock(256, 256, 1)
        )#16x16
        self.trunk_last_conv = nn.Sequential(
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size = 1)
        )#16x16
        self.trunk_residual = nn.Sequential(
            nn.Conv2d(64,256,1),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True)
        )


        self.aspp = ASPP_places()
        self.mask_first_conv = nn.Sequential(
            nn.Conv2d(64, 96, kernel_size=1, stride = 2),
            nn.BatchNorm2d(96),
            nn.ReLU(inplace=True)
        )#28x28
        self._last_conv = nn.Sequential(
            nn.Conv2d(256, 512, kernel_size=1, stride = 2),
            nn.BatchNorm2d(512),
            nn.Conv2d(512, 2048, kernel_size=1),
            nn.BatchNorm2d(2048),
            nn.ReLU(inplace=True),

#             nn.BatchNorm2d(512),
#             nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride = 2, padding = 1)
        )#4x4
        
        
        self.mpool = nn.Sequential(
            nn.BatchNorm2d(2048),
            nn.ReLU(inplace=True),
            nn.AvgPool2d(kernel_size=4, stride=1)
        )
        self.fc = nn.Linear(2048, len(classes))
    def forward(self, x):
        h = int(x.size()[2] / 2) #16
        w = int(x.size()[3] / 2) #16
#         print('h:',h, 'w:',w)
        x1 = self.begin_residual_blocks(x)#16x16
#         print(x1.shape)
#         print(x1.shape)
#################trunk_part#################
        out_trunk1 = self.trunk_first_conv(x1)#16x16x64
        out_trunk2 = self.trunk(out_trunk1)#16x16x256
        out_trunk3 = self.trunk_last_conv(out_trunk2)#16x16x256
        out_trunk4 = out_trunk3 + self.trunk_residual(x1)#16x16x256
#################trunk_part#################        

#################mask_part#################
        feature_map = self.mask_first_conv(x1)#8x8x256
        mask = self.aspp(feature_map)#8x8
        mask = F.upsample(mask, size=(h, w), mode="bilinear") #16x16x256
#         mask = F.softmax(mask)
#################mask_part#################
        out1 = (1 + mask) * out_trunk4 #16x16x256
#         print(out.shape)
        out2 = self._last_conv(out1)#4x4x2048
#         print(out.shape)
        out3 = self.mpool(out2)#1x1x512
#         print(out.shape)
        out = out3.view(out3.size(0),-1)
#         print(out.shape)
        out = self.fc(out)

        return out, out_trunk1, out_trunk2, out_trunk3, out_trunk4, feature_map, mask, out1, out2, out3
        

In [13]:
def weights_init(m):
    classname = m.__class__.__name__
    if classname.find('Conv') != -1:         # Conv weight init
        m.weight.data.normal_(0.0, 0.02)
    elif classname.find('BatchNorm') != -1:  # BatchNorm weight init
        m.weight.data.normal_(1.0, 0.02)
        m.bias.data.fill_(0)

In [14]:
model = Attention_place()

In [15]:
model.apply(weights_init)

Attention_place(
  (begin_residual_blocks): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace)
    (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  )
  (trunk_first_conv): Sequential(
    (0): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (1): ReLU(inplace)
    (2): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1))
  )
  (trunk): Sequential(
    (0): ResidualBlock(
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace)
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn3): BatchNorm2d(64, e

In [16]:
use_gpu = torch.cuda.is_available()
if use_gpu:
    print("Using CUDA, model")
    model.cuda() #after second other epoch model
summary(model,(3,32,32))

Using CUDA, model
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 64, 32, 32]           1,792
       BatchNorm2d-2           [-1, 64, 32, 32]             128
              ReLU-3           [-1, 64, 32, 32]               0
         MaxPool2d-4           [-1, 64, 16, 16]               0
       BatchNorm2d-5           [-1, 64, 16, 16]             128
              ReLU-6           [-1, 64, 16, 16]               0
            Conv2d-7           [-1, 64, 16, 16]           4,160
       BatchNorm2d-8           [-1, 64, 16, 16]             128
              ReLU-9           [-1, 64, 16, 16]               0
           Conv2d-10           [-1, 64, 16, 16]           4,096
      BatchNorm2d-11           [-1, 64, 16, 16]             128
             ReLU-12           [-1, 64, 16, 16]               0
           Conv2d-13           [-1, 64, 16, 16]          36,864
      BatchNorm2d-14 

In [17]:
writer = SummaryWriter()

In [22]:
def train_model(model, scheduler, criterion, optimizer, num_epochs=25):
    since = time.time()

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0
    best_epoch = 0
    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch + 1, num_epochs))
        print('-' * 10)
        since1 = time.time()
        # Each epoch has a training and validation phase
        train_batches = len(dataloaders['train'])
        for phase in ['train','test']:
            
            print("lr:", optimizer.param_groups[0]['lr'])
            if phase == 'train':
                scheduler.step()
                model.train()  # Set model to training mode
            else:
                model.eval()
                
            running_loss = 0.0
            running_corrects = 0
            for i, data in enumerate(dataloaders[phase]):
                if i % 100 == 0:
                    print("\rTraining batch {}/{}".format(i, len(dataloaders[phase])), end='', flush=True)
                # Use half training dataset
                if i >= len(dataloaders[phase]):
                    break    
                inputs, labels = data
                inputs, labels = Variable(inputs.cuda()),Variable(labels.cuda())

                optimizer.zero_grad()

                    # forward
                    # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    out, out_trunk1, out_trunk2, out_trunk3, out_trunk4, feature_map, mask, out1, out2, out3 = model(inputs)
                    _, preds = torch.max(out, 1)
                    loss = criterion(out, labels)
#                     print(loss)
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()


                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)
            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]
            if phase == 'train':
                writer.add_scalar('data/train_loss_places', epoch_loss, epoch)
                writer.add_scalar('data/train_acc_places', epoch_acc, epoch)
            else:
                writer.add_scalar('data/test_loss_places', epoch_loss, epoch)
                writer.add_scalar('data/test_acc_places', epoch_acc, epoch)
            for name, param in model.named_parameters():
                writer.add_histogram(name, param.clone().cpu().data.numpy(), epoch)
            time_elapsed1 = time.time() - since1
            print('\rEpoch process in {:.0f}m {:.0f}s'.format(
            time_elapsed1 // 60, time_elapsed1 % 60))
            print('{} Loss: {:.4f} Acc: {:.4f} lr: {:.8f}'.format(phase, epoch_loss, 
                                                                  epoch_acc*100, optimizer.param_groups[0]['lr']))
#             csvfile = open(os.path.join('./csv/resnet18/', '20190102resnet18_64_data_15_places_{}{}_class{}_epoch{}.csv'.format(optimizer_name, learning_rate, len(class_names), num_epochs)), 'a', newline='')
#             csv_writer = csv.writer(csvfile, delimiter=';', quotechar='|', quoting=csv.QUOTE_MINIMAL)
#             csv_writer.writerow(['class', len(class_names), 'epoch', epoch, phase, epoch_loss, epoch_acc])
#             csvfile.close()
#             deep copy the model
            if phase == 'test' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_epoch = epoch
                best_model_wts = copy.deepcopy(model.state_dict())

        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f} at {}'.format(best_acc, best_epoch+1))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model


In [None]:
import copy
lr = 0.001  # 0.1
criterion = nn.CrossEntropyLoss()
# optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9, nesterov=True, weight_decay=0.0001)
optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=0.0001)
scheduler = lr_scheduler.StepLR(optimizer, step_size=25, gamma=0.1)
total_epoch = 100
model_train = train_model(model, scheduler, criterion, optimizer, num_epochs=total_epoch)
torch.save(model_train.state_dict(), './Trained/cifar10_adam.pt')

Epoch 1/100
----------
lr: 0.001
Epoch process in 0m 28s
train Loss: 1.0614 Acc: 62.4240 lr: 0.00100000
lr: 0.001
Epoch process in 0m 32s
test Loss: 1.0363 Acc: 63.3000 lr: 0.00100000

Epoch 2/100
----------
lr: 0.001
Epoch process in 0m 28s
train Loss: 0.9264 Acc: 67.3060 lr: 0.00100000
lr: 0.001
Epoch process in 0m 32s
test Loss: 1.0577 Acc: 62.2200 lr: 0.00100000

Epoch 3/100
----------
lr: 0.001
Epoch process in 0m 28s
train Loss: 0.8402 Acc: 70.4820 lr: 0.00100000
lr: 0.001
Epoch process in 0m 32s
test Loss: 0.9450 Acc: 67.4500 lr: 0.00100000

Epoch 4/100
----------
lr: 0.001
Epoch process in 0m 29s
train Loss: 0.7594 Acc: 73.4680 lr: 0.00100000
lr: 0.001
Epoch process in 0m 32s
test Loss: 0.9180 Acc: 67.6900 lr: 0.00100000

Epoch 5/100
----------
lr: 0.001
Epoch process in 0m 29s
train Loss: 0.6876 Acc: 76.1080 lr: 0.00100000
lr: 0.001
Epoch process in 0m 33s
test Loss: 0.7069 Acc: 76.0200 lr: 0.00100000

Epoch 6/100
----------
lr: 0.001
Epoch process in 0m 29s
train Loss: 0.6478