In [2]:

from __future__ import print_function, division
%load_ext autoreload
%autoreload 2
import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
from torch.optim import lr_scheduler
from torch.utils.data import Dataset, DataLoader
from torch.utils.data.sampler import SubsetRandomSampler
from torch.nn import init
import torch.nn.functional as F
import functools
import numpy as np
import torchvision
from torchvision import transforms, datasets, models
import os
import time

from torchsummary import summary

from tensorboardX import SummaryWriter

import matplotlib.pyplot as plt

In [3]:
# Training settings
batch_size = 32

In [4]:
#%% 이걸 사용하자
t_image_size=224
transform_train = transforms.Compose([
    transforms.Resize(t_image_size),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor()
])
transform_val = transforms.Compose([
    transforms.Resize(t_image_size),
    transforms.ToTensor()
])
transform_test = transforms.Compose([
    transforms.Resize(t_image_size),
    transforms.ToTensor()
])

data_dir = '/media/lee/E61C94F21C94BECD/place_dataset/Places2/data_256/20'
# data_dir = '/media/lee/E61C94F21C94BECD/place_dataset/Places2/data_class50/'


train_image_datasets = torchvision.datasets.ImageFolder(os.path.join(data_dir),
                                     transform = transform_train)
val_image_datasets = torchvision.datasets.ImageFolder(os.path.join(data_dir),
                                     transform = transform_val)
test_image_datasets = torchvision.datasets.ImageFolder(os.path.join(data_dir),
                                     transform = transform_test)


dataset_size = len(train_image_datasets)
print('total size:', dataset_size)
class_names_train = train_image_datasets.classes
class_names_val = val_image_datasets.classes
class_names_test = test_image_datasets.classes
# print(class_names)
print('# of train class: ', len(class_names_train))
print('# of valid class: ', len(class_names_val))
print('# of test class : ', len(class_names_test))
## SPLIT DATASET
train_split= 0.7
validate_split = 0.20
test_split = 0.10
train_size = int(train_split * dataset_size)
validation_size = int(validate_split * dataset_size)
test_size = int(dataset_size - train_size - validation_size)

# ########### CURRENTLY DOING THIS, WHICH WORKS ###########

indices = list(range(dataset_size))
np.random.shuffle(indices)
train_indices = indices[:train_size]
temp = int(train_size+validation_size)
val_indices = indices[train_size:temp]
test_indices = indices[temp:]
train_sampler = SubsetRandomSampler(train_indices)
valid_sampler = SubsetRandomSampler(val_indices)
test_sampler = SubsetRandomSampler(test_indices)

train_loader = torch.utils.data.DataLoader(
    train_image_datasets, batch_size=batch_size, sampler=train_sampler,
    num_workers=4)
valid_loader = torch.utils.data.DataLoader(
    val_image_datasets, batch_size=batch_size, sampler=valid_sampler,
    num_workers=4)
test_loader = torch.utils.data.DataLoader(
    test_image_datasets, batch_size=batch_size, sampler=test_sampler,
    num_workers=4)

dataloaders = {
    'train': train_loader,
    'valid': valid_loader,
    'test' : test_loader
}
image_datasets = {
    'train': train_sampler,
    'valid': valid_sampler,
    'test' : test_sampler
}

dataset_sizes ={
    'train': len(image_datasets['train']),
    'valid': len(image_datasets['valid']),
    'test' : len(image_datasets['test'])
}

# dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']}
# print('train_class:' len(train_image_datasets.classes))

print('train_size: ',dataset_sizes['train'], '\nvalid_size: ',dataset_sizes['valid'], 
      '\ntest_size : ',dataset_sizes['test'])
# class_names = image_datasets['train'].classes
# print(class_names)
# print('len class: ', len(class_names))



total size: 96675
# of train class:  20
# of valid class:  20
# of test class :  20
train_size:  67672 
valid_size:  19335 
test_size :  9668


In [5]:
from ResidualUnit import ResidualBlock
from ASPP_edit_PLACE import ASPP_places
# from attention_module import Attention_step1

In [6]:
class Attention_place(nn.Module):
    def __init__(self):
        super(Attention_place, self).__init__()
        self.begin_residual_blocks = nn.Sequential(
            nn.Conv2d(3,64,kernel_size=7, stride = 2, padding = 3),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2, padding = 1)
        )#56x56
        self.trunk_first_conv = nn.Sequential(
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.Conv2d(64,64, kernel_size=1)
        )#56x56
        self.trunk = nn.Sequential(
            ResidualBlock(64, 256, 1),
            ResidualBlock(256, 256, 1)
        )#56x56
        self.trunk_last_conv = nn.Sequential(
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size = 1)
        )#56x56
        self.trunk_residual = nn.Sequential(
            nn.Conv2d(64,256,1),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True)
#             nn.MaxPool2d(kernel_size=3, stride = 2, padding = 1)
        )
#         resnet = ResNet18()
#         self.num_classes = len(class_names)
#         self.resnet = nn.Sequential(*list(resnet.children())[:-2])#8x8

        self.aspp = ASPP_places()
        self.mask_first_conv = nn.Sequential(
            nn.Conv2d(64, 96, kernel_size=1, stride = 2),
            nn.BatchNorm2d(96),
            nn.ReLU(inplace=True)
        )#28x28
        self._last_conv = nn.Sequential(
            nn.Conv2d(256, 512, kernel_size=1, stride = 2),
            nn.BatchNorm2d(512),
            nn.Conv2d(512, 2048, kernel_size=1, stride = 2),
            nn.BatchNorm2d(2048),
            nn.ReLU(inplace=True),

#             nn.BatchNorm2d(512),
#             nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride = 2, padding = 1)
        )#7x7
        
        
        self.mpool = nn.Sequential(
            nn.BatchNorm2d(2048),
            nn.ReLU(inplace=True),
            nn.AvgPool2d(kernel_size=7, stride=1)
        )
        self.fc = nn.Linear(2048, len(class_names_train))
    def forward(self, x):
        h = int(x.size()[2] / 4)
        w = int(x.size()[3] / 4)
#         print('h:',h, 'w:',w)
        x1 = self.begin_residual_blocks(x)#56x56
#         print(x1.shape)
#         print(x1.shape)
#################trunk_part#################
        out_trunk1 = self.trunk_first_conv(x1)#56x56
        out_trunk2 = self.trunk(out_trunk1)#56x56
        out_trunk3 = self.trunk_last_conv(out_trunk2)#56x56
#         out_trunk = F.upsample(out_trunk, size=(h,w), mode="bilinear")#56x56x256
        out_trunk4 = out_trunk3 + self.trunk_residual(x1)#56x56
#         out_trunk = F.softmax(out_trunk)
#################trunk_part#################        

#################mask_part#################
        feature_map = self.mask_first_conv(x1)#28x28x256
        mask = self.aspp(feature_map)#28x28x256
#         print(mask.shape)
        mask = F.upsample(mask, size=(h, w), mode="bilinear") #56x56x256
#         mask = F.softmax(mask)
#################mask_part#################
        out1 = (1 + mask) * out_trunk4 #56x56x256
#         print(out.shape)
        out2 = self._last_conv(out1)#7x7x512
#         print(out.shape)
        out3 = self.mpool(out2)#1x1x512
#         print(out.shape)
        out = out3.view(out3.size(0),-1)
#         print(out.shape)
        out = self.fc(out)

        return out, out_trunk1, out_trunk2, out_trunk3, out_trunk4, feature_map, mask, out1, out2, out3
        

In [7]:
def weights_init(m):
    classname = m.__class__.__name__
    if classname.find('Conv') != -1:         # Conv weight init
        m.weight.data.normal_(0.0, 0.02)
    elif classname.find('BatchNorm') != -1:  # BatchNorm weight init
        m.weight.data.normal_(1.0, 0.02)
        m.bias.data.fill_(0)

In [8]:
model = Attention_place()

In [9]:
model.apply(weights_init)

Attention_place(
  (begin_residual_blocks): Sequential(
    (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace)
    (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  )
  (trunk_first_conv): Sequential(
    (0): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (1): ReLU(inplace)
    (2): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1))
  )
  (trunk): Sequential(
    (0): ResidualBlock(
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace)
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn3): BatchNorm2d(64, e

In [10]:
use_gpu = torch.cuda.is_available()
if use_gpu:
    print("Using CUDA, model")
    model.cuda() #after second other epoch model
summary(model,(3,224,224))

Using CUDA, model
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 112, 112]           9,472
       BatchNorm2d-2         [-1, 64, 112, 112]             128
              ReLU-3         [-1, 64, 112, 112]               0
         MaxPool2d-4           [-1, 64, 56, 56]               0
       BatchNorm2d-5           [-1, 64, 56, 56]             128
              ReLU-6           [-1, 64, 56, 56]               0
            Conv2d-7           [-1, 64, 56, 56]           4,160
       BatchNorm2d-8           [-1, 64, 56, 56]             128
              ReLU-9           [-1, 64, 56, 56]               0
           Conv2d-10           [-1, 64, 56, 56]           4,096
      BatchNorm2d-11           [-1, 64, 56, 56]             128
             ReLU-12           [-1, 64, 56, 56]               0
           Conv2d-13           [-1, 64, 56, 56]          36,864
      BatchNorm2d-14 

  "See the documentation of nn.Upsample for details.".format(mode))


In [11]:
writer = SummaryWriter()

In [12]:
def train_model(model, scheduler, criterion, optimizer, num_epochs=25):
    since = time.time()

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0
    best_epoch = 0
    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch + 1, num_epochs))
        print('-' * 10)
        since1 = time.time()
        # Each epoch has a training and validation phase
        train_batches = len(dataloaders['train'])
        for phase in ['train','valid']:
            
            print("lr:", optimizer.param_groups[0]['lr'])
            if phase == 'train':
                scheduler.step()
                model.train()  # Set model to training mode
            else:
                model.eval()
                
            running_loss = 0.0
            running_corrects = 0
            for i, data in enumerate(dataloaders[phase]):
                if i % 100 == 0:
                    print("\rTraining batch {}/{}".format(i, len(dataloaders[phase])), end='', flush=True)
                # Use half training dataset
                if i >= len(dataloaders[phase]):
                    break    
                inputs, labels = data
                inputs, labels = Variable(inputs.cuda()),Variable(labels.cuda())

                optimizer.zero_grad()

                    # forward
                    # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    out, out_trunk1, out_trunk2, out_trunk3, out_trunk4, feature_map, mask, out1, out2, out3 = model(inputs)
                    _, preds = torch.max(out, 1)
                    loss = criterion(out, labels)
#                     print(loss)
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()


                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)
            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]
            if phase == 'train':
                writer.add_scalar('data/train_loss_places', epoch_loss, epoch)
                writer.add_scalar('data/train_acc_places', epoch_acc, epoch)
            else:
                writer.add_scalar('data/val_loss_places', epoch_loss, epoch)
                writer.add_scalar('data/val_acc_places', epoch_acc, epoch)
            for name, param in model.named_parameters():
                writer.add_histogram(name, param.clone().cpu().data.numpy(), epoch)
            time_elapsed1 = time.time() - since1
            print('\rEpoch process in {:.0f}m {:.0f}s'.format(
            time_elapsed1 // 60, time_elapsed1 % 60))
            print('{} Loss: {:.4f} Acc: {:.4f} lr: {:.4f}'.format(phase, epoch_loss, 
                                                                  epoch_acc, optimizer.param_groups[0]['lr']))
#             csvfile = open(os.path.join('./csv/resnet18/', '20190102resnet18_64_data_15_places_{}{}_class{}_epoch{}.csv'.format(optimizer_name, learning_rate, len(class_names), num_epochs)), 'a', newline='')
#             csv_writer = csv.writer(csvfile, delimiter=';', quotechar='|', quoting=csv.QUOTE_MINIMAL)
#             csv_writer.writerow(['class', len(class_names), 'epoch', epoch, phase, epoch_loss, epoch_acc])
#             csvfile.close()
#             deep copy the model
            if phase == 'valid' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_epoch = epoch
                best_model_wts = copy.deepcopy(model.state_dict())

        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f} at {}'.format(best_acc, best_epoch+1))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model


In [13]:
import copy
lr = 0.001  # 0.1
criterion = nn.CrossEntropyLoss()
# optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9, nesterov=True, weight_decay=0.0001)
optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=0.0001)
scheduler = lr_scheduler.StepLR(optimizer, step_size=25, gamma=0.1)
total_epoch = 100
model_train = train_model(model, scheduler, criterion, optimizer, num_epochs=total_epoch)
torch.save(model_train.state_dict(), './Trained/place_50_adam.pt')

Epoch 1/100
----------
lr: 0.001
Epoch process in 11m 11s
train Loss: 1.7244 Acc: 0.4594 lr: 0.0010
lr: 0.001
Epoch process in 14m 21s
valid Loss: 1.8863 Acc: 0.4427 lr: 0.0010

Epoch 2/100
----------
lr: 0.001
Epoch process in 4m 21s5
train Loss: 1.3416 Acc: 0.5726 lr: 0.0010
lr: 0.001
Epoch process in 5m 0s
valid Loss: 1.3825 Acc: 0.5620 lr: 0.0010

Epoch 3/100
----------
lr: 0.001
Epoch process in 4m 19s5
train Loss: 1.2150 Acc: 0.6136 lr: 0.0010
lr: 0.001
Epoch process in 4m 59s
valid Loss: 1.2032 Acc: 0.6160 lr: 0.0010

Epoch 4/100
----------
lr: 0.001
Epoch process in 4m 21s5
train Loss: 1.1272 Acc: 0.6406 lr: 0.0010
lr: 0.001
Epoch process in 4m 56s
valid Loss: 1.1865 Acc: 0.6191 lr: 0.0010

Epoch 5/100
----------
lr: 0.001
Epoch process in 4m 21s5
train Loss: 1.0704 Acc: 0.6578 lr: 0.0010
lr: 0.001
Epoch process in 4m 58s
valid Loss: 1.8137 Acc: 0.4765 lr: 0.0010

Epoch 6/100
----------
lr: 0.001
Epoch process in 4m 18s5
train Loss: 1.0171 Acc: 0.6744 lr: 0.0010
lr: 0.001
Epoch

Epoch process in 4m 17s5
train Loss: 0.2116 Acc: 0.9379 lr: 0.0001
lr: 0.0001
Epoch process in 4m 51s
valid Loss: 0.8974 Acc: 0.7520 lr: 0.0001

Epoch 48/100
----------
lr: 0.0001
Epoch process in 4m 17s5
train Loss: 0.2051 Acc: 0.9397 lr: 0.0001
lr: 0.0001
Epoch process in 4m 54s
valid Loss: 0.8933 Acc: 0.7529 lr: 0.0001

Epoch 49/100
----------
lr: 0.0001
Epoch process in 4m 17s5
train Loss: 0.1975 Acc: 0.9426 lr: 0.0001
lr: 0.0001
Epoch process in 4m 52s
valid Loss: 0.9083 Acc: 0.7482 lr: 0.0001

Epoch 50/100
----------
lr: 0.0001
Epoch process in 4m 17s5
train Loss: 0.1899 Acc: 0.9452 lr: 0.0001
lr: 0.0001
Epoch process in 4m 51s
valid Loss: 0.9196 Acc: 0.7512 lr: 0.0001

Epoch 51/100
----------
lr: 0.0001
Epoch process in 4m 17s5
train Loss: 0.1616 Acc: 0.9564 lr: 0.0000
lr: 1.0000000000000003e-05
Epoch process in 4m 55s
valid Loss: 0.9089 Acc: 0.7520 lr: 0.0000

Epoch 52/100
----------
lr: 1.0000000000000003e-05
Epoch process in 4m 17s5
train Loss: 0.1571 Acc: 0.9592 lr: 0.0000
l

Epoch process in 4m 57s
valid Loss: 0.9283 Acc: 0.7511 lr: 0.0000

Epoch 87/100
----------
lr: 1.0000000000000002e-06
Epoch process in 4m 17s5
train Loss: 0.1233 Acc: 0.9722 lr: 0.0000
lr: 1.0000000000000002e-06
Epoch process in 4m 51s
valid Loss: 0.9381 Acc: 0.7499 lr: 0.0000

Epoch 88/100
----------
lr: 1.0000000000000002e-06
Epoch process in 4m 18s5
train Loss: 0.1235 Acc: 0.9717 lr: 0.0000
lr: 1.0000000000000002e-06
Epoch process in 4m 52s
valid Loss: 0.9225 Acc: 0.7503 lr: 0.0000

Epoch 89/100
----------
lr: 1.0000000000000002e-06
Epoch process in 4m 18s5
train Loss: 0.1232 Acc: 0.9730 lr: 0.0000
lr: 1.0000000000000002e-06
Epoch process in 4m 56s
valid Loss: 0.9355 Acc: 0.7517 lr: 0.0000

Epoch 90/100
----------
lr: 1.0000000000000002e-06
Epoch process in 4m 17s5
train Loss: 0.1237 Acc: 0.9712 lr: 0.0000
lr: 1.0000000000000002e-06
Epoch process in 4m 54s
valid Loss: 0.9272 Acc: 0.7519 lr: 0.0000

Epoch 91/100
----------
lr: 1.0000000000000002e-06
Epoch process in 4m 17s5
train Los