In [1]:
import torch
import torchvision
from torchvision import datasets, models, transforms
import torch.nn as nn
import torch.optim as optim
import time
import os
import copy
import torch.nn.functional as F


mean, std = (0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)

preprocess_augment = transforms.Compose([
    transforms.Resize(256),
    transforms.RandomCrop(64),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean, std)])

dataset = datasets.ImageFolder(root = "../../share_dataset/six_objects/stimuli_objects/", transform=preprocess_augment)

In [2]:
data_loader = torch.utils.data.DataLoader(dataset, batch_size=20, shuffle=True , num_workers=2)

In [3]:
# for i in data_loader:
#     print(i)
#     break

In [4]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cuda:0


In [5]:
class SemanticImageExtractor(nn.Module):
    """
    This class expected image as input with size (64x64x3)
    """

    def __init__(self, output_class_num, feature_size=200):
        super(SemanticImageExtractor, self).__init__()
        self.alx_layer1 = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, stride=2)
        )
        self.alx_layer2 = nn.Sequential(
            nn.Conv2d(64, 192, kernel_size=5, padding=2),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, stride=2)
        )
        self.alx_layer3 = nn.Sequential(
            nn.Conv2d(192, 384, kernel_size=3, padding=1),
            nn.ReLU()
        )
        self.alx_layer4 = nn.Sequential(
            nn.Conv2d(384, 256, kernel_size=3, padding=1),
            nn.ReLU()
        )
        self.alx_layer5 = nn.Sequential(
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, stride=2)
        )
        self.avg_pool = nn.AdaptiveAvgPool2d((6, 6))
        # return the same number of features but change width and height of img

        self.fc06 = nn.Sequential(
            nn.Dropout(),
            nn.Linear(256 * 6 * 6, 4096),
            nn.ReLU()
        )

        self.fc07 = nn.Sequential(
            nn.Dropout(),
            nn.Linear(4096, feature_size),
            nn.ReLU()
        )

        self.fc08 = nn.Sequential(
            nn.Linear(feature_size, output_class_num),
            nn.Softmax())

    def forward(self, x):
        x = self.alx_layer1(x)
        x = self.alx_layer2(x)
        x = self.alx_layer3(x)
        x = self.alx_layer4(x)
        x = self.avg_pool(x)
        x = torch.flatten(x, start_dim=1)
        x = self.fc06(x)
        semantic_features = self.fc07(x)
        p_label = self.fc08(semantic_features)
        return semantic_features, p_label

In [7]:
model = SemanticImageExtractor(6)
model.eval()

SemanticImageExtractor(
  (alx_layer1): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (alx_layer2): Sequential(
    (0): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (alx_layer3): Sequential(
    (0): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
  )
  (alx_layer4): Sequential(
    (0): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
  )
  (alx_layer5): Sequential(
    (0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avg_pool): AdaptiveAvgPool2d(output_size=(6, 6))
  (fc06): Sequential(
    (0): Dropout(p=0.5, inp

In [8]:
model = model.to(device)
criterion = nn.CrossEntropyLoss()
params_to_update = model.parameters()
optimizer = optim.SGD(params_to_update , lr=0.001, momentum=0.9)

In [9]:
def train_model(model, dataloaders, criterion, optimizer, num_epochs=25, weights_name='weight_save', is_inception=False):
    since = time.time()

    val_acc_history = []
    loss_acc_history = []

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        epoch_start = time.time()

        print(f'Epoch {epoch}/{num_epochs - 1}')
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)
                optimizer.zero_grad()
                with torch.set_grad_enabled(phase == 'train'):
                    if is_inception and phase == 'train':
                        # From https://discuss.pytorch.org/t how-to-optimize-inception-model-with-auxiliary-classifiers/7958
                        _,outputs = model(inputs)
                        loss1 = criterion(outputs, labels)
                        # loss2 = criterion(aux4a, labels)
                        # loss3 = criterion(aux4d, labels)
                        loss = loss1 #+ (0.3 * loss2) + (0.3 * loss3)
                    else:
                        outputs = model(inputs)
                        loss = criterion(outputs, labels)

                    _, preds = torch.max(outputs, 1)
                    # Backpropagate only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()
                # Gather our summary statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / len(dataloaders[phase].dataset)
            epoch_acc = running_corrects.double() / len(dataloaders[phase].dataset)
            epoch_end = time.time()
            
            elapsed_epoch = epoch_end - epoch_start

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))
            print("Epoch time taken: ", elapsed_epoch)

            # If this is the best model on the validation set so far, deep copy it
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())
                torch.save(model.state_dict(), weights_name + ".pth")
            if phase == 'val':
                val_acc_history.append(epoch_acc)
            if phase == 'train':
                loss_acc_history.append(epoch_loss)

        print()

    # Output summary statistics, load the best weight set, and return results
    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))
    model.load_state_dict(best_model_wts)
    return model, val_acc_history, loss_acc_history

In [11]:
dataloaders = { 'train': data_loader}
best_model, val_acc_history, loss_acc_history = train_model(model, dataloaders, criterion, optimizer, 9000, 'google_softmax_lr_0.001_bestsofar', is_inception=True)

Epoch 0/8999
----------
train Loss: 1.5950 Acc: 0.4167
Epoch time taken:  0.6492619514465332

Epoch 1/8999
----------
train Loss: 1.6226 Acc: 0.3583
Epoch time taken:  0.6904113292694092

Epoch 2/8999
----------
train Loss: 1.5901 Acc: 0.4000
Epoch time taken:  0.6369082927703857

Epoch 3/8999
----------
train Loss: 1.5902 Acc: 0.4417
Epoch time taken:  0.6552829742431641

Epoch 4/8999
----------
train Loss: 1.6012 Acc: 0.3750
Epoch time taken:  0.6684300899505615

Epoch 5/8999
----------
train Loss: 1.6128 Acc: 0.4083
Epoch time taken:  0.6652665138244629

Epoch 6/8999
----------
train Loss: 1.6366 Acc: 0.3583
Epoch time taken:  0.5855779647827148

Epoch 7/8999
----------
train Loss: 1.6266 Acc: 0.4167
Epoch time taken:  0.6230096817016602

Epoch 8/8999
----------
train Loss: 1.5950 Acc: 0.4333
Epoch time taken:  0.6874463558197021

Epoch 9/8999
----------
train Loss: 1.6119 Acc: 0.4000
Epoch time taken:  0.7001528739929199

Epoch 10/8999
----------
train Loss: 1.6014 Acc: 0.4000
Epoc