In [12]:
# here are the necessary imports
from __future__ import print_function, division

import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
import os
import seaborn as sns

plt.ion()   

In [14]:
class B_CNN(nn.Module):
    def __init__(self, num_classes, model, pretrained=True, ):
        super(B_CNN, self).__init__()
        assert (model == "vgg16" or model == "resnet34")
        features = torchvision.models.resnet34(pretrained=pretrained)
        if model == "vgg16":
            features = torchvision.models.vgg16(pretrained=pretrained)
        # Remove the pooling layer and full connection layer
        self.conv = nn.Sequential(*list(features.children())[:-2])
        self.fc = nn.Linear(512 * 512, num_classes)
        self.softmax = nn.Softmax()

        if pretrained:
            for parameter in self.conv.parameters():
                parameter.requires_grad = False
            nn.init.kaiming_normal_(self.fc.weight.data)
            nn.init.constant_(self.fc.bias, val=0)

    def forward(self, input):
        features = self.conv(input)
        # Cross product operation
        features = features.view(features.size(0), 512, 14 * 14)
        features_T = torch.transpose(features, 1, 2)
        features = torch.bmm(features, features_T) / (14 * 14)
        features = features.view(features.size(0), 512 * 512)
        # The signed square root
        features = torch.sign(features) * torch.sqrt(torch.abs(features) + 1e-12)
        # L2 regularization
        features = torch.nn.functional.normalize(features)

        out = self.fc(features)
        softmax = self.softmax(out)
        return out


In [15]:
# Data augmentation and normalization for training
# Just normalization for validation
data_transforms = {
    'train': transforms.Compose([
#         transforms.Resize(256),
#         transforms.RandomRotation(45),
#         transforms.RandomResizedCrop(224),
        transforms.Resize(448),
        transforms.CenterCrop(448),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'test': transforms.Compose([
        transforms.Resize(448),
        transforms.CenterCrop(448),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

data_dir = 'CUB_200_2011/CUB_200_2011'
# loading datasets with PyTorch ImageFolder
image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x),
                                          data_transforms[x])
                  for x in ['train', 'test']}
dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'test']}
class_names = image_datasets['train'].classes

use_gpu = torch.cuda.is_available()

Sizes of training and test datasets

In [16]:
dataset_sizes

{'train': 5994, 'test': 5794}

In [19]:
def train_model(model, criterion, optimizer, model_name, Bilinear, num_epochs=10, base_batch_size=8, step = 1):
    assert (model_name == "vgg16" or model_name == "resnet34")
    assert isinstance(Bilinear, bool) 
    since = time.time()
    if Bilinear:
        version = 'Bilinear'
    else:
        version = 'Original'
    best_model_wts = model.state_dict()
    best_acc = 0.0
    
    # defining data loaders to load data using image_datasets and transforms, here we also specify batch size for the mini batch
    dataloders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size= base_batch_size,
                                             shuffle=True, num_workers=4)
                  for x in ['train', 'test']}
    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'test']:
            if phase == 'train':
                #scheduler.step()
                model.train(True)  # Set model to training mode
            else:
                model.train(False)  # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for data in dataloders[phase]:
                # get the inputs
                inputs, labels = data

                # wrap them in Variable
                if use_gpu:
                    inputs = Variable(inputs.cuda())
                    labels = Variable(labels.cuda())
                else:
                    inputs, labels = Variable(inputs), Variable(labels)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                outputs = model(inputs)
                
                _, preds = torch.max(outputs.data, 1)
                loss = criterion(outputs, labels)

                # backward + optimize only if in training phase
                if phase == 'train':
                    loss.backward()
                    optimizer.step()

                # statistics
                running_loss += loss.item()
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.float() / dataset_sizes[phase]

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(
                phase, epoch_loss, epoch_acc))

            # deep copy the model
            if phase == 'test' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = model.state_dict()
                state = {'model':model.state_dict(),'optim':optimizer.state_dict()}
                torch.save(state,'CUB_200_2011/CUB_200_2011/%s_%s_best_step_%d_epoch_%d_acc_%g.pth' % (version, model_name, step, epoch, best_acc))
        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Best test Acc: {:4f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model

Traning process for original CNN with basenet Resnet34:

In [20]:
model = "resnet34"
model_ft = models.resnet34(pretrained=True) # loading a pre-trained(trained on image net) resnet18 model from torchvision models
num_ftrs = model_ft.fc.in_features
model_ft.fc = nn.Linear(num_ftrs, 200)      # changing the last layer for this dataset by setting last layer neurons to 200 as this dataset has 200 categories
 
if use_gpu:                                 # if gpu is available then use it
    model_ft = model_ft.cuda()       
#model_ft = model_ft.float()
criterion = nn.CrossEntropyLoss()           # defining loss function

# Observe that all parameters are being optimized
optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.0001, momentum=0.9)

Downloading: "https://download.pytorch.org/models/resnet34-333f7ec4.pth" to /tmp/xdg-cache/torch/checkpoints/resnet34-333f7ec4.pth
100%|██████████| 83.3M/83.3M [00:05<00:00, 16.4MB/s]


In [21]:
model_ft = train_model(model_ft, criterion, optimizer_ft, model, False, num_epochs=50, base_batch_size=8, step = 1)

Epoch 0/49
----------
train Loss: 0.6682 Acc: 0.0102


KeyboardInterrupt: 

Traning process for original CNN with basenet Vgg16:

In [22]:
model = "vgg16"
model_ft = models.resnet34(pretrained=True) # loading a pre-trained(trained on image net) resnet18 model from torchvision models
num_ftrs = model_ft.fc.in_features
model_ft.fc = nn.Linear(num_ftrs, 200)      # changing the last layer for this dataset by setting last layer neurons to 200 as this dataset has 200 categories
 
if use_gpu:                                 # if gpu is available then use it
    model_ft = model_ft.cuda()       
#model_ft = model_ft.float()
criterion = nn.CrossEntropyLoss()           # defining loss function

# Observe that all parameters are being optimized
optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.0001, momentum=0.9)

In [24]:
model_ft = train_model(model_ft, criterion, optimizer_ft, model, False, num_epochs=50, base_batch_size=8, step = 1)

Epoch 0/49
----------
train Loss: 0.6681 Acc: 0.0078
test Loss: 0.6478 Acc: 0.0192

Epoch 1/49
----------


KeyboardInterrupt: 

Traning process for Bilinear-CNN with basenet Resnet34:

The first training process

In [26]:
model = "resnet34"
model_ft = B_CNN(200, model, pretrained=True)
num_ftrs = model_ft.fc.in_features

if use_gpu:                                 # if gpu is available then use it
    model_ft = model_ft.cuda()       
#model_ft = model_ft.float()
criterion = nn.CrossEntropyLoss()           # defining loss function


You can load a checkpoint from your my drive or any other place if you have saved it. you have to load weights of model and optimizer

In [27]:
# checkpoint = torch.load('path to model')
#checkpoint = torch.load('drive//Bird/point_resnet_best.pth')
# model_ft.load_state_dict(checkpoint['model'])
# optimizer_ft.load_state_dict(checkpoint['optim'])

#Train and evaluate

In [29]:
# Observe that all parameters are being optimized
optimizer_ft = optim.SGD(model_ft.fc.parameters(), lr=0.1, momentum=0.9)
model_ft = train_model(model_ft, criterion, optimizer_ft, model, True, num_epochs=50, base_batch_size=8, step = 1)



Epoch 0/49
----------




train Loss: 0.6663 Acc: 0.0052
test Loss: 0.6546 Acc: 0.0081

Epoch 1/49
----------


KeyboardInterrupt: 

The second training process:

In [31]:
model_ft = B_CNN(200, model, pretrained=False)
record = torch.load(os.path.join('CUB_200_2011/CUB_200_2011',
                                              'Bilinear_resnet34_best_step_1_epoch_0_acc_0.00811184.pth'),
                                              map_location=lambda storage, loc: storage)
model_ft.load_state_dict(record['model'])
if use_gpu:                                 # if gpu is available then use it
    model_ft = model_ft.cuda()  

In [32]:
optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.001, momentum=0.9)
model_ft = train_model(model_ft, criterion, optimizer_ft, model, True, num_epochs=50, base_batch_size = 8, step = 2)
   
    

Epoch 0/49
----------




train Loss: 0.6493 Acc: 0.0130
test Loss: 0.6428 Acc: 0.0216

Epoch 1/49
----------


KeyboardInterrupt: 

Traning process for Bilinear-CNN with basenet Vgg16:

The first training process

In [33]:
model = "vgg16"
model_ft = B_CNN(200, model, pretrained=True)
num_ftrs = model_ft.fc.in_features

if use_gpu:                                 # if gpu is available then use it
    model_ft = model_ft.cuda()       
#model_ft = model_ft.float()
criterion = nn.CrossEntropyLoss()           # defining loss function

Downloading: "https://download.pytorch.org/models/vgg16-397923af.pth" to /tmp/xdg-cache/torch/checkpoints/vgg16-397923af.pth
100%|██████████| 528M/528M [00:16<00:00, 32.9MB/s] 


In [34]:
# Observe that all parameters are being optimized
optimizer_ft = optim.SGD(model_ft.fc.parameters(), lr=0.1, momentum=0.9)
model_ft = train_model(model_ft, criterion, optimizer_ft, model, True, num_epochs=50, base_batch_size=8, step = 1)

Epoch 0/49
----------




train Loss: 0.6479 Acc: 0.0294
test Loss: 0.6220 Acc: 0.0549

Epoch 1/49
----------


KeyboardInterrupt: 

The second training process

In [None]:
model_ft = B_CNN(200, model, pretrained=False)
record = torch.load(os.path.join('CUB_200_2011/CUB_200_2011',
                                              'Bilinear_vgg16_best_step_1_acc_0.517604.pth'),
                                              map_location=lambda storage, loc: storage)
model_ft.load_state_dict(record['model'])
if use_gpu:                                 # if gpu is available then use it
    model_ft = model_ft.cuda()  

In [None]:
optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.001, momentum=0.9)
model_ft = train_model(model_ft, criterion, optimizer_ft, model, True, num_epochs=50, base_batch_size = 8, step = 2)