In [2]:
import torch, torchvision
from torchvision import datasets, models, transforms
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torch.utils.data import Subset
import time
from torchsummary import summary
from torch.optim import lr_scheduler
import copy

import numpy as np
import matplotlib.pyplot as plt
import os

from PIL import Image
from collections import OrderedDict
import shutil 

In [4]:
# Load the Data
data_dir = './data/tiny-imagenet-200'
num_classes = 200

# Create the training data generator
batch_size = 500
im_height = 64
im_width = 64

data_transforms = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0, 0, 0), tuple(np.sqrt((255, 255, 255)))),
])

# Load Data from folders
image_datasets = {
    'train': datasets.ImageFolder(os.path.join(data_dir, 'train'), transform=data_transforms),
    'val': datasets.ImageFolder(os.path.join(data_dir, 'val'), transform=data_transforms),
    'test': datasets.ImageFolder(os.path.join(data_dir, 'test'), transform=data_transforms)
}

phases = ['train', 'val', 'test']


# subset_indices = np.random.permutation(range(10000))
# dataloaders = {x: DataLoader(image_datasets[x], batch_size=batch_size, shuffle=False, 
#                              sampler=SubsetRandomSampler(subset_indices)) for x in phases}

dataloaders = {'train': DataLoader(image_datasets['train'], batch_size=batch_size, shuffle=True),
              'val': DataLoader(image_datasets['val'], batch_size=batch_size, shuffle=True),
              'test': DataLoader(image_datasets['test'], batch_size=batch_size, shuffle=False)}
dataset_sizes = {x: len(image_datasets[x]) for x in phases}
class_names = image_datasets['train'].classes
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(dataloaders['train'])
print(dataset_sizes)

<torch.utils.data.dataloader.DataLoader object at 0x7fd8f50f4d10>
{'train': 100000, 'val': 10000, 'test': 10000}


In [5]:
def train_model(model, criterion, optimizer, scheduler, num_epochs=25):
    since = time.time()

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0
    tr_acc, val_acc = [], []
    tr_loss, val_loss  = [], []
    
    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
#                 with torch.set_grad_enabled(phase == 'train'):
                if phase == 'train':
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)
                    # backward + optimize only if in training phase
                    loss.backward()
                    optimizer.step()
                else:
                    with torch.no_grad():
                        outputs = model(inputs)
                        _, preds = torch.max(outputs, 1)
                        loss = criterion(outputs, labels)
                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)
            if phase == 'train':
                scheduler.step()

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]
            if phase == 'train':
                tr_acc.append(epoch_acc)
                tr_loss.append(epoch_loss)
            elif phase == 'val':
                val_acc.append(epoch_acc)
                val_loss.append(epoch_loss)
                
            print('{} Loss: {:.4f} Acc: {:.4f}'.format(
                phase, epoch_loss, epoch_acc))

            # deep copy the model
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())
        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model, tr_acc, val_acc, tr_loss, val_loss

In [10]:
# Load the pretrained model
model = models.resnet18(pretrained=True)
# Freeze model parameters to train only the last layer. 
# Comment out this cell if you want to fine tune the whole network
for param in model.parameters():
    param.requires_grad = False

model.fc = nn.Linear(num_ftrs, num_classes)
model = model.to(device)
model.eval()

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [12]:
# Define Optimizer and Loss Function
criterion = nn.CrossEntropyLoss()
# learning_rates = [0.00001, 0.0001, 0.001]
optimizer_ft = optim.Adam(model.parameters(), lr=0.001)
# Decay LR by a factor of 0.1 every 5 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=5, gamma=0.1)
model, tr_acc, val_acc, tr_loss, val_loss = train_model(model, criterion, optimizer_ft, exp_lr_scheduler, num_epochs=10)

Epoch 0/9
----------
train Loss: 3.7194 Acc: 0.2345
val Loss: 3.1500 Acc: 0.3170

Epoch 1/9
----------
train Loss: 2.9385 Acc: 0.3467
val Loss: 3.0045 Acc: 0.3407

Epoch 2/9
----------
train Loss: 2.7716 Acc: 0.3736
val Loss: 2.9721 Acc: 0.3381

Epoch 3/9
----------
train Loss: 2.6766 Acc: 0.3886
val Loss: 2.9419 Acc: 0.3480

Epoch 4/9
----------
train Loss: 2.6167 Acc: 0.3986
val Loss: 2.9417 Acc: 0.3502

Epoch 5/9
----------
train Loss: 2.4794 Acc: 0.4265
val Loss: 2.9084 Acc: 0.3559

Epoch 6/9
----------
train Loss: 2.4716 Acc: 0.4283
val Loss: 2.9089 Acc: 0.3565

Epoch 7/9
----------
train Loss: 2.4670 Acc: 0.4300
val Loss: 2.9055 Acc: 0.3581

Epoch 8/9
----------
train Loss: 2.4624 Acc: 0.4305
val Loss: 2.9155 Acc: 0.3543

Epoch 9/9
----------
train Loss: 2.4564 Acc: 0.4319
val Loss: 2.9058 Acc: 0.3553

Training complete in 46m 10s
Best val Acc: 0.358100


In [14]:
torch.save(model, './models/resnet18_model2.pt')

## Ensemble

In [10]:
# Load the model and saved state_dict
vgg_model = models.vgg11_bn(pretrained=True)
num_ftrs = vgg_model.classifier[6].in_features
vgg_model.classifier[6] = nn.Linear(num_ftrs,num_classes)
vgg_model.load_state_dict(torch.load('./models/vgg11_bn_best_model_state_dict.pt'))
vgg_model.eval()

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (4): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (5): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): ReLU(inplace=True)
    (7): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (8): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (10): ReLU(inplace=True)
    (11): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (12): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (13): ReLU(inplace=True)
    (14): MaxPool2d(ke

In [17]:
resnet_model = models.resnet18(pretrained=True)
num_ftrs = model.fc.in_features
resnet_model.fc = nn.Linear(num_ftrs, num_classes)
resnet_model.load_state_dict(torch.load('./models/resnet18_model_state_dict.pt'))
resnet_model.eval()

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [18]:
for param in resnet_model.parameters():
    param.requires_grad = False
for param in vgg_model.parameters():
    param.requires_grad = False
# Load models onto GPU
resnet_model = resnet_model.to(device)
vgg_model = vgg_model.to(device)


In [19]:
class Ensemble():
    def __init__(self, models):
        self.models = models
        self.loss = 0.0
        self.acc = 0.0
    
    def evaluate_all(self, criterion):
        running_loss = 0.0
        running_corrects = 0
        phase = 'val'
        for m in self.models:
            m.eval()
        
        with torch.no_grad():
            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)
                outputs = None
                # Take mean of the output to make prediction
#                 print()
                for m in self.models:
#                     print('model')
                    if outputs is None:
                        outputs = m(inputs)
#                         print("outputs1", outputs)
                    else:
                        outputs += m(inputs)
#                         print("output2", outputs)
#                     print('model end')
                outputs /= len(self.models)
                _, preds = torch.max(outputs, 1)
                loss = criterion(outputs, labels)
                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)
#                 print(preds[:10])
#                 print(labels.data[:10])
#                 print()
            self.loss = running_loss / dataset_sizes[phase]
            self.acc = running_corrects.double() / dataset_sizes[phase]
        return self.acc, self.loss

In [20]:
criterion = nn.CrossEntropyLoss()
ensemble_solver = Ensemble([resnet_model, vgg_model])
val_acc, val_loss = ensemble_solver.evaluate_all(criterion)
print("validation accuracy", val_acc)
print("validation loss", val_loss)

validation accuracy tensor(0.5321, device='cuda:0', dtype=torch.float64)
validation loss 1.9333796083927155


In [28]:
vgg19_model = torch.hub.load('pytorch/vision:v0.5.0', 'vgg19', pretrained=True)
num_ftrs = vgg19_model.classifier[6].in_features
vgg19_model.classifier[6] = nn.Linear(num_ftrs, num_classes)
vgg19_model.load_state_dict(torch.load('./models/vgg19_model_state_dict.pt'))
vgg19_model.eval()

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (4): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (5): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): ReLU(inplace=True)
    (7): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (8): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (10): ReLU(inplace=True)
    (11): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (12): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (13): ReLU(inplace=True)
    (14): MaxPool2d(ke

In [None]:
criterion = nn.CrossEntropyLoss()
ensemble_solver = Ensemble([resnet_model, vgg_model, vgg19_model])
val_acc, val_loss = ensemble_solver.evaluate_all(criterion)
print("validation accuracy", val_acc)
print("validation loss", val_loss)