In [140]:
from __future__ import print_function
from __future__ import division
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
from torch.utils.data import random_split
import matplotlib.pyplot as plt
import time
import os
import copy
print("PyTorch Version: ",torch.__version__)
print("Torchvision Version: ",torchvision.__version__)

PyTorch Version:  1.8.1+cu111
Torchvision Version:  0.9.1+cu111


In [141]:
resnet_path = "saved-models/resnet/resnet.pth"
densenet_path = "saved-models/densenet/densenet.pth"
vit_dir = "saved-models/vit/flame-dataset-vit-1"
model_paths = [resnet_path, densenet_path, vit_dir]

In [171]:
num_classes = 2
use_pretrained = True
resnet_model = models.resnet18(pretrained=use_pretrained).to(device)
num_ftrs = resnet_model.fc.in_features
resnet_model.fc = nn.Linear(num_ftrs, num_classes).to(device)

resnet_model_features = models.resnet18(pretrained=use_pretrained).to(device)
resnet_model_features.fc = nn.Identity()
num_resnet_ftrs = num_ftrs

In [172]:
densenet_model = models.densenet121(pretrained=use_pretrained).to(device)
num_ftrs = densenet_model.classifier.in_features
densenet_model.classifier = nn.Linear(num_ftrs, num_classes).to(device)

densenet_model_features= models.densenet121(pretrained=use_pretrained).to(device)
densenet_model_features.fc = nn.Identity()
num_densenet_ftrs = num_ftrs

In [173]:
from transformers import ViTForImageClassification
vit_model = ViTForImageClassification.from_pretrained(vit_dir)

vit_model_features = copy.deepcopy(vit_model)
vit_model_features.classifier = nn.Identity()

num_vit_ftrs = vit_model.classifier.in_features

class VitWrapper(nn.Module):
    def __init__(self, huggingface_model):
        super(VitWrapper, self).__init__()
        self.huggingface_model = huggingface_model
    
    def forward(self, x):
        return self.huggingface_model(x).logits.to(device) #grab logits

vit_model = VitWrapper(vit_model)
vit_model = vit_model.to(device)

vit_model_features = VitWrapper(vit_model_features)
vit_model_features = vit_model_features.to(device)

In [145]:
resnet_model.load_state_dict(torch.load(resnet_path, map_location=device))
densenet_model.load_state_dict(torch.load(densenet_path, map_location=device))

<All keys matched successfully>

In [146]:
# load in data
train_folder = "../train-val/Training/"
test_folder = "../test/Test/"
import torch.utils.data as data
from datasets import load_dataset
ds = load_dataset("imagefolder", ignore_verifications=True, data_files={"train": f"{train_folder}**", "test": f"{test_folder}**"})
import datasets
train_valid = ds['train'].train_test_split(test_size=0.2)
ds = datasets.DatasetDict({
    'train': train_valid['train'],
    'val': train_valid['test'],
    'test': ds['test']
})
ds

Resolving data files:   0%|          | 0/39375 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/8617 [00:00<?, ?it/s]

Using custom data configuration default-320a3a01cff974fc
Reusing dataset image_folder (/home/ubuntu/.cache/huggingface/datasets/image_folder/default-320a3a01cff974fc/0.0.0/48efdc62d40223daee675ca093d163bcb6cb0b7d7f93eb25aebf5edca72dc597)


  0%|          | 0/2 [00:00<?, ?it/s]

DatasetDict({
    train: Dataset({
        features: ['image', 'label'],
        num_rows: 31500
    })
    val: Dataset({
        features: ['image', 'label'],
        num_rows: 7875
    })
    test: Dataset({
        features: ['image', 'label'],
        num_rows: 8617
    })
})

In [147]:
# copied over from resnet-densenet
model_name = "resnet"
num_classes = 2
batch_size = 8
num_epochs = 15
feature_extract = True
data_dir = "train-val/Training"
test_dir = "test/Test"
percent_val = 0.1
save_path = "saved-models"
input_size = 224

In [148]:
data_transforms = {
    'train': transforms.Compose([
        transforms.Resize(input_size),
        transforms.ToTensor(),
        #transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.Resize(input_size),
        transforms.ToTensor(),
        #transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'test':transforms.Compose([
        transforms.Resize(input_size),
        transforms.ToTensor(),
        #transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'vit_train': transforms.Compose([
        transforms.Resize(input_size),
        transforms.ToTensor(),
        transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
    ]),
    'vit_val': transforms.Compose([
        transforms.Resize(input_size),
        transforms.ToTensor(),
        transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
    ]),
    'vit_test':transforms.Compose([
        transforms.Resize(input_size),
        transforms.ToTensor(),
        transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
    ]),
    'torchvision_train': transforms.Compose([
        transforms.Resize(input_size),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'torchvision_val': transforms.Compose([
        transforms.Resize(input_size),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'torchvision_test':transforms.Compose([
        transforms.Resize(input_size),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

In [159]:
from torchvision import datasets
image_datasets = {}
orig_set = datasets.ImageFolder(train_folder)
n = len(orig_set)
n_val = int(percent_val * n)
train_dataset, val_dataset = random_split(orig_set, [n-n_val, n_val])
train_dataset_vit, val_dataset_vit = random_split(orig_set, [n-n_val, n_val])
train_dataset_torchvision, val_dataset_torchvision = random_split(orig_set, [n-n_val, n_val])
val_dataset.dataset.transform = data_transforms["val"]
train_dataset.dataset.transform = data_transforms["train"]
val_dataset_vit.dataset.transform = data_transforms["vit_val"]
train_dataset_vit.dataset.transform = data_transforms["vit_train"]
val_dataset_vit.dataset.transform = data_transforms["torchvision_val"]
train_dataset_vit.dataset.transform = data_transforms["torchvision_train"]
image_datasets["val"] = val_dataset
image_datasets["train"] = train_dataset
image_datasets["test"] = datasets.ImageFolder(test_folder, data_transforms["test"])
image_datasets["vit_val"] = val_dataset_vit
image_datasets["vit_train"] = train_dataset_vit
image_datasets["vit_test"] = datasets.ImageFolder(test_folder, data_transforms["vit_test"])
image_datasets["torchvision_val"] = val_dataset_torchvision
image_datasets["torchvision_train"] = train_dataset_torchvision
image_datasets["torchvision_test"] = datasets.ImageFolder(test_folder, data_transforms["torchvision_test"])

# Create training, validation, and test dataloaders
dataloaders_dict = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=batch_size, shuffle=True, num_workers=4) for x in ['train', 'val', 'test', 'vit_train', 'vit_val', 'vit_test', 'torchvision_train', 'torchvision_val', 'torchvision_test']}
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device

device(type='cuda', index=0)

In [156]:
class VotingEnsemble(nn.Module):
    def __init__(self, model_list):
        super(VotingEnsemble, self).__init__()
        self.model_list = model_list
    
    def forward(self, x):
        # normalize according to the type of model
        results_list = []
        for model_i in self.model_list:
            # normalize according to the type of model
            if model_i.__class__.__name__ == "VitWrapper":
                normalize_transform = transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
            else:
                normalize_transform = transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
            x_norm = normalize_transform(x.clone()).to(device)
            scores = model_i(x_norm)
            _, preds = scores.max(1)
            results_list.append(preds)
        combined_results = torch.stack(results_list)
        voted_pred = torch.mode(combined_results, dim=0)[0]
        return torch.mode(combined_results, dim=0)[0]

In [160]:
# check accuracy on test set
voting_ensemble = VotingEnsemble([resnet_model, densenet_model, vit_model])
def test_ensemble_accuracy(loader, ensemble, is_val):
    if is_val:
        print('Checking accuracy on validation set')
    else:
        print('Checking accuracy on test set')   
    num_correct = 0
    num_samples = 0
    ensemble.eval()  # set model to evaluation mode
    with torch.no_grad():
        for x, y in loader:
            x = x.to(device=device)  # move to device, e.g. GPU
            y = y.to(device=device, dtype=torch.long)
            preds = ensemble(x)
            num_correct += (preds == y).sum()
            num_samples += preds.size(0)
        acc = float(num_correct) / num_samples
        print('Got %d / %d correct (%.2f)' % (num_correct, num_samples, 100 * acc)) 

In [152]:
test_ensemble_accuracy(dataloaders_dict["val"], voting_ensemble, True)
test_ensemble_accuracy(dataloaders_dict["test"], voting_ensemble, False)

Checking accuracy on validation set
Got 3878 / 3937 correct (98.50)
Checking accuracy on test set
Got 6484 / 8617 correct (75.25)


In [161]:
def test_accuracy(loader, model, is_val):
    if is_val:
        print('Checking accuracy on validation set')
    else:
        print('Checking accuracy on test set')   
    num_correct = 0
    num_samples = 0
    model.eval()  # set model to evaluation mode
    with torch.no_grad():
        for x, y in loader:
            x = x.to(device=device)  # move to device, e.g. GPU
            y = y.to(device=device, dtype=torch.long)
            scores = model(x)
            _, preds = scores.max(1)
            num_correct += (preds == y).sum()
            num_samples += preds.size(0)
        acc = float(num_correct) / num_samples
        print('Got %d / %d correct (%.2f)' % (num_correct, num_samples, 100 * acc))  

In [162]:
test_accuracy(dataloaders_dict["vit_test"], vit_model, False)
test_accuracy(dataloaders_dict["torchvision_test"], resnet_model, False)
test_accuracy(dataloaders_dict["torchvision_test"], densenet_model, False)

Checking accuracy on test set
Got 6743 / 8617 correct (78.25)
Checking accuracy on test set
Got 6275 / 8617 correct (72.82)
Checking accuracy on test set
Got 6126 / 8617 correct (71.09)


In [210]:
class ConcatEnsemble(nn.Module):
    def __init__(self, model_list, num_features, num_classes):
        super(ConcatEnsemble, self).__init__()
        self.model_list = model_list
        self.num_features = num_features
        self.classifier = nn.Linear(num_features, num_classes).to(device)
        
    def forward(self, x):
        # normalize according to the type of model
        scores_list = []
        for model_i in self.model_list:
            # normalize according to the type of model
            if model_i.__class__.__name__ == "VitWrapper":
                normalize_transform = transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
            else:
                normalize_transform = transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
            x_norm = normalize_transform(x.clone()).to(device)
            
            scores = model_i(x_norm)
            scores_list.append(scores)
        combined_results = torch.cat(scores_list, dim=1)
        out = self.classifier(torch.nn.functional.relu(combined_results))
        
        #_, preds = out.max(1)
        
        return out
        

In [211]:
for param in resnet_model_features.parameters():
    param.requires_grad = (False)
for param in densenet_model_features.parameters():
    param.requires_grad = (False)
for param in vit_model_features.parameters():
    param.requires_grad = (False)    

In [212]:
concat_ensemble = ConcatEnsemble([resnet_model_features, densenet_model_features, vit_model_features], 2280, 2)

In [217]:
def train_model(model, dataloaders, criterion, optimizer, num_epochs=25, is_inception=False):
    since = time.time()

    val_acc_history = []

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    # Get model outputs and calculate loss
                    # Special case for inception because in training it has an auxiliary output. In train
                    #   mode we calculate the loss by summing the final output and the auxiliary output
                    #   but in testing we only consider the final output.
                    if is_inception and phase == 'train':
                        # From https://discuss.pytorch.org/t/how-to-optimize-inception-model-with-auxiliary-classifiers/7958
                        outputs, aux_outputs = model(inputs)
                        loss1 = criterion(outputs, labels)
                        loss2 = criterion(aux_outputs, labels)
                        loss = loss1 + 0.4*loss2
                    else:
                        outputs = model(inputs)
                        loss = criterion(outputs, labels)

                    _, preds = torch.max(outputs, 1)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / len(dataloaders[phase].dataset)
            epoch_acc = running_corrects.double() / len(dataloaders[phase].dataset)

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))

            # deep copy the model
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())
            if phase == 'val':
                val_acc_history.append(epoch_acc)

        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model, val_acc_history

In [None]:
# Setup the loss fxn
optimizer_ft = optim.SGD(concat_ensemble.parameters(), lr=0.001, momentum=0.9)
criterion = nn.CrossEntropyLoss()
num_epochs = 15
# Train and evaluate
concat_ensemble, hist = train_model(concat_ensemble, dataloaders_dict, criterion, optimizer_ft, num_epochs=num_epochs, is_inception=(model_name=="inception"))
torch.save(concat_ensemble.state_dict(), os.path.join(os.path.join(save_path, "concat-ensemble"), "concat-ensemble.pth"))

Epoch 0/14
----------
train Loss: 0.0083 Acc: 0.9970
val Loss: 0.0112 Acc: 0.9949

Epoch 1/14
----------
train Loss: 0.0081 Acc: 0.9975
val Loss: 0.0187 Acc: 0.9936

Epoch 2/14
----------
train Loss: 0.0069 Acc: 0.9979
val Loss: 0.0076 Acc: 0.9977

Epoch 3/14
----------
train Loss: 0.0062 Acc: 0.9981
val Loss: 0.0100 Acc: 0.9957

Epoch 4/14
----------
train Loss: 0.0070 Acc: 0.9977
val Loss: 0.0067 Acc: 0.9977

Epoch 5/14
----------
train Loss: 0.0059 Acc: 0.9980
val Loss: 0.0073 Acc: 0.9970

Epoch 6/14
----------
train Loss: 0.0057 Acc: 0.9983
val Loss: 0.0057 Acc: 0.9980

Epoch 7/14
----------
train Loss: 0.0054 Acc: 0.9984
val Loss: 0.0102 Acc: 0.9970

Epoch 8/14
----------
train Loss: 0.0052 Acc: 0.9982
val Loss: 0.0101 Acc: 0.9975

Epoch 9/14
----------
train Loss: 0.0055 Acc: 0.9983
val Loss: 0.0115 Acc: 0.9959

Epoch 10/14
----------
train Loss: 0.0049 Acc: 0.9984
val Loss: 0.0062 Acc: 0.9975

Epoch 11/14
----------
train Loss: 0.0048 Acc: 0.9986
val Loss: 0.0090 Acc: 0.9972

Ep

In [223]:
test_accuracy(dataloaders_dict["val"], concat_ensemble, True)
test_accuracy(dataloaders_dict["test"], concat_ensemble, False)

Checking accuracy on validation set
Got 3922 / 3937 correct (99.62)
Checking accuracy on test set
Got 6151 / 8617 correct (71.38)
