In [1]:
from __future__ import print_function, division

import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler

import torchvision
import torchvision.transforms as transforms
from torchvision import datasets, models

import skorch
from skorch import NeuralNetClassifier

from pathlib import Path
import os
import sys
import time
import copy

import pandas as pd
import matplotlib.pylab as plt

import numpy as np
from numpy import dstack

# Local modules
from cub_tools.train import train_model
from cub_tools.visualize import imshow, visualize_model
from cub_tools.utils import unpickle, save_pickle
from cub_tools.transforms import makeDefaultTransforms
#from cub_tools.ensembles import stackedEnsemble

In [2]:
# Script runtime options
model_names = ['resnet152', 'resnext101_32x8d', 'inception_v3', 'googlenet']
data_parallel = {'resnet152' : False,
                 'inceptionv4' : True,
                 'resnext101_64x4d' : True, 
                 'pnasnet5large' : True, 
                 'googlenet' : False,
                 'inception_v3' : False, 
                 'resnext101_32x8d' : False}
data_root_dir = '../data'
model_root_dir = '../models'
stages = ['train', 'test']


# Paths setup
data_dir = os.path.join(data_root_dir,'images')

# Ensemble setup
run_train_data_stack = False
run_test_data_stack = False

In [3]:
# Get data transforms
data_transforms = makeDefaultTransforms()

In [4]:
# Setup data loaders with augmentation transforms
image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x), data_transforms[x])
                  for x in stages}
dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=16,
                                             shuffle=True, num_workers=4)
              for x in stages}
dataset_sizes = {x: len(image_datasets[x]) for x in stages}
class_names = image_datasets[stages[0]].classes

In [5]:
# Setup the device to run the computations
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print('Device::', device)

Device:: cuda:0


In [6]:
models = {}

for model_name in model_names:
    
    print('[INFO] Loading model {}'.format(model_name))
    
    # Paths
    output_dir = os.path.join(model_root_dir,'classification/{}'.format(model_name))
    model_file = os.path.join(output_dir, 'caltech_birds_{}_full.pth'.format(model_name))
    
    # Load the best model from file
    models[model_name] = torch.load(model_file)
    if data_parallel[model_name]:
        models[model_name] = torch.nn.DataParallel(model[model_name])
    models[model_name] = models[model_name].to(device)

[INFO] Loading model resnet152
[INFO] Loading model resnext101_32x8d
[INFO] Loading model inception_v3
[INFO] Loading model googlenet


# Ensemble Methods Part 1 - Stacking Torchvision models

Different approaches for ensembling, first lets try stacking.

## Stacking ensemble bespoke implementation for PyTorch

Following the tutorial here: https://machinelearningmastery.com/stacking-ensemble-for-deep-learning-neural-networks/

In [7]:
class stackedEnsemble():
    
    def __init__(self, meta_learner=None, meta_learner_options=None, models=None, device=None):
        
        if meta_learner is None:
            from sklearn.linear_model import LogisticRegression
            self.meta_learner = LogisticRegression
        else:
            self.meta_learner = meta_learner
        self.meta_learner_options = meta_learner_options
        self.models = models
        self.device = device
        self.meta_learner_fit = False
        
    
    
    
    def fit(self, dataloader=None, stackX=None, labels=None):
        
        # create dataset using ensemble
        if (stackX is None) or (labels is None):
            self.models = models
            self.train_dataloader = dataloader
            print('[INFO] Creating the meta learner inputs (probabilities from individual models) as none provided.')
            self.stackX, self.labels = _stacked_dataset_from_dataloader(self.models, self.dataloader, self.device)
        else:
            self.train_dataloader = None
            self.stackX = stackX
            self.labels = labels
            print('[INFO] Stacked input table and labels found, using these to train meta learner.')

        # fit standalone model
        print('[INFO] Training the meta learner...', end='')
        if self.meta_learner_options is not None:
            self.meta_model = self.meta_learner(**self.meta_learner_options)
        else:
            self.meta_model = self.meta_learner()
        
        self.meta_model.fit(self.stackX, self.labels)
        
        self.meta_learner_fit = True
        print('..Complete')
        
    
    
    def predict(self, dataloader=None, stackX=None):
        assert self.meta_learner_fit is True, 'Meta Leaner has not been fit. Please run the stackedEnsemble.fit() method to fit the meta learner before trying to predict'
        self.test_dataloader=dataloader
        self.test_stackX=stackX
        # create dataset using ensemble
        if (self.test_stackX is None) or (self.test_dataloader is not None):
            print('[INFO] Creating the meta learner inputs (probabilities from individual models) as none provided.')
            self.test_stackX, _ = _stacked_dataset_from_dataloader(self.models, self.test_dataloader, self.device)
        else:
            print('[INFO] Stacked input table and labels found, using these to train meta learner.')

        # predict using the trained meta learner
        print('[INFO] Predicting with the meta learner...', end='')
        self.yhat = self.meta_model.predict(X=self.test_stackX)
        
        
        print('..Complete')
        
        
    def class_report(self, y_true):
        from sklearn.metrics import classification_report
        print(classification_report(y_pred=self.yhat, y_true=y_true))
        
        
    def _stacked_dataset_from_dataloader(models, dataloader, device):
        stackX = None
        stacky = None
        print('[INFO] Starting StackX', end='')
        with torch.no_grad():
            for i, (inputs, labels) in enumerate(dataloader):
                if i < (len(dataloader)-1):
                    temp_stack = None
                    for model_name, model in models.items():
                        # make prediction
                        if isinstance(model, skorch.classifier.NeuralNetClassifier):
                            yhat = model.predict_proba(inputs)
                        else:
                            model.eval()
                            inputs = inputs.to(device)
                            yhat = model(inputs)
                            yhat = yhat.cpu().numpy()

                        # Convert score to probability
                        for ind in np.arange(0, yhat.shape[0], 1):
                            yhat[ind, ::] = softmax(yhat[ind,::])

                        # stack predictions into [rows, members, probabilities]
                        if temp_stack is None:
                            temp_stack = yhat
                        else:
                            temp_stack = dstack((temp_stack, yhat))

                    # flatten predictions to [rows, members x probabilities]
                    temp_stack = temp_stack.reshape((temp_stack.shape[0], temp_stack.shape[1]*temp_stack.shape[2]))
                    # stack the batch of model probabilities onto the bottom of the results table
                    if stackX is None:
                        stackX = temp_stack
                    else:
                        stackX = np.vstack((stackX, temp_stack))

                    # stack the output truth labels to bottom of truth labels table
                    if stacky is None:
                        stacky = labels.cpu().numpy().ravel()
                    else:
                        stacky = np.vstack((stacky, labels.cpu().numpy().ravel()))

                    if i % 5 == 0:
                        print('..{}'.format(i), end='')

        print('..Complete')
        return stackX, stacky

## Creating the stacked datasets for train and test sets

This process pushes the training and test images through the CNN models trained on the image classification problem, and stacks their predicted class probabilities into a single array. Assuming there N images in the set, and M models in the ensemble, the expected size of the array is as follows:

nx (cols): number of classes by number of models e.g. 4 models and 200 classes, 800 columns.
ny (rows): number of images

The array is arrange as follows:

    image 1: |---200 class probability columns model 1---|---200 class probability columns model 2---|---***---|---200 class probability columns model M---|
    image 2: |---200 class probability columns model 1---|---200 class probability columns model 2---|---***---|---200 class probability columns model M---|
    ***
    ***
    image N: |---200 class probability columns model 1---|---200 class probability columns model 2---|---***---|---200 class probability columns model M---| 

In [8]:
try:
    if run_train_data_stack:
        # Generate the stacked dataset from the ensemble models for the training data
        stackX, stacky = stacked_dataset_from_dataloader(models, dataloaders['train'], device)

        # Save to pikle file
        save_pickle(stackX, os.path.join(model_root_dir,'classification/caltech_birds_ensemble_1_stackX.pkl'))
        save_pickle(stacky, os.path.join(model_root_dir,'classification/caltech_birds_ensemble_1_stacky.pkl'))
    else:
        stackX = unpickle(os.path.join(model_root_dir,'classification/caltech_birds_ensemble_1_stackX.pkl'))
        stacky = unpickle(os.path.join(model_root_dir,'classification/caltech_birds_ensemble_1_stacky.pkl'))

    print('[INFO] Loaded stacked datasets...')
except:
    print('[INFO] Error trying to create or load datasets')

[INFO] Loaded stacked datasets...


In [9]:
try:
    if run_test_data_stack:
        # Generate the stacked dataset from the ensemble models for the training data
        test_stackX, test_stacky = stacked_dataset_from_dataloader(models, dataloaders['test'], device)

        # Save to pikle file
        save_pickle(test_stackX, os.path.join(model_root_dir,'classification/caltech_birds_ensemble_1_test_stackX.pkl'))
        save_pickle(test_stacky, os.path.join(model_root_dir,'classification/caltech_birds_ensemble_1_test_stacky.pkl'))
    else:
        test_stackX = unpickle(os.path.join(model_root_dir,'classification/caltech_birds_ensemble_1_test_stackX.pkl'))
        test_stacky = unpickle(os.path.join(model_root_dir,'classification/caltech_birds_ensemble_1_test_stacky.pkl'))
        print('[INFO] Loaded stacked datasets...')
except:
    print('[INFO] Error trying to create or load datasets')

[INFO] Loaded stacked datasets...


## Fit the ensemble of models

The stacking ensemble takes either the set of PyTorch trained models and the PyTorch dataloader object and runs the dataset stacking prior to fitting the meta learner of the ensemble, or it takes the pre-stacked dataset of class predictions from the image set and simply fits the classifier object.

The classifier object as default is a Logistic Regression. Any scikit-learn type classifier object can be passed to the fitting function, as long as it has a **fit** and **predict** method. Optional hyperparameter arguments can be provided to initialise the classifier object, and these can be passed in a dictionary with the *meta_learner_options* variable. 

In [10]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report

from xgboost import XGBClassifier, XGBRFClassifier

### Default Logistic Regression

Run predictions using the test set stacked dataset and run classification metrics to assess the model performance

In [11]:
mtlearner1 = stackedEnsemble()
mtlearner1.fit(stackX=stackX, labels=stacky.ravel())
mtlearner1.predict(stackX=test_stackX)
mtlearner1.class_report(y_true=test_stacky.ravel())

[INFO] Stacked input table and labels found, using these to train meta learner.
[INFO] Training the meta learner.....Complete
[INFO] Stacked input table and labels found, using these to train meta learner.
[INFO] Predicting with the meta learner.....Complete
              precision    recall  f1-score   support

           0       0.84      0.87      0.85        30
           1       0.93      0.87      0.90        30
           2       0.84      0.96      0.90        28
           3       0.94      0.97      0.95        30
           4       0.93      0.93      0.93        14
           5       0.91      0.91      0.91        11
           6       1.00      0.96      0.98        23
           7       0.81      0.94      0.87        18
           8       0.59      0.45      0.51        29
           9       0.97      0.93      0.95        30
          10       0.65      0.67      0.66        30
          11       0.93      0.96      0.94        26
          12       0.97      0.97     

## SKLearn Random Forests

In [None]:
mtlearner2 = stackedEnsemble(meta_learner=RandomForestClassifier,
                             meta_learner_options={'n_estimators' : 250, 'verbose' : 10, 'n_jobs' : -1})
mtlearner2.fit(stackX=stackX, labels=stacky.ravel())
mtlearner2.predict(stackX=test_stackX)
mtlearner2.class_report(y_true=test_stacky.ravel())

## XGBoost Classifiers

Look at Random Forest and Gradient Boosting Machine classifiers

In [12]:
mtlearner2 = stackedEnsemble(meta_learner=XGBClassifier,
                             meta_learner_options={'n_estimators' : 250, 
                                                   'verbosity' : 1, 
                                                   'n_jobs' : 6, 
                                                   'objective' : 'multi:softmax', 
                                                   'num_class' : 200,
                                                   'tree_method' : 'gpu_hist'})
mtlearner2.fit(stackX=stackX, labels=stacky.ravel())
mtlearner2.predict(stackX=test_stackX)
mtlearner2.class_report(y_true=test_stacky.ravel())

[INFO] Stacked input table and labels found, using these to train meta learner.
[INFO] Training the meta learner.....Complete
[INFO] Stacked input table and labels found, using these to train meta learner.
[INFO] Predicting with the meta learner...

TypeError: predict() got an unexpected keyword argument 'X'