In [4]:
from __future__ import print_function, division

import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler

import torchvision
import torchvision.transforms as transforms
from torchvision import datasets, models

import skorch
from skorch import NeuralNetClassifier

from pathlib import Path
import os
import sys
import time
import copy

import pandas as pd
import matplotlib.pylab as plt
import numpy as np

# Local modules
from cub_tools.train import train_model
from cub_tools.visualize import imshow, visualize_model
from cub_tools.utils import unpickle, save_pickle
from cub_tools.transforms import makeDefaultTransforms

In [5]:
# Script runtime options
model_names = ['resnet152', 'resnext101_32x8d', 'inception_v3', 'googlenet']
data_parallel = {'resnet152' : False,
                 'inceptionv4' : True,
                 'resnext101_64x4d' : True, 
                 'pnasnet5large' : True, 
                 'googlenet' : False,
                 'inception_v3' : False, 
                 'resnext101_32x8d' : False}
data_root_dir = '../data'
model_root_dir = '../models'
stages = ['train', 'test']


# Paths setup
data_dir = os.path.join(data_root_dir,'images')

In [6]:
# Get data transforms
data_transforms = makeDefaultTransforms()

In [7]:
# Setup data loaders with augmentation transforms
image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x), data_transforms[x])
                  for x in stages}
dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=16,
                                             shuffle=True, num_workers=4)
              for x in stages}
dataset_sizes = {x: len(image_datasets[x]) for x in stages}
class_names = image_datasets[stages[0]].classes

In [8]:
# Setup the device to run the computations
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print('Device::', device)

Device:: cuda:0


In [9]:
models = {}

for model_name in model_names:
    
    print('[INFO] Loading model {}'.format(model_name))
    
    # Paths
    output_dir = os.path.join(model_root_dir,'classification/{}'.format(model_name))
    model_file = os.path.join(output_dir, 'caltech_birds_{}_full.pth'.format(model_name))
    
    # Load the best model from file
    models[model_name] = torch.load(model_file)
    if data_parallel[model_name]:
        models[model_name] = torch.nn.DataParallel(model[model_name])
    models[model_name] = models[model_name].to(device)

[INFO] Loading model resnet152
[INFO] Loading model resnext101_32x8d
[INFO] Loading model inception_v3
[INFO] Loading model googlenet


In [None]:
net = {}
for model_name in model_names:
    net[model_name] = NeuralNetClassifier(models[model_name])
    net[model_name].initialize()

## Ensemble Methods

Different approaches for ensembling

### Stacking ensemble bespoke implementation

Following the tutorial here: https://machinelearningmastery.com/stacking-ensemble-for-deep-learning-neural-networks/

In [45]:
from numpy import dstack
from sklearn.linear_model import LogisticRegression

def softmax(x):
    """Compute softmax values for each sets of scores in x."""
    return np.exp(x) / np.sum(np.exp(x), axis=0)

# TODO: refactor so that it accepts a dataloader instance and iterates over the whole dataset.
#       Add as an outer loop and then run each model on an inner loop, so that they use the same images.
#       Remember to also keep the labels for later use and return them
def stacked_dataset(models, inputs):
    stackX = None
    for model_name, model in models.items():
        # make prediction
        yhat = model.predict_proba(inputs)
        for i in np.arange(0, yhat.shape[0], 1):
            yhat[i, ::] = softmax(yhat[i,::])
        # stack predictions into [rows, members, probabilities]
        if stackX is None:
            stackX = yhat
        else:
            stackX = dstack((stackX, yhat))
        print('{}...'.format(model_name), end='')
    # flatten predictions to [rows, members x probabilities]
    stackX = stackX.reshape((stackX.shape[0], stackX.shape[1]*stackX.shape[2]))
    return stackX

def stacked_dataset_from_dataloader(models, dataloader, device):
    stackX = None
    stacky = None
    print('[INFO] Starting StackX', end='')
    with torch.no_grad():
        for i, (inputs, labels) in enumerate(dataloader):
            if i < (len(dataloader)-1):
                temp_stack = None
                for model_name, model in models.items():
                    # make prediction
                    if isinstance(model, skorch.classifier.NeuralNetClassifier):
                        yhat = model.predict_proba(inputs)
                    else:
                        model.eval()
                        inputs = inputs.to(device)
                        yhat = model(inputs)
                        yhat = yhat.cpu().numpy()

                    # Convert score to probability
                    for ind in np.arange(0, yhat.shape[0], 1):
                        yhat[ind, ::] = softmax(yhat[ind,::])

                    # stack predictions into [rows, members, probabilities]
                    if temp_stack is None:
                        temp_stack = yhat
                    else:
                        temp_stack = dstack((temp_stack, yhat))

                # flatten predictions to [rows, members x probabilities]
                temp_stack = temp_stack.reshape((temp_stack.shape[0], temp_stack.shape[1]*temp_stack.shape[2]))
                # stack the batch of model probabilities onto the bottom of the results table
                if stackX is None:
                    stackX = temp_stack
                else:
                    stackX = np.vstack((stackX, temp_stack))

                # stack the output truth labels to bottom of truth labels table
                if stacky is None:
                    stacky = labels.cpu().numpy().ravel()
                else:
                    stacky = np.vstack((stacky, labels.cpu().numpy().ravel()))
                    
                if i % 5 == 0:
                    print('..{}'.format(i), end='')
    
    print('..Complete')
    return stackX, stacky

# fit a model based on the outputs from the ensemble members
def fit_stacked_model(models, dataloader=None, stackX=None, labels=None, 
                      meta_learner=LogisticRegression, meta_learner_options=None):
    # create dataset using ensemble
    if (stackX is None) or (labels is None):
        print('[INFO] Creating the meta learner inputs (probabilities from individual models) as none provided.')
        stackX, labels = stacked_dataset_from_dataloader(models, dataloader)
    else:
        print('[INFO] Stacked input table and labels found, using these to train meta learner.')
    
    # fit standalone model
    print('[INFO] Training the meta learner...', end='')
    if meta_learner_options is not None:
        meta_model = meta_learner(**meta_learner_options)
    else:
        meta_model = meta_learner()
    meta_model.fit(stackX, labels)
    print('..Complete')
    return meta_model

In [22]:
run_train_data_stack = True
if run_train_data_stack:
    # Generate the stacked dataset from the ensemble models for the training data
    stackX, stacky = stacked_dataset_from_dataloader(models, dataloaders['train'], device)
    
    # Save to pikle file
    save_pickle(stackX, os.path.join(model_root_dir,'classification/caltech_birds_ensemble_1_stackX.pkl'))
    save_pickle(stacky, os.path.join(model_root_dir,'classification/caltech_birds_ensemble_1_stacky.pkl'))
else:
    stackX = unpickle(os.path.join(model_root_dir,'classification/caltech_birds_ensemble_1_stackX.pkl'))
    stacky = unpickle(os.path.join(model_root_dir,'classification/caltech_birds_ensemble_1_stacky.pkl'))

[INFO] Starting StackX..0..5..10..15..20..25..30..35..40..45..50..55..60..65..70..75..80..85..90..95..100..105..110..115..120..125..130..135..140..145..150..155..160..165..170..175..180..185..190..195..200..205..210..215..220..225..230..235..240..245..250..255..260..265..270..275..280..285..290..295..300..305..310..315..320..325..330..335..340..345..350..355..360..365..370Complete


In [47]:
from sklearn.ensemble import RandomForestClassifier

In [50]:
meta_model = fit_stacked_model(models=models, stackX=stackX, labels=stacky.ravel(), 
                               meta_learner=RandomForestClassifier, 
                               meta_learner_options={'n_estimators' : 250, 'verbose' : 10, 'n_jobs' : -1})

[INFO] Stacked input table and labels found, using these to train meta learner.
[INFO] Training the meta learner...building tree 1 of 250
building tree 2 of 250
building tree 3 of 250
building tree 4 of 250
building tree 5 of 250
building tree 6 of 250


[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 6 concurrent workers.


building tree 7 of 250
building tree 8 of 250
building tree 9 of 250
building tree 10 of 250


[Parallel(n_jobs=-1)]: Done   1 tasks      | elapsed:    2.2s


building tree 11 of 250
building tree 12 of 250


[Parallel(n_jobs=-1)]: Done   6 tasks      | elapsed:    2.7s


building tree 13 of 250
building tree 14 of 250
building tree 15 of 250
building tree 16 of 250
building tree 17 of 250
building tree 18 of 250


[Parallel(n_jobs=-1)]: Done  13 tasks      | elapsed:    6.7s


building tree 19 of 250
building tree 20 of 250
building tree 21 of 250
building tree 22 of 250
building tree 23 of 250
building tree 24 of 250
building tree 25 of 250
building tree 26 of 250
building tree 27 of 250


[Parallel(n_jobs=-1)]: Done  20 tasks      | elapsed:    9.0s


building tree 28 of 250
building tree 29 of 250
building tree 30 of 250
building tree 31 of 250
building tree 32 of 250
building tree 33 of 250
building tree 34 of 250
building tree 35 of 250


[Parallel(n_jobs=-1)]: Done  29 tasks      | elapsed:   11.8s


building tree 36 of 250
building tree 37 of 250
building tree 38 of 250
building tree 39 of 250
building tree 40 of 250
building tree 41 of 250
building tree 42 of 250
building tree 43 of 250
building tree 44 of 250


[Parallel(n_jobs=-1)]: Done  38 tasks      | elapsed:   15.7s


building tree 45 of 250
building tree 46 of 250
building tree 47 of 250
building tree 48 of 250
building tree 49 of 250
building tree 50 of 250
building tree 51 of 250
building tree 52 of 250
building tree 53 of 250
building tree 54 of 250


[Parallel(n_jobs=-1)]: Done  49 tasks      | elapsed:   20.3s


building tree 55 of 250
building tree 56 of 250
building tree 57 of 250
building tree 58 of 250
building tree 59 of 250
building tree 60 of 250
building tree 61 of 250
building tree 62 of 250
building tree 63 of 250
building tree 64 of 250
building tree 65 of 250


[Parallel(n_jobs=-1)]: Done  60 tasks      | elapsed:   23.9s


building tree 66 of 250
building tree 67 of 250
building tree 68 of 250
building tree 69 of 250
building tree 70 of 250
building tree 71 of 250
building tree 72 of 250
building tree 73 of 250
building tree 74 of 250
building tree 75 of 250
building tree 76 of 250
building tree 77 of 250
building tree 78 of 250


[Parallel(n_jobs=-1)]: Done  73 tasks      | elapsed:   29.2s


building tree 79 of 250
building tree 80 of 250
building tree 81 of 250
building tree 82 of 250
building tree 83 of 250
building tree 84 of 250
building tree 85 of 250
building tree 86 of 250
building tree 87 of 250
building tree 88 of 250
building tree 89 of 250
building tree 90 of 250
building tree 91 of 250


[Parallel(n_jobs=-1)]: Done  86 tasks      | elapsed:   34.4s


building tree 92 of 250
building tree 93 of 250
building tree 94 of 250
building tree 95 of 250
building tree 96 of 250
building tree 97 of 250
building tree 98 of 250
building tree 99 of 250
building tree 100 of 250
building tree 101 of 250
building tree 102 of 250
building tree 103 of 250
building tree 104 of 250
building tree 105 of 250
building tree 106 of 250


[Parallel(n_jobs=-1)]: Done 101 tasks      | elapsed:   39.6s


building tree 107 of 250
building tree 108 of 250
building tree 109 of 250
building tree 110 of 250
building tree 111 of 250
building tree 112 of 250
building tree 113 of 250
building tree 114 of 250
building tree 115 of 250
building tree 116 of 250
building tree 117 of 250
building tree 118 of 250
building tree 119 of 250
building tree 120 of 250
building tree 121 of 250


[Parallel(n_jobs=-1)]: Done 116 tasks      | elapsed:   45.7s


building tree 122 of 250
building tree 123 of 250
building tree 124 of 250
building tree 125 of 250
building tree 126 of 250
building tree 127 of 250
building tree 128 of 250
building tree 129 of 250
building tree 130 of 250
building tree 131 of 250
building tree 132 of 250
building tree 133 of 250
building tree 134 of 250
building tree 135 of 250
building tree 136 of 250
building tree 137 of 250
building tree 138 of 250


[Parallel(n_jobs=-1)]: Done 133 tasks      | elapsed:   52.3s


building tree 139 of 250
building tree 140 of 250
building tree 141 of 250
building tree 142 of 250
building tree 143 of 250
building tree 144 of 250
building tree 145 of 250
building tree 146 of 250
building tree 147 of 250
building tree 148 of 250
building tree 149 of 250
building tree 150 of 250
building tree 151 of 250
building tree 152 of 250
building tree 153 of 250
building tree 154 of 250
building tree 155 of 250


[Parallel(n_jobs=-1)]: Done 150 tasks      | elapsed:   58.3s


building tree 156 of 250
building tree 157 of 250
building tree 158 of 250
building tree 159 of 250
building tree 160 of 250
building tree 161 of 250
building tree 162 of 250
building tree 163 of 250
building tree 164 of 250
building tree 165 of 250
building tree 166 of 250
building tree 167 of 250
building tree 168 of 250
building tree 169 of 250
building tree 170 of 250
building tree 171 of 250
building tree 172 of 250
building tree 173 of 250
building tree 174 of 250


[Parallel(n_jobs=-1)]: Done 169 tasks      | elapsed:  1.1min


building tree 175 of 250
building tree 176 of 250
building tree 177 of 250
building tree 178 of 250
building tree 179 of 250
building tree 180 of 250
building tree 181 of 250
building tree 182 of 250
building tree 183 of 250
building tree 184 of 250
building tree 185 of 250
building tree 186 of 250
building tree 187 of 250
building tree 188 of 250
building tree 189 of 250
building tree 190 of 250
building tree 191 of 250
building tree 192 of 250
building tree 193 of 250


[Parallel(n_jobs=-1)]: Done 188 tasks      | elapsed:  1.2min


building tree 194 of 250
building tree 195 of 250
building tree 196 of 250
building tree 197 of 250
building tree 198 of 250
building tree 199 of 250
building tree 200 of 250
building tree 201 of 250
building tree 202 of 250
building tree 203 of 250
building tree 204 of 250
building tree 205 of 250
building tree 206 of 250
building tree 207 of 250
building tree 208 of 250
building tree 209 of 250
building tree 210 of 250
building tree 211 of 250
building tree 212 of 250
building tree 213 of 250
building tree 214 of 250


[Parallel(n_jobs=-1)]: Done 209 tasks      | elapsed:  1.3min


building tree 215 of 250
building tree 216 of 250
building tree 217 of 250
building tree 218 of 250
building tree 219 of 250
building tree 220 of 250
building tree 221 of 250
building tree 222 of 250
building tree 223 of 250
building tree 224 of 250
building tree 225 of 250
building tree 226 of 250
building tree 227 of 250
building tree 228 of 250
building tree 229 of 250
building tree 230 of 250
building tree 231 of 250
building tree 232 of 250
building tree 233 of 250
building tree 234 of 250
building tree 235 of 250


[Parallel(n_jobs=-1)]: Done 230 tasks      | elapsed:  1.5min


building tree 236 of 250
building tree 237 of 250
building tree 238 of 250
building tree 239 of 250
building tree 240 of 250
building tree 241 of 250
building tree 242 of 250
building tree 243 of 250
building tree 244 of 250
building tree 245 of 250
building tree 246 of 250
building tree 247 of 250
building tree 248 of 250
building tree 249 of 250
building tree 250 of 250
..Complete


[Parallel(n_jobs=-1)]: Done 250 out of 250 | elapsed:  1.6min finished


In [51]:
def stacked_prediction(meta_learner, models, device=None, dataloader=None, stackX=None):
    # create dataset using ensemble
    return_ytrue=False
    if (stackX is None):
        print('[INFO] Creating the meta learner inputs (probabilities from individual models) as none provided.')
        stackX, ytrue = stacked_dataset_from_dataloader(models, dataloader, device)
        return_ytrue = True
    else:
        print('[INFO] Stacked input table and labels found, using these to train meta learner.')
    
    # fit standalone model
    print('[INFO] Predicting with the meta learner...', end='')
    yhat = meta_learner.predict(stackX)
    print('..Complete')
    if return_ytrue:
        return yhat, ytrue
    else:
        return yhat

In [52]:
yhat, ytrue = stacked_prediction(meta_learner=meta_model, 
                                 models=models, 
                                 dataloader=dataloaders['test'], 
                                 device=device)

[INFO] Creating the meta learner inputs (probabilities from individual models) as none provided.
[INFO] Starting StackX..0..5..10..15..20..25..30..35..40..45..50..55..60..65..70..75..80..85..90..95..100..105..110..115..120..125..130..135..140..145..150..155..160..165..170..175..180..185..190..195..200..205..210..215..220..225..230..235..240..245..250..255..260..265..270..275..280..285..290..295..300..305..310..315..320..325..330..335..340..345..350..355..360..Complete
[INFO] Predicting with the meta learner...

[Parallel(n_jobs=6)]: Using backend ThreadingBackend with 6 concurrent workers.
[Parallel(n_jobs=6)]: Done   1 tasks      | elapsed:    0.0s
[Parallel(n_jobs=6)]: Done   6 tasks      | elapsed:    0.0s
[Parallel(n_jobs=6)]: Done  13 tasks      | elapsed:    0.0s
[Parallel(n_jobs=6)]: Done  20 tasks      | elapsed:    0.1s
[Parallel(n_jobs=6)]: Done  29 tasks      | elapsed:    0.1s
[Parallel(n_jobs=6)]: Done  38 tasks      | elapsed:    0.1s
[Parallel(n_jobs=6)]: Done  49 tasks      | elapsed:    0.1s
[Parallel(n_jobs=6)]: Done  60 tasks      | elapsed:    0.2s
[Parallel(n_jobs=6)]: Done  73 tasks      | elapsed:    0.2s
[Parallel(n_jobs=6)]: Done  86 tasks      | elapsed:    0.2s
[Parallel(n_jobs=6)]: Done 101 tasks      | elapsed:    0.3s
[Parallel(n_jobs=6)]: Done 116 tasks      | elapsed:    0.3s
[Parallel(n_jobs=6)]: Done 133 tasks      | elapsed:    0.3s
[Parallel(n_jobs=6)]: Done 150 tasks      | elapsed:    0.4s
[Parallel(n_jobs=6)]: Done 169 tasks      | elapsed:    0.4s


..Complete


[Parallel(n_jobs=6)]: Done 188 tasks      | elapsed:    0.5s
[Parallel(n_jobs=6)]: Done 209 tasks      | elapsed:    0.5s
[Parallel(n_jobs=6)]: Done 230 tasks      | elapsed:    0.6s
[Parallel(n_jobs=6)]: Done 250 out of 250 | elapsed:    0.6s finished


In [53]:
from sklearn.metrics import classification_report

In [54]:
print(classification_report(y_pred=yhat, y_true=ytrue.ravel()))

              precision    recall  f1-score   support

           0       0.89      0.83      0.86        30
           1       0.96      0.86      0.91        29
           2       0.82      1.00      0.90        28
           3       0.91      0.97      0.94        30
           4       0.87      0.93      0.90        14
           5       0.83      0.91      0.87        11
           6       1.00      0.91      0.95        23
           7       0.80      0.89      0.84        18
           8       0.65      0.38      0.48        29
           9       1.00      0.97      0.98        30
          10       0.72      0.70      0.71        30
          11       0.93      0.96      0.94        26
          12       0.97      0.97      0.97        30
          13       0.93      0.93      0.93        30
          14       0.93      0.96      0.95        28
          15       0.90      0.96      0.93        28
          16       0.96      1.00      0.98        27
          17       0.94    

In [None]:
inputs, classes = next(iter(dataloaders['train']))

net = {}
output = {}
with torch.no_grad():
    for model_name in model_names:

        net[model_name] = NeuralNetClassifier(model[model_name])
        net[model_name].initialize()
        
        output[model_name] = net[model_name].predict(inputs)

for i_res, truth in enumerate(classes.numpy()):
    res_str = 'Truth: {:5}  Pred:'.format(truth)
    for model_name in model_names:
        res_str = res_str + ' {:5}'.format(output[model_name][i_res])
    print(res_str)

In [None]:
from combo.models.classifier_comb import SimpleClassifierAggregator
#from sklearn.ensemble import VotingClassifier

In [None]:
combo_model = SimpleClassifierAggregator(base_estimators=list(net.values()),
                                         method='majority_vote',
                                         pre_fitted=True)

In [None]:
combo_model.predict(inputs)