# Focal Diversity-based Ensemble Selection

This demo provides the focal diversity-based ensemble selection examples on CIFAR-10 and ImageNet.

In [None]:
import os
import time
import torch
import numpy as np
from itertools import combinations
import timeit

# EnsembleBench modules
from EnsembleBench.frameworks.pytorchUtility import (
    calAccuracy,
    calAveragePredictionVectorAccuracy,
    calNegativeSamplesSet,
    calDisagreementSamplesOneTargetNegative,
    filterModelsFixed,
)

%load_ext autoreload
%autoreload 2

## Dataset Configurations

You can download the extracted predictions for CIFAR-10 and ImageNet from the following Google Drive folder.
https://drive.google.com/drive/folders/18rEcjSpMSy-XN2bUQ3PfsBppwb874B8q?usp=sharing

In [None]:
# simply use the extracted prediction results to calculate the diversity scores and perform ensemble selection

dataset = 'cifar10'
diversityMetricsList = ['CK', 'QS', 'BD', 'FK', 'KW', 'GD']

if dataset == 'cifar10':
    predictionDir = './cifar10/prediction'
    models = ['densenet-L190-k40', 'densenetbc-100-12', 'resnext8x64d', 'wrn-28-10-drop', 'vgg19_bn', 
              'resnet20', 'resnet32', 'resnet44', 'resnet56', 'resnet110']
    maxModel = 0
    maxModelAcc = 96.68
    targetAcc = 96.33 # accuracy of entire ensemble
elif dataset == 'imagenet':
    predictionDir = './imagenet/prediction'
    models = np.array(['AlexNet', 'DenseNet', 'EfficientNetb0', 'ResNeXt50', 'Inception3', 'ResNet152', 'ResNet18', 'SqueezeNet', 'VGG16', 'VGG19bn'])
    maxModel = 5
    maxModelAcc = 78.25
    targetAcc = 79.82 # accuracy of entire ensemble

else:
    raise Exception("Dataset not support!")

suffix = '.pt'

In [None]:
labelVectorsList = list()
predictionVectorsList = list()
tmpAccList = list()
for m in models:
    predictionPath = os.path.join(predictionDir, m+suffix)
    prediction = torch.load(predictionPath)
    predictionVectors = prediction['predictionVectors']
    predictionVectorsList.append(torch.nn.functional.softmax(predictionVectors, dim=-1).cpu())
    labelVectors = prediction['labelVectors']
    labelVectorsList.append(labelVectors.cpu())
    tmpAccList.append(calAccuracy(predictionVectors, labelVectors)[0].cpu())
    print(tmpAccList[-1])

minAcc = np.min(tmpAccList)
avgAcc = np.mean(tmpAccList)
maxAcc = np.max(tmpAccList)

In [None]:
# preprocessing
# team -> accuracy map
# model -> team
teamAccuracyDict = dict()
modelTeamDict = dict()
teamNameDict = dict()
startTime = timeit.default_timer()
for n in range(2, len(models)+1):
    comb = combinations(list(range(len(models))), n)
    for selectedModels in list(comb):
        tmpAccuracy = calAveragePredictionVectorAccuracy(predictionVectorsList, labelVectorsList[0], modelsList=selectedModels)[0].cpu().item()
        teamName = "".join(map(str, selectedModels))
        teamNameDict[teamName] = selectedModels
        teamAccuracyDict[teamName] = tmpAccuracy
        for m in teamName:
            if m in modelTeamDict:
                modelTeamDict[m].add(teamName)
            else:
                modelTeamDict[m] = set([teamName,])
endTime = timeit.default_timer()
print("Time: ", endTime-startTime)

In [None]:
# calculate the diversity measures for all configurations
import numpy as np
from EnsembleBench.groupMetrics import *
np.random.seed(0)
nRandomSamples = 100
crossValidation = True
crossValidationTimes = 3

teamDiversityMetricMap = dict()
negAccuracyDict = dict()
startTime = timeit.default_timer()
for oneTargetModel in range(len(models)):
    sampleID, sampleTarget, predictions, predVectors = calDisagreementSamplesOneTargetNegative(predictionVectorsList, labelVectorsList[0], oneTargetModel)
    if len(predictions) == 0:
        print("negative sample not found")
        continue
    sampleID = np.array(sampleID)
    sampleTarget = np.array(sampleTarget)
    predictions = np.array(predictions)
    predVectors = np.array([np.array([np.array(pp) for pp in p]) for p in predVectors])
    for teamName in modelTeamDict[str(oneTargetModel)]:
        selectedModels = teamNameDict[teamName]
        teamSampleID, teamSampleTarget, teamPredictions, teamPredVectors = filterModelsFixed(sampleID, sampleTarget, predictions, predVectors, selectedModels) 
        if crossValidation:
            tmpMetrics = list()
            for _ in range(crossValidationTimes):
                randomIdx = np.random.choice(np.arange(teamPredictions.shape[0]), nRandomSamples)        
                tmpMetrics.append(calAllDiversityMetrics(teamPredictions[randomIdx], teamSampleTarget[randomIdx], diversityMetricsList))
            tmpMetrics = np.mean(np.array(tmpMetrics), axis=0)
        else:
            tmpMetrics = np.array(calAllDiversityMetrics(teamPredictions, teamSampleTarget, diversityMetricsList))
        diversityMetricDict = {diversityMetricsList[i]:tmpMetrics[i].item()  for i in range(len(tmpMetrics))}
        targetDiversity = teamDiversityMetricMap.get(teamName, dict())
        targetDiversity[str(oneTargetModel)] = diversityMetricDict
        teamDiversityMetricMap[teamName] = targetDiversity
        
        tmpNegAccuracy = calAccuracy(torch.tensor(np.mean(np.transpose(teamPredVectors, (1, 0, 2)), axis=0)), torch.tensor(teamSampleTarget))[0].cpu().item()
        targetNegAccuracy = negAccuracyDict.get(teamName, dict())
        targetNegAccuracy[str(oneTargetModel)] = tmpNegAccuracy
        negAccuracyDict[teamName] = targetNegAccuracy

endTime = timeit.default_timer()
print("Time: ", endTime-startTime)

In [None]:
# calculate the targetTeamSizeDict
startTime = timeit.default_timer()
targetTeamSizeDict = dict()
for oneTargetModel in range(len(models)):
    for teamName in modelTeamDict[str(oneTargetModel)]:
        teamSize = len(teamName)
        teamSizeDict = targetTeamSizeDict.get(str(oneTargetModel), dict())
        fixedTeamDict = teamSizeDict.get(str(teamSize), dict())
        
        teamList = fixedTeamDict.get('TeamList', list())
        teamList.append(teamName)
        fixedTeamDict['TeamList'] = teamList
        
        # diversity measures
        diversityVector = np.expand_dims(np.array([teamDiversityMetricMap[teamName][str(oneTargetModel)][dm]
                                    for dm in diversityMetricsList]), axis=0)
        
        diversityMatrix = fixedTeamDict.get('DiversityMatrix', None)
        if diversityMatrix is None:
            diversityMatrix = diversityVector
        else:
            diversityMatrix = np.append(diversityMatrix, diversityVector, axis=0)
        fixedTeamDict['DiversityMatrix'] = diversityMatrix
        
        teamSizeDict[str(teamSize)] = fixedTeamDict
        targetTeamSizeDict[str(oneTargetModel)] = teamSizeDict 
endTime = timeit.default_timer()
print("Time: ", endTime-startTime)

In [None]:
teamSelectedFQDict = dict()
teamSelectedFQOutDict = dict()
from EnsembleBench.teamSelection import *
for oneTargetModel in range(len(models)):
    targetFQDict = teamSelectedFQDict.get(str(oneTargetModel), dict())
    targetFQOutDict = teamSelectedFQOutDict.get(str(oneTargetModel), dict())
    for teamSize in range(2, len(models)):
        targetTeamSizeFQDict = targetFQDict.get(str(teamSize), dict())
        targetTeamSizeFQOutDict = targetFQOutDict.get(str(teamSize), dict())
        fixedTeamDict = targetTeamSizeDict[str(oneTargetModel)][str(teamSize)]
        thresholds = list()
        kmeans = list()
        teamList = fixedTeamDict['TeamList']
        accuracyList = [teamAccuracyDict[teamName] for teamName in teamList]
        diversityMatrix = fixedTeamDict['DiversityMatrix']
        for i in range(len(diversityMetricsList)):
            tmpThreshold, tmpKMeans = getThresholdClusteringKMeans(accuracyList, diversityMatrix[:, i], kmeansInit='strategic')
            tmpThreshold = max(np.mean(diversityMatrix[:, i]), tmpThreshold)
            thresholds.append(tmpThreshold)
            kmeans.append(tmpKMeans)
        fixedTeamDict['Threshold'] = thresholds
        fixedTeamDict['KMeans'] = kmeans
        
        # calculate scaled diversity scores
        scaledDiversityMeasures = list()
        for i in range(len(diversityMetricsList)):
            scaledDiversityMeasures.append(normalize01(diversityMatrix[:, i]))
        scaledDiversityMatrix = np.stack(scaledDiversityMeasures, axis=1)
        fixedTeamDict['ScaledDiversityMatrix'] = scaledDiversityMatrix
        targetTeamSizeDict[str(oneTargetModel)][str(teamSize)] = fixedTeamDict
        
        for i, teamName in enumerate(fixedTeamDict['TeamList']):
            for j in range(len(diversityMetricsList)):
                targetTeamSizeFQDiversitySet = targetTeamSizeFQDict.get(diversityMetricsList[j], set())
                targetTeamSizeFQOutDiversitySet = targetTeamSizeFQOutDict.get(diversityMetricsList[j], set())
                if diversityMatrix[i, j] > round(thresholds[j], 3):
                    targetTeamSizeFQDiversitySet.add(teamName)
                else:
                    targetTeamSizeFQOutDiversitySet.add(teamName)
                targetTeamSizeFQDict[diversityMetricsList[j]] = targetTeamSizeFQDiversitySet
                targetTeamSizeFQOutDict[diversityMetricsList[j]] = targetTeamSizeFQOutDiversitySet

        targetFQDict[str(teamSize)] = targetTeamSizeFQDict
        targetFQOutDict[str(teamSize)] = targetTeamSizeFQOutDict

        
    teamSelectedFQDict[str(oneTargetModel)] = targetFQDict
    teamSelectedFQOutDict[str(oneTargetModel)] = targetFQOutDict

In [None]:
teamSelectedFQAllDict = dict()
for j, dm in enumerate(diversityMetricsList):
    teamSelectedFQAllDiversitySet = teamSelectedFQAllDict.get(dm, set())
    for teamSize in range(2, len(models)):
        teamSizeSelectedTeamsSet = set()
        tmpTeamDict = dict() # teamName & Metric
        for oneTargetModel in range(len(models)):
            for teamName in teamSelectedFQDict[str(oneTargetModel)][str(teamSize)][dm]:
                if teamName in tmpTeamDict:
                    continue
                tmpMetricList = list()
                teamModelIdx = map(int, [modelName for modelName in teamName])
                teamModelAcc = [tmpAccList[modelIdx].item() for modelIdx in teamModelIdx]
                teamModelWeights = np.argsort(teamModelAcc)
                tmpModelWeights = list()
                for (k, modelName) in enumerate(teamName):
                    fixedTeamDict = targetTeamSizeDict[modelName][str(teamSize)]
                    for i, tmpTeamName in enumerate(fixedTeamDict['TeamList']):
                        if tmpTeamName == teamName:
                            tmpMetricList.append(fixedTeamDict['ScaledDiversityMatrix'][i, j])
                            tmpModelWeights.append(teamModelWeights[k])
                tmpTeamDict[teamName] = np.average(tmpMetricList, weights=tmpModelWeights)
        if len(tmpTeamDict) > 0:
            accuracyList = np.array([teamAccuracyDict[teamName] for teamName in tmpTeamDict])
            metricList = np.array([tmpTeamDict[teamName] for teamName in tmpTeamDict])
            tmpThreshold, _ = getThresholdClusteringKMeansCenter(accuracyList, metricList, kmeansInit='strategic')
            for teamName in tmpTeamDict:
                if tmpTeamDict[teamName] > tmpThreshold:
                    teamSizeSelectedTeamsSet.add(teamName)
        teamSelectedFQAllDiversitySet.update(teamSizeSelectedTeamsSet)
    teamSelectedFQAllDict[dm] = teamSelectedFQAllDiversitySet


# print the ensemble selection results
for dm in diversityMetricsList:
    print(dm, getNTeamStatistics(list(teamSelectedFQAllDict[dm]), teamAccuracyDict, minAcc, avgAcc, maxAcc, tmpAccList))
    
    